diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index e95e21f131e96..2989d4da96c1b 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -19,6 +19,20 @@ Description: is returned to the user. The inject_poison attribute is only visible for devices supporting the capability. + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + DATA LOSS RISK: For CXL persistent memory (PMEM) devices, + poison injection can result in permanent data loss. Injected + poison may render data permanently inaccessible even after + clearing, as the clear operation writes zeros and does not + recover original data. + + SYSTEM STABILITY RISK: For volatile memory, poison injection + can cause kernel crashes, system instability, or unpredictable + behavior if the poisoned addresses are accessed by running code + or critical kernel structures. What: /sys/kernel/debug/cxl/memX/clear_poison Date: April, 2023 @@ -35,6 +49,79 @@ Description: The clear_poison attribute is only visible for devices supporting the capability. + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the + specified address range and removes the address from the poison + list. It does NOT recover or restore original data that may have + been present before poison injection. Any original data at the + cleared address is permanently lost and replaced with zeros. + + CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing + purposes only and should not be used as a data repair tool. + Clearing poison is fundamentally different from data recovery + or error correction. + +What: /sys/kernel/debug/cxl/regionX/inject_poison +Date: August, 2025 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Host Physical Address (HPA) is written to this + attribute, the region driver translates it to a Device + Physical Address (DPA) and identifies the corresponding + memdev. It then sends an inject poison command to that memdev + at the translated DPA. Refer to the memdev ABI entry at: + /sys/kernel/debug/cxl/memX/inject_poison for the detailed + behavior. This attribute is only visible if all memdevs + participating in the region support both inject and clear + poison commands. + + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + DATA LOSS RISK: For CXL persistent memory (PMEM) devices, + poison injection can result in permanent data loss. Injected + poison may render data permanently inaccessible even after + clearing, as the clear operation writes zeros and does not + recover original data. + + SYSTEM STABILITY RISK: For volatile memory, poison injection + can cause kernel crashes, system instability, or unpredictable + behavior if the poisoned addresses are accessed by running code + or critical kernel structures. + +What: /sys/kernel/debug/cxl/regionX/clear_poison +Date: August, 2025 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Host Physical Address (HPA) is written to this + attribute, the region driver translates it to a Device + Physical Address (DPA) and identifies the corresponding + memdev. It then sends a clear poison command to that memdev + at the translated DPA. Refer to the memdev ABI entry at: + /sys/kernel/debug/cxl/memX/clear_poison for the detailed + behavior. This attribute is only visible if all memdevs + participating in the region support both inject and clear + poison commands. + + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the + specified address range and removes the address from the poison + list. It does NOT recover or restore original data that may have + been present before poison injection. Any original data at the + cleared address is permanently lost and replaced with zeros. + + CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing + purposes only and should not be used as a data repair tool. + Clearing poison is fundamentally different from data recovery + or error correction. + What: /sys/kernel/debug/cxl/einj_types Date: January, 2024 KernelVersion: v6.9 diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 42e1e78353f38..46590510e931f 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -102,6 +102,8 @@ Possible return values are:: PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */ PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */ PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */ + PCI_ERS_RESULT_NO_AER_DRIVER, /* No AER capabilities registered for the driver */ + PCI_ERS_RESULT_PANIC, /* System is unstable, panic. Is CXL specific */ }; A driver does not have to implement all of these callbacks; however, @@ -116,6 +118,10 @@ The actual steps taken by a platform to recover from a PCI error event will be platform-dependent, but will follow the general sequence described below. +PCI_ERS_RESULT_PANIC is currently unique to CXL and handled in CXL +cxl_do_recovery(). The PCI pcie_do_recovery() routine does not report or +handle PCI_ERS_RESULT_PANIC. + STEP 0: Error Event ------------------- A PCI bus error is detected by the PCI hardware. On powerpc, the slot diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst index da347a81a237a..e37336d7b116e 100644 --- a/Documentation/driver-api/cxl/conventions.rst +++ b/Documentation/driver-api/cxl/conventions.rst @@ -45,3 +45,138 @@ Detailed Description of the Change ---------------------------------- + + +Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders +============================================================================ + +Document +-------- + +CXL Revision 3.2, Version 1.0 + +License +------- + +SPDX-License Identifier: CC-BY-4.0 + +Creator/Contributors +-------------------- + +- Fabio M. De Francesco, Intel +- Dan J. Williams, Intel +- Mahesh Natu, Intel + +Summary of the Change +--------------------- + +According to the current Compute Express Link (CXL) Specifications (Revision +3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero +or more Host Physical Address (HPA) windows associated with each CXL Host +Bridge. Each window represents a contiguous HPA range that may be interleaved +across one or more targets, including CXL Host Bridges. Each window has a set +of restrictions that govern its usage. It is the Operating System-directed +configuration and Power Management (OSPM) responsibility to utilize each window +for the specified use. + +Table 9-22 of the current CXL Specifications states that the Window Size field +contains the total number of consecutive bytes of HPA this window describes. +This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB. + +Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a +memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases, +the CFMWS Range Size may not adhere to the NIW * 256 MB rule. + +The HPA represents the actual physical memory address space that the CXL devices +can decode and respond to, while the System Physical Address (SPA), a related +but distinct concept, represents the system-visible address space that users can +direct transaction to and so it excludes reserved regions. + +BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms +with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole, +resulting in lost capacity in the Endpoints with no SPA to map to that part of +the HPA range that intersects the hole. + +E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB: + + +--------+------------+-------------------+------------------+-------------------+------+ + | Window | CFMWS Base | CFMWS Size | HDM Decoder Base | HDM Decoder Size | Ways | + +========+============+===================+==================+===================+======+ + |  0 | 0 GB | 2 GB | 0 GB | 3 GB | 12 | + +--------+------------+-------------------+------------------+-------------------+------+ + |  1 | 4 GB | NIW*256MB Aligned | 4 GB | NIW*256MB Aligned | 12 | + +--------+------------+-------------------+------------------+-------------------+------+ + +HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of +a 12 ways region and all the intermediate Switch Decoders. They are configured +by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of +3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root +Decoder HPA range that results smaller (2GB) than that of the Switch and +Endpoint Decoders in the hierarchy (3GB). + +This creates 2 issues which lead to a failure to construct a region: + +1) A mismatch in region size between root and any HDM decoder. The root decoders + will always be smaller due to the trim. + +2) The trim causes the root decoder to violate the (NIW * 256MB) rule. + +This change allows a region with a base address of 0GB to bypass these checks to +allow for region creation with the trimmed root decoder address range. + +This change does not allow for any other arbitrary region to violate these +checks - it is intended exclusively to enable x86 platforms which map CXL memory +under 4GB. + +Despite the HDM decoders covering the PCIE hole HPA region, it is expected that +the platform will never route address accesses to the CXL complex because the +root decoder only covers the trimmed region (which excludes this). This is +outside the ability of Linux to enforce. + +On the example platform, only the first 2GB will be potentially usable, but +Linux, aiming to adhere to the current specifications, fails to construct +Regions and attach Endpoint and intermediate Switch Decoders to them. + +There are several points of failure that due to the expectation that the Root +Decoder HPA size, that is equal to the CFMWS from which it is configured, has +to be greater or equal to the matching Switch and Endpoint HDM Decoders. + +In order to succeed with construction and attachment, Linux must construct a +Region with Root Decoder HPA range size, and then attach to that all the +intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy +regardless of their range sizes. + +Benefits of the Change +---------------------- + +Without the change, the OSPM wouldn't match intermediate Switch and Endpoint +Decoders with Root Decoders configured with CFMWS HPA sizes that don't align +with the NIW * 256MB constraint, and so it leads to lost memdev capacity. + +This change allows the OSPM to construct Regions and attach intermediate Switch +and Endpoint Decoders to them, so that the addressable part of the memory +devices total capacity is made available to the users. + +References +---------- + +Compute Express Link Specification Revision 3.2, Version 1.0 + + +Detailed Description of the Change +---------------------------------- + +The description of the Window Size field in table 9-22 needs to account for +platforms with Low Memory Holes, where SPA ranges might be subsets of the +endpoints HPA. Therefore, it has to be changed to the following: + +"The total number of consecutive bytes of HPA this window represents. This value +shall be a multiple of NIW * 256 MB. + +On platforms that reserve physical addresses below 4 GB, such as the Low Memory +Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might +have a size that doesn't align with the NIW * 256 MB constraint. + +Note that the matching intermediate Switch Decoders and the Endpoint Decoders +HPA range sizes must still align to the above-mentioned rule, but the memory +capacity that exceeds the CFMWS window size won't be accessible.". diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index 1330f3f52129a..282c1102dd819 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -173,7 +173,7 @@ Accelerator User Flow Support ----------------- -* [0] Inject & clear poison by HPA +* [2] Inject & clear poison by region offset Details ======= diff --git a/Documentation/driver-api/cxl/platform/bios-and-efi.rst b/Documentation/driver-api/cxl/platform/bios-and-efi.rst index 645322632cc9b..a9aa0ccd92af7 100644 --- a/Documentation/driver-api/cxl/platform/bios-and-efi.rst +++ b/Documentation/driver-api/cxl/platform/bios-and-efi.rst @@ -202,7 +202,7 @@ future and such a configuration should be avoided. Memory Holes ------------ -If your platform includes memory holes intersparsed between your CXL memory, it +If your platform includes memory holes interspersed between your CXL memory, it is recommended to utilize multiple decoders to cover these regions of memory, rather than try to program the decoders to accept the entire range and expect Linux to manage the overlap. diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations index 4fb025e692b3a..1406e71970e4b 100644 --- a/debian.nvidia-6.17/config/annotations +++ b/debian.nvidia-6.17/config/annotations @@ -204,6 +204,37 @@ CONFIG_UBUNTU_ODM_DRIVERS note<'Disable all Ubuntu ODM dri CONFIG_ULTRASOC_SMB policy<{'arm64': 'n'}> CONFIG_ULTRASOC_SMB note<'Required for Grace enablement'> +CONFIG_CXL_BUS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_BUS note<'Changed to bool for CXL Type-2 device support'> + +CONFIG_CXL_PCI policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_PCI note<'Changed to bool for CXL Type-2 device support'> + +CONFIG_CXL_MEM policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_MEM note<'Changed to y due to CXL_BUS being bool'> + +CONFIG_CXL_PORT policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_PORT note<'Changed to y due to CXL_BUS being bool'> + +CONFIG_FWCTL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_FWCTL note<'Selected by CXL_BUS when bool'> + +CONFIG_CXL_RAS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_RAS note<'CXL RAS error handling support'> + +CONFIG_CXL_RCH_RAS policy<{'amd64': 'n', 'arm64': 'n'}> +CONFIG_CXL_RCH_RAS note<'RAS support for Restricted CXL Host defined in CXL1.1'> + +CONFIG_SFC_CXL policy<{'amd64': 'n', 'arm64': 'n'}> +CONFIG_SFC_CXL note<'Solarflare SFC9100-family CXL Type-2 device support'> + +CONFIG_ACPI_APEI_EINJ policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ACPI_APEI_EINJ note<'Required for CONFIG_ACPI_APEI_EINJ_CXL'> + + +CONFIG_ACPI_APEI_EINJ_CXL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ACPI_APEI_EINJ_CXL note<'CXL protocol error injection support via APEI EINJ'> + # ---- Annotations without notes ---- diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 9085375830605..11e4483685c9c 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -74,7 +74,6 @@ struct memory_target { struct node_cache_attrs cache_attrs; u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; - bool ext_updated; /* externally updated */ }; struct memory_initiator { @@ -368,35 +367,6 @@ static void hmat_update_target_access(struct memory_target *target, } } -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access) -{ - struct memory_target *target; - int pxm; - - if (nid == NUMA_NO_NODE) - return -EINVAL; - - pxm = node_to_pxm(nid); - guard(mutex)(&target_lock); - target = find_mem_target(pxm); - if (!target) - return -ENODEV; - - hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY, - coord->read_latency, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY, - coord->write_latency, access); - hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH, - coord->read_bandwidth, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, - coord->write_bandwidth, access); - target->ext_updated = true; - - return 0; -} -EXPORT_SYMBOL_GPL(hmat_update_target_coordinates); - static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) { struct memory_locality *loc; @@ -773,10 +743,6 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; - /* Don't update if an external agent has changed the data. */ - if (target->ext_updated) - return; - /* Don't update for generic port if there's no device handle */ if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && diff --git a/drivers/base/node.c b/drivers/base/node.c index 67b01d5797377..3e2329ccb618d 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -248,6 +248,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, } EXPORT_SYMBOL_GPL(node_set_perf_attrs); +/** + * node_update_perf_attrs - Update the performance values for given access class + * @nid: Node identifier to be updated + * @coord: Heterogeneous memory performance coordinates + * @access: The access class for the given attributes + */ +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct node_access_nodes *access_node; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + list_for_each_entry(access_node, &node->access_list, list_node) { + if (access_node->access != access) + continue; + + access_node->coord = *coord; + for (i = 0; access_attrs[i]; i++) { + sysfs_notify(&access_node->dev.kobj, + NULL, access_attrs[i]->name); + } + break; + } + + /* When setting CPU access coordinates, update mempolicy */ + if (access != ACCESS_COORDINATE_CPU) + return; + + if (mempolicy_set_node_perf(nid, coord)) + pr_info("failed to set mempolicy attrs for node %d\n", nid); +} +EXPORT_SYMBOL_GPL(node_update_perf_attrs); + /** * struct node_cache_info - Internal tracking for memory node caches * @dev: Device represeting the cache level diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 48b7314afdb88..94a3102ce86be 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig CXL_BUS - tristate "CXL (Compute Express Link) Devices Support" + bool "CXL (Compute Express Link) Devices Support" depends on PCI select FW_LOADER select FW_UPLOAD @@ -20,8 +20,9 @@ menuconfig CXL_BUS if CXL_BUS config CXL_PCI - tristate "PCI manageability" + bool "PCI manageability" default CXL_BUS + select CXL_MEM help The CXL specification defines a "CXL memory device" sub-class in the PCI "memory controller" base class of devices. Device's identified by @@ -29,12 +30,12 @@ config CXL_PCI memory to be mapped into the system address map (Host-managed Device Memory (HDM)). - Say 'y/m' to enable a driver that will attach to CXL memory expander + Say 'y' to enable a driver that will attach to CXL memory expander devices enumerated by the memory device class code for configuration and management primarily via the mailbox interface. See Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more details. - If unsure say 'm'. + If unsure say 'y'. config CXL_MEM_RAW_COMMANDS bool "RAW Command Interface for Memory Devices" @@ -89,7 +90,6 @@ config CXL_PMEM config CXL_MEM tristate "CXL: Memory Expansion" - depends on CXL_PCI default CXL_BUS help The CXL.mem protocol allows a device to act as a provider of "System @@ -233,4 +233,15 @@ config CXL_MCE def_bool y depends on X86_MCE && MEMORY_FAILURE +config CXL_RAS + def_bool y + depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI + +config CXL_RCH_RAS + bool "CXL: Restricted CXL Host (RCH) protocol error handling" + def_bool n + depends on CXL_RAS + help + RAS support for Restricted CXL Host (RCH) defined in CXL1.1. + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index 2caa90fa4bf25..ff6add88b6ae2 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -12,10 +12,8 @@ obj-$(CONFIG_CXL_PORT) += cxl_port.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o obj-$(CONFIG_CXL_MEM) += cxl_mem.o -obj-$(CONFIG_CXL_PCI) += cxl_pci.o cxl_port-y := port.o cxl_acpi-y := acpi.o cxl_pmem-y := pmem.o security.o cxl_mem-y := mem.o -cxl_pci-y := pci.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 87f0ed3f3f51f..bd2e282ca93a0 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -20,8 +20,7 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); - -static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) +static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; int hbiw = cxlrd->cxlsd.nr_targets; @@ -30,19 +29,23 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) /* No xormaps for host bridge interleave ways of 1 or 3 */ if (hbiw == 1 || hbiw == 3) - return hpa; + return addr; /* - * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore - * the position bit to its value before the xormap was applied at - * HPA->DPA translation. + * In regions using XOR interleave arithmetic the CXL HPA may not + * be the same as the SPA. This helper performs the SPA->CXL HPA + * or the CXL HPA->SPA translation. Since XOR is self-inverting, + * so is this function. + * + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) applying the + * xormaps will toggle a position bit. * * pos is the lowest set bit in an XORMAP - * val is the XORALLBITS(HPA & XORMAP) + * val is the XORALLBITS(addr & XORMAP) * * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS * as an operation that outputs a single bit by XORing all the - * bits in the input (hpa & xormap). Implement XORALLBITS using + * bits in the input (addr & xormap). Implement XORALLBITS using * hweight64(). If the hamming weight is even the XOR of those * bits results in val==0, if odd the XOR result is val==1. */ @@ -51,11 +54,11 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) if (!cximsd->xormaps[i]) continue; pos = __ffs(cximsd->xormaps[i]); - val = (hweight64(hpa & cximsd->xormaps[i]) & 1); - hpa = (hpa & ~(1ULL << pos)) | (val << pos); + val = (hweight64(addr & cximsd->xormaps[i]) & 1); + addr = (addr & ~(1ULL << pos)) | (val << pos); } - return hpa; + return addr; } struct cxl_cxims_context { @@ -113,9 +116,9 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_DEVMEM) flags |= CXL_DECODER_F_TYPE2; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM) flags |= CXL_DECODER_F_TYPE3; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE) flags |= CXL_DECODER_F_RAM; @@ -398,7 +401,6 @@ DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T)) static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_cfmws_context *ctx) { - int target_map[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; @@ -416,8 +418,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, rc = eig_to_granularity(cfmws->granularity, &ig); if (rc) return rc; - for (i = 0; i < ways; i++) - target_map[i] = cfmws->interleave_targets[i]; struct resource *res __free(del_cxl_resource) = alloc_cxl_resource( cfmws->base_hpa, cfmws->window_size, ctx->id++); @@ -443,6 +443,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, .end = cfmws->base_hpa + cfmws->window_size - 1, }; cxld->interleave_ways = ways; + for (i = 0; i < ways; i++) + cxld->target_map[i] = cfmws->interleave_targets[i]; /* * Minimize the x1 granularity to advertise support for any * valid region granularity @@ -472,10 +474,16 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->qos_class = cfmws->qtg_id; - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) - cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { + cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL); + if (!cxlrd->ops) + return -ENOMEM; + + cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps; + cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps; + } - rc = cxl_decoder_add(cxld, target_map); + rc = cxl_decoder_add(cxld); if (rc) return rc; diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 5ad8fef210b5c..2937d0ddcce27 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,9 +14,11 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o -cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o +cxl_core-$(CONFIG_CXL_RAS) += ras.o +cxl_core-$(CONFIG_CXL_RCH_RAS) += ras_rch.o +cxl_core-$(CONFIG_CXL_PCI) += pci_drv.o diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c0af645425f4a..c4bd6e8a0cf03 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -338,7 +338,7 @@ static int match_cxlrd_hb(struct device *dev, void *data) guard(rwsem_read)(&cxl_rwsem.region); for (int i = 0; i < cxlsd->nr_targets; i++) { - if (host_bridge == cxlsd->target[i]->dport_dev) + if (cxlsd->target[i] && host_bridge == cxlsd->target[i]->dport_dev) return 1; } @@ -440,8 +440,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, } *tbl = (struct acpi_cdat_sslbis_table *)header; int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header); struct acpi_cdat_sslbis *sslbis; - struct cxl_port *port = arg; - struct device *dev = &port->dev; + struct cxl_dport *dport = arg; + struct device *dev = &dport->port->dev; int remain, entries, i; u16 len; @@ -467,8 +467,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id); __le64 le_base; __le16 le_val; - struct cxl_dport *dport; - unsigned long index; u16 dsp_id; u64 val; @@ -499,28 +497,27 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), sslbis->data_type); - xa_for_each(&port->dports, index, dport) { - if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || - dsp_id == dport->port_id) { - cxl_access_coordinate_set(dport->coord, - sslbis->data_type, - val); - } + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) { + cxl_access_coordinate_set(dport->coord, + sslbis->data_type, val); + return 0; } } return 0; } -void cxl_switch_parse_cdat(struct cxl_port *port) +void cxl_switch_parse_cdat(struct cxl_dport *dport) { + struct cxl_port *port = dport->port; int rc; if (!port->cdat.table) return; rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, - port, port->cdat.table, port->cdat.length); + dport, port->cdat.table, port->cdat.length); rc = cdat_table_parse_output(rc); if (rc) dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); @@ -1075,14 +1072,3 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth; } } - -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access) -{ - return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); -} - -bool cxl_need_node_perf_attrs_update(int nid) -{ - return !acpi_node_backed_by_real_pxm(nid); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2669f251d6775..9a6775845afe4 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -4,8 +4,10 @@ #ifndef __CXL_CORE_H__ #define __CXL_CORE_H__ +#include #include #include +#include extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; @@ -13,11 +15,6 @@ extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; -enum cxl_detach_mode { - DETACH_ONLY, - DETACH_INVALIDATE, -}; - #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; extern struct device_attribute dev_attr_create_ram_region; @@ -89,6 +86,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, struct dentry *cxl_debugfs_create_dir(const char *dir); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode); +struct cxl_memdev_state; +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); @@ -135,17 +134,78 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +enum poison_cmd_enabled_bits; +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd); + long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access); -bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +#ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); +pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, + void __iomem *ras_base); +void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base); +pci_ers_result_t cxl_error_detected(struct device *dev); +void cxl_cor_error_detected(struct device *dev); +pci_ers_result_t pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error); +void pci_cor_error_detected(struct pci_dev *pdev); +pci_ers_result_t cxl_port_error_detected(struct device *dev); +void cxl_port_cor_error_detected(struct device *dev); +void cxl_mask_proto_interrupts(struct device *dev); +void cxl_unmask_proto_interrupts(struct device *dev); +#else +static inline int cxl_ras_init(void) +{ + return 0; +} +static inline void cxl_ras_exit(void) { } +static inline pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, + void __iomem *ras_base) +{ + return PCI_ERS_RESULT_NONE; +} +static inline void cxl_handle_cor_ras(struct device *dev, u64 serial, + void __iomem *ras_base) { } +static inline pci_ers_result_t pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error) +{ + return PCI_ERS_RESULT_NONE; +} +static inline void pci_cor_error_detected(struct pci_dev *pdev) { } +static inline void cxl_port_cor_error_detected(struct device *dev) { } +static inline pci_ers_result_t cxl_port_error_detected(struct device *dev) +{ + return PCI_ERS_RESULT_NONE; +} +static inline void cxl_unmask_proto_interrupts(struct device *dev) { } +static inline void cxl_mask_proto_interrupts(struct device *dev) { } +#endif /* CONFIG_CXL_RAS */ + +/* Restricted CXL Host specific RAS functions */ +#ifdef CONFIG_CXL_RCH_RAS +void cxl_dport_map_rch_aer(struct cxl_dport *dport); +void cxl_disable_rch_root_ints(struct cxl_dport *dport); +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); +#else +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif /* CONFIG_CXL_RCH_RAS */ + int cxl_gpf_port_setup(struct cxl_dport *dport); +struct cxl_port *find_cxl_port(struct device *dport_dev, + struct cxl_dport **dport); +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev); + +struct cxl_hdm; +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info); +int cxl_port_get_possible_dports(struct cxl_port *port); #ifdef CONFIG_CXL_FEATURES struct cxl_feat_entry * @@ -160,4 +220,16 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, u16 *return_code); #endif +#ifdef CONFIG_CXL_PCI +bool cxl_pci_drv_bound(struct pci_dev *pdev); +int cxl_pci_driver_init(void); +void cxl_pci_driver_exit(void); +#else +static inline bool cxl_pci_drv_bound(struct pci_dev *pdev) { return false; }; +static inline int cxl_pci_driver_init(void) { return 0; } +static inline void cxl_pci_driver_exit(void) { } +#endif + +resource_size_t cxl_rcd_component_reg_phys(struct device *dev, + struct cxl_dport *dport); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e930191057c04..6c85a849bee79 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "cxlmem.h" #include "core.h" @@ -21,12 +22,11 @@ struct cxl_rwsem cxl_rwsem = { .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa), }; -static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map) +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld) { int rc; - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -50,12 +50,9 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, * are claimed and passed to the single dport. Disable the range until the first * CXL region is enumerated / activated. */ -int devm_cxl_add_passthrough_decoder(struct cxl_port *port) +static int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_switch_decoder *cxlsd; - struct cxl_dport *dport = NULL; - int single_port_map[1]; - unsigned long index; struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); /* @@ -71,13 +68,8 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) - break; - single_port_map[0] = dport->port_id; - - return add_hdm_decoder(port, &cxlsd->cxld, single_port_map); + return add_hdm_decoder(port, &cxlsd->cxld); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL"); static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) { @@ -101,6 +93,7 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm; + struct cxl_port *port; void __iomem *hdm; u32 ctrl; int i; @@ -114,6 +107,19 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) if (!hdm) return true; + port = cxlhdm->port; + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + /* + * Type-2 accelerators (DEVMEM) have HDM decoders programmed + * by firmware/BIOS - never use DVSEC emulation. + * Type-3 memory (CLASSMEM) may still need DVSEC emulation + * fallback in certain firmware configurations. + */ + if (cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM) + return false; + } + /* * If HDM decoders are present and the driver is in control of * Mem_Enable skip DVSEC based emulation @@ -147,8 +153,8 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) * @port: cxl_port to map * @info: cached DVSEC range register info */ -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) +static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info) { struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; @@ -197,13 +203,12 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, */ if (should_emulate_decoders(info)) { dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, - info->ranges > 1 ? "s" : ""); + str_plural(info->ranges)); cxlhdm->decoder_count = info->ranges; } return cxlhdm; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, "CXL"); static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth) { @@ -556,6 +561,12 @@ bool cxl_resource_contains_addr(const struct resource *res, const resource_size_ return resource_contains(res, &_addr); } +/** + * cxl_dpa_free - release DPA (Device Physical Address) + * @cxled: endpoint decoder linked to the DPA + * + * Returns 0 or error. + */ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); @@ -582,6 +593,7 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) devm_cxl_dpa_release(cxled); return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_dpa_free, "CXL"); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode) @@ -613,6 +625,82 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, return 0; } +static int find_free_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == (port->hdm_end + 1); +} + +static struct cxl_endpoint_decoder * +cxl_find_free_decoder(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct device *dev; + + guard(rwsem_read)(&cxl_rwsem.dpa); + dev = device_find_child(&endpoint->dev, NULL, + find_free_decoder); + if (!dev) + return NULL; + + return to_cxl_endpoint_decoder(dev); +} + +/** + * cxl_request_dpa - search and reserve DPA given input constraints + * @cxlmd: memdev with an endpoint port with available decoders + * @mode: CXL partition mode (ram vs pmem) + * @alloc: dpa size required + * + * Returns a pointer to a 'struct cxl_endpoint_decoder' on success or + * an errno encoded pointer on failure. + * + * Given that a region needs to allocate from limited HPA capacity it + * may be the case that a device has more mappable DPA capacity than + * available HPA. The expectation is that @alloc is a driver known + * value based on the device capacity but which could not be fully + * available due to HPA constraints. + * + * Returns a pinned cxl_decoder with at least @alloc bytes of capacity + * reserved, or an error pointer. The caller is also expected to own the + * lifetime of the memdev registration associated with the endpoint to + * pin the decoder registered as well. + */ +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc) +{ + int rc; + + if (!IS_ALIGNED(alloc, SZ_256M)) + return ERR_PTR(-EINVAL); + + struct cxl_endpoint_decoder *cxled __free(put_cxled) = + cxl_find_free_decoder(cxlmd); + + if (!cxled) + return ERR_PTR(-ENODEV); + + rc = cxl_dpa_set_part(cxled, mode); + if (rc) + return ERR_PTR(rc); + + rc = cxl_dpa_alloc(cxled, alloc); + if (rc) + return ERR_PTR(rc); + + return no_free_ptr(cxled); +} +EXPORT_SYMBOL_NS_GPL(cxl_request_dpa, "CXL"); + static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); @@ -696,6 +784,45 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } +static int find_committed_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == (port->hdm_end); +} + +struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, + struct cxl_region **cxlr) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct cxl_endpoint_decoder *cxled; + struct device *cxled_dev; + + if (IS_ERR_OR_NULL(endpoint)) + return NULL; + + guard(rwsem_read)(&cxl_rwsem.dpa); + cxled_dev = device_find_child(&endpoint->dev, NULL, + find_committed_decoder); + + if (!cxled_dev) + return NULL; + + cxled = to_cxl_endpoint_decoder(cxled_dev); + *cxlr = cxled->cxld.region; + + /* Reference transferred to caller - caller must put_device() */ + return cxled; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_committed_decoder, "CXL"); + static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl) { u16 eig; @@ -984,7 +1111,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( } static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map, void __iomem *hdm, int which, + void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { struct cxl_endpoint_decoder *cxled = NULL; @@ -1104,7 +1231,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); target_list.value = (hi << 32) + lo; for (i = 0; i < cxld->interleave_ways; i++) - target_map[i] = target_list.target_id[i]; + cxld->target_map[i] = target_list.target_id[i]; return 0; } @@ -1169,8 +1296,8 @@ static void cxl_settle_decoders(struct cxl_hdm *cxlhdm) * @cxlhdm: Structure to populate with HDM capabilities * @info: cached DVSEC range register info */ -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; @@ -1180,7 +1307,6 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxl_settle_decoders(cxlhdm); for (i = 0; i < cxlhdm->decoder_count; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; int rc, target_count = cxlhdm->target_count; struct cxl_decoder *cxld; @@ -1208,8 +1334,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxlsd->cxld; } - rc = init_hdm_decoder(port, cxld, target_map, hdm, i, - &dpa_base, info); + rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info); if (rc) { if (rc == -ENOSPC) continue; @@ -1219,7 +1344,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, put_device(&cxld->dev); return rc; } - rc = add_hdm_decoder(port, cxld, target_map); + rc = add_hdm_decoder(port, cxld); if (rc) { dev_warn(&port->dev, "Failed to add decoder%d.%d\n", port->id, i); @@ -1229,4 +1354,71 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, "CXL"); + +/** + * __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + cxlhdm = devm_cxl_setup_hdm(port, NULL); + if (!IS_ERR(cxlhdm)) + return devm_cxl_enumerate_decoders(cxlhdm, NULL); + + if (PTR_ERR(cxlhdm) != -ENODEV) { + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + if (cxl_port_get_possible_dports(port) == 1) { + dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); + return devm_cxl_add_passthrough_decoder(port); + } + + dev_err(&port->dev, "HDM decoder capability not found\n"); + return -ENXIO; +} +EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL"); + +/** + * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_endpoint_dvsec_info info = { .port = port }; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_hdm *cxlhdm; + int rc; + + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + rc = cxl_dvsec_rr_decode(cxlds, &info); + if (rc < 0) + return rc; + + cxlhdm = devm_cxl_setup_hdm(port, &info); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) == -ENODEV) + dev_err(&port->dev, "HDM decoder registers not found\n"); + return PTR_ERR(cxlhdm); + } + + rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); + if (rc) + return rc; + + return devm_cxl_enumerate_decoders(cxlhdm, &info); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL"); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656f..d57a0c2d39fb6 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1144,7 +1144,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, "CXL"); * * See CXL @8.2.9.5.2.1 Get Partition Info */ -static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_partition_info pi; @@ -1300,55 +1300,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) return -EBUSY; } -static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) -{ - int i = info->nr_partitions; - - if (size == 0) - return; - - info->part[i].range = (struct range) { - .start = start, - .end = start + size - 1, - }; - info->part[i].mode = mode; - info->nr_partitions++; -} - -int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) -{ - struct cxl_dev_state *cxlds = &mds->cxlds; - struct device *dev = cxlds->dev; - int rc; - - if (!cxlds->media_ready) { - info->size = 0; - return 0; - } - - info->size = mds->total_bytes; - - if (mds->partition_align_bytes == 0) { - add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->volatile_only_bytes, - mds->persistent_only_bytes, CXL_PARTMODE_PMEM); - return 0; - } - - rc = cxl_mem_get_partition_info(mds); - if (rc) { - dev_err(dev, "Failed to query partition information\n"); - return rc; - } - - add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, - CXL_PARTMODE_PMEM); - - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); - int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; @@ -1514,23 +1465,21 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec) { struct cxl_memdev_state *mds; int rc; - mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); + mds = devm_cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, + dvsec, struct cxl_memdev_state, cxlds, + true); if (!mds) { dev_err(dev, "No memory available\n"); return ERR_PTR(-ENOMEM); } mutex_init(&mds->event.log_lock); - mds->cxlds.dev = dev; - mds->cxlds.reg_map.host = dev; - mds->cxlds.cxl_mbox.host = dev; - mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; - mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); if (rc == -EOPNOTSUPP) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index c569e00a511f4..2d4828831ce1a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include "private.h" #include "trace.h" #include "core.h" @@ -200,6 +202,14 @@ static ssize_t security_erase_store(struct device *dev, static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + return test_bit(cmd, mds->poison.enabled_cmds); +} + static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -276,7 +286,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) return 0; } -int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_inject_poison inject; @@ -288,13 +298,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -324,9 +329,24 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_inject_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL"); -int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_clear_poison clear; @@ -338,13 +358,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -383,6 +398,21 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_clear_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL"); static struct attribute *cxl_memdev_attributes[] = { @@ -549,12 +579,85 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +static const struct device_type cxl_accel_memdev_type = { + .name = "cxl_accel_memdev", + .release = cxl_memdev_release, + .devnode = cxl_memdev_devnode, +}; + bool is_cxl_memdev(const struct device *dev) { - return dev->type == &cxl_memdev_type; + return (dev->type == &cxl_memdev_type || + dev->type == &cxl_accel_memdev_type); } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) +{ + int i = info->nr_partitions; + + if (size == 0) + return; + + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; +} + +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; + struct device *dev = cxlds->dev; + int rc; + + if (!cxlds->media_ready) { + info->size = 0; + return 0; + } + + info->size = mds->total_bytes; + + if (mds->partition_align_bytes == 0) { + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; + } + + rc = cxl_mem_get_partition_info(mds); + if (rc) { + dev_err(dev, "Failed to query partition information\n"); + return rc; + } + + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); + +/** + * cxl_set_capacity: initialize dpa by a driver without a mailbox. + * + * @cxlds: pointer to cxl_dev_state + * @capacity: device volatile memory size + */ +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity) +{ + struct cxl_dpa_info range_info = { + .size = capacity, + }; + + add_part(&range_info, 0, capacity, CXL_PARTMODE_RAM); + return cxl_dpa_setup(cxlds, &range_info); +} +EXPORT_SYMBOL_NS_GPL(cxl_set_capacity, "CXL"); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @mds: The device state to operate on @@ -620,42 +723,61 @@ static void detach_memdev(struct work_struct *work) static struct lock_class_key cxl_memdev_key; -static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, - const struct file_operations *fops) +static void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev, + enum cxl_devtype type, u64 serial, u16 dvsec, + bool has_mbox) { - struct cxl_memdev *cxlmd; - struct device *dev; - struct cdev *cdev; - int rc; + *cxlds = (struct cxl_dev_state) { + .dev = dev, + .type = type, + .serial = serial, + .cxl_dvsec = dvsec, + .reg_map.host = dev, + .reg_map.resource = CXL_RESOURCE_NONE, + }; - cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); - if (!cxlmd) - return ERR_PTR(-ENOMEM); + if (has_mbox) + cxlds->cxl_mbox.host = dev; +} - rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); - if (rc < 0) - goto err; - cxlmd->id = rc; - cxlmd->depth = -1; +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox) +{ + struct cxl_dev_state *cxlds = devm_kzalloc(dev, size, GFP_KERNEL); - dev = &cxlmd->dev; - device_initialize(dev); - lockdep_set_class(&dev->mutex, &cxl_memdev_key); - dev->parent = cxlds->dev; - dev->bus = &cxl_bus_type; - dev->devt = MKDEV(cxl_mem_major, cxlmd->id); - dev->type = &cxl_memdev_type; - device_set_pm_not_required(dev); - INIT_WORK(&cxlmd->detach_work, detach_memdev); + if (!cxlds) + return NULL; - cdev = &cxlmd->cdev; - cdev_init(cdev, fops); - return cxlmd; + cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox); + return cxlds; +} +EXPORT_SYMBOL_NS_GPL(_devm_cxl_dev_state_create, "CXL"); -err: - kfree(cxlmd); - return ERR_PTR(rc); +struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, + struct cxl_memdev *cxlmd) +{ + struct device *dev = &cxlmd->dev; + struct cdev *cdev = &cxlmd->cdev; + int rc; + + rc = cdev_device_add(cdev, dev); + if (rc) { + /* + * The cdev was briefly live, shutdown any ioctl operations that + * saw that state. + */ + cxl_memdev_shutdown(dev); + put_device(dev); + return ERR_PTR(rc); + } + rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); + if (rc) + return ERR_PTR(rc); + return cxlmd; } +EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_add_or_reset, "CXL"); static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, unsigned long arg) @@ -1023,50 +1145,51 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds) +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops) { struct cxl_memdev *cxlmd; struct device *dev; struct cdev *cdev; int rc; - cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); - if (IS_ERR(cxlmd)) - return cxlmd; + cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); + if (!cxlmd) + return ERR_PTR(-ENOMEM); - dev = &cxlmd->dev; - rc = dev_set_name(dev, "mem%d", cxlmd->id); - if (rc) + rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); + if (rc < 0) goto err; - /* - * Activate ioctl operations, no cxl_memdev_rwsem manipulation - * needed as this is ordered with cdev_add() publishing the device. - */ + cxlmd->id = rc; + cxlmd->depth = -1; + cxlmd->ops = ops; + cxlmd->endpoint = ERR_PTR(-ENXIO); cxlmd->cxlds = cxlds; cxlds->cxlmd = cxlmd; - cdev = &cxlmd->cdev; - rc = cdev_device_add(cdev, dev); - if (rc) - goto err; + dev = &cxlmd->dev; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_memdev_key); + dev->parent = cxlds->dev; + dev->bus = &cxl_bus_type; + dev->devt = MKDEV(cxl_mem_major, cxlmd->id); + if (cxlds->type == CXL_DEVTYPE_DEVMEM) + dev->type = &cxl_accel_memdev_type; + else + dev->type = &cxl_memdev_type; + device_set_pm_not_required(dev); + INIT_WORK(&cxlmd->detach_work, detach_memdev); - rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); - if (rc) - return ERR_PTR(rc); + cdev = &cxlmd->cdev; + cdev_init(cdev, &cxl_memdev_fops); return cxlmd; err: - /* - * The cdev was briefly live, shutdown any ioctl operations that - * saw that state. - */ - cxl_memdev_shutdown(dev); - put_device(dev); + kfree(cxlmd); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_memdev_alloc, "CXL"); static void sanitize_teardown_notifier(void *data) { diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index b50551601c2e4..90a0763e72c4b 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,53 @@ static unsigned short media_ready_timeout = 60; module_param(media_ready_timeout, ushort, 0644); MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); +static int pci_get_port_num(struct pci_dev *pdev) +{ + u32 lnkcap; + int type; + + type = pci_pcie_type(pdev); + if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT) + return -EINVAL; + + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap)) + return -ENXIO; + + return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); +} + +/** + * __devm_cxl_add_dport_by_dev - allocate a dport by dport device + * @port: cxl_port that hosts the dport + * @dport_dev: 'struct device' of the dport + * + * Returns the allocated dport on success or ERR_PTR() of -errno on error + */ +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_register_map map; + struct pci_dev *pdev; + int port_num, rc; + + if (!dev_is_pci(dport_dev)) + return ERR_PTR(-EINVAL); + + pdev = to_pci_dev(dport_dev); + port_num = pci_get_port_num(pdev); + if (port_num < 0) + return ERR_PTR(port_num); + + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + return ERR_PTR(rc); + + device_lock_assert(&port->dev); + return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); +} +EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; @@ -111,19 +159,19 @@ static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) int rc, i; u32 temp; - if (id > CXL_DVSEC_RANGE_MAX) + if (id > PCI_DVSEC_CXL_RANGE_MAX) return -EINVAL; /* Check MEM INFO VALID bit first, give up after 1s */ i = 1; do { rc = pci_read_config_dword(pdev, - d + CXL_DVSEC_RANGE_SIZE_LOW(id), + d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); + valid = FIELD_GET(PCI_DVSEC_CXL_MEM_INFO_VALID, temp); if (valid) break; msleep(1000); @@ -147,17 +195,17 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) int rc, i; u32 temp; - if (id > CXL_DVSEC_RANGE_MAX) + if (id > PCI_DVSEC_CXL_RANGE_MAX) return -EINVAL; /* Check MEM ACTIVE bit, up to 60s timeout by default */ for (i = media_ready_timeout; i; i--) { rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + active = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE, temp); if (active) break; msleep(1000); @@ -186,11 +234,11 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) u16 cap; rc = pci_read_config_word(pdev, - d + CXL_DVSEC_CAP_OFFSET, &cap); + d + PCI_DVSEC_CXL_CAP_OFFSET, &cap); if (rc) return rc; - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT_MASK, cap); for (i = 0; i < hdm_count; i++) { rc = cxl_dvsec_mem_range_valid(cxlds, i); if (rc) @@ -218,16 +266,16 @@ static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) u16 ctrl; int rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL_OFFSET, &ctrl); if (rc < 0) return rc; - if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + if ((ctrl & PCI_DVSEC_CXL_MEM_ENABLE) == val) return 1; - ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl &= ~PCI_DVSEC_CXL_MEM_ENABLE; ctrl |= val; - rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + rc = pci_write_config_word(pdev, d + PCI_DVSEC_CXL_CTRL_OFFSET, ctrl); if (rc < 0) return rc; @@ -243,7 +291,7 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) { int rc; - rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + rc = cxl_set_mem_enable(cxlds, PCI_DVSEC_CXL_MEM_ENABLE); if (rc < 0) return rc; if (rc > 0) @@ -305,11 +353,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return -ENXIO; } - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CAP_OFFSET, &cap); if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; } @@ -320,7 +368,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * driver is for a spec defined class code which must be CXL.mem * capable, there is no point in continuing to enable CXL.mem. */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT_MASK, cap); if (!hdm_count || hdm_count > 2) return -EINVAL; @@ -329,11 +377,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL_OFFSET, &ctrl); if (rc) return rc; - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + info->mem_enabled = FIELD_GET(PCI_DVSEC_CXL_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -346,35 +394,35 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return rc; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i), &temp); if (rc) return rc; size = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(i), &temp); if (rc) return rc; - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + size |= temp & PCI_DVSEC_CXL_MEM_SIZE_LOW_MASK; if (!size) { continue; } rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), &temp); if (rc) return rc; base = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), &temp); if (rc) return rc; - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + base |= temp & PCI_DVSEC_CXL_MEM_BASE_LOW_MASK; info->dvsec_range[ranges++] = (struct range) { .start = base, @@ -664,324 +712,6 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - void __iomem *addr; - u32 status; - - if (!ras_base) - return; - - addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); - } -} - -static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); -} - -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(void __iomem *ras_base, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!ras_base) - return false; - - addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - void __iomem *rcc_addr = - ras_base + CXL_RAS_CAP_CONTROL_OFFSET; - - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, - readl(rcc_addr))); - } else { - fe = status; - } - - header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_ras(cxlds, cxlds->regs.ras); -} - -#ifdef CONFIG_PCIEAER_CXL - -static void cxl_dport_map_rch_aer(struct cxl_dport *dport) -{ - resource_size_t aer_phys; - struct device *host; - u16 aer_cap; - - aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); - if (aer_cap) { - host = dport->reg_map.host; - aer_phys = aer_cap + dport->rcrb.base; - dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, - sizeof(struct aer_capability_regs)); - } -} - -static void cxl_dport_map_ras(struct cxl_dport *dport) -{ - struct cxl_register_map *map = &dport->reg_map; - struct device *dev = dport->dport_dev; - - if (!map->component_map.ras.valid) - dev_dbg(dev, "RAS registers not found\n"); - else if (cxl_map_component_regs(map, &dport->regs.component, - BIT(CXL_CM_CAP_CAP_ID_RAS))) - dev_dbg(dev, "Failed to map RAS capability.\n"); -} - -static void cxl_disable_rch_root_ints(struct cxl_dport *dport) -{ - void __iomem *aer_base = dport->regs.dport_aer; - u32 aer_cmd_mask, aer_cmd; - - if (!aer_base) - return; - - /* - * Disable RCH root port command interrupts. - * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors - * - * This sequence may not be necessary. CXL spec states disabling - * the root cmd register's interrupts is required. But, PCI spec - * shows these are disabled by default on reset. - */ - aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | - PCI_ERR_ROOT_CMD_NONFATAL_EN | - PCI_ERR_ROOT_CMD_FATAL_EN); - aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); - aer_cmd &= ~aer_cmd_mask; - writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); -} - -/** - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport - * @dport: the cxl_dport that needs to be initialized - * @host: host device for devm operations - */ -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) -{ - dport->reg_map.host = host; - cxl_dport_map_ras(dport); - - if (dport->rch) { - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); - - if (!host_bridge->native_aer) - return; - - cxl_dport_map_rch_aer(dport); - cxl_disable_rch_root_ints(dport); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); - -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return __cxl_handle_cor_ras(cxlds, dport->regs.ras); -} - -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return __cxl_handle_ras(cxlds, dport->regs.ras); -} - -/* - * Copy the AER capability registers using 32 bit read accesses. - * This is necessary because RCRB AER capability is MMIO mapped. Clear the - * status after copying. - * - * @aer_base: base address of AER capability block in RCRB - * @aer_regs: destination for copying AER capability - */ -static bool cxl_rch_get_aer_info(void __iomem *aer_base, - struct aer_capability_regs *aer_regs) -{ - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); - u32 *aer_regs_buf = (u32 *)aer_regs; - int n; - - if (!aer_base) - return false; - - /* Use readl() to guarantee 32-bit accesses */ - for (n = 0; n < read_cnt; n++) - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); - - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); - - return true; -} - -/* Get AER severity. Return false if there is no error. */ -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, - int *severity) -{ - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) - *severity = AER_FATAL; - else - *severity = AER_NONFATAL; - return true; - } - - if (aer_regs->cor_status & ~aer_regs->cor_mask) { - *severity = AER_CORRECTABLE; - return true; - } - - return false; -} - -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - struct aer_capability_regs aer_regs; - struct cxl_dport *dport; - int severity; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return; - - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) - return; - - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) - return; - - pci_print_aer(pdev, severity, &aer_regs); - - if (severity == AER_CORRECTABLE) - cxl_handle_rdport_cor_ras(cxlds, dport); - else - cxl_handle_rdport_ras(cxlds, dport); -} - -#else -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } -#endif - -void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct device *dev = &cxlds->cxlmd->dev; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - - cxl_handle_endpoint_cor_ras(cxlds); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); - -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_endpoint_ras(cxlds); - } - - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} -EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) @@ -1046,6 +776,68 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL"); +static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, + struct cxl_register_map *map, + struct cxl_dport *dport) +{ + resource_size_t component_reg_phys; + + *map = (struct cxl_register_map) { + .host = &pdev->dev, + .resource = CXL_RESOURCE_NONE, + }; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); + if (component_reg_phys == CXL_RESOURCE_NONE) + return -ENXIO; + + map->resource = component_reg_phys; + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + + return 0; +} + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + int rc; + + rc = cxl_find_regblock(pdev, type, map); + + /* + * If the Register Locator DVSEC does not exist, check if it + * is an RCH and try to extract the Component Registers from + * an RCRB. + */ + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + rc = cxl_rcrb_get_comp_regs(pdev, map, dport); + if (rc) + return rc; + + rc = cxl_dport_map_rcd_linkcap(pdev, dport); + if (rc) + return rc; + + } else if (rc) { + return rc; + } + + return cxl_setup_regs(map); +} +EXPORT_SYMBOL_NS_GPL(cxl_pci_setup_regs, "CXL"); + int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) { int speed, bw; @@ -1100,7 +892,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev) is_port = false; dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + is_port ? PCI_DVSEC_CXL_PORT_GPF : PCI_DVSEC_CXL_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); @@ -1116,14 +908,14 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) switch (phase) { case 1: - offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL_OFFSET; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE_MASK; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE_MASK; break; case 2: - offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL_OFFSET; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE_MASK; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE_MASK; break; default: return -EINVAL; @@ -1169,3 +961,45 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) return 0; } + +static int count_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + int type = pci_pcie_type(pdev); + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + + ctx->count++; + return 0; +} + +int cxl_port_get_possible_dports(struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) { + dev_err(&port->dev, "No PCI bus found for port %s\n", + dev_name(&port->dev)); + return -ENXIO; + } + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, count_dports, &ctx); + + return ctx.count; +} diff --git a/drivers/cxl/pci.c b/drivers/cxl/core/pci_drv.c similarity index 92% rename from drivers/cxl/pci.c rename to drivers/cxl/core/pci_drv.c index bd100ac31672d..4a812765217e4 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/core/pci_drv.c @@ -11,11 +11,13 @@ #include #include #include +#include #include #include "cxlmem.h" #include "cxlpci.h" #include "cxl.h" #include "pmu.h" +#include "core/core.h" /** * DOC: cxl pci @@ -465,76 +467,6 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) return 0; } -/* - * Assume that any RCIEP that emits the CXL memory expander class code - * is an RCD - */ -static bool is_cxl_restricted(struct pci_dev *pdev) -{ - return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; -} - -static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, - struct cxl_register_map *map, - struct cxl_dport *dport) -{ - resource_size_t component_reg_phys; - - *map = (struct cxl_register_map) { - .host = &pdev->dev, - .resource = CXL_RESOURCE_NONE, - }; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); - if (component_reg_phys == CXL_RESOURCE_NONE) - return -ENXIO; - - map->resource = component_reg_phys; - map->reg_type = CXL_REGLOC_RBI_COMPONENT; - map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; - - return 0; -} - -static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - int rc; - - rc = cxl_find_regblock(pdev, type, map); - - /* - * If the Register Locator DVSEC does not exist, check if it - * is an RCH and try to extract the Component Registers from - * an RCRB. - */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { - struct cxl_dport *dport; - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - rc = cxl_rcrb_get_comp_regs(pdev, map, dport); - if (rc) - return rc; - - rc = cxl_dport_map_rcd_linkcap(pdev, dport); - if (rc) - return rc; - - } else if (rc) { - return rc; - } - - return cxl_setup_regs(map); -} - static int cxl_pci_ras_unmask(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); @@ -911,6 +843,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) int rc, pmu_count; unsigned int i; bool irq_avail; + u16 dvsec; /* * Double check the anonymous union trickery in struct cxl_regs @@ -924,19 +857,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; pci_set_master(pdev); - mds = cxl_memdev_state_create(&pdev->dev); + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n"); + + mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec); if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); - cxlds->serial = pci_get_dsn(pdev); - cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); - if (!cxlds->cxl_dvsec) - dev_warn(&pdev->dev, - "Device DVSEC not present, skip CXL.mem init\n"); rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) @@ -1006,7 +938,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_dbg(&pdev->dev, "No CXL Features discovered\n"); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); @@ -1112,11 +1044,11 @@ static void cxl_reset_done(struct pci_dev *pdev) } } -static const struct pci_error_handlers cxl_error_handlers = { - .error_detected = cxl_error_detected, +static const struct pci_error_handlers pci_error_handlers = { + .error_detected = pci_error_detected, .slot_reset = cxl_slot_reset, .resume = cxl_error_resume, - .cor_error_detected = cxl_cor_error_detected, + .cor_error_detected = pci_cor_error_detected, .reset_done = cxl_reset_done, }; @@ -1124,13 +1056,24 @@ static struct pci_driver cxl_pci_driver = { .name = KBUILD_MODNAME, .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, - .err_handler = &cxl_error_handlers, + .err_handler = &pci_error_handlers, .dev_groups = cxl_rcd_groups, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; +bool cxl_pci_drv_bound(struct pci_dev *pdev) +{ + device_lock_assert(&pdev->dev); + + if (pdev->driver != &cxl_pci_driver) + pr_err_ratelimited("%s device not bound to CXL PCI driver\n", + pci_name(pdev)); + + return (pdev->driver == &cxl_pci_driver); +} + #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) static void cxl_handle_cper_event(enum cxl_event_type ev_type, struct cxl_cper_event_rec *rec) @@ -1177,7 +1120,7 @@ static void cxl_cper_work_fn(struct work_struct *work) } static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); -static int __init cxl_pci_driver_init(void) +int __init cxl_pci_driver_init(void) { int rc; @@ -1192,15 +1135,9 @@ static int __init cxl_pci_driver_init(void) return rc; } -static void __exit cxl_pci_driver_exit(void) +void cxl_pci_driver_exit(void) { cxl_cper_unregister_work(&cxl_cper_work); cancel_work_sync(&cxl_cper_work); pci_unregister_driver(&cxl_pci_driver); } - -module_init(cxl_pci_driver_init); -module_exit(cxl_pci_driver_exit); -MODULE_DESCRIPTION("CXL: PCI manageability"); -MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS("CXL"); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 29197376b18e3..f7f597c83181a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,15 @@ static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +/* + * The terminal device in PCI is NULL and @platform_bus + * for platform devices (for cxl_test) + */ +static bool is_cxl_host_bridge(struct device *dev) +{ + return (!dev || dev == &platform_bus); +} + int cxl_num_decoders_committed(struct cxl_port *port) { lockdep_assert_held(&cxl_rwsem.region); @@ -450,6 +460,7 @@ static void cxl_root_decoder_release(struct device *dev) if (atomic_read(&cxlrd->region_id) >= 0) memregion_free(atomic_read(&cxlrd->region_id)); __cxl_decoder_release(&cxlrd->cxlsd.cxld); + kfree(cxlrd->ops); kfree(cxlrd); } @@ -740,6 +751,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, xa_init(&port->dports); xa_init(&port->endpoints); xa_init(&port->regions); + port->component_reg_phys = CXL_RESOURCE_NONE; device_initialize(dev); lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); @@ -858,9 +870,7 @@ static int cxl_port_add(struct cxl_port *port, if (rc) return rc; - rc = cxl_port_setup_regs(port, component_reg_phys); - if (rc) - return rc; + port->component_reg_phys = component_reg_phys; } else { rc = dev_set_name(dev, "root%d", port->id); if (rc) @@ -1173,6 +1183,23 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + port->nr_dports--; + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + if (!is_cxl_endpoint(port) && dev_is_pci(port->uport_dev)) + cxl_uport_init_ras_reporting(port, &port->dev); + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1335,26 +1362,11 @@ static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx) return NULL; } -static struct cxl_port *find_cxl_port(struct device *dport_dev, - struct cxl_dport **dport) -{ - struct cxl_find_port_ctx ctx = { - .dport_dev = dport_dev, - .dport = dport, - }; - struct cxl_port *port; - - port = __find_cxl_port(&ctx); - return port; -} - -static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, - struct device *dport_dev, - struct cxl_dport **dport) +struct cxl_port *find_cxl_port(struct device *dport_dev, + struct cxl_dport **dport) { struct cxl_find_port_ctx ctx = { .dport_dev = dport_dev, - .parent_port = parent_port, .dport = dport, }; struct cxl_port *port; @@ -1423,16 +1435,29 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL"); * through ->remove(). This "bottom-up" removal selectively removes individual * child ports manually. This depends on devm_cxl_add_port() to not change is * devm action registration order, and for dports to have already been - * destroyed by reap_dports(). + * destroyed by del_dports(). */ static void delete_switch_port(struct cxl_port *port) { + cxl_mask_proto_interrupts(port->uport_dev); + if (port->parent_dport) + cxl_mask_proto_interrupts(port->parent_dport->dport_dev); + devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port); devm_release_action(port->dev.parent, cxl_unlink_uport, port); devm_release_action(port->dev.parent, unregister_port, port); } -static void reap_dports(struct cxl_port *port) +static void del_dport(struct cxl_dport *dport) +{ + struct cxl_port *port = dport->port; + + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); +} + +static void del_dports(struct cxl_port *port) { struct cxl_dport *dport; unsigned long index; @@ -1440,9 +1465,8 @@ static void reap_dports(struct cxl_port *port) device_lock_assert(&port->dev); xa_for_each(&port->dports, index, dport) { - devm_release_action(&port->dev, cxl_dport_unlink, dport); - devm_release_action(&port->dev, cxl_dport_remove, dport); - devm_kfree(&port->dev, dport); + cxl_mask_proto_interrupts(dport->dport_dev); + del_dport(dport); } } @@ -1470,6 +1494,8 @@ static void cxl_detach_ep(void *data) { struct cxl_memdev *cxlmd = data; + cxl_mask_proto_interrupts(cxlmd->cxlds->dev); + for (int i = cxlmd->depth - 1; i >= 1; i--) { struct cxl_port *port, *parent_port; struct detach_ctx ctx = { @@ -1501,7 +1527,7 @@ static void cxl_detach_ep(void *data) */ died = true; port->dead = true; - reap_dports(port); + del_dports(port); } device_unlock(&port->dev); @@ -1532,16 +1558,160 @@ static resource_size_t find_component_registers(struct device *dev) return map.resource; } +static int match_port_by_uport(struct device *dev, const void *data) +{ + const struct device *uport_dev = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + return uport_dev == port->uport_dev; +} + +/* + * Function takes a device reference on the port device. Caller should do a + * put_device() when done. + */ +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) +{ + struct device *dev; + + dev = bus_find_device(&cxl_bus_type, NULL, uport_dev, match_port_by_uport); + if (dev) + return to_cxl_port(dev); + return NULL; +} +EXPORT_SYMBOL_NS_GPL(find_cxl_port_by_uport, "CXL"); + +static int update_decoder_targets(struct device *dev, void *data) +{ + struct cxl_dport *dport = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_decoder *cxld; + int i; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + cxld = &cxlsd->cxld; + guard(rwsem_write)(&cxl_rwsem.region); + + for (i = 0; i < cxld->interleave_ways; i++) { + if (cxld->target_map[i] == dport->port_id) { + cxlsd->target[i] = dport; + dev_dbg(dev, "dport%d found in target list, index %d\n", + dport->port_id, i); + return 0; + } + } + + return 0; +} + +DEFINE_FREE(del_cxl_dport, struct cxl_dport *, if (!IS_ERR_OR_NULL(_T)) del_dport(_T)) +static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + int rc; + + device_lock_assert(&port->dev); + if (!port->dev.driver) + return ERR_PTR(-ENXIO); + + dport = cxl_find_dport_by_dev(port, dport_dev); + if (dport) { + dev_dbg(&port->dev, "dport%d:%s already exists\n", + dport->port_id, dev_name(dport_dev)); + return ERR_PTR(-EBUSY); + } + + struct cxl_dport *new_dport __free(del_cxl_dport) = + devm_cxl_add_dport_by_dev(port, dport_dev); + if (IS_ERR(new_dport)) + return new_dport; + + cxl_switch_parse_cdat(new_dport); + + cxl_dport_init_ras_reporting(new_dport, &port->dev); + + if (ida_is_empty(&port->decoder_ida)) { + rc = devm_cxl_switch_port_decoders_setup(port); + if (rc) + return ERR_PTR(rc); + dev_dbg(&port->dev, "first dport%d:%s added with decoders\n", + new_dport->port_id, dev_name(dport_dev)); + return no_free_ptr(new_dport); + } + + /* New dport added, update the decoder targets */ + device_for_each_child(&port->dev, new_dport, update_decoder_targets); + + dev_dbg(&port->dev, "dport%d:%s added\n", new_dport->port_id, + dev_name(dport_dev)); + + return no_free_ptr(new_dport); +} + +static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev, + struct cxl_port *parent_port, + struct cxl_dport *parent_dport, + struct device *uport_dev, + struct device *dport_dev) +{ + resource_size_t component_reg_phys; + + device_lock_assert(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(ep_dev, + "port %s:%s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev), + dev_name(dport_dev)); + } + + struct cxl_port *port __free(put_cxl_port) = + find_cxl_port_by_uport(uport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_dport); + if (IS_ERR(port)) + return ERR_CAST(port); + + /* + * retry to make sure a port is found. a port device + * reference is taken. + */ + port = find_cxl_port_by_uport(uport_dev); + if (!port) + return ERR_PTR(-ENODEV); + + dev_dbg(ep_dev, "created port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport_dev)); + } else { + /* + * Port was created before right before this function is + * called. Signal the caller to deal with it. + */ + return ERR_PTR(-EAGAIN); + } + + guard(device)(&port->dev); + return cxl_port_add_dport(port, dport_dev); +} + static int add_port_attach_ep(struct cxl_memdev *cxlmd, struct device *uport_dev, struct device *dport_dev) { struct device *dparent = grandparent(dport_dev); struct cxl_dport *dport, *parent_dport; - resource_size_t component_reg_phys; int rc; - if (!dparent) { + if (is_cxl_host_bridge(dparent)) { /* * The iteration reached the topology root without finding the * CXL-root 'cxl_port' on a previous iteration, fail for now to @@ -1553,42 +1723,31 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, } struct cxl_port *parent_port __free(put_cxl_port) = - find_cxl_port(dparent, &parent_dport); + find_cxl_port_by_uport(dparent->parent); if (!parent_port) { /* iterate to create this parent_port */ return -EAGAIN; } - /* - * Definition with __free() here to keep the sequence of - * dereferencing the device of the port before the parent_port releasing. - */ - struct cxl_port *port __free(put_cxl_port) = NULL; scoped_guard(device, &parent_port->dev) { - if (!parent_port->dev.driver) { - dev_warn(&cxlmd->dev, - "port %s:%s disabled, failed to enumerate CXL.mem\n", - dev_name(&parent_port->dev), dev_name(uport_dev)); - return -ENXIO; + parent_dport = cxl_find_dport_by_dev(parent_port, dparent); + if (!parent_dport) { + parent_dport = cxl_port_add_dport(parent_port, dparent); + if (IS_ERR(parent_dport)) + return PTR_ERR(parent_dport); } - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) { - component_reg_phys = find_component_registers(uport_dev); - port = devm_cxl_add_port(&parent_port->dev, uport_dev, - component_reg_phys, parent_dport); - if (IS_ERR(port)) - return PTR_ERR(port); - - /* retry find to pick up the new dport information */ - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) - return -ENXIO; + dport = devm_cxl_create_port(&cxlmd->dev, parent_port, + parent_dport, uport_dev, + dport_dev); + if (IS_ERR(dport)) { + /* Port already exists, restart iteration */ + if (PTR_ERR(dport) == -EAGAIN) + return 0; + return PTR_ERR(dport); } } - dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", - dev_name(&port->dev), dev_name(port->uport_dev)); rc = cxl_add_ep(dport, &cxlmd->dev); if (rc == -EBUSY) { /* @@ -1598,9 +1757,30 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, rc = -ENXIO; } + cxl_unmask_proto_interrupts(cxlmd->cxlds->dev); + return rc; } +static struct cxl_dport *find_or_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + dport = cxl_find_dport_by_dev(port, dport_dev); + if (!dport) { + dport = cxl_port_add_dport(port, dport_dev); + if (IS_ERR(dport)) + return dport; + + /* New dport added, restart iteration */ + return ERR_PTR(-EAGAIN); + } + + return dport; +} + int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) { struct device *dev = &cxlmd->dev; @@ -1629,11 +1809,7 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) struct device *uport_dev; struct cxl_dport *dport; - /* - * The terminal "grandparent" in PCI is NULL and @platform_bus - * for platform devices - */ - if (!dport_dev || dport_dev == &platform_bus) + if (is_cxl_host_bridge(dport_dev)) return 0; uport_dev = dport_dev->parent; @@ -1647,14 +1823,30 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) dev_name(iter), dev_name(dport_dev), dev_name(uport_dev)); struct cxl_port *port __free(put_cxl_port) = - find_cxl_port(dport_dev, &dport); + find_cxl_port_by_uport(uport_dev); if (port) { dev_dbg(&cxlmd->dev, "found already registered port %s:%s\n", dev_name(&port->dev), dev_name(port->uport_dev)); + + /* + * RP port enumerated by cxl_acpi without dport will + * have the dport added here. + */ + scoped_guard(device, &port->dev) { + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); + } + } + rc = cxl_add_ep(dport, &cxlmd->dev); + cxl_unmask_proto_interrupts(cxlmd->cxlds->dev); + /* * If the endpoint already exists in the port's list, * that's ok, it was added on a previous pass. @@ -1704,24 +1896,24 @@ struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd, EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, "CXL"); static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, - struct cxl_port *port, int *target_map) + struct cxl_port *port) { + struct cxl_decoder *cxld = &cxlsd->cxld; int i; - if (!target_map) - return 0; - device_lock_assert(&port->dev); if (xa_empty(&port->dports)) - return -EINVAL; + return 0; guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { - struct cxl_dport *dport = find_dport(port, target_map[i]); + struct cxl_dport *dport = find_dport(port, cxld->target_map[i]); - if (!dport) - return -ENXIO; + if (!dport) { + /* dport may be activated later */ + continue; + } cxlsd->target[i] = dport; } @@ -1910,9 +2102,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); /** * cxl_decoder_add_locked - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl__decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * Certain types of decoders may not have any targets. The main example of this * is an endpoint device. A more awkward example is a hostbridge whose root @@ -1926,7 +2115,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); * Return: Negative error code if the decoder wasn't properly configured; else * returns 0. */ -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add_locked(struct cxl_decoder *cxld) { struct cxl_port *port; struct device *dev; @@ -1947,7 +2136,7 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) if (!is_endpoint_decoder(dev)) { struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev); - rc = decoder_populate_targets(cxlsd, port, target_map); + rc = decoder_populate_targets(cxlsd, port); if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { dev_err(&port->dev, "Failed to populate active decoder targets\n"); @@ -1966,9 +2155,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); /** * cxl_decoder_add - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl__decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * This is the unlocked variant of cxl_decoder_add_locked(). * See cxl_decoder_add_locked(). @@ -1976,7 +2162,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); * Context: Process context. Takes and releases the device lock of the port that * owns the @cxld. */ -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add(struct cxl_decoder *cxld) { struct cxl_port *port; @@ -1989,7 +2175,7 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); guard(device)(&port->dev); - return cxl_decoder_add_locked(cxld, target_map); + return cxl_decoder_add_locked(cxld); } EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); @@ -2336,8 +2522,14 @@ static __init int cxl_core_init(void) if (rc) goto err_ras; + rc = cxl_pci_driver_init(); + if (rc) + goto err_pci; + return 0; +err_pci: + cxl_ras_exit(); err_ras: cxl_region_exit(); err_region: @@ -2351,6 +2543,7 @@ static __init int cxl_core_init(void) static void cxl_core_exit(void) { + cxl_pci_driver_exit(); cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 2731ba3a07993..46ce96a89814b 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "trace.h" static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, @@ -12,7 +13,7 @@ static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - trace_cxl_port_aer_correctable_error(&pdev->dev, status); + trace_cxl_aer_correctable_error(&pdev->dev, status, 0); } static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, @@ -27,8 +28,8 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, else fe = status; - trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe, - ras_cap.header_log); + trace_cxl_aer_uncorrectable_error(&pdev->dev, status, fe, + ras_cap.header_log, 0); } static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, @@ -36,7 +37,7 @@ static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - trace_cxl_aer_correctable_error(cxlmd, status); + trace_cxl_aer_correctable_error(&cxlmd->dev, status, cxlmd->cxlds->serial); } static void @@ -44,6 +45,7 @@ cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + struct cxl_dev_state *cxlds = cxlmd->cxlds; u32 fe; if (hweight32(status) > 1) @@ -52,8 +54,9 @@ cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, else fe = status; - trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, - ras_cap.header_log); + trace_cxl_aer_uncorrectable_error(&cxlmd->dev, status, fe, + ras_cap.header_log, + cxlds->serial); } static int match_memdev_by_parent(struct device *dev, const void *uport) @@ -114,13 +117,578 @@ static void cxl_cper_prot_err_work_fn(struct work_struct *work) } static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); +static bool is_pcie_endpoint(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT; +} + +void cxl_unmask_proto_interrupts(struct device *dev) +{ + if (!dev || !dev_is_pci(dev)) + return; + + struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev)); + + if (!pdev->aer_cap) { + pdev->aer_cap = pci_find_ext_capability(pdev, + PCI_EXT_CAP_ID_ERR); + if (!pdev->aer_cap) + return; + } + + pci_aer_unmask_internal_errors(pdev); +} + +void cxl_mask_proto_interrupts(struct device *dev) +{ + if (!dev || !dev_is_pci(dev)) + return; + + struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev)); + + if (!pdev->aer_cap) { + pdev->aer_cap = pci_find_ext_capability(pdev, + PCI_EXT_CAP_ID_ERR); + if (!pdev->aer_cap) + return; + } + + pci_aer_mask_internal_errors(pdev); +} + +static void cxl_dport_map_ras(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); +} + +static void __iomem *cxl_get_ras_base(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + switch (pci_pcie_type(pdev)) { + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_DOWNSTREAM: + { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport); + + if (!dport) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return dport->regs.ras; + } + case PCI_EXP_TYPE_UPSTREAM: + { + struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return port->uport_regs.ras; + } + } + + dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev)); + return NULL; +} + +/* + * Return 'struct cxl_port *' parent CXL port of dev's + * + * Reference count increments on success + * + * dev: Find the parent port of this dev + */ +static struct cxl_port *get_cxl_port(struct pci_dev *pdev) +{ + switch (pci_pcie_type(pdev)) { + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_DOWNSTREAM: + { + struct cxl_dport *dport; + struct cxl_port *port = find_cxl_port(&pdev->dev, &dport); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return port; + } + case PCI_EXP_TYPE_UPSTREAM: + { + struct cxl_port *port = find_cxl_port_by_uport(&pdev->dev); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return port; + } + case PCI_EXP_TYPE_ENDPOINT: + { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_port *port = cxlds->cxlmd->endpoint; + + get_device(&port->dev); + return port; + } + } + pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev)); + return NULL; +} + +/** + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport + * @dport: the cxl_dport that needs to be initialized + * @host: host device for devm operations + */ +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) +{ + dport->reg_map.host = host; + cxl_dport_map_ras(dport); + + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); + + if (!host_bridge->native_aer) + return; + + cxl_dport_map_rch_aer(dport); + cxl_disable_rch_root_ints(dport); + return; + } + + cxl_unmask_proto_interrupts(dport->dport_dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); + +void cxl_uport_init_ras_reporting(struct cxl_port *port, + struct device *host) +{ + struct cxl_register_map *map = &port->reg_map; + + map->host = host; + if (cxl_map_component_regs(map, &port->uport_regs, + BIT(CXL_CM_CAP_CAP_ID_RAS))) { + dev_dbg(&port->dev, "Failed to map RAS capability\n"); + return; + } + + cxl_unmask_proto_interrupts(port->uport_dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, "CXL"); + +static bool device_lock_if(struct device *dev, bool cond) +{ + if (cond) + device_lock(dev); + return cond; +} + +static void device_unlock_if(struct device *dev, bool take) +{ + if (take) + device_unlock(dev); +} + +/** + * cxl_report_error_detected + * @dev: Device being reported + * @data: Result + * @err_pdev: Device with initial detected error. Is locked immediately + * after KFIFO dequeue. + */ +static int cxl_report_error_detected(struct device *dev, void *data, struct pci_dev *err_pdev) +{ + bool need_lock = (dev != &err_pdev->dev); + pci_ers_result_t vote, *result = data; + struct pci_dev *pdev; + + if (!dev || !dev_is_pci(dev)) + return 0; + pdev = to_pci_dev(dev); + + device_lock_if(&pdev->dev, need_lock); + if (is_pcie_endpoint(pdev) && !cxl_pci_drv_bound(pdev)) { + device_unlock_if(&pdev->dev, need_lock); + return PCI_ERS_RESULT_NONE; + } + + if (pdev->aer_cap) + pci_clear_and_set_config_dword(pdev, + pdev->aer_cap + PCI_ERR_COR_STATUS, + 0, PCI_ERR_COR_INTERNAL); + + if (is_pcie_endpoint(pdev)) { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + device_lock_if(&cxlds->cxlmd->dev, need_lock); + vote = cxl_error_detected(&cxlds->cxlmd->dev); + device_unlock_if(&cxlds->cxlmd->dev, need_lock); + } else { + vote = cxl_port_error_detected(dev); + } + + pcie_clear_device_status(pdev); + *result = pcie_ers_merge_result(*result, vote); + device_unlock_if(&pdev->dev, need_lock); + + return 0; +} + +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev) +{ + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + + return port->parent_dport->dport_dev == dport_dev; +} + +/** + * cxl_walk_port + * + * @port: Port be traversed into + * @cb: Callback for handling the CXL Ports + * @userdata: Result + * @err_pdev: Device with initial detected error. Is locked immediately + * after KFIFO dequeue. + */ +static void cxl_walk_port(struct cxl_port *port, + int (*cb)(struct device *, void *, struct pci_dev *), + void *userdata, + struct pci_dev *err_pdev) +{ + struct cxl_port *err_port __free(put_cxl_port) = get_cxl_port(err_pdev); + bool need_lock = (port != err_port); + struct cxl_dport *dport = NULL; + unsigned long index; + + device_lock_if(&port->dev, need_lock); + if (is_cxl_endpoint(port)) { + cb(port->uport_dev->parent, userdata, err_pdev); + device_unlock_if(&port->dev, need_lock); + return; + } + + if (port->uport_dev && dev_is_pci(port->uport_dev)) + cb(port->uport_dev, userdata, err_pdev); + + /* + * Iterate over the set of Downstream Ports recorded in port->dports (XArray): + * - For each dport, attempt to find a child CXL Port whose parent dport + * match. + * - Invoke the provided callback on the dport's device. + * - If a matching child CXL Port device is found, recurse into that port to + * continue the walk. + */ + xa_for_each(&port->dports, index, dport) + { + struct device *child_port_dev __free(put_device) = + bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev, + match_port_by_parent_dport); + + cb(dport->dport_dev, userdata, err_pdev); + if (child_port_dev) + cxl_walk_port(to_cxl_port(child_port_dev), cb, userdata, err_pdev); + } + device_unlock_if(&port->dev, need_lock); +} + +static void cxl_do_recovery(struct pci_dev *pdev) +{ + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct cxl_port *port __free(put_cxl_port) = get_cxl_port(pdev); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device\n"); + return; + } + + cxl_walk_port(port, cxl_report_error_detected, &status, pdev); + if (status == PCI_ERS_RESULT_PANIC) + panic("CXL cachemem error."); + + /* + * If we have native control of AER, clear error status in the device + * that detected the error. If the platform retained control of AER, + * it is responsible for clearing this status. In that case, the + * signaling device may not even be visible to the OS. + */ + if (cxl_error_is_native(pdev)) { + pcie_clear_device_status(pdev); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } +} + +void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base) +{ + void __iomem *addr; + u32 status; + + if (!ras_base) { + dev_warn_once(dev, "CXL RAS register block is not mapped"); + return; + } + + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) + return; + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + + trace_cxl_aer_correctable_error(dev, status, serial); +} + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(void __iomem *ras_base, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base) +{ + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!ras_base) { + dev_warn_once(dev, "CXL RAS register block is not mapped"); + return PCI_ERS_RESULT_NONE; + } + + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return PCI_ERS_RESULT_NONE; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + void __iomem *rcc_addr = + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); + } else { + fe = status; + } + + header_log_copy(ras_base, hl); + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return PCI_ERS_RESULT_PANIC; +} + +void cxl_port_cor_error_detected(struct device *dev) +{ + void __iomem *ras_base = cxl_get_ras_base(dev); + + cxl_handle_cor_ras(dev, 0, ras_base); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_port_error_detected(struct device *dev) +{ + void __iomem *ras_base = cxl_get_ras_base(dev); + + return cxl_handle_ras(dev, 0, ras_base); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_error_detected, "CXL"); + +void cxl_cor_error_detected(struct device *dev) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + device_lock_assert(cxlds->dev); + device_lock_assert(&cxlmd->dev); + + if (!dev->driver) { + dev_warn(cxlds->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); + +void pci_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds; + + device_lock_assert(&pdev->dev); + if (!cxl_pci_drv_bound(pdev)) + return; + + cxlds = pci_get_drvdata(pdev); + guard(device)(&cxlds->cxlmd->dev); + + cxl_cor_error_detected(&cxlds->cxlmd->dev); +} +EXPORT_SYMBOL_NS_GPL(pci_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_error_detected(struct device *dev) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + device_lock_assert(cxlds->dev); + device_lock_assert(&cxlmd->dev); + + if (!dev->driver) { + dev_warn(cxlds->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + return cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); + +pci_ers_result_t pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error) +{ + struct cxl_dev_state *cxlds; + pci_ers_result_t rc; + + device_lock_assert(&pdev->dev); + if (!cxl_pci_drv_bound(pdev)) + return PCI_ERS_RESULT_NONE; + + cxlds = pci_get_drvdata(pdev); + guard(device)(&cxlds->cxlmd->dev); + + rc = cxl_error_detected(&cxlds->cxlmd->dev); + if (rc == PCI_ERS_RESULT_PANIC) + panic("CXL cachemem error."); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(pci_error_detected, "CXL"); + +static void cxl_handle_proto_error(struct cxl_proto_err_work_data *err_info) +{ + struct pci_dev *pdev = err_info->pdev; + + if (err_info->severity == AER_CORRECTABLE) { + + if (pdev->aer_cap) + pci_clear_and_set_config_dword(pdev, + pdev->aer_cap + PCI_ERR_COR_STATUS, + 0, PCI_ERR_COR_INTERNAL); + + if (is_pcie_endpoint(pdev)) { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + cxl_cor_error_detected(&cxlds->cxlmd->dev); + } else { + cxl_port_cor_error_detected(&pdev->dev); + } + + pcie_clear_device_status(pdev); + } else { + cxl_do_recovery(pdev); + } +} + +static void cxl_proto_err_work_fn(struct work_struct *work) +{ + struct cxl_proto_err_work_data wd; + + while (cxl_proto_err_kfifo_get(&wd)) { + struct pci_dev *pdev __free(pci_dev_put) = wd.pdev; + struct device *cxlmd_dev; + + if (!pdev) { + pr_err_ratelimited("NULL PCI device passed in AER-CXL KFIFO\n"); + continue; + } + + guard(device)(&pdev->dev); + if (is_pcie_endpoint(pdev)) { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + if (!cxl_pci_drv_bound(pdev)) + continue; + cxlmd_dev = &cxlds->cxlmd->dev; + } else { + cxlmd_dev = NULL; + } + + /* Lock the CXL parent Port */ + struct cxl_port *port __free(put_cxl_port) = get_cxl_port(pdev); + if (!port) + continue; + guard(device)(&port->dev); + + device_lock_if(cxlmd_dev, cxlmd_dev); + cxl_handle_proto_error(&wd); + device_unlock_if(cxlmd_dev, cxlmd_dev); + } +} + +static struct work_struct cxl_proto_err_work; +static DECLARE_WORK(cxl_proto_err_work, cxl_proto_err_work_fn); + int cxl_ras_init(void) { - return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work); + if (cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work)) + pr_err("Failed to initialize CXL RAS CPER\n"); + + cxl_register_proto_err_work(&cxl_proto_err_work); + + return 0; } void cxl_ras_exit(void) { cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); cancel_work_sync(&cxl_cper_prot_err_work); + + cxl_unregister_proto_err_work(); + cancel_work_sync(&cxl_proto_err_work); } diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c new file mode 100644 index 0000000000000..421dd1bcfc9c0 --- /dev/null +++ b/drivers/cxl/core/ras_rch.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include +#include "trace.h" + +void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + resource_size_t aer_phys; + struct device *host; + u16 aer_cap; + + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); + if (aer_cap) { + host = dport->reg_map.host; + aer_phys = aer_cap + dport->rcrb.base; + dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } +} + +void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + int severity; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + if (severity == AER_CORRECTABLE) + cxl_handle_cor_ras(&cxlds->cxlmd->dev, 0, dport->regs.ras); + else + cxl_handle_ras(&cxlds->cxlmd->dev, 0, dport->regs.ras); +} diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index adebbb1db5078..32d9b9bdc53a6 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2,6 +2,7 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include #include +#include #include #include #include @@ -10,6 +11,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -30,6 +32,12 @@ * 3. Decoder targets */ +/* + * nodemask that sets per node when the access_coordinates for the node has + * been updated by the CXL memory hotplug notifier. + */ +static nodemask_t nodemask_region_seen = NODE_MASK_NONE; + static struct cxl_region *to_cxl_region(struct device *dev); #define __ACCESS_ATTR_RO(_level, _name) { \ @@ -483,22 +491,14 @@ static ssize_t interleave_ways_show(struct device *dev, static const struct attribute_group *get_cxl_region_target_group(void); -static ssize_t interleave_ways_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_ways(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - unsigned int val, save; - int rc; + int save, rc; u8 iw; - rc = kstrtouint(buf, 0, &val); - if (rc) - return rc; - rc = ways_to_eiw(val, &iw); if (rc) return rc; @@ -513,9 +513,7 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; @@ -523,10 +521,31 @@ static ssize_t interleave_ways_store(struct device *dev, save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) { + if (rc) p->interleave_ways = save; + + return rc; +} + +static ssize_t interleave_ways_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_ways(cxlr, val); + if (rc) return rc; - } return len; } @@ -546,21 +565,14 @@ static ssize_t interleave_granularity_show(struct device *dev, return sysfs_emit(buf, "%d\n", p->interleave_granularity); } -static ssize_t interleave_granularity_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_granularity(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - int rc, val; + int rc; u16 ig; - rc = kstrtoint(buf, 0, &val); - if (rc) - return rc; - rc = granularity_to_eig(val, &ig); if (rc) return rc; @@ -576,14 +588,32 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; - + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; p->interleave_granularity = val; + return 0; +} + +static ssize_t interleave_granularity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int rc, val; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_granularity(cxlr, val); + if (rc) + return rc; return len; } @@ -703,6 +733,171 @@ static int free_hpa(struct cxl_region *cxlr) return 0; } +struct cxlrd_max_context { + struct device * const *host_bridges; + int interleave_ways; + unsigned long flags; + resource_size_t max_hpa; + struct cxl_root_decoder *cxlrd; +}; + +static int find_max_hpa(struct device *dev, void *data) +{ + struct cxlrd_max_context *ctx = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + struct resource *res, *prev; + struct cxl_decoder *cxld; + resource_size_t free = 0; + resource_size_t max; + int found = 0; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + cxld = &cxlsd->cxld; + + if ((cxld->flags & ctx->flags) != ctx->flags) { + dev_dbg(dev, "flags not matching: %08lx vs %08lx\n", + cxld->flags, ctx->flags); + return 0; + } + + for (int i = 0; i < ctx->interleave_ways; i++) { + for (int j = 0; j < ctx->interleave_ways; j++) { + if (ctx->host_bridges[i] == cxlsd->target[j]->dport_dev) { + found++; + break; + } + } + } + + if (found != ctx->interleave_ways) { + dev_dbg(dev, + "Not enough host bridges. Found %d for %d interleave ways requested\n", + found, ctx->interleave_ways); + return 0; + } + + /* + * Walk the root decoder resource range relying on cxl_rwsem.region to + * preclude sibling arrival/departure and find the largest free space + * gap. + */ + lockdep_assert_held_read(&cxl_rwsem.region); + res = cxlrd->res->child; + + /* With no resource child the whole parent resource is available */ + if (!res) + max = resource_size(cxlrd->res); + else + max = 0; + + for (prev = NULL; res; prev = res, res = res->sibling) { + + if (!prev && res->start == cxlrd->res->start && + res->end == cxlrd->res->end) { + max = resource_size(cxlrd->res); + break; + } + /* + * Sanity check for preventing arithmetic problems below as a + * resource with size 0 could imply using the end field below + * when set to unsigned zero - 1 or all f in hex. + */ + if (prev && !resource_size(prev)) + continue; + + if (!prev && res->start > cxlrd->res->start) { + free = res->start - cxlrd->res->start; + max = max(free, max); + } + if (prev && res->start > prev->end + 1) { + free = res->start - prev->end - 1; + max = max(free, max); + } + } + + if (prev && prev->end + 1 < cxlrd->res->end + 1) { + free = cxlrd->res->end - prev->end; + max = max(free, max); + } + + dev_dbg(cxlrd_dev(cxlrd), "found %pa bytes of free space\n", &max); + if (max > ctx->max_hpa) { + if (ctx->cxlrd) + put_device(cxlrd_dev(ctx->cxlrd)); + get_device(cxlrd_dev(cxlrd)); + ctx->cxlrd = cxlrd; + ctx->max_hpa = max; + } + return 0; +} + +/** + * cxl_get_hpa_freespace - find a root decoder with free capacity per constraints + * @cxlmd: the mem device requiring the HPA + * @interleave_ways: number of entries in @host_bridges + * @flags: CXL_DECODER_F flags for selecting RAM vs PMEM, and Type2 device + * @max_avail_contig: output parameter of max contiguous bytes available in the + * returned decoder + * + * Returns a pointer to a struct cxl_root_decoder + * + * The return tuple of a 'struct cxl_root_decoder' and 'bytes available given + * in (@max_avail_contig))' is a point in time snapshot. If by the time the + * caller goes to use this decoder and its capacity is reduced then caller needs + * to loop and retry. + * + * The returned root decoder has an elevated reference count that needs to be + * put with cxl_put_root_decoder(cxlrd). + */ +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max_avail_contig) +{ + struct cxlrd_max_context ctx = { + .flags = flags, + .interleave_ways = interleave_ways, + }; + struct cxl_port *root_port; + struct cxl_port *endpoint; + + endpoint = cxlmd->endpoint; + if (!endpoint) { + dev_dbg(&cxlmd->dev, "endpoint not linked to memdev\n"); + return ERR_PTR(-ENXIO); + } + + ctx.host_bridges = &endpoint->host_bridge; + + struct cxl_root *root __free(put_cxl_root) = find_cxl_root(endpoint); + if (!root) { + dev_dbg(&endpoint->dev, "endpoint is not related to a root port\n"); + return ERR_PTR(-ENXIO); + } + + root_port = &root->port; + scoped_guard(rwsem_read, &cxl_rwsem.region) + device_for_each_child(&root_port->dev, &ctx, find_max_hpa); + + if (!ctx.cxlrd) + return ERR_PTR(-ENOMEM); + + *max_avail_contig = ctx.max_hpa; + return ctx.cxlrd; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_hpa_freespace, "CXL"); + +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd) +{ + put_device(cxlrd_dev(cxlrd)); +} +EXPORT_SYMBOL_NS_GPL(cxl_put_root_decoder, "CXL"); + static ssize_t size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -1468,9 +1663,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, dev_name(port->uport_dev), dev_name(&port->dev), __func__, cxld->interleave_ways, cxld->interleave_granularity, - (cxld->flags & CXL_DECODER_F_ENABLE) ? - "enabled" : - "disabled", + str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE), cxld->hpa_range.start, cxld->hpa_range.end); return -ENXIO; } @@ -1510,8 +1703,10 @@ static int cxl_port_setup_targets(struct cxl_port *port, cxl_rr->nr_targets_set); return -ENXIO; } - } else + } else { cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id; + } inc = 1; out_target_set: cxl_rr->nr_targets_set += inc; @@ -2191,6 +2386,7 @@ int cxl_decoder_detach(struct cxl_region *cxlr, } return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_decoder_detach, "CXL"); static int __attach_target(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, @@ -2385,7 +2581,7 @@ static struct cxl_region *to_cxl_region(struct device *dev) return container_of(dev, struct cxl_region, dev); } -static void unregister_region(void *_cxlr) +void unregister_region(void *_cxlr) { struct cxl_region *cxlr = _cxlr; struct cxl_region_params *p = &cxlr->params; @@ -2404,6 +2600,7 @@ static void unregister_region(void *_cxlr) cxl_region_iomem_release(cxlr); put_device(&cxlr->dev); } +EXPORT_SYMBOL_NS_GPL(unregister_region, "CXL"); static struct lock_class_key cxl_region_key; @@ -2442,14 +2639,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { if (cxlr->coord[i].read_bandwidth) { - rc = 0; - if (cxl_need_node_perf_attrs_update(nid)) - node_set_perf_attrs(nid, &cxlr->coord[i], i); - else - rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); - - if (rc == 0) - cset++; + node_update_perf_attrs(nid, &cxlr->coord[i], i); + cset++; } } @@ -2487,6 +2678,10 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid != region_nid) return NOTIFY_DONE; + /* No action needed if node bit already set */ + if (node_test_and_set(nid, nodemask_region_seen)) + return NOTIFY_DONE; + if (!cxl_region_update_coordinates(cxlr, nid)) return NOTIFY_DONE; @@ -2569,6 +2764,29 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(rc); } +/** + * cxl_get_region_range - obtain range linked to a CXL region + * + * @region: a pointer to struct cxl_region + * @range: a pointer to a struct range to be set + * + * Returns 0 or error. + */ +int cxl_get_region_range(struct cxl_region *region, struct range *range) +{ + if (WARN_ON_ONCE(!region)) + return -ENODEV; + + if (!region->params.res) + return -ENOSPC; + + range->start = region->params.res->start; + range->end = region->params.res->end; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_region_range, "CXL"); + static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) { return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); @@ -2587,7 +2805,8 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_partition_mode mode, int id) + enum cxl_partition_mode mode, int id, + enum cxl_decoder_type target_type) { int rc; @@ -2609,7 +2828,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); + return devm_cxl_add_region(cxlrd, id, mode, target_type); } static ssize_t create_region_store(struct device *dev, const char *buf, @@ -2623,7 +2842,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, mode, id); + cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); @@ -2675,6 +2894,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name) return to_cxl_region(region_dev); } +static void drop_region(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_port *port = cxlrd_to_port(cxlrd); + + devm_release_action(port->uport_dev, unregister_region, cxlr); +} + static ssize_t delete_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -2918,6 +3145,16 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) return false; } +static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd) +{ + return cxlrd->ops && cxlrd->ops->hpa_to_spa; +} + +static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd) +{ + return cxlrd->ops && cxlrd->ops->spa_to_hpa; +} + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { @@ -2972,8 +3209,8 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, hpa = hpa_offset + p->res->start + p->cache_size; /* Root decoder translation overrides typical modulo decode */ - if (cxlrd->hpa_to_spa) - hpa = cxlrd->hpa_to_spa(cxlrd, hpa); + if (has_hpa_to_spa(cxlrd)) + hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa); if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, @@ -2982,12 +3219,107 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, } /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) + if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) return ULLONG_MAX; return hpa; } +struct dpa_result { + struct cxl_memdev *cxlmd; + u64 dpa; +}; + +static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, + struct dpa_result *result) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_endpoint_decoder *cxled; + u64 hpa, hpa_offset, dpa_offset; + u64 bits_upper, bits_lower; + u64 shifted, rem, temp; + u16 eig = 0; + u8 eiw = 0; + int pos; + + lockdep_assert_held(&cxl_rwsem.region); + lockdep_assert_held(&cxl_rwsem.dpa); + + /* Input validation ensures valid ways and gran */ + granularity_to_eig(p->interleave_granularity, &eig); + ways_to_eiw(p->interleave_ways, &eiw); + + /* + * If the root decoder has SPA to CXL HPA callback, use it. Otherwise + * CXL HPA is assumed to equal SPA. + */ + if (has_spa_to_hpa(cxlrd)) { + hpa = cxlrd->ops->spa_to_hpa(cxlrd, p->res->start + offset); + hpa_offset = hpa - p->res->start; + } else { + hpa_offset = offset; + } + /* + * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13 + * eiw < 8 + * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8]. + * Per spec "remove IW bits starting with bit position IG+8" + * eiw >= 8 + * Position is not explicitly stored in HPA_OFFSET bits. It is + * derived from the modulo operation of the upper bits using + * the total number of interleave ways. + */ + if (eiw < 8) { + pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0); + } else { + shifted = hpa_offset >> (eig + 8); + div64_u64_rem(shifted, p->interleave_ways, &rem); + pos = rem; + } + if (pos < 0 || pos >= p->nr_targets) { + dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n", + pos, p->nr_targets); + return -ENXIO; + } + + /* + * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13 + * Lower bits [IG+7:0] pass through unchanged + * (eiw < 8) + * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW) + * Clear the position bits to isolate upper section, then + * reverse the left shift by eiw that occurred during DPA->HPA + * (eiw >= 8) + * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3 + * Extract upper bits from the correct bit range and divide by 3 + * to recover the original DPA upper bits + */ + bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0); + if (eiw < 8) { + temp = hpa_offset &= ~((u64)GENMASK(eig + eiw + 8 - 1, 0)); + dpa_offset = temp >> eiw; + } else { + bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3); + dpa_offset = bits_upper << (eig + 8); + } + dpa_offset |= bits_lower; + + /* Look-up and return the result: a memdev and a DPA */ + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxled->pos != pos) + continue; + result->cxlmd = cxled_to_memdev(cxled); + result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset; + + return 0; + } + dev_err(&cxlr->dev, "No device found for position %d\n", pos); + + return -ENXIO; +} + static struct lock_class_key cxl_pmem_region_key; static int cxl_pmem_region_alloc(struct cxl_region *cxlr) @@ -3400,14 +3732,12 @@ static int __construct_region(struct cxl_region *cxlr, return 0; } -/* Establish an empty region covering the given HPA range */ -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd, struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct cxl_dev_state *cxlds = cxlmd->cxlds; - int rc, part = READ_ONCE(cxled->part); + int part = READ_ONCE(cxled->part); struct cxl_region *cxlr; if (part < 0 || part >= cxlds->nr_partitions) { @@ -3420,16 +3750,30 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, - atomic_read(&cxlrd->region_id)); + atomic_read(&cxlrd->region_id), + cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); - if (IS_ERR(cxlr)) { + if (IS_ERR(cxlr)) dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, PTR_ERR(cxlr)); + + return cxlr; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_region *cxlr; + int rc; + + cxlr = construct_region_begin(cxlrd, cxled); + if (IS_ERR(cxlr)) return cxlr; - } rc = __construct_region(cxlr, cxlrd, cxled); if (rc) { @@ -3440,6 +3784,104 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, return cxlr; } +DEFINE_FREE(cxl_region_drop, struct cxl_region *, if (_T) drop_region(_T)) + +static struct cxl_region * +__construct_new_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, int ways) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p; + resource_size_t size = 0; + int rc, i; + + struct cxl_region *cxlr __free(cxl_region_drop) = + construct_region_begin(cxlrd, cxled[0]); + if (IS_ERR(cxlr)) + return cxlr; + + guard(rwsem_write)(&cxl_rwsem.region); + + /* + * Sanity check. This should not happen with an accel driver handling + * the region creation. + */ + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s unexpected region state\n", + dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev), + __func__); + return ERR_PTR(-EBUSY); + } + + rc = set_interleave_ways(cxlr, ways); + if (rc) + return ERR_PTR(rc); + + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity); + if (rc) + return ERR_PTR(rc); + + scoped_guard(rwsem_read, &cxl_rwsem.dpa) { + for (i = 0; i < ways; i++) { + if (!cxled[i]->dpa_res) + return ERR_PTR(-EINVAL); + size += resource_size(cxled[i]->dpa_res); + } + + rc = alloc_hpa(cxlr, size); + if (rc) + return ERR_PTR(rc); + + for (i = 0; i < ways; i++) { + rc = cxl_region_attach(cxlr, cxled[i], i); + if (rc) + return ERR_PTR(rc); + } + } + + rc = cxl_region_decode_commit(cxlr); + if (rc) + return ERR_PTR(rc); + + p->state = CXL_CONFIG_COMMIT; + + return no_free_ptr(cxlr); +} + +/** + * cxl_create_region - Establish a region given an endpoint decoder + * @cxlrd: root decoder to allocate HPA + * @cxled: endpoint decoders with reserved DPA capacity + * @ways: interleave ways required + * + * Returns a fully formed region in the commit state and attached to the + * cxl_region driver. + */ +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways) +{ + struct cxl_region *cxlr; + + mutex_lock(&cxlrd->range_lock); + cxlr = __construct_new_region(cxlrd, cxled, ways); + mutex_unlock(&cxlrd->range_lock); + if (IS_ERR(cxlr)) + return cxlr; + + if (device_attach(&cxlr->dev) <= 0) { + dev_err(&cxlr->dev, "failed to create region\n"); + drop_region(cxlr); + return ERR_PTR(-ENODEV); + } + + return cxlr; +} +EXPORT_SYMBOL_NS_GPL(cxl_create_region, "CXL"); + static struct cxl_region * cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa) { @@ -3547,6 +3989,105 @@ static void shutdown_notifiers(void *_cxlr) unregister_mt_adistance_algorithm(&cxlr->adist_notifier); } +static void remove_debugfs(void *dentry) +{ + debugfs_remove_recursive(dentry); +} + +static int validate_region_offset(struct cxl_region *cxlr, u64 offset) +{ + struct cxl_region_params *p = &cxlr->params; + resource_size_t region_size; + u64 hpa; + + if (offset < p->cache_size) { + dev_err(&cxlr->dev, + "Offset %#llx is within extended linear cache %pa\n", + offset, &p->cache_size); + return -EINVAL; + } + + region_size = resource_size(p->res); + if (offset >= region_size) { + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", + offset, ®ion_size); + return -EINVAL; + } + + hpa = p->res->start + offset; + if (hpa < p->res->start || hpa > p->res->end) { + dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa, + p->res); + return -EINVAL; + } + + return 0; +} + +static int cxl_region_debugfs_poison_inject(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_inject_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL, + cxl_region_debugfs_poison_inject, "%llx\n"); + +static int cxl_region_debugfs_poison_clear(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_clear_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, + cxl_region_debugfs_poison_clear, "%llx\n"); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -3576,12 +4117,20 @@ static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; + bool poison_supported = true; int rc; rc = cxl_region_can_probe(cxlr); if (rc) return rc; + /* + * HDM-D[B] (device-memory) regions have accelerator specific usage. + * Skip device-dax registration. + */ + if (cxlr->type == CXL_DECODER_DEVMEM) + return 0; + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. @@ -3599,6 +4148,31 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* Create poison attributes if all memdevs support the capabilities */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + + if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) || + !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) { + poison_supported = false; + break; + } + } + + if (poison_supported) { + struct dentry *dentry; + + dentry = cxl_debugfs_create_dir(dev_name(dev)); + debugfs_create_file("inject_poison", 0200, dentry, cxlr, + &cxl_poison_inject_fops); + debugfs_create_file("clear_poison", 0200, dentry, cxlr, + &cxl_poison_clear_fops); + rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); + if (rc) + return rc; + } + switch (cxlr->mode) { case CXL_PARTMODE_PMEM: rc = devm_cxl_region_edac_register(cxlr); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 5ca7b0eed568b..dcf444f1fe48d 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -271,10 +272,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, "CXL"); static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); - int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + u8 reg_type = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); + int bar = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR_MASK, reg_lo); u64 offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW_MASK); if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, @@ -311,15 +312,15 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty }; regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_REG_LOCATOR); + PCI_DVSEC_CXL_REG_LOCATOR); if (!regloc) return -ENXIO; pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; - regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + regloc += PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1_OFFSET; + regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1_OFFSET) / 8; for (i = 0; i < regblocks; i++, regloc += 8) { u32 reg_lo, reg_hi; @@ -641,4 +642,3 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev, return CXL_RESOURCE_NONE; return __rcrb_to_component(dev, &dport->rcrb, CXL_RCRB_UPSTREAM); } -EXPORT_SYMBOL_NS_GPL(cxl_rcd_component_reg_phys, "CXL"); diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a972e4ef19368..69f8a0efd9241 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -48,40 +48,13 @@ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ ) -TRACE_EVENT(cxl_port_aer_uncorrectable_error, - TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), - TP_ARGS(dev, status, fe, hl), - TP_STRUCT__entry( - __string(device, dev_name(dev)) - __string(host, dev_name(dev->parent)) - __field(u32, status) - __field(u32, first_error) - __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) - ), - TP_fast_assign( - __assign_str(device); - __assign_str(host); - __entry->status = status; - __entry->first_error = fe; - /* - * Embed the 512B headerlog data for user app retrieval and - * parsing, but no need to print this in the trace buffer. - */ - memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); - ), - TP_printk("device=%s host=%s status: '%s' first_error: '%s'", - __get_str(device), __get_str(host), - show_uc_errs(__entry->status), - show_uc_errs(__entry->first_error) - ) -); - TRACE_EVENT(cxl_aer_uncorrectable_error, - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), - TP_ARGS(cxlmd, status, fe, hl), + TP_PROTO(const struct device *cxlmd, u32 status, u32 fe, u32 *hl, + u64 serial), + TP_ARGS(cxlmd, status, fe, hl, serial), TP_STRUCT__entry( - __string(memdev, dev_name(&cxlmd->dev)) - __string(host, dev_name(cxlmd->dev.parent)) + __string(memdev, dev_name(cxlmd)) + __string(host, dev_name(cxlmd->parent)) __field(u64, serial) __field(u32, status) __field(u32, first_error) @@ -90,7 +63,7 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, TP_fast_assign( __assign_str(memdev); __assign_str(host); - __entry->serial = cxlmd->cxlds->serial; + __entry->serial = serial; __entry->status = status; __entry->first_error = fe; /* @@ -124,38 +97,19 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ ) -TRACE_EVENT(cxl_port_aer_correctable_error, - TP_PROTO(struct device *dev, u32 status), - TP_ARGS(dev, status), - TP_STRUCT__entry( - __string(device, dev_name(dev)) - __string(host, dev_name(dev->parent)) - __field(u32, status) - ), - TP_fast_assign( - __assign_str(device); - __assign_str(host); - __entry->status = status; - ), - TP_printk("device=%s host=%s status='%s'", - __get_str(device), __get_str(host), - show_ce_errs(__entry->status) - ) -); - TRACE_EVENT(cxl_aer_correctable_error, - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), - TP_ARGS(cxlmd, status), + TP_PROTO(const struct device *cxlmd, u32 status, u64 serial), + TP_ARGS(cxlmd, status, serial), TP_STRUCT__entry( - __string(memdev, dev_name(&cxlmd->dev)) - __string(host, dev_name(cxlmd->dev.parent)) + __string(memdev, dev_name(cxlmd)) + __string(host, dev_name(cxlmd->parent)) __field(u64, serial) __field(u32, status) ), TP_fast_assign( __assign_str(memdev); __assign_str(host); - __entry->serial = cxlmd->cxlds->serial; + __entry->serial = serial; __entry->status = status; ), TP_printk("memdev=%s host=%s serial=%lld: status: '%s'", diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 847e37be42c47..06a111392c3b9 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -12,6 +12,7 @@ #include #include #include +#include extern const struct nvdimm_security_ops *cxl_security_ops; @@ -38,10 +39,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops; #define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) #define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) -#define CXL_CM_CAP_CAP_ID_RAS 0x2 -#define CXL_CM_CAP_CAP_ID_HDM 0x5 -#define CXL_CM_CAP_CAP_HDM_VERSION 1 - /* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ #define CXL_HDM_DECODER_CAP_OFFSET 0x0 #define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) @@ -201,104 +198,10 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48) #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 -/* - * Using struct_group() allows for per register-block-type helper routines, - * without requiring block-type agnostic code to include the prefix. - */ -struct cxl_regs { - /* - * Common set of CXL Component register block base pointers - * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure - * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure - */ - struct_group_tagged(cxl_component_regs, component, - void __iomem *hdm_decoder; - void __iomem *ras; - ); - /* - * Common set of CXL Device register block base pointers - * @status: CXL 2.0 8.2.8.3 Device Status Registers - * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers - * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers - */ - struct_group_tagged(cxl_device_regs, device_regs, - void __iomem *status, *mbox, *memdev; - ); - - struct_group_tagged(cxl_pmu_regs, pmu_regs, - void __iomem *pmu; - ); - - /* - * RCH downstream port specific RAS register - * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB - */ - struct_group_tagged(cxl_rch_regs, rch_regs, - void __iomem *dport_aer; - ); - - /* - * RCD upstream port specific PCIe cap register - * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB - */ - struct_group_tagged(cxl_rcd_regs, rcd_regs, - void __iomem *rcd_pcie_cap; - ); -}; - -struct cxl_reg_map { - bool valid; - int id; - unsigned long offset; - unsigned long size; -}; - -struct cxl_component_reg_map { - struct cxl_reg_map hdm_decoder; - struct cxl_reg_map ras; -}; - -struct cxl_device_reg_map { - struct cxl_reg_map status; - struct cxl_reg_map mbox; - struct cxl_reg_map memdev; -}; - -struct cxl_pmu_reg_map { - struct cxl_reg_map pmu; -}; - -/** - * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @host: device for devm operations and logging - * @base: virtual base of the register-block-BAR + @block_offset - * @resource: physical resource base of the register block - * @max_size: maximum mapping size to perform register search - * @reg_type: see enum cxl_regloc_type - * @component_map: cxl_reg_map for component registers - * @device_map: cxl_reg_maps for device registers - * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units - */ -struct cxl_register_map { - struct device *host; - void __iomem *base; - resource_size_t resource; - resource_size_t max_size; - u8 reg_type; - union { - struct cxl_component_reg_map component_map; - struct cxl_device_reg_map device_map; - struct cxl_pmu_reg_map pmu_map; - }; -}; - void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, struct cxl_device_reg_map *map); -int cxl_map_component_regs(const struct cxl_register_map *map, - struct cxl_component_regs *regs, - unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); @@ -312,28 +215,11 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; -resource_size_t cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport); int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 -/* - * cxl_decoder flags that define the type of memory / devices this - * decoder supports as well as configuration lock status See "CXL 2.0 - * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. - * Additionally indicate whether decoder settings were autodetected, - * user customized. - */ -#define CXL_DECODER_F_RAM BIT(0) -#define CXL_DECODER_F_PMEM BIT(1) -#define CXL_DECODER_F_TYPE2 BIT(2) -#define CXL_DECODER_F_TYPE3 BIT(3) -#define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_ENABLE BIT(5) -#define CXL_DECODER_F_MASK GENMASK(5, 0) - enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, CXL_DECODER_HOSTONLYMEM = 3, @@ -357,6 +243,9 @@ enum cxl_decoder_type { * @target_type: accelerator vs expander (type2 vs type3) selector * @region: currently assigned region for this decoder * @flags: memory type capabilities and locking + * @target_map: cached copy of hardware port-id list, available at init + * before all @dport objects have been instantiated. While + * dport id is 8bit, CFMWS interleave targets are 32bits. * @commit: device/decoder-type specific callback to commit settings to hw * @reset: device/decoder-type specific callback to reset hw settings */ @@ -369,6 +258,7 @@ struct cxl_decoder { enum cxl_decoder_type target_type; struct cxl_region *region; unsigned long flags; + u32 target_map[CXL_DECODER_MAX_INTERLEAVE]; int (*commit)(struct cxl_decoder *cxld); void (*reset)(struct cxl_decoder *cxld); }; @@ -419,27 +309,35 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); +/** + * struct cxl_rd_ops - CXL root decoder callback operations + * @hpa_to_spa: Convert host physical address to system physical address + * @spa_to_hpa: Convert system physical address to host physical address + */ +struct cxl_rd_ops { + u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa); + u64 (*spa_to_hpa)(struct cxl_root_decoder *cxlrd, u64 spa); +}; /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @cache_size: extended linear cache size if exists, otherwise zero. * @region_id: region id for next region provisioning event - * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie + * @ops: CXL root decoder operations * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { struct resource *res; resource_size_t cache_size; atomic_t region_id; - cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; + struct cxl_rd_ops *ops; struct cxl_switch_decoder cxlsd; }; @@ -485,11 +383,6 @@ struct cxl_region_params { resource_size_t cache_size; }; -enum cxl_partition_mode { - CXL_PARTMODE_RAM, - CXL_PARTMODE_PMEM, -}; - /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic @@ -587,6 +480,7 @@ struct cxl_dax_region { * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids * @reg_map: component and ras register mapping parameters + * @uport_regs: mapped component registers * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering @@ -595,6 +489,7 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds + * @component_reg_phys: Physical address of component register */ struct cxl_port { struct device dev; @@ -607,6 +502,7 @@ struct cxl_port { struct cxl_dport *parent_dport; struct ida decoder_ida; struct cxl_register_map reg_map; + struct cxl_component_regs uport_regs; int nr_dports; int hdm_end; int commit_end; @@ -618,6 +514,7 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; + resource_size_t component_reg_phys; }; /** @@ -743,6 +640,7 @@ struct cxl_root *find_cxl_root(struct cxl_port *port); DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) +DEFINE_FREE(put_cxled, struct cxl_endpoint_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxld.dev)) DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) @@ -762,28 +660,23 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); -#ifdef CONFIG_PCIEAER_CXL -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); -#else -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, - struct device *host) { } -#endif - struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); + +#define cxlrd_dev(cxlrd) (&(cxlrd)->cxlsd.cxld.dev) + bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add(struct cxl_decoder *cxld); struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add_locked(struct cxl_decoder *cxld); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); static inline int cxl_root_decoder_autoremove(struct device *host, struct cxl_root_decoder *cxlrd) @@ -806,12 +699,10 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; -struct cxl_hdm; -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_add_passthrough_decoder(struct cxl_port *port); +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port); + struct cxl_dev_state; int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); @@ -890,7 +781,7 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); -void cxl_switch_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_dport *dport); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); @@ -905,6 +796,10 @@ void cxl_coordinates_combine(struct access_coordinate *out, struct access_coordinate *c2); bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); /* * Unit test builds overrides this to __weak, find the 'strong' version @@ -915,4 +810,21 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #endif u16 cxl_gpf_get_dvsec(struct device *dev); + +/* + * Declaration for functions that are mocked by cxl_test that are called by + * cxl_core. The respective functions are defined as __foo() and called by + * cxl_core as foo(). The macros below ensures that those functions would + * exist as foo(). See tools/testing/cxl/cxl_core_exports.c and + * tools/testing/cxl/exports.h for setting up the mock functions. The dance + * is done to avoid a circular dependency where cxl_core calls a function that + * ends up being a mock function and goes to * cxl_test where it calls a + * cxl_core function. + */ +#ifndef CXL_TEST_ENABLE +#define DECLARE_TESTABLE(x) __##x +#define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev) +#define devm_cxl_switch_port_decoders_setup DECLARE_TESTABLE(devm_cxl_switch_port_decoders_setup) +#endif + #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 751478dfc4106..1eaf4e57554e0 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -43,6 +43,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @endpoint: connection to the CXL port topology for this memory device + * @ops: incremental caller specific probe routine * @id: id number of this memdev instance. * @depth: endpoint port depth * @scrub_cycle: current scrub cycle set for this device @@ -59,6 +60,7 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct cxl_port *endpoint; + const struct cxl_memdev_ops *ops; int id; int depth; u8 scrub_cycle; @@ -95,8 +97,6 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds); int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; @@ -105,8 +105,6 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); -#define CXL_NR_PARTITIONS_MAX 2 - struct cxl_dpa_info { u64 size; struct cxl_dpa_part_info { @@ -365,87 +363,6 @@ struct cxl_security_state { struct kernfs_node *sanitize_node; }; -/* - * enum cxl_devtype - delineate type-2 from a generic type-3 device - * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or - * HDM-DB, no requirement that this device implements a - * mailbox, or other memory-device-standard manageability - * flows. - * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with - * HDM-H and class-mandatory memory device registers - */ -enum cxl_devtype { - CXL_DEVTYPE_DEVMEM, - CXL_DEVTYPE_CLASSMEM, -}; - -/** - * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range: range for DPA address - * @coord: QoS performance data (i.e. latency, bandwidth) - * @cdat_coord: raw QoS performance data from CDAT - * @qos_class: QoS Class cookies - */ -struct cxl_dpa_perf { - struct range dpa_range; - struct access_coordinate coord[ACCESS_COORDINATE_MAX]; - struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; - int qos_class; -}; - -/** - * struct cxl_dpa_partition - DPA partition descriptor - * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) - * @perf: performance attributes of the partition from CDAT - * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... - */ -struct cxl_dpa_partition { - struct resource res; - struct cxl_dpa_perf perf; - enum cxl_partition_mode mode; -}; - -/** - * struct cxl_dev_state - The driver device state - * - * cxl_dev_state represents the CXL driver/device state. It provides an - * interface to mailbox commands as well as some cached data about the device. - * Currently only memory devices are represented. - * - * @dev: The device associated with this CXL state - * @cxlmd: The device representing the CXL.mem capabilities of @dev - * @reg_map: component and ras register mapping parameters - * @regs: Parsed register blocks - * @cxl_dvsec: Offset to the PCIe device DVSEC - * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) - * @media_ready: Indicate whether the device media is usable - * @dpa_res: Overall DPA resource tree for the device - * @part: DPA partition array - * @nr_partitions: Number of DPA partitions - * @serial: PCIe Device Serial Number - * @type: Generic Memory Class device or Vendor Specific Memory device - * @cxl_mbox: CXL mailbox context - * @cxlfs: CXL features context - */ -struct cxl_dev_state { - struct device *dev; - struct cxl_memdev *cxlmd; - struct cxl_register_map reg_map; - struct cxl_regs regs; - int cxl_dvsec; - bool rcd; - bool media_ready; - struct resource dpa_res; - struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; - unsigned int nr_partitions; - u64 serial; - enum cxl_devtype type; - struct cxl_mailbox cxl_mbox; -#ifdef CONFIG_CXL_FEATURES - struct cxl_features_state *cxlfs; -#endif -}; - static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) { /* @@ -850,7 +767,8 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, @@ -869,6 +787,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); #ifdef CONFIG_CXL_EDAC_MEM_FEATURES int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 67ad5b007498e..53760ce31af80 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -7,89 +7,12 @@ #define CXL_MEMORY_PROGIF 0x10 -/* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification. Names are taken straight from the specification with "CXL" and - * "DVSEC" redundancies removed. When obvious, abbreviations may be used. - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) - -/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_CACHE_CAPABLE BIT(0) -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CACHE_WBI_CAPABLE BIT(6) -#define CXL_DVSEC_CXL_RST_CAPABLE BIT(7) -#define CXL_DVSEC_CXL_RST_TIMEOUT_MASK GENMASK(10, 8) -#define CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE BIT(11) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_CTRL2_OFFSET 0x10 -#define CXL_DVSEC_DISABLE_CACHING BIT(0) -#define CXL_DVSEC_INIT_CACHE_WBI BIT(1) -#define CXL_DVSEC_INIT_CXL_RESET BIT(2) -#define CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE BIT(3) -#define CXL_DVSEC_STATUS2_OFFSET 0x12 -#define CXL_DVSEC_CACHE_INVALID BIT(0) -#define CXL_DVSEC_CXL_RST_COMPLETE BIT(1) -#define CXL_DVSEC_CXL_RESET_ERR BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + ((i) * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + ((i) * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + ((i) * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + ((i) * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) - -#define CXL_DVSEC_RANGE_MAX 2 - -/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 - -/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT_EXTENSIONS 3 - -/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) - -/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 - -/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ -#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 - -/* CXL 2.0 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) - /* * NOTE: Currently all the functions which are enabled for CXL require their * vectors to be in the first 16. Use this as the default max. */ #define CXL_PCI_DEFAULT_MAX_VECTORS 16 -/* Register Block Identifier (RBI) */ -enum cxl_regloc_type { - CXL_REGLOC_RBI_EMPTY = 0, - CXL_REGLOC_RBI_COMPONENT, - CXL_REGLOC_RBI_VIRT, - CXL_REGLOC_RBI_MEMDEV, - CXL_REGLOC_RBI_PMU, - CXL_REGLOC_RBI_TYPES -}; - /* * Table Access DOE, CDAT Read Entry Response * @@ -141,12 +64,30 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) return lnksta2 & PCI_EXP_LNKSTA2_FLIT; } +/* + * Assume that the caller has already validated that @pdev has CXL + * capabilities, any RCiEP with CXL capabilities is treated as a + * Restricted CXL Device (RCD) and finds upstream port and endpoint + * registers in a Root Complex Register Block (RCRB). + */ +static inline bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); -void cxl_cor_error_detected(struct pci_dev *pdev); -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state); + +#ifdef CONFIG_CXL_RAS +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); +void cxl_uport_init_ras_reporting(struct cxl_port *port, + struct device *host); +#else +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, + struct device *host) { } +static inline void cxl_uport_init_ras_reporting(struct cxl_port *port, + struct device *host) { } +#endif + #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 6e6777b7bafb5..6d0f2f0b332a2 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -7,6 +7,7 @@ #include "cxlmem.h" #include "cxlpci.h" +#include "private.h" /** * DOC: cxl mem @@ -45,44 +46,6 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data) return 0; } -static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, - struct cxl_dport *parent_dport) -{ - struct cxl_port *parent_port = parent_dport->port; - struct cxl_port *endpoint, *iter, *down; - int rc; - - /* - * Now that the path to the root is established record all the - * intervening ports in the chain. - */ - for (iter = parent_port, down = NULL; !is_cxl_root(iter); - down = iter, iter = to_cxl_port(iter->dev.parent)) { - struct cxl_ep *ep; - - ep = cxl_ep_load(iter, cxlmd); - ep->next = down; - } - - /* Note: endpoint port component registers are derived from @cxlds */ - endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, - parent_dport); - if (IS_ERR(endpoint)) - return PTR_ERR(endpoint); - - rc = cxl_endpoint_autoremove(cxlmd, endpoint); - if (rc) - return rc; - - if (!endpoint->dev.driver) { - dev_err(&cxlmd->dev, "%s failed probe\n", - dev_name(&endpoint->dev)); - return -ENXIO; - } - - return 0; -} - static int cxl_debugfs_poison_inject(void *data, u64 dpa) { struct cxl_memdev *cxlmd = data; @@ -103,6 +66,26 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa) DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, cxl_debugfs_poison_clear, "%llx\n"); +static void cxl_memdev_poison_enable(struct cxl_memdev_state *mds, + struct cxl_memdev *cxlmd, + struct dentry *dentry) +{ + /* + * Avoid poison debugfs for DEVMEM aka accelerators as they rely on + * cxl_memdev_state. + */ + if (!mds) + return; + + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) + debugfs_create_file("inject_poison", 0200, dentry, cxlmd, + &cxl_poison_inject_fops); + + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) + debugfs_create_file("clear_poison", 0200, dentry, cxlmd, + &cxl_poison_clear_fops); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -130,12 +113,7 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); - if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) - debugfs_create_file("inject_poison", 0200, dentry, cxlmd, - &cxl_poison_inject_fops); - if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) - debugfs_create_file("clear_poison", 0200, dentry, cxlmd, - &cxl_poison_clear_fops); + cxl_memdev_poison_enable(mds, cxlmd, dentry); rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); if (rc) @@ -166,7 +144,8 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; - cxl_dport_init_ras_reporting(dport, dev); + if (dport->rch) + cxl_dport_init_ras_reporting(dport, dev); scoped_guard(device, endpoint_parent) { if (!endpoint_parent->driver) { @@ -180,6 +159,12 @@ static int cxl_mem_probe(struct device *dev) return rc; } + if (cxlmd->ops) { + rc = cxlmd->ops->probe(cxlmd); + if (rc) + return rc; + } + rc = devm_cxl_memdev_edac_register(cxlmd); if (rc) dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc); @@ -201,6 +186,55 @@ static int cxl_mem_probe(struct device *dev) return devm_add_action_or_reset(dev, enable_suspend, NULL); } +/** + * devm_cxl_add_memdev - Add a CXL memory device + * @host: devres alloc/release context and parent for the memdev + * @cxlds: CXL device state to associate with the memdev + * @ops: optional operations to run in cxl_mem::{probe,remove}() context + * + * Upon return the device will have had a chance to attach to the + * cxl_mem driver, but may fail if the CXL topology is not ready + * (hardware CXL link down, or software platform CXL root not attached) + */ +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops) +{ + struct cxl_memdev *cxlmd = cxl_memdev_alloc(cxlds, ops); + int rc; + + if (IS_ERR(cxlmd)) + return cxlmd; + + rc = dev_set_name(&cxlmd->dev, "mem%d", cxlmd->id); + if (rc) { + put_device(&cxlmd->dev); + return ERR_PTR(rc); + } + + cxlmd = devm_cxl_memdev_add_or_reset(host, cxlmd); + if (IS_ERR(cxlmd)) + return cxlmd; + + /* + * If ops is provided fail if the driver is not attached upon + * return. The ->endpoint ERR_PTR may have a more precise error + * code to convey. Note that failure here could be the result of + * a race to teardown the CXL port topology. I.e. + * cxl_mem_probe() could have succeeded and then cxl_mem unbound + * before the lock is acquired. + */ + guard(device)(&cxlmd->dev); + if (ops && !cxlmd->dev.driver) { + if (IS_ERR(cxlmd->endpoint)) + return ERR_CAST(cxlmd->endpoint); + return ERR_PTR(-ENXIO); + } + + return cxlmd; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); + static ssize_t trigger_poison_list_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -217,16 +251,24 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +static bool cxl_poison_attr_visible(struct kobject *kobj, struct attribute *a) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - if (a == &dev_attr_trigger_poison_list.attr) - if (!test_bit(CXL_POISON_ENABLED_LIST, - mds->poison.enabled_cmds)) - return 0; + if (!mds || + !test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) + return false; + + return true; +} + +static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_trigger_poison_list.attr && + !cxl_poison_attr_visible(kobj, a)) + return 0; return a->mode; } @@ -249,6 +291,7 @@ static struct cxl_driver cxl_mem_driver = { .id = CXL_DEVICE_MEMORY_EXPANDER, .drv = { .dev_groups = cxl_mem_groups, + .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; @@ -258,8 +301,3 @@ MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); -/* - * create_endpoint() wants to validate port driver attach immediately after - * endpoint registration. - */ -MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index cf32dc50b7a61..ef65d983e1c82 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -6,6 +6,7 @@ #include "cxlmem.h" #include "cxlpci.h" +#include "private.h" /** * DOC: cxl port @@ -59,55 +60,20 @@ static int discover_region(struct device *dev, void *unused) static int cxl_switch_port_probe(struct cxl_port *port) { - struct cxl_hdm *cxlhdm; - int rc; + /* Reset nr_dports for rebind of driver */ + port->nr_dports = 0; /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - - cxl_switch_parse_cdat(port); - - cxlhdm = devm_cxl_setup_hdm(port, NULL); - if (!IS_ERR(cxlhdm)) - return devm_cxl_enumerate_decoders(cxlhdm, NULL); - - if (PTR_ERR(cxlhdm) != -ENODEV) { - dev_err(&port->dev, "Failed to map HDM decoder capability\n"); - return PTR_ERR(cxlhdm); - } - - if (rc == 1) { - dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); - return devm_cxl_add_passthrough_decoder(port); - } - - dev_err(&port->dev, "HDM decoder capability not found\n"); - return -ENXIO; + return 0; } static int cxl_endpoint_port_probe(struct cxl_port *port) { - struct cxl_endpoint_dvsec_info info = { .port = port }; struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_hdm *cxlhdm; int rc; - rc = cxl_dvsec_rr_decode(cxlds, &info); - if (rc < 0) - return rc; - - cxlhdm = devm_cxl_setup_hdm(port, &info); - if (IS_ERR(cxlhdm)) { - if (PTR_ERR(cxlhdm) == -ENODEV) - dev_err(&port->dev, "HDM decoder registers not found\n"); - return PTR_ERR(cxlhdm); - } - /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); cxl_endpoint_parse_cdat(port); @@ -117,11 +83,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); - if (rc) - return rc; - - rc = devm_cxl_enumerate_decoders(cxlhdm, &info); + rc = devm_cxl_endpoint_decoders_setup(port); if (rc) return rc; @@ -195,10 +157,50 @@ static struct cxl_driver cxl_port_driver = { .probe = cxl_port_probe, .id = CXL_DEVICE_PORT, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .dev_groups = cxl_port_attribute_groups, }, }; +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport) +{ + struct cxl_port *parent_port = parent_dport->port; + struct cxl_port *endpoint, *iter, *down; + int rc; + + /* + * Now that the path to the root is established record all the + * intervening ports in the chain. + */ + for (iter = parent_port, down = NULL; !is_cxl_root(iter); + down = iter, iter = to_cxl_port(iter->dev.parent)) { + struct cxl_ep *ep; + + ep = cxl_ep_load(iter, cxlmd); + ep->next = down; + } + + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, + parent_dport); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_endpoint, "CXL"); + static int __init cxl_port_init(void) { return cxl_driver_register(&cxl_port_driver); diff --git a/drivers/cxl/private.h b/drivers/cxl/private.h new file mode 100644 index 0000000000000..167a538efd18d --- /dev/null +++ b/drivers/cxl/private.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Intel Corporation. */ + +/* + * Private interfaces betwen common drivers ("cxl_mem", "cxl_port") and + * the cxl_core. + */ +#ifndef __CXL_PRIVATE_H__ +#define __CXL_PRIVATE_H__ +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops); +struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, + struct cxl_memdev *cxlmd); +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport); +#endif /* __CXL_PRIVATE_H__ */ diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index c4c43434f3143..e959d9b4f4cef 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -66,6 +66,16 @@ config SFC_MCDI_LOGGING Driver-Interface) commands and responses, allowing debugging of driver/firmware interaction. The tracing is actually enabled by a sysfs file 'mcdi_logging' under the PCI device. +config SFC_CXL + bool "Solarflare SFC9100-family CXL support" + depends on SFC && CXL_BUS >= SFC + depends on CXL_REGION + default SFC + help + This enables SFC CXL support if the kernel is configuring CXL for + using CTPIO with CXL.mem. The SFC device with CXL support and + with a CXL-aware firmware can be used for minimizing latencies + when sending through CTPIO. source "drivers/net/ethernet/sfc/falcon/Kconfig" source "drivers/net/ethernet/sfc/siena/Kconfig" diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index d99039ec468d6..bb0f1891cde65 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -13,6 +13,7 @@ sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ mae.o tc.o tc_bindings.o tc_counters.o \ tc_encap_actions.o tc_conntrack.o +sfc-$(CONFIG_SFC_CXL) += efx_cxl.o obj-$(CONFIG_SFC) += sfc.o obj-$(CONFIG_SFC_FALCON) += falcon/ diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fcec81f862ec5..52c25148844c9 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -24,6 +24,7 @@ #include #include #include +#include "efx_cxl.h" /* Hardware control for EF10 architecture including 'Huntington'. */ @@ -106,7 +107,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx) static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V4_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V7_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; @@ -177,6 +178,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) efx->num_mac_stats); } + if (outlen < MC_CMD_GET_CAPABILITIES_V7_OUT_LEN) + nic_data->datapath_caps3 = 0; + else + nic_data->datapath_caps3 = MCDI_DWORD(outbuf, + GET_CAPABILITIES_V7_OUT_FLAGS3); + return 0; } @@ -919,6 +926,9 @@ static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) static void efx_ef10_remove(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif int rc; #ifdef CONFIG_SFC_SRIOV @@ -949,7 +959,12 @@ static void efx_ef10_remove(struct efx_nic *efx) efx_mcdi_rx_free_indir_table(efx); +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if (nic_data->wc_membase && !probe_data->cxl_pio_in_use) +#else if (nic_data->wc_membase) +#endif iounmap(nic_data->wc_membase); rc = efx_mcdi_free_vis(efx); @@ -1140,6 +1155,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) unsigned int channel_vis, pio_write_vi_base, max_vis; struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int uc_mem_map_size, wc_mem_map_size; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif void __iomem *membase; int rc; @@ -1263,8 +1281,26 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) iounmap(efx->membase); efx->membase = membase; - /* Set up the WC mapping if needed */ - if (wc_mem_map_size) { + if (!wc_mem_map_size) + goto skip_pio; + + /* Set up the WC mapping */ + +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if ((nic_data->datapath_caps3 & + (1 << MC_CMD_GET_CAPABILITIES_V7_OUT_CXL_CONFIG_ENABLE_LBN)) && + probe_data->cxl_pio_initialised) { + /* Using PIO through CXL mapping? */ + nic_data->pio_write_vi_base = pio_write_vi_base; + nic_data->pio_write_base = probe_data->cxl->ctpio_cxl + + (pio_write_vi_base * efx->vi_stride + + ER_DZ_TX_PIOBUF - uc_mem_map_size); + probe_data->cxl_pio_in_use = true; + } else +#endif + { + /* Using legacy PIO BAR mapping */ nic_data->wc_membase = ioremap_wc(efx->membase_phys + uc_mem_map_size, wc_mem_map_size); @@ -1279,12 +1315,13 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) nic_data->wc_membase + (pio_write_vi_base * efx->vi_stride + ER_DZ_TX_PIOBUF - uc_mem_map_size); - - rc = efx_ef10_link_piobufs(efx); - if (rc) - efx_ef10_free_piobufs(efx); } + rc = efx_ef10_link_piobufs(efx); + if (rc) + efx_ef10_free_piobufs(efx); + +skip_pio: netif_dbg(efx, probe, efx->net_dev, "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n", &efx->membase_phys, efx->membase, uc_mem_map_size, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 112e55b98ed3b..537668278375b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -34,6 +34,7 @@ #include "selftest.h" #include "sriov.h" #include "efx_devlink.h" +#include "efx_cxl.h" #include "mcdi_port_common.h" #include "mcdi_pcol.h" @@ -981,12 +982,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_pci_remove_main(efx); efx_fini_io(efx); + + probe_data = container_of(efx, struct efx_probe_data, efx); + efx_cxl_exit(probe_data); + pci_dbg(efx->pci_dev, "shutdown successful\n"); efx_fini_devlink_and_unlock(efx); efx_fini_struct(efx); free_netdev(efx->net_dev); - probe_data = container_of(efx, struct efx_probe_data, efx); kfree(probe_data); }; @@ -1190,6 +1194,15 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail2; + /* A successful cxl initialization implies a CXL region created to be + * used for PIO buffers. If there is no CXL support, or initialization + * fails, efx_cxl_pio_initialised will be false and legacy PIO buffers + * defined at specific PCI BAR regions will be used. + */ + rc = efx_cxl_init(probe_data); + if (rc) + pci_err(pci_dev, "CXL initialization failed with error %d\n", rc); + rc = efx_pci_probe_post_io(efx); if (rc) { /* On failure, retry once immediately. diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c new file mode 100644 index 0000000000000..27919dcf61877 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include + +#include +#include +#include "net_driver.h" +#include "efx_cxl.h" +#include "efx.h" + +#define EFX_CTPIO_BUFFER_SIZE SZ_256M + +int efx_cxl_init(struct efx_probe_data *probe_data) +{ + struct efx_nic *efx = &probe_data->efx; + struct pci_dev *pci_dev = efx->pci_dev; + resource_size_t max_size; + struct efx_cxl *cxl; + struct range range; + u16 dvsec; + int rc; + + probe_data->cxl_pio_initialised = false; + + /* Is the device configured with and using CXL? */ + if (!pcie_is_cxl(pci_dev)) + return 0; + + dvsec = pci_find_dvsec_capability(pci_dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) { + pci_err(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability not found\n"); + return 0; + } + + pci_dbg(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability found\n"); + + /* Create a cxl_dev_state embedded in the cxl struct using cxl core api + * specifying no mbox available. + */ + cxl = devm_cxl_dev_state_create(&pci_dev->dev, CXL_DEVTYPE_DEVMEM, + pci_dev->dev.id, dvsec, struct efx_cxl, + cxlds, false); + + if (!cxl) + return -ENOMEM; + + rc = cxl_pci_setup_regs(pci_dev, CXL_REGLOC_RBI_COMPONENT, + &cxl->cxlds.reg_map); + if (rc) { + pci_err(pci_dev, "No component registers\n"); + return rc; + } + + if (!cxl->cxlds.reg_map.component_map.hdm_decoder.valid) { + pci_err(pci_dev, "Expected HDM component register not found\n"); + return -ENODEV; + } + + if (!cxl->cxlds.reg_map.component_map.ras.valid) { + pci_err(pci_dev, "Expected RAS component register not found\n"); + return -ENODEV; + } + + rc = cxl_map_component_regs(&cxl->cxlds.reg_map, + &cxl->cxlds.regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS)); + if (rc) { + pci_err(pci_dev, "Failed to map RAS capability.\n"); + return rc; + } + + /* + * Set media ready explicitly as there are neither mailbox for checking + * this state nor the CXL register involved, both not mandatory for + * type2. + */ + cxl->cxlds.media_ready = true; + + if (cxl_set_capacity(&cxl->cxlds, EFX_CTPIO_BUFFER_SIZE)) { + pci_err(pci_dev, "dpa capacity setup failed\n"); + return -ENODEV; + } + + cxl->cxlmd = devm_cxl_add_memdev(&pci_dev->dev, &cxl->cxlds, NULL); + if (IS_ERR(cxl->cxlmd)) { + pci_err(pci_dev, "CXL accel memdev creation failed"); + return PTR_ERR(cxl->cxlmd); + } + + cxl->cxled = cxl_get_committed_decoder(cxl->cxlmd, &cxl->efx_region); + if (cxl->cxled) { + if (!cxl->efx_region) { + pci_err(pci_dev, "CXL found committed decoder without a region"); + return -ENODEV; + } + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, + "CXL getting regions params from a committed decoder failed"); + return rc; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + return -ENOMEM; + } + cxl->hdm_was_committed = true; + } else { + cxl->cxlrd = cxl_get_hpa_freespace(cxl->cxlmd, 1, + CXL_DECODER_F_RAM | + CXL_DECODER_F_TYPE2, + &max_size); + + if (IS_ERR(cxl->cxlrd)) { + dev_err(&pci_dev->dev, "cxl_get_hpa_freespace failed\n"); + return PTR_ERR(cxl->cxlrd); + } + + if (max_size < EFX_CTPIO_BUFFER_SIZE) { + dev_err(&pci_dev->dev, "%s: not enough free HPA space %pap < %u\n", + __func__, &max_size, EFX_CTPIO_BUFFER_SIZE); + cxl_put_root_decoder(cxl->cxlrd); + return -ENOSPC; + } + + cxl->cxled = cxl_request_dpa(cxl->cxlmd, CXL_PARTMODE_RAM, + EFX_CTPIO_BUFFER_SIZE); + if (IS_ERR(cxl->cxled)) { + pci_err(pci_dev, "CXL accel request DPA failed"); + cxl_put_root_decoder(cxl->cxlrd); + return PTR_ERR(cxl->cxled); + } + + cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1); + if (IS_ERR(cxl->efx_region)) { + pci_err(pci_dev, "CXL accel create region failed"); + rc = PTR_ERR(cxl->efx_region); + goto err_dpa; + } + + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, "CXL getting regions params failed"); + goto err_detach; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + rc = -ENOMEM; + goto err_detach; + } + } + + probe_data->cxl = cxl; + probe_data->cxl_pio_initialised = true; + + return 0; + +err_detach: + cxl_decoder_detach(NULL, cxl->cxled, 0, DETACH_INVALIDATE); + unregister_region(cxl->efx_region); +err_dpa: + cxl_put_root_decoder(cxl->cxlrd); + cxl_dpa_free(cxl->cxled); + return rc; +} + +void efx_cxl_exit(struct efx_probe_data *probe_data) +{ + if (!probe_data->cxl) + return; + + iounmap(probe_data->cxl->ctpio_cxl); + cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, + DETACH_INVALIDATE); + + if (!probe_data->cxl->hdm_was_committed) { + cxl_dpa_free(probe_data->cxl->cxled); + cxl_put_root_decoder(probe_data->cxl->cxlrd); + } else { + /* Release decoder reference from cxl_get_committed_decoder() */ + if (probe_data->cxl->cxled) + put_device(&probe_data->cxl->cxled->cxld.dev); + } + + unregister_region(probe_data->cxl->efx_region); +} + +MODULE_IMPORT_NS("CXL"); diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h new file mode 100644 index 0000000000000..9a92e386695bb --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_CXL_H +#define EFX_CXL_H + +#ifdef CONFIG_SFC_CXL + +#include + +struct cxl_root_decoder; +struct cxl_port; +struct cxl_endpoint_decoder; +struct cxl_region; +struct efx_probe_data; + +struct efx_cxl { + struct cxl_dev_state cxlds; + struct cxl_memdev *cxlmd; + struct cxl_root_decoder *cxlrd; + struct cxl_port *endpoint; + struct cxl_endpoint_decoder *cxled; + bool hdm_was_committed; + struct cxl_region *efx_region; + void __iomem *ctpio_cxl; +}; + +int efx_cxl_init(struct efx_probe_data *probe_data); +void efx_cxl_exit(struct efx_probe_data *probe_data); +#else +static inline int efx_cxl_init(struct efx_probe_data *probe_data) { return 0; } +static inline void efx_cxl_exit(struct efx_probe_data *probe_data) {} +#endif +#endif diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index b98c259f672db..bea4eecdf842d 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1197,14 +1197,26 @@ struct efx_nic { atomic_t n_rx_noskb_drops; }; +#ifdef CONFIG_SFC_CXL +struct efx_cxl; +#endif + /** * struct efx_probe_data - State after hardware probe * @pci_dev: The PCI device * @efx: Efx NIC details + * @cxl: details of related cxl objects + * @cxl_pio_initialised: cxl initialization outcome. + * @cxl_pio_in_use: PIO using CXL mapping */ struct efx_probe_data { struct pci_dev *pci_dev; struct efx_nic efx; +#ifdef CONFIG_SFC_CXL + struct efx_cxl *cxl; + bool cxl_pio_initialised; + bool cxl_pio_in_use; +#endif }; static inline struct efx_nic *efx_netdev_priv(struct net_device *dev) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 9fa5c4c713abd..c87cc9214690b 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -152,6 +152,8 @@ enum { * %MC_CMD_GET_CAPABILITIES response) * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps3: Further Capabilities of datapath firmware (FLAGS3 field of + * %MC_CMD_GET_CAPABILITIES response) * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot @@ -186,6 +188,7 @@ struct efx_ef10_nic_data { bool must_check_datapath_caps; u32 datapath_caps; u32 datapath_caps2; + u32 datapath_caps3; unsigned int rx_dpcpu_fw_id; unsigned int tx_dpcpu_fw_id; bool must_probe_vswitching; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9a6943688e6db..ec876d46264c2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -30,7 +30,6 @@ #include #include #include -#include "../cxl/cxlpci.h" #include "pci.h" DEFINE_MUTEX(pci_slot_mutex); @@ -2334,6 +2333,7 @@ void pcie_clear_device_status(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); } +EXPORT_SYMBOL_GPL(pcie_clear_device_status); #endif /** @@ -5059,7 +5059,9 @@ static bool cxl_sbr_masked(struct pci_dev *dev) if (!dvsec) return false; - rc = pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_PORT_CTL, ®); + rc = pci_read_config_word(dev, + dvsec + PCI_DVSEC_CXL_PORT_CTL, + ®); if (rc || PCI_POSSIBLE_ERROR(reg)) return false; @@ -5134,151 +5136,6 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) return rc; } -static int cxl_reset_prepare(struct pci_dev *dev, u16 dvsec) -{ - u32 timeout_us = 100, timeout_tot_us = 10000; - u16 reg, cap; - int rc; - - if (!pci_wait_for_pending_transaction(dev)) - pci_err(dev, "timed out waiting for pending transaction; performing cxl reset anyway\n"); - - /* Check if the device is cache capable. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_CACHE_CAPABLE)) - return 0; - - /* Disable cache. WB and invalidate cache if capability is advertised */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - reg |= CXL_DVSEC_DISABLE_CACHING; - /* - * DEVCTL2 bits are written only once. So check WB+I capability while - * keeping disable caching set. - */ - if (cap & CXL_DVSEC_CACHE_WBI_CAPABLE) - reg |= CXL_DVSEC_INIT_CACHE_WBI; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - /* - * From Section 9.6: "Software may leverage the cache size reported in - * the DVSEC CXL Capability2 register to compute a suitable timeout - * value". - * Given there is no conversion factor for cache size -> timeout, - * setting timer for default 10ms. - */ - do { - if (timeout_tot_us == 0) - return -ETIMEDOUT; - usleep_range(timeout_us, timeout_us + 1); - timeout_tot_us -= timeout_us; - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, - ®); - if (rc) - return rc; - } while (!(reg & CXL_DVSEC_CACHE_INVALID)); - - return 0; -} - -static int cxl_reset_init(struct pci_dev *dev, u16 dvsec) -{ - /* - * Timeout values ref CXL Spec v3.2 Ch 8 Control and Status Registers, - * under section 8.1.3.1 DVSEC CXL Capability. - */ - u32 reset_timeouts_ms[] = { 10, 100, 1000, 10000, 100000 }; - u16 reg; - u32 timeout_ms; - int rc, ind; - - /* Check if CXL Reset MEM CLR is supported. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return rc; - - if (reg & CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE) { - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, - ®); - if (rc) - return rc; - - reg |= CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - } - - /* Read timeout value. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return rc; - ind = FIELD_GET(CXL_DVSEC_CXL_RST_TIMEOUT_MASK, reg); - timeout_ms = reset_timeouts_ms[ind]; - - /* Write reset config. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - - reg |= CXL_DVSEC_INIT_CXL_RESET; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - /* Wait till timeout and then check reset status is complete. */ - msleep(timeout_ms); - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_STATUS2_OFFSET, ®); - if (rc) - return rc; - if (reg & CXL_DVSEC_CXL_RESET_ERR || - ~reg & CXL_DVSEC_CXL_RST_COMPLETE) - return -ETIMEDOUT; - - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - reg &= (~CXL_DVSEC_DISABLE_CACHING); - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - return 0; -} - -/** - * cxl_reset - initiate a cxl reset - * @dev: device to reset - * @probe: if true, return 0 if device can be reset this way - * - * Initiate a cxl reset on @dev. - */ -static int cxl_reset(struct pci_dev *dev, bool probe) -{ - u16 dvsec, reg; - int rc; - - dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_PCIE_DEVICE); - if (!dvsec) - return -ENOTTY; - - /* Check if CXL Reset is supported. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return -ENOTTY; - - if ((reg & CXL_DVSEC_CXL_RST_CAPABLE) == 0) - return -ENOTTY; - - if (probe) - return 0; - - rc = cxl_reset_prepare(dev, dvsec); - if (rc) - return rc; - - return cxl_reset_init(dev, dvsec); -} - void pci_dev_lock(struct pci_dev *dev) { /* block PM suspend, driver probe, etc. */ @@ -5365,7 +5222,6 @@ const struct pci_reset_fn_method pci_reset_fn_methods[] = { { pci_dev_acpi_reset, .name = "acpi" }, { pcie_reset_flr, .name = "flr" }, { pci_af_flr, .name = "af_flr" }, - { cxl_reset, .name = "cxl_reset" }, { pci_pm_reset, .name = "pm" }, { pci_reset_bus_function, .name = "bus" }, { cxl_reset_bus_function, .name = "cxl_bus" }, diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index ee7b515125826..82c6b7a498e6c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -137,7 +137,6 @@ void pci_refresh_power_state(struct pci_dev *dev); int pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); -void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); @@ -607,31 +606,54 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev) #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ +/** + * struct aer_err_info - AER Error Information + * @dev: Devices reporting error + * @ratelimit_print: Flag to log or not log the devices' error. 0=NotLog/1=Log + * @error_devnum: Number of devices reporting an error + * @level: printk level to use in logging + * @id: Value from register PCI_ERR_ROOT_ERR_SRC + * @severity: AER severity, 0-UNCOR Non-fatal, 1-UNCOR fatal, 2-COR + * @root_ratelimit_print: Flag to log or not log the root's error. 0=NotLog/1=Log + * @multi_error_valid: If multiple errors are reported + * @first_error: First reported error + * @is_cxl: Bus type error: 0-PCI Bus error, 1-CXL Bus error + * @tlp_header_valid: Indicates if TLP field contains error information + * @status: COR/UNCOR error status + * @mask: COR/UNCOR mask + * @tlp: Transaction packet information + */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; - const char *level; /* printk level */ + const char *level; unsigned int id:16; - unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ - unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */ + unsigned int severity:2; + unsigned int root_ratelimit_print:1; unsigned int __pad1:4; unsigned int multi_error_valid:1; unsigned int first_error:5; - unsigned int __pad2:2; + unsigned int __pad2:1; + bool is_cxl:1; unsigned int tlp_header_valid:1; - unsigned int status; /* COR/UNCOR Error Status */ - unsigned int mask; /* COR/UNCOR Error Mask */ - struct pcie_tlp_log tlp; /* TLP Header */ + unsigned int status; + unsigned int mask; + struct pcie_tlp_log tlp; }; int aer_get_device_error_info(struct aer_err_info *info, int i); void aer_print_error(struct aer_err_info *info, int i); +static inline const char *aer_err_bus(struct aer_err_info *info) +{ + return info->is_cxl ? "CXL" : "PCIe"; +} + int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, struct pcie_tlp_log *log); @@ -1067,7 +1089,6 @@ void pci_restore_aer_state(struct pci_dev *dev); static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } -static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline void pci_save_aer_state(struct pci_dev *dev) { } @@ -1210,4 +1231,24 @@ static inline int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int inde (PCI_CONF1_ADDRESS(bus, dev, func, reg) | \ PCI_CONF1_EXT_REG(reg)) +struct aer_err_info; + +#ifdef CONFIG_CXL_RCH_RAS +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); +void cxl_rch_enable_rcec(struct pci_dev *rcec); +#else +static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } +static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } +#endif + +#ifdef CONFIG_CXL_RAS +bool is_internal_error(struct aer_err_info *info); +bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info); +void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info); +#else +static inline bool is_internal_error(struct aer_err_info *info) { return false; } +static inline bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) { return false; } +static inline void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info) { } +#endif + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 173829aa02e60..72992b3ea417b 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -8,6 +8,8 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o +obj-$(CONFIG_CXL_RCH_RAS) += aer_cxl_rch.o +obj-$(CONFIG_CXL_RAS) += aer_cxl_vh.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 3dba9c0c6ae11..4b187dc4f1d0a 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -287,6 +287,7 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) if (status) pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); } +EXPORT_SYMBOL_GPL(pci_aer_clear_fatal_status); /** * pci_aer_raw_clear_status - Clear AER error registers. @@ -844,6 +845,7 @@ void aer_print_error(struct aer_err_info *info, int i) struct pci_dev *dev; int layer, agent, id; const char *level = info->level; + const char *bus_type = aer_err_bus(info); if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) return; @@ -852,23 +854,23 @@ void aer_print_error(struct aer_err_info *info, int i) id = pci_dev_id(dev); pci_dev_aer_stats_incr(dev, info); - trace_aer_event(pci_name(dev), (info->status & ~info->mask), + trace_aer_event(pci_name(dev), bus_type, (info->status & ~info->mask), info->severity, info->tlp_header_valid, &info->tlp); if (!info->ratelimit_print[i]) return; if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", - aer_error_severity_string[info->severity]); + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + bus_type, aer_error_severity_string[info->severity]); goto out; } layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], + aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", + bus_type, aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", @@ -902,6 +904,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { + const char *bus_type; int layer, agent, tlp_header_valid = 0; u32 status, mask; struct aer_err_info info = { @@ -922,9 +925,12 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.status = status; info.mask = mask; + info.is_cxl = pcie_is_cxl(dev); + + bus_type = aer_err_bus(&info); pci_dev_aer_stats_incr(dev, &info); - trace_aer_event(pci_name(dev), (status & ~mask), + trace_aer_event(pci_name(dev), bus_type, (status & ~mask), aer_severity, tlp_header_valid, &aer->header_log); if (!aer_ratelimit(dev, info.severity)) @@ -1094,8 +1100,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -#ifdef CONFIG_PCIEAER_CXL - /** * pci_aer_unmask_internal_errors - unmask internal errors * @dev: pointer to the pci_dev data structure @@ -1106,7 +1110,7 @@ static bool find_source_device(struct pci_dev *parent, * Note: AER must be enabled and supported by the device which must be * checked in advance, e.g. with pcie_aer_is_native(). */ -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +void pci_aer_unmask_internal_errors(struct pci_dev *dev) { int aer = dev->aer_cap; u32 mask; @@ -1119,119 +1123,28 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev) mask &= ~PCI_ERR_COR_INTERNAL; pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); } +EXPORT_SYMBOL_GPL(pci_aer_unmask_internal_errors); -static bool is_cxl_mem_dev(struct pci_dev *dev) -{ - /* - * The capability, status, and control fields in Device 0, - * Function 0 DVSEC control the CXL functionality of the - * entire device (CXL 3.0, 8.1.3). - */ - if (dev->devfn != PCI_DEVFN(0, 0)) - return false; - - /* - * CXL Memory Devices must have the 502h class code set (CXL - * 3.0, 8.1.12.1). - */ - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) - return false; - - return true; -} - -static bool cxl_error_is_native(struct pci_dev *dev) -{ - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); - - return (pcie_ports_native || host->native_aer); -} - -static bool is_internal_error(struct aer_err_info *info) -{ - if (info->severity == AER_CORRECTABLE) - return info->status & PCI_ERR_COR_INTERNAL; - - return info->status & PCI_ERR_UNC_INTN; -} - -static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) -{ - struct aer_err_info *info = (struct aer_err_info *)data; - const struct pci_error_handlers *err_handler; - - if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) - return 0; - - /* Protect dev->driver */ - device_lock(&dev->dev); - - err_handler = dev->driver ? dev->driver->err_handler : NULL; - if (!err_handler) - goto out; - - if (info->severity == AER_CORRECTABLE) { - if (err_handler->cor_error_detected) - err_handler->cor_error_detected(dev); - } else if (err_handler->error_detected) { - if (info->severity == AER_NONFATAL) - err_handler->error_detected(dev, pci_channel_io_normal); - else if (info->severity == AER_FATAL) - err_handler->error_detected(dev, pci_channel_io_frozen); - } -out: - device_unlock(&dev->dev); - return 0; -} - -static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) -{ - /* - * Internal errors of an RCEC indicate an AER error in an - * RCH's downstream port. Check and handle them in the CXL.mem - * device driver. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && - is_internal_error(info)) - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); -} - -static int handles_cxl_error_iter(struct pci_dev *dev, void *data) -{ - bool *handles_cxl = data; - - if (!*handles_cxl) - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); - - /* Non-zero terminates iteration */ - return *handles_cxl; -} - -static bool handles_cxl_errors(struct pci_dev *rcec) -{ - bool handles_cxl = false; - - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && - pcie_aer_is_native(rcec)) - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); - - return handles_cxl; -} - -static void cxl_rch_enable_rcec(struct pci_dev *rcec) +/** + * pci_aer_mask_internal_errors - mask internal errors + * @dev: pointer to the pcie_dev data structure + * + * Masks internal errors in the Uncorrectable and Correctable Error + * Mask registers. + * + * Note: AER must be enabled and supported by the device which must be + * checked in advance, e.g. with pcie_aer_is_native(). + */ +void pci_aer_mask_internal_errors(struct pci_dev *dev) { - if (!handles_cxl_errors(rcec)) - return; + int aer = dev->aer_cap; - pci_aer_unmask_internal_errors(rcec); - pci_info(rcec, "CXL: Internal errors unmasked"); + pci_clear_and_set_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, + 0, PCI_ERR_UNC_INTN); + pci_clear_and_set_config_dword(dev, aer + PCI_ERR_COR_MASK, + 0, PCI_ERR_COR_INTERNAL); } - -#else -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } -static inline void cxl_rch_handle_error(struct pci_dev *dev, - struct aer_err_info *info) { } -#endif +EXPORT_SYMBOL_GPL(pci_aer_mask_internal_errors); /** * pci_aer_handle_error - handle logging error into an event log @@ -1269,7 +1182,10 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info) static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) { cxl_rch_handle_error(dev, info); - pci_aer_handle_error(dev, info); + if (is_cxl_error(dev, info)) + cxl_forward_error(dev, info); + else + pci_aer_handle_error(dev, info); pci_dev_put(dev); } @@ -1379,6 +1295,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i) /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; + info->is_cxl = pcie_is_cxl(dev); /* The device might not support AER */ if (!aer) diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c new file mode 100644 index 0000000000000..f4d160f181690 --- /dev/null +++ b/drivers/pci/pcie/aer_cxl_rch.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include "../pci.h" + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + guard(device)(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + return 0; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } + return 0; +} + +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} diff --git a/drivers/pci/pcie/aer_cxl_vh.c b/drivers/pci/pcie/aer_cxl_vh.c new file mode 100644 index 0000000000000..c2510ac3cb139 --- /dev/null +++ b/drivers/pci/pcie/aer_cxl_vh.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include +#include "../pci.h" + +#define CXL_ERROR_SOURCES_MAX 128 + +struct cxl_proto_err_kfifo { + struct work_struct *work; + struct rw_semaphore rw_sema; + DECLARE_KFIFO(fifo, struct cxl_proto_err_work_data, + CXL_ERROR_SOURCES_MAX); +}; + +static struct cxl_proto_err_kfifo cxl_proto_err_kfifo = { + .rw_sema = __RWSEM_INITIALIZER(cxl_proto_err_kfifo.rw_sema) +}; + +bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} +EXPORT_SYMBOL_NS_GPL(cxl_error_is_native, "CXL"); + +bool is_internal_error(struct aer_err_info *info) +{ + if (info->severity == AER_CORRECTABLE) + return info->status & PCI_ERR_COR_INTERNAL; + + return info->status & PCI_ERR_UNC_INTN; +} +EXPORT_SYMBOL_NS_GPL(is_internal_error, "CXL"); + +bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) +{ + if (!info || !info->is_cxl) + return false; + + if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_UPSTREAM) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) + return false; + + return is_internal_error(info); +} +EXPORT_SYMBOL_NS_GPL(is_cxl_error, "CXL"); + +void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info) +{ + struct cxl_proto_err_work_data wd = (struct cxl_proto_err_work_data) { + .severity = info->severity, + .pdev = pci_dev_get(pdev) + }; + + guard(rwsem_write)(&cxl_proto_err_kfifo.rw_sema); + + if (!cxl_proto_err_kfifo.work) { + pci_dev_put(pdev); + dev_warn_once(&pdev->dev, "CXL driver is unregistered. Unable to forward error."); + return; + } + + if (!kfifo_put(&cxl_proto_err_kfifo.fifo, wd)) { + dev_err_ratelimited(&pdev->dev, "AER-CXL kfifo overflow\n"); + return; + } + + schedule_work(cxl_proto_err_kfifo.work); +} +EXPORT_SYMBOL_NS_GPL(cxl_forward_error, "CXL"); + +void cxl_register_proto_err_work(struct work_struct *work) +{ + guard(rwsem_write)(&cxl_proto_err_kfifo.rw_sema); + cxl_proto_err_kfifo.work = work; +} +EXPORT_SYMBOL_NS_GPL(cxl_register_proto_err_work, "CXL"); + +void cxl_unregister_proto_err_work(void) +{ + guard(rwsem_write)(&cxl_proto_err_kfifo.rw_sema); + cxl_proto_err_kfifo.work = NULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_unregister_proto_err_work, "CXL"); + +int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd) +{ + guard(rwsem_read)(&cxl_proto_err_kfifo.rw_sema); + return kfifo_get(&cxl_proto_err_kfifo.fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_proto_err_kfifo_get, "CXL"); diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index e85b9cd5fec1b..feefce97c6872 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -21,9 +21,12 @@ #include "portdrv.h" #include "../pci.h" -static pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) +pci_ers_result_t pcie_ers_merge_result(enum pci_ers_result orig, + enum pci_ers_result new) { + if (new == PCI_ERS_RESULT_PANIC) + return PCI_ERS_RESULT_PANIC; + if (new == PCI_ERS_RESULT_NO_AER_DRIVER) return PCI_ERS_RESULT_NO_AER_DRIVER; @@ -45,6 +48,7 @@ static pci_ers_result_t merge_result(enum pci_ers_result orig, return orig; } +EXPORT_SYMBOL(pcie_ers_merge_result); static int report_error_detected(struct pci_dev *dev, pci_channel_state_t state, @@ -81,7 +85,7 @@ static int report_error_detected(struct pci_dev *dev, vote = err_handler->error_detected(dev, state); } pci_uevent_ers(dev, vote); - *result = merge_result(*result, vote); + *result = pcie_ers_merge_result(*result, vote); device_unlock(&dev->dev); return 0; } @@ -127,7 +131,7 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data) err_handler = pdrv->err_handler; vote = err_handler->mmio_enabled(dev); - *result = merge_result(*result, vote); + *result = pcie_ers_merge_result(*result, vote); out: device_unlock(&dev->dev); return 0; @@ -147,7 +151,7 @@ static int report_slot_reset(struct pci_dev *dev, void *data) err_handler = pdrv->err_handler; vote = err_handler->slot_reset(dev); - *result = merge_result(*result, vote); + *result = pcie_ers_merge_result(*result, vote); out: device_unlock(&dev->dev); return 0; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index eb084877bb043..ff9d8e14a6a4e 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1692,6 +1692,34 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) dev->is_thunderbolt = 1; } +static void set_pcie_cxl(struct pci_dev *dev) +{ + struct pci_dev *parent; + u16 dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_FLEXBUS_PORT); + if (dvsec) { + u16 cap; + + pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_STATUS_OFFSET, &cap); + + dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_STATUS_CACHE_MASK, cap) || + FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_STATUS_MEM_MASK, cap); + } + + if (!pci_is_pcie(dev) || + !(pci_pcie_type(dev) == PCI_EXP_TYPE_ENDPOINT || + pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)) + return; + + /* + * Update parent's CXL state because alternate protocol training + * may have changed + */ + parent = pci_upstream_bridge(dev); + if (parent) + set_pcie_cxl(parent); +} + static void set_pcie_untrusted(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); @@ -2022,6 +2050,8 @@ int pci_setup_device(struct pci_dev *dev) /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); + set_pcie_cxl(dev); + set_pcie_untrusted(dev); if (pci_is_pcie(dev)) diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 99fd1588ff382..eb787dfbd2fa0 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -560,8 +560,8 @@ struct acpi_cedt_cfmws_target_element { /* Values for Restrictions field above */ -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2 (1) -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3 (1<<1) +#define ACPI_CEDT_CFMWS_RESTRICT_DEVMEM (1) +#define ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM (1<<1) #define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE (1<<2) #define ACPI_CEDT_CFMWS_RESTRICT_PMEM (1<<3) #define ACPI_CEDT_CFMWS_RESTRICT_FIXED (1<<4) diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h new file mode 100644 index 0000000000000..e6176677ea940 --- /dev/null +++ b/include/cxl/cxl.h @@ -0,0 +1,296 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. */ +/* Copyright(c) 2025 Advanced Micro Devices, Inc. */ + +#ifndef __CXL_CXL_H__ +#define __CXL_CXL_H__ + +#include +#include +#include +#include + +/** + * enum cxl_devtype - delineate type-2 from a generic type-3 device + * @CXL_DEVTYPE_DEVMEM: Vendor specific CXL Type-2 device implementing HDM-D or + * HDM-DB, no requirement that this device implements a + * mailbox, or other memory-device-standard manageability + * flows. + * @CXL_DEVTYPE_CLASSMEM: Common class definition of a CXL Type-3 device with + * HDM-H and class-mandatory memory device registers + */ +enum cxl_devtype { + CXL_DEVTYPE_DEVMEM, + CXL_DEVTYPE_CLASSMEM, +}; + +struct device; + +/* + * Using struct_group() allows for per register-block-type helper routines, + * without requiring block-type agnostic code to include the prefix. + */ +struct cxl_regs { + /* + * Common set of CXL Component register block base pointers + * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure + */ + struct_group_tagged(cxl_component_regs, component, + void __iomem *hdm_decoder; + void __iomem *ras; + ); + /* + * Common set of CXL Device register block base pointers + * @status: CXL 2.0 8.2.8.3 Device Status Registers + * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers + * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers + */ + struct_group_tagged(cxl_device_regs, device_regs, + void __iomem *status, *mbox, *memdev; + ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); +}; + +#define CXL_CM_CAP_CAP_ID_RAS 0x2 +#define CXL_CM_CAP_CAP_ID_HDM 0x5 +#define CXL_CM_CAP_CAP_HDM_VERSION 1 + +struct cxl_reg_map { + bool valid; + int id; + unsigned long offset; + unsigned long size; +}; + +struct cxl_component_reg_map { + struct cxl_reg_map hdm_decoder; + struct cxl_reg_map ras; +}; + +struct cxl_device_reg_map { + struct cxl_reg_map status; + struct cxl_reg_map mbox; + struct cxl_reg_map memdev; +}; + +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + +/** + * struct cxl_register_map - DVSEC harvested register block mapping parameters + * @host: device for devm operations and logging + * @base: virtual base of the register-block-BAR + @block_offset + * @resource: physical resource base of the register block + * @max_size: maximum mapping size to perform register search + * @reg_type: see enum cxl_regloc_type + * @component_map: cxl_reg_map for component registers + * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units + */ +struct cxl_register_map { + struct device *host; + void __iomem *base; + resource_size_t resource; + resource_size_t max_size; + u8 reg_type; + union { + struct cxl_component_reg_map component_map; + struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; + }; +}; + +/** + * struct cxl_dpa_perf - DPA performance property entry + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @cdat_coord: raw QoS performance data from CDAT + * @qos_class: QoS Class cookies + */ +struct cxl_dpa_perf { + struct range dpa_range; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; + int qos_class; +}; + +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + +/** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +#define CXL_NR_PARTITIONS_MAX 2 + +/* + * cxl_decoder flags that define the type of memory / devices this + * decoder supports as well as configuration lock status See "CXL 2.0 + * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + * Additionally indicate whether decoder settings were autodetected, + * user customized. + */ +#define CXL_DECODER_F_RAM BIT(0) +#define CXL_DECODER_F_PMEM BIT(1) +#define CXL_DECODER_F_TYPE2 BIT(2) +#define CXL_DECODER_F_TYPE3 BIT(3) +#define CXL_DECODER_F_LOCK BIT(4) +#define CXL_DECODER_F_ENABLE BIT(5) +#define CXL_DECODER_F_MASK GENMASK(5, 0) + +struct cxl_memdev_ops { + int (*probe)(struct cxl_memdev *cxlmd); +}; + +/** + * struct cxl_dev_state - The driver device state + * + * cxl_dev_state represents the CXL driver/device state. It provides an + * interface to mailbox commands as well as some cached data about the device. + * Currently only memory devices are represented. + * + * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters + * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) + * @media_ready: Indicate whether the device media is usable + * @dpa_res: Overall DPA resource tree for the device + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions + * @serial: PCIe Device Serial Number + * @type: Generic Memory Class device or Vendor Specific Memory device + * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context + */ +struct cxl_dev_state { + /* public for Type2 drivers */ + struct device *dev; + struct cxl_memdev *cxlmd; + + /* private for Type2 drivers */ + struct cxl_register_map reg_map; + struct cxl_regs regs; + int cxl_dvsec; + bool rcd; + bool media_ready; + struct resource dpa_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; + u64 serial; + enum cxl_devtype type; + struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif +}; + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox); + +/** + * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a + * driver specific struct. + * + * @parent: device behind the request + * @type: CXL device type + * @serial: device identification + * @dvsec: dvsec capability offset + * @drv_struct: driver struct embedding a cxl_dev_state struct + * @member: drv_struct member as cxl_dev_state + * @mbox: true if mailbox supported + * + * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state + * struct initialized. + * + * Introduced for Type2 driver support. + */ +#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ + ({ \ + static_assert(__same_type(struct cxl_dev_state, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ + sizeof(drv_struct), mbox); \ + }) + +/** + * cxl_map_component_regs - map cxl component registers + * + * @map: cxl register map to update with the mappings + * @regs: cxl component registers to work with + * @map_mask: cxl component regs to map + * + * Returns integer: success (0) or error (-ENOMEM) + * + * Made public for Type2 driver support. + */ +int cxl_map_component_regs(const struct cxl_register_map *map, + struct cxl_component_regs *regs, + unsigned long map_mask); +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops); +struct cxl_region; +struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, + struct cxl_region **cxlr); +struct range; +int cxl_get_region_range(struct cxl_region *region, struct range *range); +enum cxl_detach_mode { + DETACH_ONLY, + DETACH_INVALIDATE, +}; + +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode); +void unregister_region(void *_cxlr); +struct cxl_port; +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max); +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd); +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc); +int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways); +#endif /* __CXL_CXL_H__ */ diff --git a/include/cxl/pci.h b/include/cxl/pci.h new file mode 100644 index 0000000000000..a172439f08c60 --- /dev/null +++ b/include/cxl/pci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ + +#ifndef __CXL_CXL_PCI_H__ +#define __CXL_CXL_PCI_H__ + +/* Register Block Identifier (RBI) */ +enum cxl_regloc_type { + CXL_REGLOC_RBI_EMPTY = 0, + CXL_REGLOC_RBI_COMPONENT, + CXL_REGLOC_RBI_VIRT, + CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_PMU, + CXL_REGLOC_RBI_TYPES +}; + +struct cxl_register_map; + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); +#endif diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 42cbeaba2a510..0c6087ea979b2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1637,18 +1637,6 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } -#ifdef CONFIG_ACPI_HMAT -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access); -#else -static inline int hmat_update_target_coordinates(int nid, - struct access_coordinate *coord, - enum access_coordinate_class access) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else diff --git a/include/linux/aer.h b/include/linux/aer.h index 02940be66324e..2b89bd940ac15 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -10,6 +10,7 @@ #include #include +#include #define AER_NONFATAL 0 #define AER_FATAL 1 @@ -53,15 +54,43 @@ struct aer_capability_regs { u16 uncor_err_source; }; +/** + * struct cxl_proto_err_work_data - Error information used in CXL error handling + * @severity: AER severity + * @pdev: PCI device detecting the error + */ +struct cxl_proto_err_work_data { + int severity; + struct pci_dev *pdev; +}; + #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +void pci_aer_clear_fatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); +void pci_aer_unmask_internal_errors(struct pci_dev *dev); +void pci_aer_mask_internal_errors(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } +static inline void pci_aer_mask_internal_errors(struct pci_dev *dev) { } +#endif + +#ifdef CONFIG_CXL_RAS +bool cxl_error_is_native(struct pci_dev *dev); +int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd); +void cxl_register_proto_err_work(struct work_struct *work); +void cxl_unregister_proto_err_work(void); +#else +static inline bool cxl_error_is_native(struct pci_dev *dev) { return false; } +static inline int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd) { return 0; } +static inline void cxl_register_proto_err_work(struct work_struct *work) { } +static inline void cxl_unregister_proto_err_work(void) { } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, diff --git a/include/linux/memory.h b/include/linux/memory.h index 2a770e7c6ab1e..55f0a47c85ebf 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,13 +115,13 @@ struct notifier_block; struct mem_section; /* - * Priorities for the hotplug memory callback routines (stored in decreasing - * order in the callback chain) + * Priorities for the hotplug memory callback routines. Invoked from + * high to low. Higher priorities correspond to higher numbers. */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -#define HMAT_CALLBACK_PRI 2 #define CXL_CALLBACK_PRI 5 +#define HMAT_CALLBACK_PRI 6 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 diff --git a/include/linux/node.h b/include/linux/node.h index 2c7529335b21a..866e3323f1fdc 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,6 +85,8 @@ struct node_cache_attrs { void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, enum access_coordinate_class access); +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid, enum access_coordinate_class access) { } + +static inline void node_update_perf_attrs(unsigned int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ +} #endif struct node { diff --git a/include/linux/pci.h b/include/linux/pci.h index a5837cd74faad..546f9e1c4b13c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -51,7 +51,7 @@ PCI_STATUS_PARITY) /* Number of reset methods used in pci_reset_fn_methods array in pci.c */ -#define PCI_NUM_RESET_METHODS 9 +#define PCI_NUM_RESET_METHODS 8 #define PCI_RESET_PROBE true #define PCI_RESET_DO_RESET false @@ -466,6 +466,7 @@ struct pci_dev { unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int is_cxl:1; /* Compute Express Link (CXL) */ /* * Devices marked being untrusted are the ones that can potentially * execute DMA attacks and similar. They are typically connected @@ -773,6 +774,11 @@ static inline bool pci_is_display(struct pci_dev *pdev) return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; } +static inline bool pcie_is_cxl(struct pci_dev *pci_dev) +{ + return pci_dev->is_cxl; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else @@ -891,6 +897,9 @@ enum pci_ers_result { /* No AER capabilities registered for the driver */ PCI_ERS_RESULT_NO_AER_DRIVER = (__force pci_ers_result_t) 6, + + /* System is unstable, panic. Is CXL specific */ + PCI_ERS_RESULT_PANIC = (__force pci_ers_result_t) 7, }; /* PCI bus error event callbacks */ @@ -1884,8 +1893,17 @@ static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } #ifdef CONFIG_PCIEAER bool pci_aer_available(void); +void pcie_clear_device_status(struct pci_dev *dev); +pci_ers_result_t pcie_ers_merge_result(enum pci_ers_result orig, + enum pci_ers_result new); #else static inline bool pci_aer_available(void) { return false; } +static inline void pcie_clear_device_status(struct pci_dev *dev) { } +static inline pci_ers_result_t pcie_ers_merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + return PCI_ERS_RESULT_NONE; +} #endif bool pci_ats_disabled(void); diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index fecfeb7c8be7f..9bbb2e0ee5d22 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -298,15 +298,17 @@ TRACE_EVENT(non_standard_event, TRACE_EVENT(aer_event, TP_PROTO(const char *dev_name, + const char *bus_type, const u32 status, const u8 severity, const u8 tlp_header_valid, struct pcie_tlp_log *tlp), - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), + TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp), TP_STRUCT__entry( __string( dev_name, dev_name ) + __string( bus_type, bus_type ) __field( u32, status ) __field( u8, severity ) __field( u8, tlp_header_valid) @@ -315,6 +317,7 @@ TRACE_EVENT(aer_event, TP_fast_assign( __assign_str(dev_name); + __assign_str(bus_type); __entry->status = status; __entry->severity = severity; __entry->tlp_header_valid = tlp_header_valid; @@ -326,8 +329,8 @@ TRACE_EVENT(aer_event, } ), - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", - __get_str(dev_name), + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", + __get_str(dev_name), __get_str(bus_type), __entry->severity == AER_CORRECTABLE ? "Corrected" : __entry->severity == AER_FATAL ? "Fatal" : "Uncorrected, non-fatal", diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index bfa9ada355c9b..6d1671e97b9c7 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1234,10 +1234,65 @@ /* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */ #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 +/* Compute Express Link (CXL r3.2, sec 8.1) + * + * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state + * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these + * registers on downstream link-up events. + */ + +#define PCI_DVSEC_HEADER1_LENGTH_MASK __GENMASK(31, 20) + +/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE 0 +#define PCI_DVSEC_CXL_CAP_OFFSET 0xA +#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2) +#define PCI_DVSEC_CXL_HDM_COUNT_MASK __GENMASK(5, 4) +#define PCI_DVSEC_CXL_CTRL_OFFSET 0xC +#define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2) +#define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0) +#define PCI_DVSEC_CXL_MEM_ACTIVE _BITUL(1) +#define PCI_DVSEC_CXL_MEM_SIZE_LOW_MASK __GENMASK(31, 28) +#define PCI_DVSEC_CXL_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_BASE_LOW_MASK __GENMASK(31, 28) + +#define PCI_DVSEC_CXL_RANGE_MAX 2 + +/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */ +#define PCI_DVSEC_CXL_FUNCTION_MAP 2 + +/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + +/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */ +#define PCI_DVSEC_CXL_PORT_GPF 4 +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE_MASK __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE_MASK __GENMASK(11, 8) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE_MASK __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE_MASK __GENMASK(11, 8) + +/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE_GPF 5 + +/* CXL 3.2 8.1.8: PCIe DVSEC for Flex Bus Port */ +#define PCI_DVSEC_CXL_FLEXBUS_PORT 7 +#define PCI_DVSEC_CXL_FLEXBUS_STATUS_OFFSET 0xE +#define PCI_DVSEC_CXL_FLEXBUS_STATUS_CACHE_MASK _BITUL(0) +#define PCI_DVSEC_CXL_FLEXBUS_STATUS_MEM_MASK _BITUL(2) + +/* CXL 3.2 8.1.9: Register Locator DVSEC */ +#define PCI_DVSEC_CXL_REG_LOCATOR 8 +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define PCI_DVSEC_CXL_REG_LOCATOR_BIR_MASK __GENMASK(2, 0) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID_MASK __GENMASK(15, 8) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW_MASK __GENMASK(31, 16) /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index d07f14cb7aa45..d8b8272ef87bd 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -5,22 +5,19 @@ ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register ldflags-y += --wrap=devm_cxl_port_enumerate_dports -ldflags-y += --wrap=devm_cxl_setup_hdm -ldflags-y += --wrap=devm_cxl_add_passthrough_decoder -ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready -ldflags-y += --wrap=cxl_hdm_decode_init -ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting +ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl CXL_CORE_SRC := $(DRIVERS)/cxl/core ccflags-y := -I$(srctree)/drivers/cxl/ ccflags-y += -D__mock=__weak +ccflags-y += -DCXL_TEST_ENABLE=1 ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/ obj-m += cxl_acpi.o @@ -61,12 +58,14 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o -cxl_core-y += $(CXL_CORE_SRC)/ras.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o +cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o +cxl_core-$(CONFIG_CXL_RCH_RAS) += $(CXL_CORE_SRC)/ras_rch.o +cxl_core-$(CONFIG_CXL_PCI) += $(CXL_CORE_SRC)/pci_drv.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index f088792a8925f..6754de35598d5 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -2,6 +2,28 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include "cxl.h" +#include "exports.h" /* Exporting of cxl_core symbols that are only used by cxl_test */ EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL"); + +cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev = + __devm_cxl_add_dport_by_dev; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL"); + +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + return _devm_cxl_add_dport_by_dev(port, dport_dev); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL"); + +cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL"); + +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + return _devm_cxl_switch_port_decoders_setup(port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h new file mode 100644 index 0000000000000..7ebee7c0bd67e --- /dev/null +++ b/tools/testing/cxl/exports.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation */ +#ifndef __MOCK_CXL_EXPORTS_H_ +#define __MOCK_CXL_EXPORTS_H_ + +typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port, + struct device *dport_dev); +extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev; + +typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port); +extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup; + +#endif diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index f4dceecf7e335..cb18ee41a7cf8 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -210,7 +210,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -225,7 +225,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -240,7 +240,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -255,7 +255,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -270,7 +270,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -285,7 +285,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M, @@ -302,7 +302,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -318,7 +318,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 1, .granularity = 0, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -334,7 +334,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 8, .granularity = 1, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_512M * 6UL, @@ -643,15 +643,8 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) -{ - dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); - return -EOPNOTSUPP; -} - - struct target_map_ctx { - int *target_map; + u32 *target_map; int index; int target_count; }; @@ -894,15 +887,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) */ if (WARN_ON(!dev)) continue; + cxlsd = to_cxl_switch_decoder(dev); if (i == 0) { /* put cxl_mem.4 second in the decode order */ - if (pdev->id == 4) + if (pdev->id == 4) { cxlsd->target[1] = dport; - else + cxld->target_map[1] = dport->port_id; + } else { cxlsd->target[0] = dport; - } else + cxld->target_map[0] = dport->port_id; + } + } else { cxlsd->target[0] = dport; + cxld->target_map[0] = dport->port_id; + } cxld = &cxlsd->cxld; cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->flags = CXL_DECODER_F_ENABLE; @@ -955,9 +954,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, target_count = NR_CXL_SWITCH_PORTS; for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; struct target_map_ctx ctx = { - .target_map = target_map, .target_count = target_count, }; struct cxl_decoder *cxld; @@ -986,6 +983,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxled->cxld; } + ctx.target_map = cxld->target_map; + mock_init_hdm_decoder(cxld); if (target_count) { @@ -997,7 +996,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } } - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -1013,10 +1012,42 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +static int __mock_cxl_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + cxlhdm = mock_cxl_setup_hdm(port, NULL); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) != -ENODEV) + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + return mock_cxl_enumerate_decoders(cxlhdm, NULL); +} + +static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int get_port_array(struct cxl_port *port, + struct platform_device ***port_array, + int *port_array_size) { struct platform_device **array; - int i, array_size; + int array_size; if (port->depth == 1) { if (is_multi_bridge(port->uport_dev)) { @@ -1050,6 +1081,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return -ENXIO; } + *port_array = array; + *port_array_size = array_size; + + return 0; +} + +static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct platform_device **array; + int i, array_size; + int rc; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return rc; + for (i = 0; i < array_size; i++) { struct platform_device *pdev = array[i]; struct cxl_dport *dport; @@ -1071,6 +1118,36 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return 0; } +static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct platform_device **array; + int rc, i, array_size; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return ERR_PTR(rc); + + for (i = 0; i < array_size; i++) { + struct platform_device *pdev = array[i]; + + if (pdev->dev.parent != port->uport_dev) { + dev_dbg(&port->dev, "%s: mismatch parent %s\n", + dev_name(port->uport_dev), + dev_name(pdev->dev.parent)); + continue; + } + + if (&pdev->dev != dport_dev) + continue; + + return devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + } + + return ERR_PTR(-ENODEV); +} + /* * Faking the cxl_dpa_perf for the memdev when appropriate. */ @@ -1127,11 +1204,11 @@ static struct cxl_mock_ops cxl_mock_ops = { .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, + .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup, + .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, - .devm_cxl_setup_hdm = mock_cxl_setup_hdm, - .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, - .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, + .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index d533481672b78..6fbe0af3e8f88 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1717,7 +1717,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mds = cxl_memdev_state_create(dev); + mds = cxl_memdev_state_create(dev, pdev->id + 1, 0); if (IS_ERR(mds)) return PTR_ERR(mds); @@ -1733,7 +1733,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; @@ -1768,7 +1767,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxl_mock_add_event_logs(&mdata->mes); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1989ae020df3d..995269a75cbd1 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -10,12 +10,21 @@ #include #include #include "mock.h" +#include "../exports.h" static LIST_HEAD(mock); +static struct cxl_dport * +redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port); + void register_cxl_mock_ops(struct cxl_mock_ops *ops) { list_add_rcu(&ops->list, &mock); + _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev; + _devm_cxl_switch_port_decoders_setup = + redirect_devm_cxl_switch_port_decoders_setup; } EXPORT_SYMBOL_GPL(register_cxl_mock_ops); @@ -23,6 +32,9 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) { + _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; + _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev; list_del_rcu(&ops->list); synchronize_srcu(&cxl_mock_srcu); } @@ -131,55 +143,34 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); -struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) - -{ - int index; - struct cxl_hdm *cxlhdm; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport_dev)) - cxlhdm = ops->devm_cxl_setup_hdm(port, info); - else - cxlhdm = devm_cxl_setup_hdm(port, info); - put_cxl_mock_ops(index); - - return cxlhdm; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL"); - -int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) +int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_add_passthrough_decoder(port); + rc = ops->devm_cxl_switch_port_decoders_setup(port); else - rc = devm_cxl_add_passthrough_decoder(port); + rc = __devm_cxl_switch_port_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL"); -int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port) { int rc, index; - struct cxl_port *port = cxlhdm->port; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info); + rc = ops->devm_cxl_endpoint_decoders_setup(port); else - rc = devm_cxl_enumerate_decoders(cxlhdm, info); + rc = devm_cxl_endpoint_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL"); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL"); int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) { @@ -211,39 +202,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL"); -int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_hdm_decode_init(cxlds, cxlhdm, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); - -int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_dvsec_rr_decode(cxlds, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL"); - struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, @@ -311,6 +269,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL"); +struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct cxl_dport *dport; + + if (ops && ops->is_mock_port(port->uport_dev)) + dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev); + else + dport = __devm_cxl_add_dport_by_dev(port, dport_dev); + put_cxl_mock_ops(index); + + return dport; +} + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("cxl_test: emulation module"); MODULE_IMPORT_NS("ACPI"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index d1b0271d28220..4ed932e76aae8 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -20,12 +20,11 @@ struct cxl_mock_ops { bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); - struct cxl_hdm *(*devm_cxl_setup_hdm)( - struct cxl_port *port, struct cxl_endpoint_dvsec_info *info); - int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); - int (*devm_cxl_enumerate_decoders)( - struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info); + int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port); + int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port); void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); + struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port, + struct device *dport_dev); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops);