From 663a7e016142492070b4f7cdc05876032263830c Mon Sep 17 00:00:00 2001 From: Jiandi An Date: Mon, 16 Feb 2026 01:21:32 -0600 Subject: [PATCH 01/80] Revert "NVIDIA: VR: SAUCE: cxl: add support for cxl reset" This reverts commit 0e06082184df86b7fe96d3f4d5a0fd075aac4002. The CXL reset implementation is being reverted to allow "NVIDIA: VR: SAUCE: CXL/PCI: Move CXL DVSEC definitions into uapi/linux/pci_regs.h" to apply cleanly. The reset functionality will be replaced by the version currently being pursued upstream. Signed-off-by: Jiandi An --- drivers/cxl/cxlpci.h | 40 ++++-------- drivers/pci/pci.c | 147 ------------------------------------------- include/linux/pci.h | 2 +- 3 files changed, 14 insertions(+), 175 deletions(-) diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 67ad5b007498e..54e219b0049ea 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -16,33 +16,19 @@ /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_CACHE_CAPABLE BIT(0) -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CACHE_WBI_CAPABLE BIT(6) -#define CXL_DVSEC_CXL_RST_CAPABLE BIT(7) -#define CXL_DVSEC_CXL_RST_TIMEOUT_MASK GENMASK(10, 8) -#define CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE BIT(11) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_CTRL2_OFFSET 0x10 -#define CXL_DVSEC_DISABLE_CACHING BIT(0) -#define CXL_DVSEC_INIT_CACHE_WBI BIT(1) -#define CXL_DVSEC_INIT_CXL_RESET BIT(2) -#define CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE BIT(3) -#define CXL_DVSEC_STATUS2_OFFSET 0x12 -#define CXL_DVSEC_CACHE_INVALID BIT(0) -#define CXL_DVSEC_CXL_RST_COMPLETE BIT(1) -#define CXL_DVSEC_CXL_RESET_ERR BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + ((i) * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + ((i) * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + ((i) * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + ((i) * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE BIT(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID BIT(0) +#define CXL_DVSEC_MEM_ACTIVE BIT(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) #define CXL_DVSEC_RANGE_MAX 2 diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 9a6943688e6db..372de7961d2a6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -30,7 +30,6 @@ #include #include #include -#include "../cxl/cxlpci.h" #include "pci.h" DEFINE_MUTEX(pci_slot_mutex); @@ -5134,151 +5133,6 @@ static int cxl_reset_bus_function(struct pci_dev *dev, bool probe) return rc; } -static int cxl_reset_prepare(struct pci_dev *dev, u16 dvsec) -{ - u32 timeout_us = 100, timeout_tot_us = 10000; - u16 reg, cap; - int rc; - - if (!pci_wait_for_pending_transaction(dev)) - pci_err(dev, "timed out waiting for pending transaction; performing cxl reset anyway\n"); - - /* Check if the device is cache capable. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_CACHE_CAPABLE)) - return 0; - - /* Disable cache. WB and invalidate cache if capability is advertised */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - reg |= CXL_DVSEC_DISABLE_CACHING; - /* - * DEVCTL2 bits are written only once. So check WB+I capability while - * keeping disable caching set. - */ - if (cap & CXL_DVSEC_CACHE_WBI_CAPABLE) - reg |= CXL_DVSEC_INIT_CACHE_WBI; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - /* - * From Section 9.6: "Software may leverage the cache size reported in - * the DVSEC CXL Capability2 register to compute a suitable timeout - * value". - * Given there is no conversion factor for cache size -> timeout, - * setting timer for default 10ms. - */ - do { - if (timeout_tot_us == 0) - return -ETIMEDOUT; - usleep_range(timeout_us, timeout_us + 1); - timeout_tot_us -= timeout_us; - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, - ®); - if (rc) - return rc; - } while (!(reg & CXL_DVSEC_CACHE_INVALID)); - - return 0; -} - -static int cxl_reset_init(struct pci_dev *dev, u16 dvsec) -{ - /* - * Timeout values ref CXL Spec v3.2 Ch 8 Control and Status Registers, - * under section 8.1.3.1 DVSEC CXL Capability. - */ - u32 reset_timeouts_ms[] = { 10, 100, 1000, 10000, 100000 }; - u16 reg; - u32 timeout_ms; - int rc, ind; - - /* Check if CXL Reset MEM CLR is supported. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return rc; - - if (reg & CXL_DVSEC_CXL_RST_MEM_CLR_CAPABLE) { - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, - ®); - if (rc) - return rc; - - reg |= CXL_DVSEC_CXL_RST_MEM_CLR_ENABLE; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - } - - /* Read timeout value. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return rc; - ind = FIELD_GET(CXL_DVSEC_CXL_RST_TIMEOUT_MASK, reg); - timeout_ms = reset_timeouts_ms[ind]; - - /* Write reset config. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - - reg |= CXL_DVSEC_INIT_CXL_RESET; - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - /* Wait till timeout and then check reset status is complete. */ - msleep(timeout_ms); - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_STATUS2_OFFSET, ®); - if (rc) - return rc; - if (reg & CXL_DVSEC_CXL_RESET_ERR || - ~reg & CXL_DVSEC_CXL_RST_COMPLETE) - return -ETIMEDOUT; - - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, ®); - if (rc) - return rc; - reg &= (~CXL_DVSEC_DISABLE_CACHING); - pci_write_config_word(dev, dvsec + CXL_DVSEC_CTRL2_OFFSET, reg); - - return 0; -} - -/** - * cxl_reset - initiate a cxl reset - * @dev: device to reset - * @probe: if true, return 0 if device can be reset this way - * - * Initiate a cxl reset on @dev. - */ -static int cxl_reset(struct pci_dev *dev, bool probe) -{ - u16 dvsec, reg; - int rc; - - dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_PCIE_DEVICE); - if (!dvsec) - return -ENOTTY; - - /* Check if CXL Reset is supported. */ - rc = pci_read_config_word(dev, dvsec + CXL_DVSEC_CAP_OFFSET, ®); - if (rc) - return -ENOTTY; - - if ((reg & CXL_DVSEC_CXL_RST_CAPABLE) == 0) - return -ENOTTY; - - if (probe) - return 0; - - rc = cxl_reset_prepare(dev, dvsec); - if (rc) - return rc; - - return cxl_reset_init(dev, dvsec); -} - void pci_dev_lock(struct pci_dev *dev) { /* block PM suspend, driver probe, etc. */ @@ -5365,7 +5219,6 @@ const struct pci_reset_fn_method pci_reset_fn_methods[] = { { pci_dev_acpi_reset, .name = "acpi" }, { pcie_reset_flr, .name = "flr" }, { pci_af_flr, .name = "af_flr" }, - { cxl_reset, .name = "cxl_reset" }, { pci_pm_reset, .name = "pm" }, { pci_reset_bus_function, .name = "bus" }, { cxl_reset_bus_function, .name = "cxl_bus" }, diff --git a/include/linux/pci.h b/include/linux/pci.h index a5837cd74faad..1bdfd152eb1f8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -51,7 +51,7 @@ PCI_STATUS_PARITY) /* Number of reset methods used in pci_reset_fn_methods array in pci.c */ -#define PCI_NUM_RESET_METHODS 9 +#define PCI_NUM_RESET_METHODS 8 #define PCI_RESET_PROBE true #define PCI_RESET_DO_RESET false From 9873464d7fd4a62eaa84218f3df8cb52836b69fe Mon Sep 17 00:00:00 2001 From: Xichao Zhao Date: Mon, 11 Aug 2025 20:25:19 +0800 Subject: [PATCH 02/80] cxl/hdm: Use str_plural() to simplify the code Use the string choice helper function str_plural() to simplify the code. Signed-off-by: Xichao Zhao Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20250811122519.543554-1-zhao.xichao@vivo.com Signed-off-by: Dave Jiang (cherry picked from commit 22fb4ad898853323f4943de3e0dc555915547ccc) Signed-off-by: Jiandi An --- drivers/cxl/core/hdm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index e930191057c04..777b8ac0c49c1 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -197,7 +197,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, */ if (should_emulate_decoders(info)) { dev_dbg(dev, "Fallback map %d range register%s\n", info->ranges, - info->ranges > 1 ? "s" : ""); + str_plural(info->ranges)); cxlhdm->decoder_count = info->ranges; } From 97c08e97974bd09b070e8f8ba461f02bcbeaad66 Mon Sep 17 00:00:00 2001 From: Nai-Chen Cheng Date: Tue, 12 Aug 2025 00:49:46 +0800 Subject: [PATCH 03/80] cxl/region: use str_enabled_disabled() instead of ternary operator Replace ternary operator with str_enabled_disabled() helper to enhance code readability and consistency. [dj: Fix spelling in commit log and subject. ] Signed-off-by: Nai-Chen Cheng Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250812-cxl-region-string-choices-v1-1-50200b0bc782@gmail.com Signed-off-by: Dave Jiang (cherry picked from commit 733c4e9bcec9c481afee3891218277d9ecd06599) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index adebbb1db5078..5c581b175013c 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -1468,9 +1469,7 @@ static int cxl_port_setup_targets(struct cxl_port *port, dev_name(port->uport_dev), dev_name(&port->dev), __func__, cxld->interleave_ways, cxld->interleave_granularity, - (cxld->flags & CXL_DECODER_F_ENABLE) ? - "enabled" : - "disabled", + str_enabled_disabled(cxld->flags & CXL_DECODER_F_ENABLE), cxld->hpa_range.start, cxld->hpa_range.end); return -ENXIO; } From c00d37ad3f0aa7f553d9ea780d6c7e9bbe042744 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 4 Aug 2025 01:00:09 -0700 Subject: [PATCH 04/80] cxl: Move hpa_to_spa callback to a new root decoder ops structure The root decoder's HPA to SPA translation logic was implemented using a single function pointer. In preparation for additional per-decoder callbacks, convert this into a struct cxl_rd_ops and move the hpa_to_spa pointer into it. To avoid maintaining a static ops instance populated with mostly NULL pointers, allocate the ops structure dynamically only when a platform requires overrides (e.g. XOR interleave decoding). The setup can be extended as additional callbacks are added. Co-developed-by: Dave Jiang Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/818530c82c351a9c0d3a204f593068dd2126a5a9.1754290144.git.alison.schofield@intel.com Signed-off-by: Dave Jiang (cherry picked from commit 524b2b76f365fb90a7f894ac17261ea760464e2c) Signed-off-by: Jiandi An --- drivers/cxl/acpi.c | 10 +++++++--- drivers/cxl/core/port.c | 1 + drivers/cxl/core/region.c | 11 ++++++++--- drivers/cxl/cxl.h | 12 +++++++++--- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 87f0ed3f3f51f..de5f08122aa92 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -20,7 +20,6 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); - static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; @@ -472,8 +471,13 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->qos_class = cfmws->qtg_id; - if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) - cxlrd->hpa_to_spa = cxl_xor_hpa_to_spa; + if (cfmws->interleave_arithmetic == ACPI_CEDT_CFMWS_ARITHMETIC_XOR) { + cxlrd->ops = kzalloc(sizeof(*cxlrd->ops), GFP_KERNEL); + if (!cxlrd->ops) + return -ENOMEM; + + cxlrd->ops->hpa_to_spa = cxl_xor_hpa_to_spa; + } rc = cxl_decoder_add(cxld, target_map); if (rc) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 29197376b18e3..8f36ff413f5d5 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -450,6 +450,7 @@ static void cxl_root_decoder_release(struct device *dev) if (atomic_read(&cxlrd->region_id) >= 0) memregion_free(atomic_read(&cxlrd->region_id)); __cxl_decoder_release(&cxlrd->cxlsd.cxld); + kfree(cxlrd->ops); kfree(cxlrd); } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 5c581b175013c..ef1f69ba8899d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2917,6 +2917,11 @@ static bool cxl_is_hpa_in_chunk(u64 hpa, struct cxl_region *cxlr, int pos) return false; } +static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd) +{ + return cxlrd->ops && cxlrd->ops->hpa_to_spa; +} + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { @@ -2971,8 +2976,8 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, hpa = hpa_offset + p->res->start + p->cache_size; /* Root decoder translation overrides typical modulo decode */ - if (cxlrd->hpa_to_spa) - hpa = cxlrd->hpa_to_spa(cxlrd, hpa); + if (has_hpa_to_spa(cxlrd)) + hpa = cxlrd->ops->hpa_to_spa(cxlrd, hpa); if (!cxl_resource_contains_addr(p->res, hpa)) { dev_dbg(&cxlr->dev, @@ -2981,7 +2986,7 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, } /* Simple chunk check, by pos & gran, only applies to modulo decodes */ - if (!cxlrd->hpa_to_spa && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) + if (!has_hpa_to_spa(cxlrd) && (!cxl_is_hpa_in_chunk(hpa, cxlr, pos))) return ULLONG_MAX; return hpa; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 847e37be42c47..4b247ab188833 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -419,27 +419,33 @@ struct cxl_switch_decoder { }; struct cxl_root_decoder; -typedef u64 (*cxl_hpa_to_spa_fn)(struct cxl_root_decoder *cxlrd, u64 hpa); +/** + * struct cxl_rd_ops - CXL root decoder callback operations + * @hpa_to_spa: Convert host physical address to system physical address + */ +struct cxl_rd_ops { + u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa); +}; /** * struct cxl_root_decoder - Static platform CXL address decoder * @res: host / parent resource for region allocations * @cache_size: extended linear cache size if exists, otherwise zero. * @region_id: region id for next region provisioning event - * @hpa_to_spa: translate CXL host-physical-address to Platform system-physical-address * @platform_data: platform specific configuration data * @range_lock: sync region autodiscovery by address range * @qos_class: QoS performance class cookie + * @ops: CXL root decoder operations * @cxlsd: base cxl switch decoder */ struct cxl_root_decoder { struct resource *res; resource_size_t cache_size; atomic_t region_id; - cxl_hpa_to_spa_fn hpa_to_spa; void *platform_data; struct mutex range_lock; int qos_class; + struct cxl_rd_ops *ops; struct cxl_switch_decoder cxlsd; }; From abd99e40864af94ddd5b8d28b1b71e045d6f7459 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 4 Aug 2025 01:00:10 -0700 Subject: [PATCH 05/80] cxl: Define a SPA->CXL HPA root decoder callback for XOR Math When DPA->SPA translation was introduced, it included a helper that applied the XOR maps to do the CXL HPA -> SPA translation for XOR region interleaves. In preparation for adding SPA->DPA address translation, introduce the reverse callback. The root decoder callback is defined generically and not all usages may be self inverting like this XOR function. Add another root decoder callback that is the spa_to_hpa function. Update the existing cxl_xor_hpa_to_spa() with a name that reflects what it does without directionality: cxl_apply_xor_maps(), a generic parameter: addr replaces hpa, and code comments stating that the function supports the translation in either direction. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/79d9d72230c599cae94d7221781ead6392ae6d3f.1754290144.git.alison.schofield@intel.com Signed-off-by: Dave Jiang (cherry picked from commit b83ee9614a3ec196111f0ae54335b99700f78b45) Signed-off-by: Jiandi An --- drivers/cxl/acpi.c | 27 ++++++++++++++++----------- drivers/cxl/cxl.h | 2 ++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index de5f08122aa92..b9ba1c33e4d24 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -20,7 +20,7 @@ static const guid_t acpi_cxl_qtg_id_guid = GUID_INIT(0xF365F9A6, 0xA7DE, 0x4071, 0xA6, 0x6A, 0xB4, 0x0C, 0x0B, 0x4F, 0x8E, 0x52); -static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) +static u64 cxl_apply_xor_maps(struct cxl_root_decoder *cxlrd, u64 addr) { struct cxl_cxims_data *cximsd = cxlrd->platform_data; int hbiw = cxlrd->cxlsd.nr_targets; @@ -29,19 +29,23 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) /* No xormaps for host bridge interleave ways of 1 or 3 */ if (hbiw == 1 || hbiw == 3) - return hpa; + return addr; /* - * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) restore - * the position bit to its value before the xormap was applied at - * HPA->DPA translation. + * In regions using XOR interleave arithmetic the CXL HPA may not + * be the same as the SPA. This helper performs the SPA->CXL HPA + * or the CXL HPA->SPA translation. Since XOR is self-inverting, + * so is this function. + * + * For root decoders using xormaps (hbiw: 2,4,6,8,12,16) applying the + * xormaps will toggle a position bit. * * pos is the lowest set bit in an XORMAP - * val is the XORALLBITS(HPA & XORMAP) + * val is the XORALLBITS(addr & XORMAP) * * XORALLBITS: The CXL spec (3.1 Table 9-22) defines XORALLBITS * as an operation that outputs a single bit by XORing all the - * bits in the input (hpa & xormap). Implement XORALLBITS using + * bits in the input (addr & xormap). Implement XORALLBITS using * hweight64(). If the hamming weight is even the XOR of those * bits results in val==0, if odd the XOR result is val==1. */ @@ -50,11 +54,11 @@ static u64 cxl_xor_hpa_to_spa(struct cxl_root_decoder *cxlrd, u64 hpa) if (!cximsd->xormaps[i]) continue; pos = __ffs(cximsd->xormaps[i]); - val = (hweight64(hpa & cximsd->xormaps[i]) & 1); - hpa = (hpa & ~(1ULL << pos)) | (val << pos); + val = (hweight64(addr & cximsd->xormaps[i]) & 1); + addr = (addr & ~(1ULL << pos)) | (val << pos); } - return hpa; + return addr; } struct cxl_cxims_context { @@ -476,7 +480,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, if (!cxlrd->ops) return -ENOMEM; - cxlrd->ops->hpa_to_spa = cxl_xor_hpa_to_spa; + cxlrd->ops->hpa_to_spa = cxl_apply_xor_maps; + cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps; } rc = cxl_decoder_add(cxld, target_map); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 4b247ab188833..4fe3df06f57a3 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -422,9 +422,11 @@ struct cxl_root_decoder; /** * struct cxl_rd_ops - CXL root decoder callback operations * @hpa_to_spa: Convert host physical address to system physical address + * @spa_to_hpa: Convert system physical address to host physical address */ struct cxl_rd_ops { u64 (*hpa_to_spa)(struct cxl_root_decoder *cxlrd, u64 hpa); + u64 (*spa_to_hpa)(struct cxl_root_decoder *cxlrd, u64 spa); }; /** From 5cb3a5919ec6563ed231811e3cd64d2140b03f9e Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 4 Aug 2025 01:00:11 -0700 Subject: [PATCH 06/80] cxl/region: Introduce SPA to DPA address translation Add infrastructure to translate System Physical Addresses (SPA) to Device Physical Addresses (DPA) within CXL regions. This capability will be used by follow-on patches that add poison inject and clear operations at the region level. The SPA-to-DPA translation process follows these steps: 1. Apply root decoder transformations (SPA to HPA) if configured. 2. Extract the position in region interleave from the HPA offset. 3. Extract the DPA offset from the HPA offset. 4. Use position to find endpoint decoder. 5. Use endpoint decoder to find memdev and calculate DPA from offset. 6. Return the result - a memdev and a DPA. It is Step 1 above that makes this a driver level operation and not work we can push to user space. Rather than exporting the XOR maps for root decoders configured with XOR interleave, the driver performs this complex calculation for the user. Steps 2 and 3 follow the CXL Spec 3.2 Section 8.2.4.20.13 Implementation Note: Device Decode Logic. These calculations mirror much of the logic introduced earlier in DPA to SPA translation, see cxl_dpa_to_hpa(), where the driver needed to reverse the spec defined 'Device Decode Logic'. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/422f0e27742c6ca9a11f7cd83e6ba9fa1a8d0c74.1754290144.git.alison.schofield@intel.com Signed-off-by: Dave Jiang (cherry picked from commit dc181170491bda9944f95ca39017667fe7fd767d) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 101 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index ef1f69ba8899d..5892de29b470b 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2922,6 +2922,11 @@ static bool has_hpa_to_spa(struct cxl_root_decoder *cxlrd) return cxlrd->ops && cxlrd->ops->hpa_to_spa; } +static bool has_spa_to_hpa(struct cxl_root_decoder *cxlrd) +{ + return cxlrd->ops && cxlrd->ops->spa_to_hpa; +} + u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, u64 dpa) { @@ -2992,6 +2997,102 @@ u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd, return hpa; } +struct dpa_result { + struct cxl_memdev *cxlmd; + u64 dpa; +}; + +static int __maybe_unused region_offset_to_dpa_result(struct cxl_region *cxlr, + u64 offset, + struct dpa_result *result) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_endpoint_decoder *cxled; + u64 hpa, hpa_offset, dpa_offset; + u64 bits_upper, bits_lower; + u64 shifted, rem, temp; + u16 eig = 0; + u8 eiw = 0; + int pos; + + lockdep_assert_held(&cxl_rwsem.region); + lockdep_assert_held(&cxl_rwsem.dpa); + + /* Input validation ensures valid ways and gran */ + granularity_to_eig(p->interleave_granularity, &eig); + ways_to_eiw(p->interleave_ways, &eiw); + + /* + * If the root decoder has SPA to CXL HPA callback, use it. Otherwise + * CXL HPA is assumed to equal SPA. + */ + if (has_spa_to_hpa(cxlrd)) { + hpa = cxlrd->ops->spa_to_hpa(cxlrd, p->res->start + offset); + hpa_offset = hpa - p->res->start; + } else { + hpa_offset = offset; + } + /* + * Interleave position: CXL Spec 3.2 Section 8.2.4.20.13 + * eiw < 8 + * Position is in the IW bits at HPA_OFFSET[IG+8+IW-1:IG+8]. + * Per spec "remove IW bits starting with bit position IG+8" + * eiw >= 8 + * Position is not explicitly stored in HPA_OFFSET bits. It is + * derived from the modulo operation of the upper bits using + * the total number of interleave ways. + */ + if (eiw < 8) { + pos = (hpa_offset >> (eig + 8)) & GENMASK(eiw - 1, 0); + } else { + shifted = hpa_offset >> (eig + 8); + div64_u64_rem(shifted, p->interleave_ways, &rem); + pos = rem; + } + if (pos < 0 || pos >= p->nr_targets) { + dev_dbg(&cxlr->dev, "Invalid position %d for %d targets\n", + pos, p->nr_targets); + return -ENXIO; + } + + /* + * DPA offset: CXL Spec 3.2 Section 8.2.4.20.13 + * Lower bits [IG+7:0] pass through unchanged + * (eiw < 8) + * Per spec: DPAOffset[51:IG+8] = (HPAOffset[51:IG+IW+8] >> IW) + * Clear the position bits to isolate upper section, then + * reverse the left shift by eiw that occurred during DPA->HPA + * (eiw >= 8) + * Per spec: DPAOffset[51:IG+8] = HPAOffset[51:IG+IW] / 3 + * Extract upper bits from the correct bit range and divide by 3 + * to recover the original DPA upper bits + */ + bits_lower = hpa_offset & GENMASK_ULL(eig + 7, 0); + if (eiw < 8) { + temp = hpa_offset &= ~((u64)GENMASK(eig + eiw + 8 - 1, 0)); + dpa_offset = temp >> eiw; + } else { + bits_upper = div64_u64(hpa_offset >> (eig + eiw), 3); + dpa_offset = bits_upper << (eig + 8); + } + dpa_offset |= bits_lower; + + /* Look-up and return the result: a memdev and a DPA */ + for (int i = 0; i < p->nr_targets; i++) { + cxled = p->targets[i]; + if (cxled->pos != pos) + continue; + result->cxlmd = cxled_to_memdev(cxled); + result->dpa = cxl_dpa_resource_start(cxled) + dpa_offset; + + return 0; + } + dev_err(&cxlr->dev, "No device found for position %d\n", pos); + + return -ENXIO; +} + static struct lock_class_key cxl_pmem_region_key; static int cxl_pmem_region_alloc(struct cxl_region *cxlr) From 0476df848abe38ff00523c05ce5ccb270091d69a Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 4 Aug 2025 01:00:12 -0700 Subject: [PATCH 07/80] cxl/core: Add locked variants of the poison inject and clear funcs The core functions that validate and send inject and clear commands to the memdev devices require holding both the dpa_rwsem and the region_rwsem. In preparation for another caller of these functions that must hold the locks upon entry, split the work into a locked and unlocked pair. Consideration was given to moving the locking to both callers, however, the existing caller is not in the core (mem.c) and cannot access the locks. Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/1d601f586975195733984ca63d1b5789bbe8690f.1754290144.git.alison.schofield@intel.com Signed-off-by: Dave Jiang (cherry picked from commit 25a0207828bc52f1ebb6588f9417eb43ca4960a3) Signed-off-by: Jiandi An --- drivers/cxl/core/memdev.c | 52 +++++++++++++++++++++++++++------------ drivers/cxl/cxlmem.h | 2 ++ 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index c569e00a511f4..90d3390d9c7c6 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -276,7 +276,7 @@ static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) return 0; } -int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_inject_poison inject; @@ -288,13 +288,8 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -324,9 +319,24 @@ int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_inject_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, "CXL"); -int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa) { struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox; struct cxl_mbox_clear_poison clear; @@ -338,13 +348,8 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) if (!IS_ENABLED(CONFIG_DEBUG_FS)) return 0; - ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) - return rc; - - ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); - if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) - return rc; + lockdep_assert_held(&cxl_rwsem.dpa); + lockdep_assert_held(&cxl_rwsem.region); rc = cxl_validate_poison_dpa(cxlmd, dpa); if (rc) @@ -383,6 +388,21 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) return 0; } + +int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) +{ + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + return cxl_clear_poison_locked(cxlmd, dpa); +} EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, "CXL"); static struct attribute *cxl_memdev_attributes[] = { diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 751478dfc4106..434031a0c1f74 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -869,6 +869,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len, int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_inject_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); +int cxl_clear_poison_locked(struct cxl_memdev *cxlmd, u64 dpa); #ifdef CONFIG_CXL_EDAC_MEM_FEATURES int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); From 0ff555190015c623ecbdbf53f29ce39bc66983de Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Mon, 4 Aug 2025 01:00:13 -0700 Subject: [PATCH 08/80] cxl/region: Add inject and clear poison by region offset Add CXL region debugfs attributes to inject and clear poison based on an offset into the region. These new interfaces allow users to operate on poison at the region level without needing to resolve Device Physical Addresses (DPA) or target individual memdevs. The implementation uses a new helper, region_offset_to_dpa_result() that applies decoder interleave logic, including XOR-based address decoding when applicable. Note that XOR decodes rely on driver internal xormaps which are not exposed to userspace. So, this support is not only a simplification of poison operations that could be done using existing per memdev operations, but also it enables this functionality for XOR interleaved regions for the first time. New debugfs attributes are added in /sys/kernel/debug/cxl/regionX/: inject_poison and clear_poison. These are only exposed if all memdevs participating in the region support both inject and clear commands, ensuring consistent and reliable behavior across multi-device regions. If tracing is enabled, these operations are logged as cxl_poison events in /sys/kernel/tracing/trace. The ABI documentation warns users of the significant risks that come with using these capabilities. A CXL Maturity Map update shows this user flow is now supported. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/f3fd8628ab57ea79704fb2d645902cd499c066af.1754290144.git.alison.schofield@intel.com Signed-off-by: Dave Jiang (cherry picked from commit c3dd67681c70cc95cc2c889b1b58a1667bb1c48b) Signed-off-by: Jiandi An --- Documentation/ABI/testing/debugfs-cxl | 87 ++++++++++++ Documentation/driver-api/cxl/maturity-map.rst | 2 +- drivers/cxl/core/core.h | 4 + drivers/cxl/core/memdev.c | 8 ++ drivers/cxl/core/region.c | 131 +++++++++++++++++- 5 files changed, 228 insertions(+), 4 deletions(-) diff --git a/Documentation/ABI/testing/debugfs-cxl b/Documentation/ABI/testing/debugfs-cxl index e95e21f131e96..2989d4da96c1b 100644 --- a/Documentation/ABI/testing/debugfs-cxl +++ b/Documentation/ABI/testing/debugfs-cxl @@ -19,6 +19,20 @@ Description: is returned to the user. The inject_poison attribute is only visible for devices supporting the capability. + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + DATA LOSS RISK: For CXL persistent memory (PMEM) devices, + poison injection can result in permanent data loss. Injected + poison may render data permanently inaccessible even after + clearing, as the clear operation writes zeros and does not + recover original data. + + SYSTEM STABILITY RISK: For volatile memory, poison injection + can cause kernel crashes, system instability, or unpredictable + behavior if the poisoned addresses are accessed by running code + or critical kernel structures. What: /sys/kernel/debug/cxl/memX/clear_poison Date: April, 2023 @@ -35,6 +49,79 @@ Description: The clear_poison attribute is only visible for devices supporting the capability. + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the + specified address range and removes the address from the poison + list. It does NOT recover or restore original data that may have + been present before poison injection. Any original data at the + cleared address is permanently lost and replaced with zeros. + + CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing + purposes only and should not be used as a data repair tool. + Clearing poison is fundamentally different from data recovery + or error correction. + +What: /sys/kernel/debug/cxl/regionX/inject_poison +Date: August, 2025 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Host Physical Address (HPA) is written to this + attribute, the region driver translates it to a Device + Physical Address (DPA) and identifies the corresponding + memdev. It then sends an inject poison command to that memdev + at the translated DPA. Refer to the memdev ABI entry at: + /sys/kernel/debug/cxl/memX/inject_poison for the detailed + behavior. This attribute is only visible if all memdevs + participating in the region support both inject and clear + poison commands. + + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + DATA LOSS RISK: For CXL persistent memory (PMEM) devices, + poison injection can result in permanent data loss. Injected + poison may render data permanently inaccessible even after + clearing, as the clear operation writes zeros and does not + recover original data. + + SYSTEM STABILITY RISK: For volatile memory, poison injection + can cause kernel crashes, system instability, or unpredictable + behavior if the poisoned addresses are accessed by running code + or critical kernel structures. + +What: /sys/kernel/debug/cxl/regionX/clear_poison +Date: August, 2025 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) When a Host Physical Address (HPA) is written to this + attribute, the region driver translates it to a Device + Physical Address (DPA) and identifies the corresponding + memdev. It then sends a clear poison command to that memdev + at the translated DPA. Refer to the memdev ABI entry at: + /sys/kernel/debug/cxl/memX/clear_poison for the detailed + behavior. This attribute is only visible if all memdevs + participating in the region support both inject and clear + poison commands. + + TEST-ONLY INTERFACE: This interface is intended for testing + and validation purposes only. It is not a data repair mechanism + and should never be used on production systems or live data. + + CLEAR IS NOT DATA RECOVERY: This operation writes zeros to the + specified address range and removes the address from the poison + list. It does NOT recover or restore original data that may have + been present before poison injection. Any original data at the + cleared address is permanently lost and replaced with zeros. + + CLEAR IS NOT A REPAIR MECHANISM: This interface is for testing + purposes only and should not be used as a data repair tool. + Clearing poison is fundamentally different from data recovery + or error correction. + What: /sys/kernel/debug/cxl/einj_types Date: January, 2024 KernelVersion: v6.9 diff --git a/Documentation/driver-api/cxl/maturity-map.rst b/Documentation/driver-api/cxl/maturity-map.rst index 1330f3f52129a..282c1102dd819 100644 --- a/Documentation/driver-api/cxl/maturity-map.rst +++ b/Documentation/driver-api/cxl/maturity-map.rst @@ -173,7 +173,7 @@ Accelerator User Flow Support ----------------- -* [0] Inject & clear poison by HPA +* [2] Inject & clear poison by region offset Details ======= diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2669f251d6775..eac8cc1bdaa07 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -135,6 +135,10 @@ enum cxl_poison_trace_type { CXL_POISON_TRACE_CLEAR, }; +enum poison_cmd_enabled_bits; +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd); + long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 90d3390d9c7c6..e370d733e4400 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -200,6 +200,14 @@ static ssize_t security_erase_store(struct device *dev, static struct device_attribute dev_attr_security_erase = __ATTR(erase, 0200, NULL, security_erase_store); +bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, + enum poison_cmd_enabled_bits cmd) +{ + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + + return test_bit(cmd, mds->poison.enabled_cmds); +} + static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 5892de29b470b..04d326c274875 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2,6 +2,7 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include #include +#include #include #include #include @@ -3002,9 +3003,8 @@ struct dpa_result { u64 dpa; }; -static int __maybe_unused region_offset_to_dpa_result(struct cxl_region *cxlr, - u64 offset, - struct dpa_result *result) +static int region_offset_to_dpa_result(struct cxl_region *cxlr, u64 offset, + struct dpa_result *result) { struct cxl_region_params *p = &cxlr->params; struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); @@ -3652,6 +3652,105 @@ static void shutdown_notifiers(void *_cxlr) unregister_mt_adistance_algorithm(&cxlr->adist_notifier); } +static void remove_debugfs(void *dentry) +{ + debugfs_remove_recursive(dentry); +} + +static int validate_region_offset(struct cxl_region *cxlr, u64 offset) +{ + struct cxl_region_params *p = &cxlr->params; + resource_size_t region_size; + u64 hpa; + + if (offset < p->cache_size) { + dev_err(&cxlr->dev, + "Offset %#llx is within extended linear cache %#llx\n", + offset, p->cache_size); + return -EINVAL; + } + + region_size = resource_size(p->res); + if (offset >= region_size) { + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %#llx\n", + offset, region_size); + return -EINVAL; + } + + hpa = p->res->start + offset; + if (hpa < p->res->start || hpa > p->res->end) { + dev_err(&cxlr->dev, "HPA %#llx not in region %pr\n", hpa, + p->res); + return -EINVAL; + } + + return 0; +} + +static int cxl_region_debugfs_poison_inject(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_inject_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_inject_fops, NULL, + cxl_region_debugfs_poison_inject, "%llx\n"); + +static int cxl_region_debugfs_poison_clear(void *data, u64 offset) +{ + struct dpa_result result = { .dpa = ULLONG_MAX, .cxlmd = NULL }; + struct cxl_region *cxlr = data; + int rc; + + ACQUIRE(rwsem_read_intr, region_rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, ®ion_rwsem))) + return rc; + + ACQUIRE(rwsem_read_intr, dpa_rwsem)(&cxl_rwsem.dpa); + if ((rc = ACQUIRE_ERR(rwsem_read_intr, &dpa_rwsem))) + return rc; + + if (validate_region_offset(cxlr, offset)) + return -EINVAL; + + rc = region_offset_to_dpa_result(cxlr, offset, &result); + if (rc || !result.cxlmd || result.dpa == ULLONG_MAX) { + dev_dbg(&cxlr->dev, + "Failed to resolve DPA for region offset %#llx rc %d\n", + offset, rc); + + return rc ? rc : -EINVAL; + } + + return cxl_clear_poison_locked(result.cxlmd, result.dpa); +} + +DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, + cxl_region_debugfs_poison_clear, "%llx\n"); + static int cxl_region_can_probe(struct cxl_region *cxlr) { struct cxl_region_params *p = &cxlr->params; @@ -3681,6 +3780,7 @@ static int cxl_region_probe(struct device *dev) { struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; + bool poison_supported = true; int rc; rc = cxl_region_can_probe(cxlr); @@ -3704,6 +3804,31 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* Create poison attributes if all memdevs support the capabilities */ + for (int i = 0; i < p->nr_targets; i++) { + struct cxl_endpoint_decoder *cxled = p->targets[i]; + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + + if (!cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_INJECT) || + !cxl_memdev_has_poison_cmd(cxlmd, CXL_POISON_ENABLED_CLEAR)) { + poison_supported = false; + break; + } + } + + if (poison_supported) { + struct dentry *dentry; + + dentry = cxl_debugfs_create_dir(dev_name(dev)); + debugfs_create_file("inject_poison", 0200, dentry, cxlr, + &cxl_poison_inject_fops); + debugfs_create_file("clear_poison", 0200, dentry, cxlr, + &cxl_poison_clear_fops); + rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); + if (rc) + return rc; + } + switch (cxlr->mode) { case CXL_PARTMODE_PMEM: rc = devm_cxl_region_edac_register(cxlr); From 83f361be372ed0045e4e11825f95f11eef8fba81 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 18 Aug 2025 08:39:53 -0700 Subject: [PATCH 09/80] cxl: Fix emit of type resource_size_t argument for validate_region_offset() 0day reported warnings of: drivers/cxl/core/region.c:3664:25: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=] drivers/cxl/core/region.c:3671:37: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'resource_size_t' {aka 'unsigned int'} [-Wformat=] Replace %#llx with %pr to emit resource_size_t arguments. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202508160513.NAZ9i9rQ-lkp@intel.com/ Cc: Alison Schofield Reviewed-by: Alison Schofield Link: https://patch.msgid.link/20250818153953.3658952-1-dave.jiang@intel.com Signed-off-by: Dave Jiang (cherry picked from commit e6a9530b3ee7407b70b60e4df70688db0d239e1a) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 04d326c274875..d9d65229eb58a 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3665,15 +3665,15 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset) if (offset < p->cache_size) { dev_err(&cxlr->dev, - "Offset %#llx is within extended linear cache %#llx\n", - offset, p->cache_size); + "Offset %#llx is within extended linear cache %pr\n", + offset, &p->cache_size); return -EINVAL; } region_size = resource_size(p->res); if (offset >= region_size) { - dev_err(&cxlr->dev, "Offset %#llx exceeds region size %#llx\n", - offset, region_size); + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + offset, ®ion_size); return -EINVAL; } From f36e40dbe3c8c793dfa9e543b24f60b4527b3ffc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:04 -0700 Subject: [PATCH 10/80] mm/memory_hotplug: Update comment for hotplug memory callback priorities Add clarification to comment for memory hotplug callback ordering as the current comment does not provide clear language on which callback happens first. Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-2-dave.jiang@intel.com Signed-off-by: Dave Jiang (cherry picked from commit 65128868bb3b0621d2d8e71f19852675a064b373) Signed-off-by: Jiandi An --- include/linux/memory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/memory.h b/include/linux/memory.h index 2a770e7c6ab1e..d231a2323331a 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -115,8 +115,8 @@ struct notifier_block; struct mem_section; /* - * Priorities for the hotplug memory callback routines (stored in decreasing - * order in the callback chain) + * Priorities for the hotplug memory callback routines. Invoked from + * high to low. Higher priorities correspond to higher numbers. */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 From a1c6edc5de7e6dfb6c1e93e8237471b973cbe44f Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:05 -0700 Subject: [PATCH 11/80] drivers/base/node: Add a helper function node_update_perf_attrs() Add helper function node_update_perf_attrs() to allow update of node access coordinates computed by an external agent such as CXL. The helper allows updating of coordinates after the attribute being created by HMAT. Acked-by: David Hildenbrand Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-3-dave.jiang@intel.com Signed-off-by: Dave Jiang (cherry picked from commit b57fc652ca24ada3b0c888327f9944ed21559286) Signed-off-by: Jiandi An --- drivers/base/node.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/node.h | 8 ++++++++ 2 files changed, 46 insertions(+) diff --git a/drivers/base/node.c b/drivers/base/node.c index 67b01d5797377..3e2329ccb618d 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -248,6 +248,44 @@ void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, } EXPORT_SYMBOL_GPL(node_set_perf_attrs); +/** + * node_update_perf_attrs - Update the performance values for given access class + * @nid: Node identifier to be updated + * @coord: Heterogeneous memory performance coordinates + * @access: The access class for the given attributes + */ +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access) +{ + struct node_access_nodes *access_node; + struct node *node; + int i; + + if (WARN_ON_ONCE(!node_online(nid))) + return; + + node = node_devices[nid]; + list_for_each_entry(access_node, &node->access_list, list_node) { + if (access_node->access != access) + continue; + + access_node->coord = *coord; + for (i = 0; access_attrs[i]; i++) { + sysfs_notify(&access_node->dev.kobj, + NULL, access_attrs[i]->name); + } + break; + } + + /* When setting CPU access coordinates, update mempolicy */ + if (access != ACCESS_COORDINATE_CPU) + return; + + if (mempolicy_set_node_perf(nid, coord)) + pr_info("failed to set mempolicy attrs for node %d\n", nid); +} +EXPORT_SYMBOL_GPL(node_update_perf_attrs); + /** * struct node_cache_info - Internal tracking for memory node caches * @dev: Device represeting the cache level diff --git a/include/linux/node.h b/include/linux/node.h index 2c7529335b21a..866e3323f1fdc 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -85,6 +85,8 @@ struct node_cache_attrs { void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs); void node_set_perf_attrs(unsigned int nid, struct access_coordinate *coord, enum access_coordinate_class access); +void node_update_perf_attrs(unsigned int nid, struct access_coordinate *coord, + enum access_coordinate_class access); #else static inline void node_add_cache(unsigned int nid, struct node_cache_attrs *cache_attrs) @@ -96,6 +98,12 @@ static inline void node_set_perf_attrs(unsigned int nid, enum access_coordinate_class access) { } + +static inline void node_update_perf_attrs(unsigned int nid, + struct access_coordinate *coord, + enum access_coordinate_class access) +{ +} #endif struct node { From 0dce4a1c0df919aa0d0ad073dfd0b961a3ddacbc Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:06 -0700 Subject: [PATCH 12/80] cxl, acpi/hmat: Update CXL access coordinates directly instead of through HMAT The current implementation of CXL memory hotplug notifier gets called before the HMAT memory hotplug notifier. The CXL driver calculates the access coordinates (bandwidth and latency values) for the CXL end to end path (i.e. CPU to endpoint). When the CXL region is onlined, the CXL memory hotplug notifier writes the access coordinates to the HMAT target structs. Then the HMAT memory hotplug notifier is called and it creates the access coordinates for the node sysfs attributes. During testing on an Intel platform, it was found that although the newly calculated coordinates were pushed to sysfs, the sysfs attributes for the access coordinates showed up with the wrong initiator. The system has 4 nodes (0, 1, 2, 3) where node 0 and 1 are CPU nodes and node 2 and 3 are CXL nodes. The expectation is that node 2 would show up as a target to node 0: /sys/devices/system/node/node2/access0/initiators/node0 However it was observed that node 2 showed up as a target under node 1: /sys/devices/system/node/node2/access0/initiators/node1 The original intent of the 'ext_updated' flag in HMAT handling code was to stop HMAT memory hotplug callback from clobbering the access coordinates after CXL has injected its calculated coordinates and replaced the generic target access coordinates provided by the HMAT table in the HMAT target structs. However the flag is hacky at best and blocks the updates from other CXL regions that are onlined in the same node later on. Remove the 'ext_updated' flag usage and just update the access coordinates for the nodes directly without touching HMAT target data. The hotplug memory callback ordering is changed. Instead of changing CXL, move HMAT back so there's room for the levels rather than have CXL share the same level as SLAB_CALLBACK_PRI. The change will resulting in the CXL callback to be executed after the HMAT callback. With the change, the CXL hotplug memory notifier runs after the HMAT callback. The HMAT callback will create the node sysfs attributes for access coordinates. The CXL callback will write the access coordinates to the now created node sysfs attributes directly and will not pollute the HMAT target values. A nodemask is introduced to keep track if a node has been updated and prevents further updates. Fixes: 067353a46d8c ("cxl/region: Add memory hotplug notifier for cxl region") Cc: stable@vger.kernel.org Tested-by: Marc Herbert Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-4-dave.jiang@intel.com Signed-off-by: Dave Jiang (cherry picked from commit 2e454fb8056df6da4bba7d89a57bf60e217463c0) Signed-off-by: Jiandi An --- drivers/acpi/numa/hmat.c | 6 ------ drivers/cxl/core/cdat.c | 5 ----- drivers/cxl/core/core.h | 1 - drivers/cxl/core/region.c | 20 ++++++++++++-------- include/linux/memory.h | 2 +- 5 files changed, 13 insertions(+), 21 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 9085375830605..f153a2c18f1d9 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -74,7 +74,6 @@ struct memory_target { struct node_cache_attrs cache_attrs; u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE]; bool registered; - bool ext_updated; /* externally updated */ }; struct memory_initiator { @@ -391,7 +390,6 @@ int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, coord->read_bandwidth, access); hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, coord->write_bandwidth, access); - target->ext_updated = true; return 0; } @@ -773,10 +771,6 @@ static void hmat_update_target_attrs(struct memory_target *target, u32 best = 0; int i; - /* Don't update if an external agent has changed the data. */ - if (target->ext_updated) - return; - /* Don't update for generic port if there's no device handle */ if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL || access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) && diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c0af645425f4a..c891fd618cfda 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -1081,8 +1081,3 @@ int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, { return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); } - -bool cxl_need_node_perf_attrs_update(int nid) -{ - return !acpi_node_backed_by_real_pxm(nid); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index eac8cc1bdaa07..e5157a328f30c 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -143,7 +143,6 @@ long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, enum access_coordinate_class access); -bool cxl_need_node_perf_attrs_update(int nid); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d9d65229eb58a..238b148768148 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -32,6 +32,12 @@ * 3. Decoder targets */ +/* + * nodemask that sets per node when the access_coordinates for the node has + * been updated by the CXL memory hotplug notifier. + */ +static nodemask_t nodemask_region_seen = NODE_MASK_NONE; + static struct cxl_region *to_cxl_region(struct device *dev); #define __ACCESS_ATTR_RO(_level, _name) { \ @@ -2442,14 +2448,8 @@ static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { if (cxlr->coord[i].read_bandwidth) { - rc = 0; - if (cxl_need_node_perf_attrs_update(nid)) - node_set_perf_attrs(nid, &cxlr->coord[i], i); - else - rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); - - if (rc == 0) - cset++; + node_update_perf_attrs(nid, &cxlr->coord[i], i); + cset++; } } @@ -2487,6 +2487,10 @@ static int cxl_region_perf_attrs_callback(struct notifier_block *nb, if (nid != region_nid) return NOTIFY_DONE; + /* No action needed if node bit already set */ + if (node_test_and_set(nid, nodemask_region_seen)) + return NOTIFY_DONE; + if (!cxl_region_update_coordinates(cxlr, nid)) return NOTIFY_DONE; diff --git a/include/linux/memory.h b/include/linux/memory.h index d231a2323331a..55f0a47c85ebf 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -120,8 +120,8 @@ struct mem_section; */ #define DEFAULT_CALLBACK_PRI 0 #define SLAB_CALLBACK_PRI 1 -#define HMAT_CALLBACK_PRI 2 #define CXL_CALLBACK_PRI 5 +#define HMAT_CALLBACK_PRI 6 #define MM_COMPUTE_BATCH_PRI 10 #define CPUSET_CALLBACK_PRI 10 #define MEMTIER_HOTPLUG_PRI 100 From 8daa7a00b7b076a8b223719c642959180836d042 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 15:29:07 -0700 Subject: [PATCH 13/80] acpi/hmat: Remove now unused hmat_update_target_coordinates() Remove deadcode since CXL no longer calls hmat_update_target_coordinates(). Reviewed-by: Jonathan Cameron Link: https://patch.msgid.link/20250829222907.1290912-5-dave.jiang@intel.com Signed-off-by: Dave Jiang (cherry picked from commit e99ecbc4c89adf551cccbbc00b5cb08c50969af6) Signed-off-by: Jiandi An --- drivers/acpi/numa/hmat.c | 28 ---------------------------- drivers/cxl/core/cdat.c | 6 ------ drivers/cxl/core/core.h | 2 -- include/linux/acpi.h | 12 ------------ 4 files changed, 48 deletions(-) diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index f153a2c18f1d9..11e4483685c9c 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -367,34 +367,6 @@ static void hmat_update_target_access(struct memory_target *target, } } -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access) -{ - struct memory_target *target; - int pxm; - - if (nid == NUMA_NO_NODE) - return -EINVAL; - - pxm = node_to_pxm(nid); - guard(mutex)(&target_lock); - target = find_mem_target(pxm); - if (!target) - return -ENODEV; - - hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY, - coord->read_latency, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY, - coord->write_latency, access); - hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH, - coord->read_bandwidth, access); - hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH, - coord->write_bandwidth, access); - - return 0; -} -EXPORT_SYMBOL_GPL(hmat_update_target_coordinates); - static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc) { struct memory_locality *loc; diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c891fd618cfda..bca1ec279651d 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -1075,9 +1075,3 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth; } } - -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access) -{ - return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); -} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index e5157a328f30c..5707cd60a8eb0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -141,8 +141,6 @@ bool cxl_memdev_has_poison_cmd(struct cxl_memdev *cxlmd, long cxl_pci_get_latency(struct pci_dev *pdev); int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); -int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, - enum access_coordinate_class access); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 42cbeaba2a510..0c6087ea979b2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1637,18 +1637,6 @@ static inline void acpi_use_parent_companion(struct device *dev) ACPI_COMPANION_SET(dev, ACPI_COMPANION(dev->parent)); } -#ifdef CONFIG_ACPI_HMAT -int hmat_update_target_coordinates(int nid, struct access_coordinate *coord, - enum access_coordinate_class access); -#else -static inline int hmat_update_target_coordinates(int nid, - struct access_coordinate *coord, - enum access_coordinate_class access) -{ - return -EOPNOTSUPP; -} -#endif - #ifdef CONFIG_ACPI_NUMA bool acpi_node_backed_by_real_pxm(int nid); #else From c58780a6cc924e93a9922fd43dcf28ce9b0dc554 Mon Sep 17 00:00:00 2001 From: Rakuram Eswaran Date: Mon, 18 Aug 2025 23:23:34 +0530 Subject: [PATCH 14/80] Documentation/driver-api: Fix typo error in cxl Fixed the following typo errors intersparsed ==> interspersed in Documentation/driver-api/cxl/platform/bios-and-efi.rst Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Gregory Price Link: https://patch.msgid.link/20250818175335.5312-1-rakuram.e96@gmail.com Signed-off-by: Dave Jiang (cherry picked from commit a414408126d13d6d5b2d2c4e537295771cc256cb) Signed-off-by: Jiandi An --- Documentation/driver-api/cxl/platform/bios-and-efi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/driver-api/cxl/platform/bios-and-efi.rst b/Documentation/driver-api/cxl/platform/bios-and-efi.rst index 645322632cc9b..a9aa0ccd92af7 100644 --- a/Documentation/driver-api/cxl/platform/bios-and-efi.rst +++ b/Documentation/driver-api/cxl/platform/bios-and-efi.rst @@ -202,7 +202,7 @@ future and such a configuration should be avoided. Memory Holes ------------ -If your platform includes memory holes intersparsed between your CXL memory, it +If your platform includes memory holes interspersed between your CXL memory, it is recommended to utilize multiple decoders to cover these regions of memory, rather than try to program the decoders to accept the entire range and expect Linux to manage the overlap. From 441d4f0f74ffb57df9ec6917ba8a2e92ab9e76bf Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 Sep 2025 09:00:34 -0700 Subject: [PATCH 15/80] cxl/acpi: Rename CFMW coherency restrictions ACPICA commit 710745713ad3a2543dbfb70e84764f31f0e46bdc This has been renamed in more recent CXL specs, as type3 (memory expanders) can also use HDM-DB for device coherent memory. Link: https://github.com/acpica/acpica/commit/710745713ad3a2543dbfb70e84764f31f0e46bdc Acked-by: Rafael J. Wysocki (Intel) Signed-off-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Link: https://patch.msgid.link/20250908160034.86471-1-dave@stgolabs.net Signed-off-by: Dave Jiang (cherry picked from commit c4272905c37930c19b54fa3549b22899122ce69e) Signed-off-by: Jiandi An --- drivers/cxl/acpi.c | 4 ++-- include/acpi/actbl1.h | 4 ++-- tools/testing/cxl/test/cxl.c | 18 +++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index b9ba1c33e4d24..b8f124685f1dc 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -116,9 +116,9 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { unsigned long flags = CXL_DECODER_F_ENABLE; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_DEVMEM) flags |= CXL_DECODER_F_TYPE2; - if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE3) + if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM) flags |= CXL_DECODER_F_TYPE3; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_VOLATILE) flags |= CXL_DECODER_F_RAM; diff --git a/include/acpi/actbl1.h b/include/acpi/actbl1.h index 99fd1588ff382..eb787dfbd2fa0 100644 --- a/include/acpi/actbl1.h +++ b/include/acpi/actbl1.h @@ -560,8 +560,8 @@ struct acpi_cedt_cfmws_target_element { /* Values for Restrictions field above */ -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE2 (1) -#define ACPI_CEDT_CFMWS_RESTRICT_TYPE3 (1<<1) +#define ACPI_CEDT_CFMWS_RESTRICT_DEVMEM (1) +#define ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM (1<<1) #define ACPI_CEDT_CFMWS_RESTRICT_VOLATILE (1<<2) #define ACPI_CEDT_CFMWS_RESTRICT_PMEM (1<<3) #define ACPI_CEDT_CFMWS_RESTRICT_FIXED (1<<4) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index f4dceecf7e335..8b5b8d17b8b84 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -210,7 +210,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -225,7 +225,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -240,7 +240,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -255,7 +255,7 @@ static struct { }, .interleave_ways = 1, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -270,7 +270,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 4UL, @@ -285,7 +285,7 @@ static struct { }, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M, @@ -302,7 +302,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 0, .granularity = 4, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -318,7 +318,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 1, .granularity = 0, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_256M * 8UL, @@ -334,7 +334,7 @@ static struct { .interleave_arithmetic = ACPI_CEDT_CFMWS_ARITHMETIC_XOR, .interleave_ways = 8, .granularity = 1, - .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | + .restrictions = ACPI_CEDT_CFMWS_RESTRICT_HOSTONLYMEM | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = FAKE_QTG_ID, .window_size = SZ_512M * 6UL, From a36b60382b4b025342681d962eedd2e587282212 Mon Sep 17 00:00:00 2001 From: "Fabio M. De Francesco" Date: Mon, 15 Sep 2025 16:57:20 +0200 Subject: [PATCH 16/80] cxl: Documentation/driver-api/cxl: Describe the x86 Low Memory Hole solution Add documentation on how to resolve conflicts between CXL Fixed Memory Windows, Platform Low Memory Holes, intermediate Switch and Endpoint Decoders. [dj]: Fixed inconsistent spacing after '.' [dj]: Fixed subject line from Alison. [dj]: Removed '::' before table from Bagas. Reviewed-by: Gregory Price Signed-off-by: Fabio M. De Francesco Reviewed-by: Bagas Sanjaya Reviewed-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang (cherry picked from commit c5dca38633daa1e240144bac453cf9065604a413) Signed-off-by: Jiandi An --- Documentation/driver-api/cxl/conventions.rst | 135 +++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/Documentation/driver-api/cxl/conventions.rst b/Documentation/driver-api/cxl/conventions.rst index da347a81a237a..e37336d7b116e 100644 --- a/Documentation/driver-api/cxl/conventions.rst +++ b/Documentation/driver-api/cxl/conventions.rst @@ -45,3 +45,138 @@ Detailed Description of the Change ---------------------------------- + + +Resolve conflict between CFMWS, Platform Memory Holes, and Endpoint Decoders +============================================================================ + +Document +-------- + +CXL Revision 3.2, Version 1.0 + +License +------- + +SPDX-License Identifier: CC-BY-4.0 + +Creator/Contributors +-------------------- + +- Fabio M. De Francesco, Intel +- Dan J. Williams, Intel +- Mahesh Natu, Intel + +Summary of the Change +--------------------- + +According to the current Compute Express Link (CXL) Specifications (Revision +3.2, Version 1.0), the CXL Fixed Memory Window Structure (CFMWS) describes zero +or more Host Physical Address (HPA) windows associated with each CXL Host +Bridge. Each window represents a contiguous HPA range that may be interleaved +across one or more targets, including CXL Host Bridges. Each window has a set +of restrictions that govern its usage. It is the Operating System-directed +configuration and Power Management (OSPM) responsibility to utilize each window +for the specified use. + +Table 9-22 of the current CXL Specifications states that the Window Size field +contains the total number of consecutive bytes of HPA this window describes. +This value must be a multiple of the Number of Interleave Ways (NIW) * 256 MB. + +Platform Firmware (BIOS) might reserve physical addresses below 4 GB where a +memory gap such as the Low Memory Hole for PCIe MMIO may exist. In such cases, +the CFMWS Range Size may not adhere to the NIW * 256 MB rule. + +The HPA represents the actual physical memory address space that the CXL devices +can decode and respond to, while the System Physical Address (SPA), a related +but distinct concept, represents the system-visible address space that users can +direct transaction to and so it excludes reserved regions. + +BIOS publishes CFMWS to communicate the active SPA ranges that, on platforms +with LMH's, map to a strict subset of the HPA. The SPA range trims out the hole, +resulting in lost capacity in the Endpoints with no SPA to map to that part of +the HPA range that intersects the hole. + +E.g, an x86 platform with two CFMWS and an LMH starting at 2 GB: + + +--------+------------+-------------------+------------------+-------------------+------+ + | Window | CFMWS Base | CFMWS Size | HDM Decoder Base | HDM Decoder Size | Ways | + +========+============+===================+==================+===================+======+ + |  0 | 0 GB | 2 GB | 0 GB | 3 GB | 12 | + +--------+------------+-------------------+------------------+-------------------+------+ + |  1 | 4 GB | NIW*256MB Aligned | 4 GB | NIW*256MB Aligned | 12 | + +--------+------------+-------------------+------------------+-------------------+------+ + +HDM decoder base and HDM decoder size represent all the 12 Endpoint Decoders of +a 12 ways region and all the intermediate Switch Decoders. They are configured +by the BIOS according to the NIW * 256MB rule, resulting in a HPA range size of +3GB. Instead, the CFMWS Base and CFMWS Size are used to configure the Root +Decoder HPA range that results smaller (2GB) than that of the Switch and +Endpoint Decoders in the hierarchy (3GB). + +This creates 2 issues which lead to a failure to construct a region: + +1) A mismatch in region size between root and any HDM decoder. The root decoders + will always be smaller due to the trim. + +2) The trim causes the root decoder to violate the (NIW * 256MB) rule. + +This change allows a region with a base address of 0GB to bypass these checks to +allow for region creation with the trimmed root decoder address range. + +This change does not allow for any other arbitrary region to violate these +checks - it is intended exclusively to enable x86 platforms which map CXL memory +under 4GB. + +Despite the HDM decoders covering the PCIE hole HPA region, it is expected that +the platform will never route address accesses to the CXL complex because the +root decoder only covers the trimmed region (which excludes this). This is +outside the ability of Linux to enforce. + +On the example platform, only the first 2GB will be potentially usable, but +Linux, aiming to adhere to the current specifications, fails to construct +Regions and attach Endpoint and intermediate Switch Decoders to them. + +There are several points of failure that due to the expectation that the Root +Decoder HPA size, that is equal to the CFMWS from which it is configured, has +to be greater or equal to the matching Switch and Endpoint HDM Decoders. + +In order to succeed with construction and attachment, Linux must construct a +Region with Root Decoder HPA range size, and then attach to that all the +intermediate Switch Decoders and Endpoint Decoders that belong to the hierarchy +regardless of their range sizes. + +Benefits of the Change +---------------------- + +Without the change, the OSPM wouldn't match intermediate Switch and Endpoint +Decoders with Root Decoders configured with CFMWS HPA sizes that don't align +with the NIW * 256MB constraint, and so it leads to lost memdev capacity. + +This change allows the OSPM to construct Regions and attach intermediate Switch +and Endpoint Decoders to them, so that the addressable part of the memory +devices total capacity is made available to the users. + +References +---------- + +Compute Express Link Specification Revision 3.2, Version 1.0 + + +Detailed Description of the Change +---------------------------------- + +The description of the Window Size field in table 9-22 needs to account for +platforms with Low Memory Holes, where SPA ranges might be subsets of the +endpoints HPA. Therefore, it has to be changed to the following: + +"The total number of consecutive bytes of HPA this window represents. This value +shall be a multiple of NIW * 256 MB. + +On platforms that reserve physical addresses below 4 GB, such as the Low Memory +Hole for PCIe MMIO on x86, an instance of CFMWS whose Base HPA range is 0 might +have a size that doesn't align with the NIW * 256 MB constraint. + +Note that the matching intermediate Switch Decoders and the Endpoint Decoders +HPA range sizes must still align to the above-mentioned rule, but the memory +capacity that exceeds the CFMWS window size won't be accessible.". From 0ac8dce5fd297cc136d4224d33f972904a58978b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:19 -0700 Subject: [PATCH 17/80] cxl: Add helper to detect top of CXL device topology Add a helper to replace the open code detection of CXL device hierarchy root, or the host bridge. The helper will be used for delayed downstream port (dport) creation. Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Dan Williams Reviewed-by: Alison Schofield Reviewed-by: Robert Richter Tested-by: Robert Richter Signed-off-by: Dave Jiang (cherry picked from commit 4fde89539a18d39169a511fda00db65eeba1a8e0) Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 8f36ff413f5d5..66c0c849c4a0d 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -33,6 +33,15 @@ static DEFINE_IDA(cxl_port_ida); static DEFINE_XARRAY(cxl_root_buses); +/* + * The terminal device in PCI is NULL and @platform_bus + * for platform devices (for cxl_test) + */ +static bool is_cxl_host_bridge(struct device *dev) +{ + return (!dev || dev == &platform_bus); +} + int cxl_num_decoders_committed(struct cxl_port *port) { lockdep_assert_held(&cxl_rwsem.region); @@ -1542,7 +1551,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, resource_size_t component_reg_phys; int rc; - if (!dparent) { + if (is_cxl_host_bridge(dparent)) { /* * The iteration reached the topology root without finding the * CXL-root 'cxl_port' on a previous iteration, fail for now to @@ -1630,11 +1639,7 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) struct device *uport_dev; struct cxl_dport *dport; - /* - * The terminal "grandparent" in PCI is NULL and @platform_bus - * for platform devices - */ - if (!dport_dev || dport_dev == &platform_bus) + if (is_cxl_host_bridge(dport_dev)) return 0; uport_dev = dport_dev->parent; From c5128ea6a55eef6302221730c272ca223849bd8b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:20 -0700 Subject: [PATCH 18/80] cxl: Add helper to delete dport Refactor the code in reap_dports() out to provide a helper function that reaps a single dport. This will be used later in the cleanup path for allocating a dport. Renaming to del_port() and del_dports() to mirror devm_cxl_add_dport(). [dj] Fixed up subject per Robert Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Tested-by: Robert Richter Signed-off-by: Dave Jiang (cherry picked from commit 8330671c57c7056ef5e1e8dccfcdda7d5fe6d0b0) Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 66c0c849c4a0d..dbea9feacdddf 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1433,7 +1433,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL"); * through ->remove(). This "bottom-up" removal selectively removes individual * child ports manually. This depends on devm_cxl_add_port() to not change is * devm action registration order, and for dports to have already been - * destroyed by reap_dports(). + * destroyed by del_dports(). */ static void delete_switch_port(struct cxl_port *port) { @@ -1442,18 +1442,24 @@ static void delete_switch_port(struct cxl_port *port) devm_release_action(port->dev.parent, unregister_port, port); } -static void reap_dports(struct cxl_port *port) +static void del_dport(struct cxl_dport *dport) +{ + struct cxl_port *port = dport->port; + + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); +} + +static void del_dports(struct cxl_port *port) { struct cxl_dport *dport; unsigned long index; device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) { - devm_release_action(&port->dev, cxl_dport_unlink, dport); - devm_release_action(&port->dev, cxl_dport_remove, dport); - devm_kfree(&port->dev, dport); - } + xa_for_each(&port->dports, index, dport) + del_dport(dport); } struct detach_ctx { @@ -1511,7 +1517,7 @@ static void cxl_detach_ep(void *data) */ died = true; port->dead = true; - reap_dports(port); + del_dports(port); } device_unlock(&port->dev); From d8d4bde72d50d65f357a9b1223ab4bdbc10eeef6 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:21 -0700 Subject: [PATCH 19/80] cxl: Add a cached copy of target_map to cxl_decoder Add a cached copy of the hardware port-id list that is available at init before all @dport objects have been instantiated. Change is in preparation of delayed dport instantiation. Reviewed-by: Robert Richter Reviewed-by: Jonathan Cameron Tested-by: Robert Richter Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang (cherry picked from commit 02edab6ceefaaf8cb917e864d8c26dbac0ea9686) Signed-off-by: Jiandi An --- drivers/cxl/acpi.c | 7 +++---- drivers/cxl/core/hdm.c | 20 ++++++++------------ drivers/cxl/core/port.c | 22 +++++++--------------- drivers/cxl/core/region.c | 4 +++- drivers/cxl/cxl.h | 8 ++++++-- tools/testing/cxl/test/cxl.c | 8 ++++---- 6 files changed, 31 insertions(+), 38 deletions(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index b8f124685f1dc..bd2e282ca93a0 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -401,7 +401,6 @@ DEFINE_FREE(del_cxl_resource, struct resource *, if (_T) del_cxl_resource(_T)) static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, struct cxl_cfmws_context *ctx) { - int target_map[CXL_DECODER_MAX_INTERLEAVE]; struct cxl_port *root_port = ctx->root_port; struct cxl_cxims_context cxims_ctx; struct device *dev = ctx->dev; @@ -419,8 +418,6 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, rc = eig_to_granularity(cfmws->granularity, &ig); if (rc) return rc; - for (i = 0; i < ways; i++) - target_map[i] = cfmws->interleave_targets[i]; struct resource *res __free(del_cxl_resource) = alloc_cxl_resource( cfmws->base_hpa, cfmws->window_size, ctx->id++); @@ -446,6 +443,8 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, .end = cfmws->base_hpa + cfmws->window_size - 1, }; cxld->interleave_ways = ways; + for (i = 0; i < ways; i++) + cxld->target_map[i] = cfmws->interleave_targets[i]; /* * Minimize the x1 granularity to advertise support for any * valid region granularity @@ -484,7 +483,7 @@ static int __cxl_parse_cfmws(struct acpi_cedt_cfmws *cfmws, cxlrd->ops->spa_to_hpa = cxl_apply_xor_maps; } - rc = cxl_decoder_add(cxld, target_map); + rc = cxl_decoder_add(cxld); if (rc) return rc; diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 777b8ac0c49c1..13c53b9c17d13 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -21,12 +21,11 @@ struct cxl_rwsem cxl_rwsem = { .dpa = __RWSEM_INITIALIZER(cxl_rwsem.dpa), }; -static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map) +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld) { int rc; - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); @@ -54,7 +53,6 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_switch_decoder *cxlsd; struct cxl_dport *dport = NULL; - int single_port_map[1]; unsigned long index; struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); @@ -73,9 +71,9 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) xa_for_each(&port->dports, index, dport) break; - single_port_map[0] = dport->port_id; + cxlsd->cxld.target_map[0] = dport->port_id; - return add_hdm_decoder(port, &cxlsd->cxld, single_port_map); + return add_hdm_decoder(port, &cxlsd->cxld); } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL"); @@ -984,7 +982,7 @@ static int cxl_setup_hdm_decoder_from_dvsec( } static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, - int *target_map, void __iomem *hdm, int which, + void __iomem *hdm, int which, u64 *dpa_base, struct cxl_endpoint_dvsec_info *info) { struct cxl_endpoint_decoder *cxled = NULL; @@ -1104,7 +1102,7 @@ static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, hi = readl(hdm + CXL_HDM_DECODER0_TL_HIGH(which)); target_list.value = (hi << 32) + lo; for (i = 0; i < cxld->interleave_ways; i++) - target_map[i] = target_list.target_id[i]; + cxld->target_map[i] = target_list.target_id[i]; return 0; } @@ -1180,7 +1178,6 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxl_settle_decoders(cxlhdm); for (i = 0; i < cxlhdm->decoder_count; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; int rc, target_count = cxlhdm->target_count; struct cxl_decoder *cxld; @@ -1208,8 +1205,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxlsd->cxld; } - rc = init_hdm_decoder(port, cxld, target_map, hdm, i, - &dpa_base, info); + rc = init_hdm_decoder(port, cxld, hdm, i, &dpa_base, info); if (rc) { if (rc == -ENOSPC) continue; @@ -1219,7 +1215,7 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, put_device(&cxld->dev); return rc; } - rc = add_hdm_decoder(port, cxld, target_map); + rc = add_hdm_decoder(port, cxld); if (rc) { dev_warn(&port->dev, "Failed to add decoder%d.%d\n", port->id, i); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index dbea9feacdddf..c36e089e53990 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1716,13 +1716,11 @@ struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd, EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, "CXL"); static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, - struct cxl_port *port, int *target_map) + struct cxl_port *port) { + struct cxl_decoder *cxld = &cxlsd->cxld; int i; - if (!target_map) - return 0; - device_lock_assert(&port->dev); if (xa_empty(&port->dports)) @@ -1730,7 +1728,7 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { - struct cxl_dport *dport = find_dport(port, target_map[i]); + struct cxl_dport *dport = find_dport(port, cxld->target_map[i]); if (!dport) return -ENXIO; @@ -1922,9 +1920,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); /** * cxl_decoder_add_locked - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl__decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * Certain types of decoders may not have any targets. The main example of this * is an endpoint device. A more awkward example is a hostbridge whose root @@ -1938,7 +1933,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, "CXL"); * Return: Negative error code if the decoder wasn't properly configured; else * returns 0. */ -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add_locked(struct cxl_decoder *cxld) { struct cxl_port *port; struct device *dev; @@ -1959,7 +1954,7 @@ int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) if (!is_endpoint_decoder(dev)) { struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev); - rc = decoder_populate_targets(cxlsd, port, target_map); + rc = decoder_populate_targets(cxlsd, port); if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { dev_err(&port->dev, "Failed to populate active decoder targets\n"); @@ -1978,9 +1973,6 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); /** * cxl_decoder_add - Add a decoder with targets * @cxld: The cxl decoder allocated by cxl__decoder_alloc() - * @target_map: A list of downstream ports that this decoder can direct memory - * traffic to. These numbers should correspond with the port number - * in the PCIe Link Capabilities structure. * * This is the unlocked variant of cxl_decoder_add_locked(). * See cxl_decoder_add_locked(). @@ -1988,7 +1980,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, "CXL"); * Context: Process context. Takes and releases the device lock of the port that * owns the @cxld. */ -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +int cxl_decoder_add(struct cxl_decoder *cxld) { struct cxl_port *port; @@ -2001,7 +1993,7 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); guard(device)(&port->dev); - return cxl_decoder_add_locked(cxld, target_map); + return cxl_decoder_add_locked(cxld); } EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, "CXL"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 238b148768148..32675a70cadf9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -1516,8 +1516,10 @@ static int cxl_port_setup_targets(struct cxl_port *port, cxl_rr->nr_targets_set); return -ENXIO; } - } else + } else { cxlsd->target[cxl_rr->nr_targets_set] = ep->dport; + cxlsd->cxld.target_map[cxl_rr->nr_targets_set] = ep->dport->port_id; + } inc = 1; out_target_set: cxl_rr->nr_targets_set += inc; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 4fe3df06f57a3..5be51b6abecd7 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -357,6 +357,9 @@ enum cxl_decoder_type { * @target_type: accelerator vs expander (type2 vs type3) selector * @region: currently assigned region for this decoder * @flags: memory type capabilities and locking + * @target_map: cached copy of hardware port-id list, available at init + * before all @dport objects have been instantiated. While + * dport id is 8bit, CFMWS interleave targets are 32bits. * @commit: device/decoder-type specific callback to commit settings to hw * @reset: device/decoder-type specific callback to reset hw settings */ @@ -369,6 +372,7 @@ struct cxl_decoder { enum cxl_decoder_type target_type; struct cxl_region *region; unsigned long flags; + u32 target_map[CXL_DECODER_MAX_INTERLEAVE]; int (*commit)(struct cxl_decoder *cxld); void (*reset)(struct cxl_decoder *cxld); }; @@ -789,9 +793,9 @@ struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, unsigned int nr_targets); -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add(struct cxl_decoder *cxld); struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); -int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); +int cxl_decoder_add_locked(struct cxl_decoder *cxld); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); static inline int cxl_root_decoder_autoremove(struct device *host, struct cxl_root_decoder *cxlrd) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 8b5b8d17b8b84..306c5cbc24187 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -651,7 +651,7 @@ static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) struct target_map_ctx { - int *target_map; + u32 *target_map; int index; int target_count; }; @@ -955,9 +955,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, target_count = NR_CXL_SWITCH_PORTS; for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { - int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; struct target_map_ctx ctx = { - .target_map = target_map, .target_count = target_count, }; struct cxl_decoder *cxld; @@ -986,6 +984,8 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, cxld = &cxled->cxld; } + ctx.target_map = cxld->target_map; + mock_init_hdm_decoder(cxld); if (target_count) { @@ -997,7 +997,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } } - rc = cxl_decoder_add_locked(cxld, target_map); + rc = cxl_decoder_add_locked(cxld); if (rc) { put_device(&cxld->dev); dev_err(&port->dev, "Failed to add decoder\n"); From d620777bd7a81ed24f06a7586f5562deb2e10f13 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:23 -0700 Subject: [PATCH 20/80] cxl/test: Refactor decoder setup to reduce cxl_test burden Group the decoder setup code in switch and endpoint port probe into a single function for each to reduce the number of functions to be mocked in cxl_test. Introduce devm_cxl_switch_port_decoders_setup() and devm_cxl_endpoint_decoders_setup(). These two functions will be mocked instead with some functions optimized out since the mock version does not do anything. Remove devm_cxl_setup_hdm(), devm_cxl_add_passthrough_decoder(), and devm_cxl_enumerate_decoders() in cxl_test mock code. In turn, mock_cxl_add_passthrough_decoder() can be removed since cxl_test does not setup passthrough decoders. __wrap_cxl_hdm_decode_init() and __wrap_cxl_dvsec_rr_decode() can be removed as well since they only return 0 when called. [dj: drop 'struct cxl_port' forward declaration (Robert)] Suggested-by: Robert Richter Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Robert Richter Signed-off-by: Dave Jiang (cherry picked from commit 68d5d9734c12fce20ad493fe24738ab2019108c0) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 5 +++ drivers/cxl/core/hdm.c | 81 +++++++++++++++++++++++++++++++---- drivers/cxl/core/pci.c | 42 ++++++++++++++++++ drivers/cxl/cxl.h | 9 ++-- drivers/cxl/cxlpci.h | 2 - drivers/cxl/port.c | 38 +--------------- tools/testing/cxl/Kbuild | 7 +-- tools/testing/cxl/test/cxl.c | 42 +++++++++++++----- tools/testing/cxl/test/mock.c | 69 ++++------------------------- tools/testing/cxl/test/mock.h | 7 +-- 10 files changed, 169 insertions(+), 133 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 5707cd60a8eb0..1fb66132b7777 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -148,6 +148,11 @@ int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct cxl_dport *dport); +struct cxl_hdm; +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info); +int cxl_port_get_possible_dports(struct cxl_port *port); + #ifdef CONFIG_CXL_FEATURES struct cxl_feat_entry * cxl_feature_info(struct cxl_features_state *cxlfs, const uuid_t *uuid); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 13c53b9c17d13..d435178f63b82 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -49,7 +49,7 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld) * are claimed and passed to the single dport. Disable the range until the first * CXL region is enumerated / activated. */ -int devm_cxl_add_passthrough_decoder(struct cxl_port *port) +static int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_switch_decoder *cxlsd; struct cxl_dport *dport = NULL; @@ -75,7 +75,6 @@ int devm_cxl_add_passthrough_decoder(struct cxl_port *port) return add_hdm_decoder(port, &cxlsd->cxld); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, "CXL"); static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) { @@ -145,8 +144,8 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) * @port: cxl_port to map * @info: cached DVSEC range register info */ -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) +static struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, + struct cxl_endpoint_dvsec_info *info) { struct cxl_register_map *reg_map = &port->reg_map; struct device *dev = &port->dev; @@ -201,7 +200,6 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, "CXL"); static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth) { @@ -1167,8 +1165,8 @@ static void cxl_settle_decoders(struct cxl_hdm *cxlhdm) * @cxlhdm: Structure to populate with HDM capabilities * @info: cached DVSEC range register info */ -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, + struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; struct cxl_port *port = cxlhdm->port; @@ -1225,4 +1223,71 @@ int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, "CXL"); + +/** + * devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + cxlhdm = devm_cxl_setup_hdm(port, NULL); + if (!IS_ERR(cxlhdm)) + return devm_cxl_enumerate_decoders(cxlhdm, NULL); + + if (PTR_ERR(cxlhdm) != -ENODEV) { + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + if (cxl_port_get_possible_dports(port) == 1) { + dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); + return devm_cxl_add_passthrough_decoder(port); + } + + dev_err(&port->dev, "HDM decoder capability not found\n"); + return -ENXIO; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); + +/** + * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders + * @port: CXL port context + * + * Return 0 or -errno on error + */ +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + struct cxl_endpoint_dvsec_info info = { .port = port }; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_hdm *cxlhdm; + int rc; + + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + rc = cxl_dvsec_rr_decode(cxlds, &info); + if (rc < 0) + return rc; + + cxlhdm = devm_cxl_setup_hdm(port, &info); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) == -ENODEV) + dev_err(&port->dev, "HDM decoder registers not found\n"); + return PTR_ERR(cxlhdm); + } + + rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); + if (rc) + return rc; + + return devm_cxl_enumerate_decoders(cxlhdm, &info); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_endpoint_decoders_setup, "CXL"); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index b50551601c2e4..fa02366d35f2d 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1169,3 +1169,45 @@ int cxl_gpf_port_setup(struct cxl_dport *dport) return 0; } + +static int count_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + int type = pci_pcie_type(pdev); + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + + ctx->count++; + return 0; +} + +int cxl_port_get_possible_dports(struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) { + dev_err(&port->dev, "No PCI bus found for port %s\n", + dev_name(&port->dev)); + return -ENXIO; + } + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, count_dports, &ctx); + + return ctx.count; +} diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 5be51b6abecd7..e4f37c143c1ef 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -818,12 +818,9 @@ struct cxl_endpoint_dvsec_info { struct range dvsec_range[2]; }; -struct cxl_hdm; -struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); -int devm_cxl_add_passthrough_decoder(struct cxl_port *port); +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int devm_cxl_endpoint_decoders_setup(struct cxl_port *port); + struct cxl_dev_state; int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, struct cxl_endpoint_dvsec_info *info); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 54e219b0049ea..7ae621e618e79 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -129,8 +129,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info); void read_cdat_data(struct cxl_port *port); void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index cf32dc50b7a61..d8cae2b5bac6c 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -59,7 +59,6 @@ static int discover_region(struct device *dev, void *unused) static int cxl_switch_port_probe(struct cxl_port *port) { - struct cxl_hdm *cxlhdm; int rc; /* Cache the data early to ensure is_visible() works */ @@ -71,43 +70,14 @@ static int cxl_switch_port_probe(struct cxl_port *port) cxl_switch_parse_cdat(port); - cxlhdm = devm_cxl_setup_hdm(port, NULL); - if (!IS_ERR(cxlhdm)) - return devm_cxl_enumerate_decoders(cxlhdm, NULL); - - if (PTR_ERR(cxlhdm) != -ENODEV) { - dev_err(&port->dev, "Failed to map HDM decoder capability\n"); - return PTR_ERR(cxlhdm); - } - - if (rc == 1) { - dev_dbg(&port->dev, "Fallback to passthrough decoder\n"); - return devm_cxl_add_passthrough_decoder(port); - } - - dev_err(&port->dev, "HDM decoder capability not found\n"); - return -ENXIO; + return devm_cxl_switch_port_decoders_setup(port); } static int cxl_endpoint_port_probe(struct cxl_port *port) { - struct cxl_endpoint_dvsec_info info = { .port = port }; struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_hdm *cxlhdm; int rc; - rc = cxl_dvsec_rr_decode(cxlds, &info); - if (rc < 0) - return rc; - - cxlhdm = devm_cxl_setup_hdm(port, &info); - if (IS_ERR(cxlhdm)) { - if (PTR_ERR(cxlhdm) == -ENODEV) - dev_err(&port->dev, "HDM decoder registers not found\n"); - return PTR_ERR(cxlhdm); - } - /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); cxl_endpoint_parse_cdat(port); @@ -117,11 +87,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, cxlhdm, &info); - if (rc) - return rc; - - rc = devm_cxl_enumerate_decoders(cxlhdm, &info); + rc = devm_cxl_endpoint_decoders_setup(port); if (rc) return rc; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index d07f14cb7aa45..51b8ab289eae9 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -5,16 +5,13 @@ ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register ldflags-y += --wrap=devm_cxl_port_enumerate_dports -ldflags-y += --wrap=devm_cxl_setup_hdm -ldflags-y += --wrap=devm_cxl_add_passthrough_decoder -ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready -ldflags-y += --wrap=cxl_hdm_decode_init -ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting +ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup +ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 306c5cbc24187..36dff58275a25 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -643,13 +643,6 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port, return cxlhdm; } -static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) -{ - dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); - return -EOPNOTSUPP; -} - - struct target_map_ctx { u32 *target_map; int index; @@ -1013,6 +1006,36 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, return 0; } +static int __mock_cxl_decoders_setup(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm; + + cxlhdm = mock_cxl_setup_hdm(port, NULL); + if (IS_ERR(cxlhdm)) { + if (PTR_ERR(cxlhdm) != -ENODEV) + dev_err(&port->dev, "Failed to map HDM decoder capability\n"); + return PTR_ERR(cxlhdm); + } + + return mock_cxl_enumerate_decoders(cxlhdm, NULL); +} + +static int mock_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + +static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return -EOPNOTSUPP; + + return __mock_cxl_decoders_setup(port); +} + static int mock_cxl_port_enumerate_dports(struct cxl_port *port) { struct platform_device **array; @@ -1127,10 +1150,9 @@ static struct cxl_mock_ops cxl_mock_ops = { .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, + .devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup, + .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, - .devm_cxl_setup_hdm = mock_cxl_setup_hdm, - .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, - .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 1989ae020df3d..f335889b7756a 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -131,55 +131,35 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); -struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, - struct cxl_endpoint_dvsec_info *info) - -{ - int index; - struct cxl_hdm *cxlhdm; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport_dev)) - cxlhdm = ops->devm_cxl_setup_hdm(port, info); - else - cxlhdm = devm_cxl_setup_hdm(port, info); - put_cxl_mock_ops(index); - - return cxlhdm; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, "CXL"); - -int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) +int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_add_passthrough_decoder(port); + rc = ops->devm_cxl_switch_port_decoders_setup(port); else - rc = devm_cxl_add_passthrough_decoder(port); + rc = devm_cxl_switch_port_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, "CXL"); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_switch_port_decoders_setup, "CXL"); -int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) +int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port) { int rc, index; - struct cxl_port *port = cxlhdm->port; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(port->uport_dev)) - rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info); + rc = ops->devm_cxl_endpoint_decoders_setup(port); else - rc = devm_cxl_enumerate_decoders(cxlhdm, info); + rc = devm_cxl_endpoint_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, "CXL"); +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL"); int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) { @@ -211,39 +191,6 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, "CXL"); -int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_hdm *cxlhdm, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_hdm_decode_init(cxlds, cxlhdm, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, "CXL"); - -int __wrap_cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - int rc = 0, index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_dev(cxlds->dev)) - rc = 0; - else - rc = cxl_dvsec_rr_decode(cxlds, info); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, "CXL"); - struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index d1b0271d28220..9d5ad3fd55ecc 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -20,11 +20,8 @@ struct cxl_mock_ops { bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); - struct cxl_hdm *(*devm_cxl_setup_hdm)( - struct cxl_port *port, struct cxl_endpoint_dvsec_info *info); - int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); - int (*devm_cxl_enumerate_decoders)( - struct cxl_hdm *hdm, struct cxl_endpoint_dvsec_info *info); + int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port); + int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port); void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); }; From 3881b4ae1a81ad6f069461fc89dfaba8ac42781c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:24 -0700 Subject: [PATCH 21/80] cxl: Defer dport allocation for switch ports The current implementation enumerates the dports during the cxl_port driver probe. Without an endpoint connected, the dport may not be active during port probe. This scheme may prevent a valid hardware dport id to be retrieved and MMIO registers to be read when an endpoint is hot-plugged. Move the dport allocation and setup to behind memdev probe so the endpoint is guaranteed to be connected. In the original enumeration behavior, there are 3 phases (or 2 if no CXL switches) for port creation. cxl_acpi() creates a Root Port (RP) from the ACPI0017.N device. Through that it enumerates downstream ports composed of ACPI0016.N devices through add_host_bridge_dport(). Once done, it uses add_host_bridge_uport() to create the ports that enumerate the PCI RPs as the dports of these ports. Every time a port is created, the port driver is attached, cxl_switch_porbe_probe() is called and devm_cxl_port_enumerate_dports() is invoked to enumerate and probe the dports. The second phase is if there are any CXL switches. When the pci endpoint device driver (cxl_pci) calls probe, it will add a mem device and triggers the cxl_mem_probe(). cxl_mem_probe() calls devm_cxl_enumerate_ports() and attempts to discovery and create all the ports represent CXL switches. During this phase, a port is created per switch and the attached dports are also enumerated and probed. The last phase is creating endpoint port which happens for all endpoint devices. The new sequence is instead of creating all possible dports at initial port creation, defer port instantiation until a memdev beneath that dport arrives. Introduce devm_cxl_create_or_extend_port() to centralize the creation and extension of ports with new dports as memory devices arrive. As part of this rework, switch decoder target list is amended at runtime as dports show up. While the decoders are allocated during the port driver probe, The decoders must also be updated since previously they were setup when all the dports are setup. Now every time a dport is setup per endpoint, the switch target listing need to be updated with new dport. A guard(rwsem_write) is used to update decoder targets. This is similar to when decoder_populate_target() is called and the decoder programming must be protected. Also the port registers are probed the first time when the first dport shows up. This ensures that the CXL link is established when the port registers are probed. [dj] Use ERR_CAST() (Jonathan) Link: https://lore.kernel.org/linux-cxl/20250305100123.3077031-1-rrichter@amd.com/ Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang (cherry picked from commit 4f06d81e7c6a02f850bfe9812295b1e859ab2db0) Signed-off-by: Jiandi An --- drivers/cxl/core/cdat.c | 2 +- drivers/cxl/core/core.h | 2 + drivers/cxl/core/hdm.c | 6 - drivers/cxl/core/pci.c | 46 ++++++++ drivers/cxl/core/port.c | 240 ++++++++++++++++++++++++++++++++-------- drivers/cxl/port.c | 11 +- 6 files changed, 247 insertions(+), 60 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index bca1ec279651d..44c1c778b7cce 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -338,7 +338,7 @@ static int match_cxlrd_hb(struct device *dev, void *data) guard(rwsem_read)(&cxl_rwsem.region); for (int i = 0; i < cxlsd->nr_targets; i++) { - if (host_bridge == cxlsd->target[i]->dport_dev) + if (cxlsd->target[i] && host_bridge == cxlsd->target[i]->dport_dev) return 1; } diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1fb66132b7777..c7c314a372a95 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -147,6 +147,8 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct cxl_dport *dport); +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); struct cxl_hdm; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index d435178f63b82..4ecbf1d23bc59 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -52,8 +52,6 @@ static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld) static int devm_cxl_add_passthrough_decoder(struct cxl_port *port) { struct cxl_switch_decoder *cxlsd; - struct cxl_dport *dport = NULL; - unsigned long index; struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev); /* @@ -69,10 +67,6 @@ static int devm_cxl_add_passthrough_decoder(struct cxl_port *port) device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) - break; - cxlsd->cxld.target_map[0] = dport->port_id; - return add_hdm_decoder(port, &cxlsd->cxld); } diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index fa02366d35f2d..9ec288ed39aea 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -24,6 +24,52 @@ static unsigned short media_ready_timeout = 60; module_param(media_ready_timeout, ushort, 0644); MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); +static int pci_get_port_num(struct pci_dev *pdev) +{ + u32 lnkcap; + int type; + + type = pci_pcie_type(pdev); + if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT) + return -EINVAL; + + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap)) + return -ENXIO; + + return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); +} + +/** + * devm_cxl_add_dport_by_dev - allocate a dport by the dport device + * @port: cxl_port that hosts the dport + * @dport_dev: 'struct device' of the dport + * + * Returns the allocated dport on success or ERR_PTR() of -errno on error + */ +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_register_map map; + struct pci_dev *pdev; + int port_num, rc; + + if (!dev_is_pci(dport_dev)) + return ERR_PTR(-EINVAL); + + pdev = to_pci_dev(dport_dev); + port_num = pci_get_port_num(pdev); + if (port_num < 0) + return ERR_PTR(port_num); + + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + return ERR_PTR(rc); + + device_lock_assert(&port->dev); + return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); +} + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c36e089e53990..c016eaa1e91b0 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1358,21 +1358,6 @@ static struct cxl_port *find_cxl_port(struct device *dport_dev, return port; } -static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, - struct device *dport_dev, - struct cxl_dport **dport) -{ - struct cxl_find_port_ctx ctx = { - .dport_dev = dport_dev, - .parent_port = parent_port, - .dport = dport, - }; - struct cxl_port *port; - - port = __find_cxl_port(&ctx); - return port; -} - /* * All users of grandparent() are using it to walk PCIe-like switch port * hierarchy. A PCIe switch is comprised of a bridge device representing the @@ -1548,13 +1533,154 @@ static resource_size_t find_component_registers(struct device *dev) return map.resource; } +static int match_port_by_uport(struct device *dev, const void *data) +{ + const struct device *uport_dev = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + return uport_dev == port->uport_dev; +} + +/* + * Function takes a device reference on the port device. Caller should do a + * put_device() when done. + */ +static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) +{ + struct device *dev; + + dev = bus_find_device(&cxl_bus_type, NULL, uport_dev, match_port_by_uport); + if (dev) + return to_cxl_port(dev); + return NULL; +} + +static int update_decoder_targets(struct device *dev, void *data) +{ + struct cxl_dport *dport = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_decoder *cxld; + int i; + + if (!is_switch_decoder(dev)) + return 0; + + cxlsd = to_cxl_switch_decoder(dev); + cxld = &cxlsd->cxld; + guard(rwsem_write)(&cxl_rwsem.region); + + for (i = 0; i < cxld->interleave_ways; i++) { + if (cxld->target_map[i] == dport->port_id) { + cxlsd->target[i] = dport; + dev_dbg(dev, "dport%d found in target list, index %d\n", + dport->port_id, i); + return 1; + } + } + + return 0; +} + +DEFINE_FREE(del_cxl_dport, struct cxl_dport *, if (!IS_ERR_OR_NULL(_T)) del_dport(_T)) +static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + int rc; + + device_lock_assert(&port->dev); + if (!port->dev.driver) + return ERR_PTR(-ENXIO); + + dport = cxl_find_dport_by_dev(port, dport_dev); + if (dport) { + dev_dbg(&port->dev, "dport%d:%s already exists\n", + dport->port_id, dev_name(dport_dev)); + return ERR_PTR(-EBUSY); + } + + struct cxl_dport *new_dport __free(del_cxl_dport) = + devm_cxl_add_dport_by_dev(port, dport_dev); + if (IS_ERR(new_dport)) + return new_dport; + + cxl_switch_parse_cdat(port); + + if (ida_is_empty(&port->decoder_ida)) { + rc = devm_cxl_switch_port_decoders_setup(port); + if (rc) + return ERR_PTR(rc); + dev_dbg(&port->dev, "first dport%d:%s added with decoders\n", + new_dport->port_id, dev_name(dport_dev)); + return no_free_ptr(new_dport); + } + + /* New dport added, update the decoder targets */ + device_for_each_child(&port->dev, new_dport, update_decoder_targets); + + dev_dbg(&port->dev, "dport%d:%s added\n", new_dport->port_id, + dev_name(dport_dev)); + + return no_free_ptr(new_dport); +} + +static struct cxl_dport *devm_cxl_create_port(struct device *ep_dev, + struct cxl_port *parent_port, + struct cxl_dport *parent_dport, + struct device *uport_dev, + struct device *dport_dev) +{ + resource_size_t component_reg_phys; + + device_lock_assert(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(ep_dev, + "port %s:%s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev), + dev_name(dport_dev)); + } + + struct cxl_port *port __free(put_cxl_port) = + find_cxl_port_by_uport(uport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_dport); + if (IS_ERR(port)) + return ERR_CAST(port); + + /* + * retry to make sure a port is found. a port device + * reference is taken. + */ + port = find_cxl_port_by_uport(uport_dev); + if (!port) + return ERR_PTR(-ENODEV); + + dev_dbg(ep_dev, "created port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport_dev)); + } else { + /* + * Port was created before right before this function is + * called. Signal the caller to deal with it. + */ + return ERR_PTR(-EAGAIN); + } + + guard(device)(&port->dev); + return cxl_port_add_dport(port, dport_dev); +} + static int add_port_attach_ep(struct cxl_memdev *cxlmd, struct device *uport_dev, struct device *dport_dev) { struct device *dparent = grandparent(dport_dev); struct cxl_dport *dport, *parent_dport; - resource_size_t component_reg_phys; int rc; if (is_cxl_host_bridge(dparent)) { @@ -1569,42 +1695,31 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, } struct cxl_port *parent_port __free(put_cxl_port) = - find_cxl_port(dparent, &parent_dport); + find_cxl_port_by_uport(dparent->parent); if (!parent_port) { /* iterate to create this parent_port */ return -EAGAIN; } - /* - * Definition with __free() here to keep the sequence of - * dereferencing the device of the port before the parent_port releasing. - */ - struct cxl_port *port __free(put_cxl_port) = NULL; scoped_guard(device, &parent_port->dev) { - if (!parent_port->dev.driver) { - dev_warn(&cxlmd->dev, - "port %s:%s disabled, failed to enumerate CXL.mem\n", - dev_name(&parent_port->dev), dev_name(uport_dev)); - return -ENXIO; + parent_dport = cxl_find_dport_by_dev(parent_port, dparent); + if (!parent_dport) { + parent_dport = cxl_port_add_dport(parent_port, dparent); + if (IS_ERR(parent_dport)) + return PTR_ERR(parent_dport); } - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) { - component_reg_phys = find_component_registers(uport_dev); - port = devm_cxl_add_port(&parent_port->dev, uport_dev, - component_reg_phys, parent_dport); - if (IS_ERR(port)) - return PTR_ERR(port); - - /* retry find to pick up the new dport information */ - port = find_cxl_port_at(parent_port, dport_dev, &dport); - if (!port) - return -ENXIO; + dport = devm_cxl_create_port(&cxlmd->dev, parent_port, + parent_dport, uport_dev, + dport_dev); + if (IS_ERR(dport)) { + /* Port already exists, restart iteration */ + if (PTR_ERR(dport) == -EAGAIN) + return 0; + return PTR_ERR(dport); } } - dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", - dev_name(&port->dev), dev_name(port->uport_dev)); rc = cxl_add_ep(dport, &cxlmd->dev); if (rc == -EBUSY) { /* @@ -1617,6 +1732,25 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return rc; } +static struct cxl_dport *find_or_add_dport(struct cxl_port *port, + struct device *dport_dev) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + dport = cxl_find_dport_by_dev(port, dport_dev); + if (!dport) { + dport = cxl_port_add_dport(port, dport_dev); + if (IS_ERR(dport)) + return dport; + + /* New dport added, restart iteration */ + return ERR_PTR(-EAGAIN); + } + + return dport; +} + int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) { struct device *dev = &cxlmd->dev; @@ -1659,12 +1793,26 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) dev_name(iter), dev_name(dport_dev), dev_name(uport_dev)); struct cxl_port *port __free(put_cxl_port) = - find_cxl_port(dport_dev, &dport); + find_cxl_port_by_uport(uport_dev); if (port) { dev_dbg(&cxlmd->dev, "found already registered port %s:%s\n", dev_name(&port->dev), dev_name(port->uport_dev)); + + /* + * RP port enumerated by cxl_acpi without dport will + * have the dport added here. + */ + scoped_guard(device, &port->dev) { + dport = find_or_add_dport(port, dport_dev); + if (IS_ERR(dport)) { + if (PTR_ERR(dport) == -EAGAIN) + goto retry; + return PTR_ERR(dport); + } + } + rc = cxl_add_ep(dport, &cxlmd->dev); /* @@ -1724,14 +1872,16 @@ static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd, device_lock_assert(&port->dev); if (xa_empty(&port->dports)) - return -EINVAL; + return 0; guard(rwsem_write)(&cxl_rwsem.region); for (i = 0; i < cxlsd->cxld.interleave_ways; i++) { struct cxl_dport *dport = find_dport(port, cxld->target_map[i]); - if (!dport) - return -ENXIO; + if (!dport) { + /* dport may be activated later */ + continue; + } cxlsd->target[i] = dport; } diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d8cae2b5bac6c..51c8f2f84717a 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -59,18 +59,13 @@ static int discover_region(struct device *dev, void *unused) static int cxl_switch_port_probe(struct cxl_port *port) { - int rc; + /* Reset nr_dports for rebind of driver */ + port->nr_dports = 0; /* Cache the data early to ensure is_visible() works */ read_cdat_data(port); - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - - cxl_switch_parse_cdat(port); - - return devm_cxl_switch_port_decoders_setup(port); + return 0; } static int cxl_endpoint_port_probe(struct cxl_port *port) From c2204c161677693d75899ec4cd4351f367f991cf Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 8 Jan 2026 11:13:23 +0100 Subject: [PATCH 22/80] cxl/port: Fix target list setup for multiple decoders sharing the same dport If a switch port has more than one decoder that is using the same downstream port, the enumeration of the target lists may fail with: # dmesg | grep target.list update_decoder_targets: cxl decoder1.0: dport3 found in target list, index 3 update_decoder_targets: cxl decoder1.0: dport2 found in target list, index 2 update_decoder_targets: cxl decoder1.0: dport0 found in target list, index 0 update_decoder_targets: cxl decoder2.0: dport3 found in target list, index 1 update_decoder_targets: cxl decoder4.0: dport3 found in target list, index 1 cxl_mem mem6: failed to find endpoint12:0000:00:01.4 in target list of decoder2.1 cxl_mem mem8: failed to find endpoint13:0000:20:01.4 in target list of decoder4.1 The case, that the same downstream port can be used in multiple target lists, is allowed and possible. Fix the update of the target list. Enumerate all children of the switch port and do not stop the iteration after the first matching target was found. With the fix applied: # dmesg | grep target.list update_decoder_targets: cxl decoder1.0: dport2 found in target list, index 2 update_decoder_targets: cxl decoder1.0: dport0 found in target list, index 0 update_decoder_targets: cxl decoder1.0: dport3 found in target list, index 3 update_decoder_targets: cxl decoder2.0: dport3 found in target list, index 1 update_decoder_targets: cxl decoder2.1: dport3 found in target list, index 1 update_decoder_targets: cxl decoder4.0: dport3 found in target list, index 1 update_decoder_targets: cxl decoder4.1: dport3 found in target list, index 1 Analyzing the conditions when this happens: 1) A dport is shared by multiple decoders. 2) The decoders have interleaving configured (ways > 1). The configuration above has the following hierarchy details (fixed version): root0 |_ | | | decoder0.1 | ways: 2 | target_list: 0,1 |_______________________________________ | | | dport0 | dport1 | | port2 port4 | | |___________________ |_____________________ | | | | | | | decoder2.0 decoder2.1 | decoder4.0 decoder4.1 | ways: 2 ways: 2 | ways: 2 ways: 2 | target_list: 2,3 target_list: 2,3 | target_list: 2,3 target_list: 2,3 |___________________ |___________________ | | | | | dport2 | dport3 | dport2 | dport3 | | | | endpoint7 endpoint12 endpoint9 endpoint13 |_ |_ |_ |_ | | | | | | | | | decoder7.0 | decoder12.0 | decoder9.0 | decoder13.0 | decoder7.2 | decoder12.2 | decoder9.2 | decoder13.2 | | | | mem3 mem5 mem6 mem8 Note: Device numbers vary for every boot. Current kernel fails to enumerate endpoint12 and endpoint13 as the target list is not updated for the second decoder. Fixes: 4f06d81e7c6a ("cxl: Defer dport allocation for switch ports") Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Signed-off-by: Robert Richter Link: https://patch.msgid.link/20260108101324.509667-1-rrichter@amd.com Signed-off-by: Dave Jiang (cherry picked from commit 3e8aaacdad4f66641f87ab441fe644b45f8ebdff) Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c016eaa1e91b0..3790dbe4e4344 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1578,7 +1578,7 @@ static int update_decoder_targets(struct device *dev, void *data) cxlsd->target[i] = dport; dev_dbg(dev, "dport%d found in target list, index %d\n", dport->port_id, i); - return 1; + return 0; } } From 09001fd8d776a9d4b1b3269c018a65153cf4660e Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:25 -0700 Subject: [PATCH 23/80] cxl/test: Add mock version of devm_cxl_add_dport_by_dev() devm_cxl_add_dport_by_dev() outside of cxl_test is done through PCI hierarchy. However with cxl_test, it needs to be done through the platform device hierarchy. Add the mock function for devm_cxl_add_dport_by_dev(). When cxl_core calls a cxl_core exported function and that function is mocked by cxl_test, the call chain causes a circular dependency issue. Dan provided a workaround to avoid this issue. Apply the method to changes from the late dport allocation changes in order to enable cxl-test. In cxl_core they are defined with "__" added in front of the function. A macro is used to define the original function names for when non-test version of the kernel is built. A bit of macros and typedefs are used to allow mocking of those functions in cxl_test. Co-developed-by: Dan Williams Signed-off-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Tested-by: Alison Schofield Tested-by: Robert Richter Signed-off-by: Dave Jiang (cherry picked from commit d96eb90d9ca6e4652c8a23d48c94364aa061fdc4) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 2 -- drivers/cxl/core/pci.c | 7 ++-- drivers/cxl/cxl.h | 20 +++++++++++ tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/cxl_core_exports.c | 12 +++++++ tools/testing/cxl/exports.h | 10 ++++++ tools/testing/cxl/test/cxl.c | 53 ++++++++++++++++++++++++++-- tools/testing/cxl/test/mock.c | 23 ++++++++++++ tools/testing/cxl/test/mock.h | 2 ++ 9 files changed, 123 insertions(+), 7 deletions(-) create mode 100644 tools/testing/cxl/exports.h diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index c7c314a372a95..1fb66132b7777 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -147,8 +147,6 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, int cxl_ras_init(void); void cxl_ras_exit(void); int cxl_gpf_port_setup(struct cxl_dport *dport); -struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, - struct device *dport_dev); struct cxl_hdm; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 9ec288ed39aea..18825e1505d6a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -41,14 +41,14 @@ static int pci_get_port_num(struct pci_dev *pdev) } /** - * devm_cxl_add_dport_by_dev - allocate a dport by the dport device + * __devm_cxl_add_dport_by_dev - allocate a dport by dport device * @port: cxl_port that hosts the dport * @dport_dev: 'struct device' of the dport * * Returns the allocated dport on success or ERR_PTR() of -errno on error */ -struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, - struct device *dport_dev) +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) { struct cxl_register_map map; struct pci_dev *pdev; @@ -69,6 +69,7 @@ struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, device_lock_assert(&port->dev); return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); } +EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); struct cxl_walk_context { struct pci_bus *bus; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index e4f37c143c1ef..ed0df7db628ac 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -914,6 +914,10 @@ void cxl_coordinates_combine(struct access_coordinate *out, struct access_coordinate *c2); bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); +struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); /* * Unit test builds overrides this to __weak, find the 'strong' version @@ -924,4 +928,20 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port); #endif u16 cxl_gpf_get_dvsec(struct device *dev); + +/* + * Declaration for functions that are mocked by cxl_test that are called by + * cxl_core. The respective functions are defined as __foo() and called by + * cxl_core as foo(). The macros below ensures that those functions would + * exist as foo(). See tools/testing/cxl/cxl_core_exports.c and + * tools/testing/cxl/exports.h for setting up the mock functions. The dance + * is done to avoid a circular dependency where cxl_core calls a function that + * ends up being a mock function and goes to * cxl_test where it calls a + * cxl_core function. + */ +#ifndef CXL_TEST_ENABLE +#define DECLARE_TESTABLE(x) __##x +#define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev) +#endif + #endif /* __CXL_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 51b8ab289eae9..81e3795673c5a 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -18,6 +18,7 @@ CXL_SRC := $(DRIVERS)/cxl CXL_CORE_SRC := $(DRIVERS)/cxl/core ccflags-y := -I$(srctree)/drivers/cxl/ ccflags-y += -D__mock=__weak +ccflags-y += -DCXL_TEST_ENABLE=1 ccflags-y += -DTRACE_INCLUDE_PATH=$(CXL_CORE_SRC) -I$(srctree)/drivers/cxl/core/ obj-m += cxl_acpi.o diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index f088792a8925f..0d18abc1f5a31 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -2,6 +2,18 @@ /* Copyright(c) 2022 Intel Corporation. All rights reserved. */ #include "cxl.h" +#include "exports.h" /* Exporting of cxl_core symbols that are only used by cxl_test */ EXPORT_SYMBOL_NS_GPL(cxl_num_decoders_committed, "CXL"); + +cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev = + __devm_cxl_add_dport_by_dev; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_add_dport_by_dev, "CXL"); + +struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + return _devm_cxl_add_dport_by_dev(port, dport_dev); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL"); diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h new file mode 100644 index 0000000000000..9261ce6f11973 --- /dev/null +++ b/tools/testing/cxl/exports.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation */ +#ifndef __MOCK_CXL_EXPORTS_H_ +#define __MOCK_CXL_EXPORTS_H_ + +typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port, + struct device *dport_dev); +extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev; + +#endif diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 36dff58275a25..b10434236590f 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1036,10 +1036,12 @@ static int mock_cxl_endpoint_decoders_setup(struct cxl_port *port) return __mock_cxl_decoders_setup(port); } -static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +static int get_port_array(struct cxl_port *port, + struct platform_device ***port_array, + int *port_array_size) { struct platform_device **array; - int i, array_size; + int array_size; if (port->depth == 1) { if (is_multi_bridge(port->uport_dev)) { @@ -1073,6 +1075,22 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return -ENXIO; } + *port_array = array; + *port_array_size = array_size; + + return 0; +} + +static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct platform_device **array; + int i, array_size; + int rc; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return rc; + for (i = 0; i < array_size; i++) { struct platform_device *pdev = array[i]; struct cxl_dport *dport; @@ -1094,6 +1112,36 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return 0; } +static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + struct platform_device **array; + int rc, i, array_size; + + rc = get_port_array(port, &array, &array_size); + if (rc) + return ERR_PTR(rc); + + for (i = 0; i < array_size; i++) { + struct platform_device *pdev = array[i]; + + if (pdev->dev.parent != port->uport_dev) { + dev_dbg(&port->dev, "%s: mismatch parent %s\n", + dev_name(port->uport_dev), + dev_name(pdev->dev.parent)); + continue; + } + + if (&pdev->dev != dport_dev) + continue; + + return devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + } + + return ERR_PTR(-ENODEV); +} + /* * Faking the cxl_dpa_perf for the memdev when appropriate. */ @@ -1154,6 +1202,7 @@ static struct cxl_mock_ops cxl_mock_ops = { .devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat, + .devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index f335889b7756a..e98101f083cd3 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -10,12 +10,18 @@ #include #include #include "mock.h" +#include "../exports.h" static LIST_HEAD(mock); +static struct cxl_dport * +redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev); + void register_cxl_mock_ops(struct cxl_mock_ops *ops) { list_add_rcu(&ops->list, &mock); + _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev; } EXPORT_SYMBOL_GPL(register_cxl_mock_ops); @@ -23,6 +29,7 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) { + _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev; list_del_rcu(&ops->list); synchronize_srcu(&cxl_mock_srcu); } @@ -258,6 +265,22 @@ void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, "CXL"); +struct cxl_dport *redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, + struct device *dport_dev) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + struct cxl_dport *dport; + + if (ops && ops->is_mock_port(port->uport_dev)) + dport = ops->devm_cxl_add_dport_by_dev(port, dport_dev); + else + dport = __devm_cxl_add_dport_by_dev(port, dport_dev); + put_cxl_mock_ops(index); + + return dport; +} + MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("cxl_test: emulation module"); MODULE_IMPORT_NS("ACPI"); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 9d5ad3fd55ecc..4ed932e76aae8 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -23,6 +23,8 @@ struct cxl_mock_ops { int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port); int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port); void (*cxl_endpoint_parse_cdat)(struct cxl_port *port); + struct cxl_dport *(*devm_cxl_add_dport_by_dev)(struct cxl_port *port, + struct device *dport_dev); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops); From 6b3738e3d24694340199a22d2c54dd261b2583d2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:26 -0700 Subject: [PATCH 24/80] cxl/test: Adjust the mock version of devm_cxl_switch_port_decoders_setup() With devm_cxl_switch_port_decoders_setup() being called within cxl_core instead of by the port driver probe, adjustments are needed to deal with circular symbol dependency when this function is being mock'd. Add the appropriate changes to get around the circular dependency. Reviewed-by: Alison Schofield Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang (cherry picked from commit 644685abc16b58b3afcc2feb0ac14e86476ca2ed) Signed-off-by: Jiandi An --- drivers/cxl/core/hdm.c | 6 +++--- drivers/cxl/cxl.h | 2 ++ tools/testing/cxl/Kbuild | 1 - tools/testing/cxl/cxl_core_exports.c | 10 ++++++++++ tools/testing/cxl/exports.h | 3 +++ tools/testing/cxl/test/mock.c | 10 +++++++--- 6 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 4ecbf1d23bc59..de78601821e60 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -1219,12 +1219,12 @@ static int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, } /** - * devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders + * __devm_cxl_switch_port_decoders_setup - allocate and setup switch decoders * @port: CXL port context * * Return 0 or -errno on error */ -int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { struct cxl_hdm *cxlhdm; @@ -1248,7 +1248,7 @@ int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) dev_err(&port->dev, "HDM decoder capability not found\n"); return -ENXIO; } -EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); +EXPORT_SYMBOL_NS_GPL(__devm_cxl_switch_port_decoders_setup, "CXL"); /** * devm_cxl_endpoint_decoders_setup - allocate and setup endpoint decoders diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index ed0df7db628ac..7374c81f55f44 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -819,6 +819,7 @@ struct cxl_endpoint_dvsec_info { }; int devm_cxl_switch_port_decoders_setup(struct cxl_port *port); +int __devm_cxl_switch_port_decoders_setup(struct cxl_port *port); int devm_cxl_endpoint_decoders_setup(struct cxl_port *port); struct cxl_dev_state; @@ -942,6 +943,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev); #ifndef CXL_TEST_ENABLE #define DECLARE_TESTABLE(x) __##x #define devm_cxl_add_dport_by_dev DECLARE_TESTABLE(devm_cxl_add_dport_by_dev) +#define devm_cxl_switch_port_decoders_setup DECLARE_TESTABLE(devm_cxl_switch_port_decoders_setup) #endif #endif /* __CXL_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 81e3795673c5a..0d5ce4b74b9f7 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -10,7 +10,6 @@ ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat ldflags-y += --wrap=cxl_dport_init_ras_reporting -ldflags-y += --wrap=devm_cxl_switch_port_decoders_setup ldflags-y += --wrap=devm_cxl_endpoint_decoders_setup DRIVERS := ../../../drivers diff --git a/tools/testing/cxl/cxl_core_exports.c b/tools/testing/cxl/cxl_core_exports.c index 0d18abc1f5a31..6754de35598d5 100644 --- a/tools/testing/cxl/cxl_core_exports.c +++ b/tools/testing/cxl/cxl_core_exports.c @@ -17,3 +17,13 @@ struct cxl_dport *devm_cxl_add_dport_by_dev(struct cxl_port *port, return _devm_cxl_add_dport_by_dev(port, dport_dev); } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport_by_dev, "CXL"); + +cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; +EXPORT_SYMBOL_NS_GPL(_devm_cxl_switch_port_decoders_setup, "CXL"); + +int devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +{ + return _devm_cxl_switch_port_decoders_setup(port); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_switch_port_decoders_setup, "CXL"); diff --git a/tools/testing/cxl/exports.h b/tools/testing/cxl/exports.h index 9261ce6f11973..7ebee7c0bd67e 100644 --- a/tools/testing/cxl/exports.h +++ b/tools/testing/cxl/exports.h @@ -7,4 +7,7 @@ typedef struct cxl_dport *(*cxl_add_dport_by_dev_fn)(struct cxl_port *port, struct device *dport_dev); extern cxl_add_dport_by_dev_fn _devm_cxl_add_dport_by_dev; +typedef int(*cxl_switch_decoders_setup_fn)(struct cxl_port *port); +extern cxl_switch_decoders_setup_fn _devm_cxl_switch_port_decoders_setup; + #endif diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index e98101f083cd3..995269a75cbd1 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -17,11 +17,14 @@ static LIST_HEAD(mock); static struct cxl_dport * redirect_devm_cxl_add_dport_by_dev(struct cxl_port *port, struct device *dport_dev); +static int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port); void register_cxl_mock_ops(struct cxl_mock_ops *ops) { list_add_rcu(&ops->list, &mock); _devm_cxl_add_dport_by_dev = redirect_devm_cxl_add_dport_by_dev; + _devm_cxl_switch_port_decoders_setup = + redirect_devm_cxl_switch_port_decoders_setup; } EXPORT_SYMBOL_GPL(register_cxl_mock_ops); @@ -29,6 +32,8 @@ DEFINE_STATIC_SRCU(cxl_mock_srcu); void unregister_cxl_mock_ops(struct cxl_mock_ops *ops) { + _devm_cxl_switch_port_decoders_setup = + __devm_cxl_switch_port_decoders_setup; _devm_cxl_add_dport_by_dev = __devm_cxl_add_dport_by_dev; list_del_rcu(&ops->list); synchronize_srcu(&cxl_mock_srcu); @@ -138,7 +143,7 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); -int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) +int redirect_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); @@ -146,12 +151,11 @@ int __wrap_devm_cxl_switch_port_decoders_setup(struct cxl_port *port) if (ops && ops->is_mock_port(port->uport_dev)) rc = ops->devm_cxl_switch_port_decoders_setup(port); else - rc = devm_cxl_switch_port_decoders_setup(port); + rc = __devm_cxl_switch_port_decoders_setup(port); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_switch_port_decoders_setup, "CXL"); int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port) { From 624c480f9ff22b0aeda4c4ecee197f59e761d3e9 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:27 -0700 Subject: [PATCH 25/80] cxl/test: Setup target_map for cxl_test decoder initialization cxl_test uses mock functions for decoder enumaration. Add initialization of the cxld->target_map[] for cxl_test based decoders in the mock functions. Reviewed-by: Jonathan Cameron Tested-by: Robert Richter Reviewed-by: Alison Schofield Signed-off-by: Dave Jiang (cherry picked from commit 87439b598ad962ffc5744e2e0a8b461e78d8d32f) Signed-off-by: Jiandi An --- tools/testing/cxl/test/cxl.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index b10434236590f..cb18ee41a7cf8 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -887,15 +887,21 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) */ if (WARN_ON(!dev)) continue; + cxlsd = to_cxl_switch_decoder(dev); if (i == 0) { /* put cxl_mem.4 second in the decode order */ - if (pdev->id == 4) + if (pdev->id == 4) { cxlsd->target[1] = dport; - else + cxld->target_map[1] = dport->port_id; + } else { cxlsd->target[0] = dport; - } else + cxld->target_map[0] = dport->port_id; + } + } else { cxlsd->target[0] = dport; + cxld->target_map[0] = dport->port_id; + } cxld = &cxlsd->cxld; cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->flags = CXL_DECODER_F_ENABLE; From 1be96d7f338f99b6f5bd18386b37fcd00bd9d33d Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Aug 2025 11:09:28 -0700 Subject: [PATCH 26/80] cxl: Change sslbis handler to only handle single dport While cxl_switch_parse_cdat() is harmless to be run multiple times, it is not efficient in the current scheme where one dport is being updated at a time by the memdev probe path. Change the input parameter to the specific dport being updated to pick up the SSLBIS information for just that dport. Reviewed-by: Gregory Price Reviewed-by: Jonathan Cameron Reviewed-by: Li Ming Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Tested-by: Robert Richter Signed-off-by: Dave Jiang (cherry picked from commit d64035a5a37741b25712fb9c2f6aca535c2967ea) Signed-off-by: Jiandi An --- drivers/cxl/core/cdat.c | 23 ++++++++++------------- drivers/cxl/core/port.c | 2 +- drivers/cxl/cxl.h | 2 +- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 44c1c778b7cce..c4bd6e8a0cf03 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -440,8 +440,8 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, } *tbl = (struct acpi_cdat_sslbis_table *)header; int size = sizeof(header->cdat) + sizeof(tbl->sslbis_header); struct acpi_cdat_sslbis *sslbis; - struct cxl_port *port = arg; - struct device *dev = &port->dev; + struct cxl_dport *dport = arg; + struct device *dev = &dport->port->dev; int remain, entries, i; u16 len; @@ -467,8 +467,6 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, u16 y = le16_to_cpu((__force __le16)tbl->entries[i].porty_id); __le64 le_base; __le16 le_val; - struct cxl_dport *dport; - unsigned long index; u16 dsp_id; u64 val; @@ -499,28 +497,27 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg, val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base), sslbis->data_type); - xa_for_each(&port->dports, index, dport) { - if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || - dsp_id == dport->port_id) { - cxl_access_coordinate_set(dport->coord, - sslbis->data_type, - val); - } + if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT || + dsp_id == dport->port_id) { + cxl_access_coordinate_set(dport->coord, + sslbis->data_type, val); + return 0; } } return 0; } -void cxl_switch_parse_cdat(struct cxl_port *port) +void cxl_switch_parse_cdat(struct cxl_dport *dport) { + struct cxl_port *port = dport->port; int rc; if (!port->cdat.table) return; rc = cdat_table_parse(ACPI_CDAT_TYPE_SSLBIS, cdat_sslbis_handler, - port, port->cdat.table, port->cdat.length); + dport, port->cdat.table, port->cdat.length); rc = cdat_table_parse_output(rc); if (rc) dev_dbg(&port->dev, "Failed to parse SSLBIS: %d\n", rc); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 3790dbe4e4344..c063f01b4a759 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1608,7 +1608,7 @@ static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, if (IS_ERR(new_dport)) return new_dport; - cxl_switch_parse_cdat(port); + cxl_switch_parse_cdat(new_dport); if (ida_is_empty(&port->decoder_ida)) { rc = devm_cxl_switch_port_decoders_setup(port); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 7374c81f55f44..0e0e518031fb0 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -900,7 +900,7 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, #endif void cxl_endpoint_parse_cdat(struct cxl_port *port); -void cxl_switch_parse_cdat(struct cxl_port *port); +void cxl_switch_parse_cdat(struct cxl_dport *dport); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); From f90594418150f78f47c9c7b741af5189c7f64921 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 14 Aug 2025 15:21:44 -0700 Subject: [PATCH 27/80] cxl: Move port register setup to when first dport appear This patch moves the port register setup to when the first dport appears via the memdev probe path. At this point, the CXL link should be established and the register access is expected to succeed. This change addresses an error message observed when PCIe hotplug is enabled on an Intel platform. The error messages "cxl portN: Couldn't locate the CXL.cache and CXL.mem capability array header" is observed for the host bridge (CHBCR) during cxl_acpi driver probe. If the cxl_acpi module probe is running before the CXL link between the endpoint device and the RP is established, then the platform may not have exposed DVSEC ID 3 and/or DVSEC ID 7 blocks which will trigger the error message. This behavior is defined by the CXL spec r3.2 9.12.3 for RPs and DSPs, however the Intel platform also added this behavior to the host bridge. This change also needs the dport enumeration to be moved to the memdev probe path in order to address the issue. This change is not a wholly contained solution by itself. [dj: Add missing var init during port alloc] Suggested-by: Dan Williams Reviewed-by: Jonathan Cameron Tested-by: Robert Richter Reviewed-by: Alison Schofield Reviewed-by: Ira Weiny Signed-off-by: Dave Jiang (cherry picked from commit f6ee24913de24dbda8d49213e1a27f5e1a5204cc) Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 17 ++++++++++++++--- drivers/cxl/cxl.h | 2 ++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index c063f01b4a759..908cfb78612a0 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -750,6 +750,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport_dev, xa_init(&port->dports); xa_init(&port->endpoints); xa_init(&port->regions); + port->component_reg_phys = CXL_RESOURCE_NONE; device_initialize(dev); lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); @@ -868,9 +869,7 @@ static int cxl_port_add(struct cxl_port *port, if (rc) return rc; - rc = cxl_port_setup_regs(port, component_reg_phys); - if (rc) - return rc; + port->component_reg_phys = component_reg_phys; } else { rc = dev_set_name(dev, "root%d", port->id); if (rc) @@ -1201,6 +1200,18 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) + return ERR_PTR(rc); + port->component_reg_phys = CXL_RESOURCE_NONE; + } + return dport; } diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 0e0e518031fb0..231ddccf89773 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -607,6 +607,7 @@ struct cxl_dax_region { * @cdat: Cached CDAT data * @cdat_available: Should a CDAT attribute be available in sysfs * @pci_latency: Upstream latency in picoseconds + * @component_reg_phys: Physical address of component register */ struct cxl_port { struct device dev; @@ -630,6 +631,7 @@ struct cxl_port { } cdat; bool cdat_available; long pci_latency; + resource_size_t component_reg_phys; }; /** From 4431d5046b2a1d12ce9c1e70a1a5177012f444d2 Mon Sep 17 00:00:00 2001 From: Li Ming Date: Wed, 1 Oct 2025 14:03:37 +0800 Subject: [PATCH 28/80] cxl/port: Avoid missing port component registers setup port->nr_dports is used to represent how many dports added to the cxl port, it will increase in add_dport() when a new dport is being added to the cxl port, but it will not be reduced when a dport is removed from the cxl port. Currently, when the first dport is added to a cxl port, it will trigger component registers setup on the cxl port, the implementation is using port->nr_dports to confirm if the dport is the first dport. A corner case here is that adding dport could fail after port->nr_dports updating and before checking port->nr_dports for component registers setup. If the failure happens during the first dport attaching, it will cause that CXL subsystem has not chance to execute component registers setup for the cxl port. the failure flow like below: port->nr_dports = 0 dport 1 adding to the port: add_dport() # port->nr_dports: 1 failed on devm_add_action_or_reset() or sysfs_create_link() return error # port->nr_dports: 1 dport 2 adding to the port: add_dport() # port->nr_dports: 2 no failure skip component registers setup because of port->nr_dports is 2 The solution here is that moving component registers setup closer to add_dport(), so if add_dport() is executed correctly for the first dport, component registers setup on the port will be executed immediately after that. Fixes: f6ee24913de2 ("cxl: Move port register setup to when first dport appear") Signed-off-by: Li Ming Reviewed-by: Dave Jiang Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang (backported from commit 02e7567f5da023524476053a38c54f4f19130959) [jan: fixed nr_dports not decremented on cxl_port_setup_regs failure] Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 908cfb78612a0..43054da7e547b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1182,6 +1182,21 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, if (rc) return ERR_PTR(rc); + /* + * Setup port register if this is the first dport showed up. Having + * a dport also means that there is at least 1 active link. + */ + if (port->nr_dports == 1 && + port->component_reg_phys != CXL_RESOURCE_NONE) { + rc = cxl_port_setup_regs(port, port->component_reg_phys); + if (rc) { + port->nr_dports--; + xa_erase(&port->dports, (unsigned long)dport->dport_dev); + return ERR_PTR(rc); + } + port->component_reg_phys = CXL_RESOURCE_NONE; + } + get_device(dport_dev); rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); if (rc) @@ -1200,18 +1215,6 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, cxl_debugfs_create_dport_dir(dport); - /* - * Setup port register if this is the first dport showed up. Having - * a dport also means that there is at least 1 active link. - */ - if (port->nr_dports == 1 && - port->component_reg_phys != CXL_RESOURCE_NONE) { - rc = cxl_port_setup_regs(port, port->component_reg_phys); - if (rc) - return ERR_PTR(rc); - port->component_reg_phys = CXL_RESOURCE_NONE; - } - return dport; } From a00440ee55d0203450d309e32a8ec724ead3c7c0 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Tue, 14 Oct 2025 00:31:04 -0700 Subject: [PATCH 29/80] cxl/region: Use %pa printk format to emit resource_size_t KASAN reports a stack-out-of-bounds access in validate_region_offset() while running the cxl-poison.sh unit test because the printk format specifier, %pr format, is not a match for the resource_size_t type of the variables. %pr expects struct resource pointers and attempts to dereference the structure fields, reading beyond the bounds of the stack variables. Since these messages emit an 'A exceeds B' type of message, keep the resource_size_t's and use the %pa specifier to be architecture safe. BUG: KASAN: stack-out-of-bounds in resource_string.isra.0+0xe9a/0x1690 [] Read of size 8 at addr ffff88800a7afb40 by task bash/1397 ... [] The buggy address belongs to stack of task bash/1397 [] and is located at offset 56 in frame: [] validate_region_offset+0x0/0x1c0 [cxl_core] Fixes: c3dd67681c70 ("cxl/region: Add inject and clear poison by region offset") Signed-off-by: Alison Schofield Reviewed-by: Dave Jiang Signed-off-by: Dave Jiang (cherry picked from commit 257c4b03a2f7d8c15f79c79b09a561af9734f6c4) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 32675a70cadf9..149f9bdabbb40 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -3671,14 +3671,14 @@ static int validate_region_offset(struct cxl_region *cxlr, u64 offset) if (offset < p->cache_size) { dev_err(&cxlr->dev, - "Offset %#llx is within extended linear cache %pr\n", + "Offset %#llx is within extended linear cache %pa\n", offset, &p->cache_size); return -EINVAL; } region_size = resource_size(p->res); if (offset >= region_size) { - dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pr\n", + dev_err(&cxlr->dev, "Offset %#llx exceeds region size %pa\n", offset, ®ion_size); return -EINVAL; } From 40a04c34980526da3b89875a47b7f568c4cdbaba Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:41 -0600 Subject: [PATCH 30/80] NVIDIA: VR: SAUCE: CXL/PCI: Move CXL DVSEC definitions into uapi/linux/pci_regs.h The CXL DVSECs are currently defined in cxl/core/cxlpci.h. These are not accessible to other subsystems. Move these to uapi/linux/pci_regs.h. Change DVSEC name formatting to follow the existing PCI format in pci_regs.h. The current format uses CXL_DVSEC_XYZ and the CXL defines must be changed to be PCI_DVSEC_CXL_XYZ to match existing pci_regs.h. Leave PCI_DVSEC_CXL_PORT* defines as-is because they are already defined and may be in use by userspace application(s). Update existing usage to match the name change. Update the inline documentation to refer to latest CXL spec version. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/pci.c | 62 +++++++++++++++++----------------- drivers/cxl/core/regs.c | 12 +++---- drivers/cxl/cxlpci.h | 53 ----------------------------- drivers/cxl/pci.c | 2 +- drivers/pci/pci.c | 4 ++- include/uapi/linux/pci_regs.h | 63 ++++++++++++++++++++++++++++++++--- 6 files changed, 100 insertions(+), 96 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 18825e1505d6a..cbc8defa68487 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -158,19 +158,19 @@ static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) int rc, i; u32 temp; - if (id > CXL_DVSEC_RANGE_MAX) + if (id > PCI_DVSEC_CXL_RANGE_MAX) return -EINVAL; /* Check MEM INFO VALID bit first, give up after 1s */ i = 1; do { rc = pci_read_config_dword(pdev, - d + CXL_DVSEC_RANGE_SIZE_LOW(id), + d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); + valid = FIELD_GET(PCI_DVSEC_CXL_MEM_INFO_VALID, temp); if (valid) break; msleep(1000); @@ -194,17 +194,17 @@ static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) int rc, i; u32 temp; - if (id > CXL_DVSEC_RANGE_MAX) + if (id > PCI_DVSEC_CXL_RANGE_MAX) return -EINVAL; /* Check MEM ACTIVE bit, up to 60s timeout by default */ for (i = media_ready_timeout; i; i--) { rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(id), &temp); if (rc) return rc; - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + active = FIELD_GET(PCI_DVSEC_CXL_MEM_ACTIVE, temp); if (active) break; msleep(1000); @@ -233,11 +233,11 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) u16 cap; rc = pci_read_config_word(pdev, - d + CXL_DVSEC_CAP_OFFSET, &cap); + d + PCI_DVSEC_CXL_CAP_OFFSET, &cap); if (rc) return rc; - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT_MASK, cap); for (i = 0; i < hdm_count; i++) { rc = cxl_dvsec_mem_range_valid(cxlds, i); if (rc) @@ -265,16 +265,16 @@ static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) u16 ctrl; int rc; - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL_OFFSET, &ctrl); if (rc < 0) return rc; - if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + if ((ctrl & PCI_DVSEC_CXL_MEM_ENABLE) == val) return 1; - ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl &= ~PCI_DVSEC_CXL_MEM_ENABLE; ctrl |= val; - rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + rc = pci_write_config_word(pdev, d + PCI_DVSEC_CXL_CTRL_OFFSET, ctrl); if (rc < 0) return rc; @@ -290,7 +290,7 @@ static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) { int rc; - rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + rc = cxl_set_mem_enable(cxlds, PCI_DVSEC_CXL_MEM_ENABLE); if (rc < 0) return rc; if (rc > 0) @@ -352,11 +352,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return -ENXIO; } - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CAP_OFFSET, &cap); if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + if (!(cap & PCI_DVSEC_CXL_MEM_CAPABLE)) { dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; } @@ -367,7 +367,7 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * driver is for a spec defined class code which must be CXL.mem * capable, there is no point in continuing to enable CXL.mem. */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + hdm_count = FIELD_GET(PCI_DVSEC_CXL_HDM_COUNT_MASK, cap); if (!hdm_count || hdm_count > 2) return -EINVAL; @@ -376,11 +376,11 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, * disabled, and they will remain moot after the HDM Decoder * capability is enabled. */ - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + rc = pci_read_config_word(pdev, d + PCI_DVSEC_CXL_CTRL_OFFSET, &ctrl); if (rc) return rc; - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + info->mem_enabled = FIELD_GET(PCI_DVSEC_CXL_MEM_ENABLE, ctrl); if (!info->mem_enabled) return 0; @@ -393,35 +393,35 @@ int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, return rc; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i), &temp); if (rc) return rc; size = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_SIZE_LOW(i), &temp); if (rc) return rc; - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + size |= temp & PCI_DVSEC_CXL_MEM_SIZE_LOW_MASK; if (!size) { continue; } rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_HIGH(i), &temp); if (rc) return rc; base = (u64)temp << 32; rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + pdev, d + PCI_DVSEC_CXL_RANGE_BASE_LOW(i), &temp); if (rc) return rc; - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + base |= temp & PCI_DVSEC_CXL_MEM_BASE_LOW_MASK; info->dvsec_range[ranges++] = (struct range) { .start = base, @@ -1147,7 +1147,7 @@ u16 cxl_gpf_get_dvsec(struct device *dev) is_port = false; dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); + is_port ? PCI_DVSEC_CXL_PORT_GPF : PCI_DVSEC_CXL_DEVICE_GPF); if (!dvsec) dev_warn(dev, "%s GPF DVSEC not present\n", is_port ? "Port" : "Device"); @@ -1163,14 +1163,14 @@ static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) switch (phase) { case 1: - offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL_OFFSET; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE_MASK; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE_MASK; break; case 2: - offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; - base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; - scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; + offset = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL_OFFSET; + base = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE_MASK; + scale = PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE_MASK; break; default: return -EINVAL; diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index 5ca7b0eed568b..fb70ffbba72d3 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -271,10 +271,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, "CXL"); static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi, struct cxl_register_map *map) { - u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); - int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + u8 reg_type = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); + int bar = FIELD_GET(PCI_DVSEC_CXL_REG_LOCATOR_BIR_MASK, reg_lo); u64 offset = ((u64)reg_hi << 32) | - (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + (reg_lo & PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW_MASK); if (offset > pci_resource_len(pdev, bar)) { dev_warn(&pdev->dev, @@ -311,15 +311,15 @@ static int __cxl_find_regblock_instance(struct pci_dev *pdev, enum cxl_regloc_ty }; regloc = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, - CXL_DVSEC_REG_LOCATOR); + PCI_DVSEC_CXL_REG_LOCATOR); if (!regloc) return -ENXIO; pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; - regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + regloc += PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1_OFFSET; + regblocks = (regloc_size - PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1_OFFSET) / 8; for (i = 0; i < regblocks; i++, regloc += 8) { u32 reg_lo, reg_hi; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 7ae621e618e79..4985dbd900693 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -7,59 +7,6 @@ #define CXL_MEMORY_PROGIF 0x10 -/* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification. Names are taken straight from the specification with "CXL" and - * "DVSEC" redundancies removed. When obvious, abbreviations may be used. - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) - -/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ -#define CXL_DVSEC_PCIE_DEVICE 0 -#define CXL_DVSEC_CAP_OFFSET 0xA -#define CXL_DVSEC_MEM_CAPABLE BIT(2) -#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) -#define CXL_DVSEC_CTRL_OFFSET 0xC -#define CXL_DVSEC_MEM_ENABLE BIT(2) -#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) -#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) -#define CXL_DVSEC_MEM_INFO_VALID BIT(0) -#define CXL_DVSEC_MEM_ACTIVE BIT(1) -#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) -#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) -#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) -#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) - -#define CXL_DVSEC_RANGE_MAX 2 - -/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ -#define CXL_DVSEC_FUNCTION_MAP 2 - -/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ -#define CXL_DVSEC_PORT_EXTENSIONS 3 - -/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ -#define CXL_DVSEC_PORT_GPF 4 -#define CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK GENMASK(11, 8) -#define CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK GENMASK(3, 0) -#define CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK GENMASK(11, 8) - -/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ -#define CXL_DVSEC_DEVICE_GPF 5 - -/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ -#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 - -/* CXL 2.0 8.1.9: Register Locator DVSEC */ -#define CXL_DVSEC_REG_LOCATOR 8 -#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC -#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) -#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) - /* * NOTE: Currently all the functions which are enabled for CXL require their * vectors to be in the first 16. Use this as the default max. diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index bd100ac31672d..bd95be1f3d5c5 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -933,7 +933,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) cxlds->rcd = is_cxl_restricted(pdev); cxlds->serial = pci_get_dsn(pdev); cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); if (!cxlds->cxl_dvsec) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 372de7961d2a6..73b586e4efa68 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -5058,7 +5058,9 @@ static bool cxl_sbr_masked(struct pci_dev *dev) if (!dvsec) return false; - rc = pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_PORT_CTL, ®); + rc = pci_read_config_word(dev, + dvsec + PCI_DVSEC_CXL_PORT_CTL, + ®); if (rc || PCI_POSSIBLE_ERROR(reg)) return false; diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index bfa9ada355c9b..6d1671e97b9c7 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -1234,10 +1234,65 @@ /* Deprecated old name, replaced with PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE */ #define PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE -/* Compute Express Link (CXL r3.1, sec 8.1.5) */ -#define PCI_DVSEC_CXL_PORT 3 -#define PCI_DVSEC_CXL_PORT_CTL 0x0c -#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 +/* Compute Express Link (CXL r3.2, sec 8.1) + * + * Note that CXL DVSEC id 3 and 7 to be ignored when the CXL link state + * is "disconnected" (CXL r3.2, sec 9.12.3). Re-enumerate these + * registers on downstream link-up events. + */ + +#define PCI_DVSEC_HEADER1_LENGTH_MASK __GENMASK(31, 20) + +/* CXL 3.2 8.1.3: PCIe DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE 0 +#define PCI_DVSEC_CXL_CAP_OFFSET 0xA +#define PCI_DVSEC_CXL_MEM_CAPABLE _BITUL(2) +#define PCI_DVSEC_CXL_HDM_COUNT_MASK __GENMASK(5, 4) +#define PCI_DVSEC_CXL_CTRL_OFFSET 0xC +#define PCI_DVSEC_CXL_MEM_ENABLE _BITUL(2) +#define PCI_DVSEC_CXL_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_INFO_VALID _BITUL(0) +#define PCI_DVSEC_CXL_MEM_ACTIVE _BITUL(1) +#define PCI_DVSEC_CXL_MEM_SIZE_LOW_MASK __GENMASK(31, 28) +#define PCI_DVSEC_CXL_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define PCI_DVSEC_CXL_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define PCI_DVSEC_CXL_MEM_BASE_LOW_MASK __GENMASK(31, 28) + +#define PCI_DVSEC_CXL_RANGE_MAX 2 + +/* CXL 3.2 8.1.4: Non-CXL Function Map DVSEC */ +#define PCI_DVSEC_CXL_FUNCTION_MAP 2 + +/* CXL 3.2 8.1.5: Extensions DVSEC for Ports */ +#define PCI_DVSEC_CXL_PORT 3 +#define PCI_DVSEC_CXL_PORT_CTL 0x0c +#define PCI_DVSEC_CXL_PORT_CTL_UNMASK_SBR 0x00000001 + +/* CXL 3.2 8.1.6: GPF DVSEC for CXL Port */ +#define PCI_DVSEC_CXL_PORT_GPF 4 +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_CONTROL_OFFSET 0x0C +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_BASE_MASK __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_1_TMO_SCALE_MASK __GENMASK(11, 8) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_CONTROL_OFFSET 0xE +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_BASE_MASK __GENMASK(3, 0) +#define PCI_DVSEC_CXL_PORT_GPF_PHASE_2_TMO_SCALE_MASK __GENMASK(11, 8) + +/* CXL 3.2 8.1.7: GPF DVSEC for CXL Device */ +#define PCI_DVSEC_CXL_DEVICE_GPF 5 + +/* CXL 3.2 8.1.8: PCIe DVSEC for Flex Bus Port */ +#define PCI_DVSEC_CXL_FLEXBUS_PORT 7 +#define PCI_DVSEC_CXL_FLEXBUS_STATUS_OFFSET 0xE +#define PCI_DVSEC_CXL_FLEXBUS_STATUS_CACHE_MASK _BITUL(0) +#define PCI_DVSEC_CXL_FLEXBUS_STATUS_MEM_MASK _BITUL(2) + +/* CXL 3.2 8.1.9: Register Locator DVSEC */ +#define PCI_DVSEC_CXL_REG_LOCATOR 8 +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define PCI_DVSEC_CXL_REG_LOCATOR_BIR_MASK __GENMASK(2, 0) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_ID_MASK __GENMASK(15, 8) +#define PCI_DVSEC_CXL_REG_LOCATOR_BLOCK_OFF_LOW_MASK __GENMASK(31, 16) /* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ #define CXL_DVSEC_PCIE_DEVICE 0 From f3dd0fc905c91a96549e90bcecbc3b0f213d547c Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:42 -0600 Subject: [PATCH 31/80] NVIDIA: VR: SAUCE: PCI/CXL: Introduce pcie_is_cxl() CXL and AER drivers need the ability to identify CXL devices. Introduce set_pcie_cxl() with logic checking for CXL.mem or CXL.cache status in the CXL Flexbus DVSEC status register. The CXL Flexbus DVSEC presence is used because it is required for all the CXL PCIe devices.[1] Add boolean 'struct pci_dev::is_cxl' with the purpose to cache the CXL CXL.cache and CXl.mem status. In the case the device is an EP or USP, call set_pcie_cxl() on behalf of the parent downstream device. Once a device is created there is possibilty the parent training or CXL state was updated as well. This will make certain the correct parent CXL state is cached. Add function pcie_is_cxl() to return 'struct pci_dev::is_cxl'. [1] CXL 3.1 Spec, 8.1.1 PCIe Designated Vendor-Specific Extended Capability (DVSEC) ID Assignment, Table 8-2 Signed-off-by: Terry Bowman Reviewed-by: Ira Weiny Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Alejandro Lucero Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: fixed no null check after call to pci_upstream_bridge() in set_pcie_cxl()] Signed-off-by: Jiandi An --- drivers/pci/probe.c | 30 ++++++++++++++++++++++++++++++ include/linux/pci.h | 6 ++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index eb084877bb043..ff9d8e14a6a4e 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1692,6 +1692,34 @@ static void set_pcie_thunderbolt(struct pci_dev *dev) dev->is_thunderbolt = 1; } +static void set_pcie_cxl(struct pci_dev *dev) +{ + struct pci_dev *parent; + u16 dvsec = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_FLEXBUS_PORT); + if (dvsec) { + u16 cap; + + pci_read_config_word(dev, dvsec + PCI_DVSEC_CXL_FLEXBUS_STATUS_OFFSET, &cap); + + dev->is_cxl = FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_STATUS_CACHE_MASK, cap) || + FIELD_GET(PCI_DVSEC_CXL_FLEXBUS_STATUS_MEM_MASK, cap); + } + + if (!pci_is_pcie(dev) || + !(pci_pcie_type(dev) == PCI_EXP_TYPE_ENDPOINT || + pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)) + return; + + /* + * Update parent's CXL state because alternate protocol training + * may have changed + */ + parent = pci_upstream_bridge(dev); + if (parent) + set_pcie_cxl(parent); +} + static void set_pcie_untrusted(struct pci_dev *dev) { struct pci_dev *parent = pci_upstream_bridge(dev); @@ -2022,6 +2050,8 @@ int pci_setup_device(struct pci_dev *dev) /* Need to have dev->cfg_size ready */ set_pcie_thunderbolt(dev); + set_pcie_cxl(dev); + set_pcie_untrusted(dev); if (pci_is_pcie(dev)) diff --git a/include/linux/pci.h b/include/linux/pci.h index 1bdfd152eb1f8..a03cdd8c96122 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -466,6 +466,7 @@ struct pci_dev { unsigned int is_pciehp:1; unsigned int shpc_managed:1; /* SHPC owned by shpchp */ unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int is_cxl:1; /* Compute Express Link (CXL) */ /* * Devices marked being untrusted are the ones that can potentially * execute DMA attacks and similar. They are typically connected @@ -773,6 +774,11 @@ static inline bool pci_is_display(struct pci_dev *pdev) return (pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY; } +static inline bool pcie_is_cxl(struct pci_dev *pci_dev) +{ + return pci_dev->is_cxl; +} + #define for_each_pci_bridge(dev, bus) \ list_for_each_entry(dev, &bus->devices, bus_list) \ if (!pci_is_bridge(dev)) {} else From df32d3ad55505d6dbd7004bfbacf76e06a5b1b06 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:43 -0600 Subject: [PATCH 32/80] NVIDIA: VR: SAUCE: cxl/pci: Remove unnecessary CXL Endpoint handling helper functions The CXL driver's cxl_handle_endpoint_cor_ras()/cxl_handle_endpoint_ras() are unnecessary helper functions used only for Endpoints. Remove these functions as they are not common for all CXL devices and do not provide value for EP handling. Rename __cxl_handle_ras to cxl_handle_ras() and __cxl_handle_cor_ras() to cxl_handle_cor_ras(). Signed-off-by: Terry Bowman Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/pci.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index cbc8defa68487..3ac90ff6e3d3a 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -711,8 +711,8 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) +static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { void __iomem *addr; u32 status; @@ -728,11 +728,6 @@ static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, } } -static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); -} - /* CXL spec rev3.0 8.2.4.16.1 */ static void header_log_copy(void __iomem *ras_base, u32 *log) { @@ -754,8 +749,8 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) +static bool cxl_handle_ras(struct cxl_dev_state *cxlds, + void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -788,11 +783,6 @@ static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, return true; } -static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) -{ - return __cxl_handle_ras(cxlds, cxlds->regs.ras); -} - #ifdef CONFIG_PCIEAER_CXL static void cxl_dport_map_rch_aer(struct cxl_dport *dport) @@ -871,13 +861,13 @@ EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return __cxl_handle_cor_ras(cxlds, dport->regs.ras); + return cxl_handle_cor_ras(cxlds, dport->regs.ras); } static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, struct cxl_dport *dport) { - return __cxl_handle_ras(cxlds, dport->regs.ras); + return cxl_handle_ras(cxlds, dport->regs.ras); } /* @@ -974,7 +964,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); - cxl_handle_endpoint_cor_ras(cxlds); + cxl_handle_cor_ras(cxlds, cxlds->regs.ras); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -1003,7 +993,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_endpoint_ras(cxlds); + ue = cxl_handle_ras(cxlds, cxlds->regs.ras); } From ed19eda28333e58ee5dda1acea1e58d2ea903d91 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:44 -0600 Subject: [PATCH 33/80] NVIDIA: VR: SAUCE: cxl/pci: Remove unnecessary CXL RCH handling helper functions cxl_handle_rdport_cor_ras() and cxl_handle_rdport_ras() are specific to Restricted CXL Host (RCH) handling. Improve readability and maintainability by replacing these and instead using the common cxl_handle_cor_ras() and cxl_handle_ras() functions. Signed-off-by: Terry Bowman Reviewed-by: Alejandro Lucero Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/pci.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 3ac90ff6e3d3a..a0f53a20fa61b 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -858,18 +858,6 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return cxl_handle_cor_ras(cxlds, dport->regs.ras); -} - -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, - struct cxl_dport *dport) -{ - return cxl_handle_ras(cxlds, dport->regs.ras); -} - /* * Copy the AER capability registers using 32 bit read accesses. * This is necessary because RCRB AER capability is MMIO mapped. Clear the @@ -939,9 +927,9 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) pci_print_aer(pdev, severity, &aer_regs); if (severity == AER_CORRECTABLE) - cxl_handle_rdport_cor_ras(cxlds, dport); + cxl_handle_cor_ras(cxlds, dport->regs.ras); else - cxl_handle_rdport_ras(cxlds, dport); + cxl_handle_ras(cxlds, dport->regs.ras); } #else From b6efb5eac1ec6a29842c1b6beb7a6f3fd813417b Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Tue, 4 Nov 2025 11:02:45 -0600 Subject: [PATCH 34/80] NVIDIA: VR: SAUCE: cxl: Remove CXL VH handling in CONFIG_PCIEAER_CXL conditional blocks from core/pci.c Create new config CONFIG_CXL_RAS and put all CXL RAS items behind the config. The config will depend on CPER and PCIE AER to build. Move the related VH RAS code from core/pci.c to core/ras.c. Restricted CXL host (RCH) RAS functions will be moved in a future patch. Cc: Robert Richter Cc: Terry Bowman Reviewed-by: Joshua Hahn Reviewed-by: Jonathan Cameron Signed-off-by: Dave Jiang Reviewed-by: Dan Williams Reviewed-by: Alison Schofield Co-developed-by: Terry Bowman Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/Kconfig | 4 + drivers/cxl/core/Makefile | 2 +- drivers/cxl/core/core.h | 31 +++++++ drivers/cxl/core/pci.c | 189 +------------------------------------- drivers/cxl/core/ras.c | 176 +++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 8 -- drivers/cxl/cxlpci.h | 16 ++++ tools/testing/cxl/Kbuild | 2 +- 8 files changed, 233 insertions(+), 195 deletions(-) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 48b7314afdb88..217888992c882 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -233,4 +233,8 @@ config CXL_MCE def_bool y depends on X86_MCE && MEMORY_FAILURE +config CXL_RAS + def_bool y + depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI + endif diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 5ad8fef210b5c..b2930cc54f8ba 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -14,9 +14,9 @@ cxl_core-y += pci.o cxl_core-y += hdm.o cxl_core-y += pmu.o cxl_core-y += cdat.o -cxl_core-y += ras.o cxl_core-$(CONFIG_TRACING) += trace.o cxl_core-$(CONFIG_CXL_REGION) += region.o cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o +cxl_core-$(CONFIG_CXL_RAS) += ras.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1fb66132b7777..bc818de87cccc 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -144,8 +144,39 @@ int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c); int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, struct access_coordinate *c); +#ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); +bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +#else +static inline int cxl_ras_init(void) +{ + return 0; +} + +static inline void cxl_ras_exit(void) +{ +} + +static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +{ + return false; +} +static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { } +#endif /* CONFIG_CXL_RAS */ + +/* Restricted CXL Host specific RAS functions */ +#ifdef CONFIG_CXL_RAS +void cxl_dport_map_rch_aer(struct cxl_dport *dport); +void cxl_disable_rch_root_ints(struct cxl_dport *dport); +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); +#else +static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } +static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } +#endif /* CONFIG_CXL_RAS */ + int cxl_gpf_port_setup(struct cxl_dport *dport); struct cxl_hdm; diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a0f53a20fa61b..cd73cea93282f 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -711,81 +711,8 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -static void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - void __iomem *addr; - u32 status; - - if (!ras_base) - return; - - addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); - } -} - -/* CXL spec rev3.0 8.2.4.16.1 */ -static void header_log_copy(void __iomem *ras_base, u32 *log) -{ - void __iomem *addr; - u32 *log_addr; - int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); - - addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; - log_addr = log; - - for (i = 0; i < log_u32_size; i++) { - *log_addr = readl(addr); - log_addr++; - addr += sizeof(u32); - } -} - -/* - * Log the state of the RAS status registers and prepare them to log the - * next error status. Return 1 if reset needed. - */ -static bool cxl_handle_ras(struct cxl_dev_state *cxlds, - void __iomem *ras_base) -{ - u32 hl[CXL_HEADERLOG_SIZE_U32]; - void __iomem *addr; - u32 status; - u32 fe; - - if (!ras_base) - return false; - - addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; - status = readl(addr); - if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; - - /* If multiple errors, log header points to first error from ctrl reg */ - if (hweight32(status) > 1) { - void __iomem *rcc_addr = - ras_base + CXL_RAS_CAP_CONTROL_OFFSET; - - fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, - readl(rcc_addr))); - } else { - fe = status; - } - - header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); - writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - - return true; -} - -#ifdef CONFIG_PCIEAER_CXL - -static void cxl_dport_map_rch_aer(struct cxl_dport *dport) +#ifdef CONFIG_CXL_RAS +void cxl_dport_map_rch_aer(struct cxl_dport *dport) { resource_size_t aer_phys; struct device *host; @@ -800,19 +727,7 @@ static void cxl_dport_map_rch_aer(struct cxl_dport *dport) } } -static void cxl_dport_map_ras(struct cxl_dport *dport) -{ - struct cxl_register_map *map = &dport->reg_map; - struct device *dev = dport->dport_dev; - - if (!map->component_map.ras.valid) - dev_dbg(dev, "RAS registers not found\n"); - else if (cxl_map_component_regs(map, &dport->regs.component, - BIT(CXL_CM_CAP_CAP_ID_RAS))) - dev_dbg(dev, "Failed to map RAS capability.\n"); -} - -static void cxl_disable_rch_root_ints(struct cxl_dport *dport) +void cxl_disable_rch_root_ints(struct cxl_dport *dport) { void __iomem *aer_base = dport->regs.dport_aer; u32 aer_cmd_mask, aer_cmd; @@ -836,28 +751,6 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); } -/** - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport - * @dport: the cxl_dport that needs to be initialized - * @host: host device for devm operations - */ -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) -{ - dport->reg_map.host = host; - cxl_dport_map_ras(dport); - - if (dport->rch) { - struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); - - if (!host_bridge->native_aer) - return; - - cxl_dport_map_rch_aer(dport); - cxl_disable_rch_root_ints(dport); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); - /* * Copy the AER capability registers using 32 bit read accesses. * This is necessary because RCRB AER capability is MMIO mapped. Clear the @@ -906,7 +799,7 @@ static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, return false; } -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); struct aer_capability_regs aer_regs; @@ -931,82 +824,8 @@ static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) else cxl_handle_ras(cxlds, dport->regs.ras); } - -#else -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } #endif -void cxl_cor_error_detected(struct pci_dev *pdev) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct device *dev = &cxlds->cxlmd->dev; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - - cxl_handle_cor_ras(cxlds, cxlds->regs.ras); - } -} -EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); - -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; - - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_ras(cxlds, cxlds->regs.ras); - } - - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; -} -EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 2731ba3a07993..b933030b8e1e7 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "trace.h" static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, @@ -124,3 +125,178 @@ void cxl_ras_exit(void) cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); cancel_work_sync(&cxl_cper_prot_err_work); } + +static void cxl_dport_map_ras(struct cxl_dport *dport) +{ + struct cxl_register_map *map = &dport->reg_map; + struct device *dev = dport->dport_dev; + + if (!map->component_map.ras.valid) + dev_dbg(dev, "RAS registers not found\n"); + else if (cxl_map_component_regs(map, &dport->regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(dev, "Failed to map RAS capability.\n"); +} + +/** + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport + * @dport: the cxl_dport that needs to be initialized + * @host: host device for devm operations + */ +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) +{ + dport->reg_map.host = host; + cxl_dport_map_ras(dport); + + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); + + if (!host_bridge->native_aer) + return; + + cxl_dport_map_rch_aer(dport); + cxl_disable_rch_root_ints(dport); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); + +void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +{ + void __iomem *addr; + u32 status; + + if (!ras_base) + return; + + addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + } +} + +/* CXL spec rev3.0 8.2.4.16.1 */ +static void header_log_copy(void __iomem *ras_base, u32 *log) +{ + void __iomem *addr; + u32 *log_addr; + int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); + + addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; + log_addr = log; + + for (i = 0; i < log_u32_size; i++) { + *log_addr = readl(addr); + log_addr++; + addr += sizeof(u32); + } +} + +/* + * Log the state of the RAS status registers and prepare them to log the + * next error status. Return 1 if reset needed. + */ +bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +{ + u32 hl[CXL_HEADERLOG_SIZE_U32]; + void __iomem *addr; + u32 status; + u32 fe; + + if (!ras_base) + return false; + + addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; + status = readl(addr); + if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) + return false; + + /* If multiple errors, log header points to first error from ctrl reg */ + if (hweight32(status) > 1) { + void __iomem *rcc_addr = + ras_base + CXL_RAS_CAP_CONTROL_OFFSET; + + fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, + readl(rcc_addr))); + } else { + fe = status; + } + + header_log_copy(ras_base, hl); + trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); + writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); + + return true; +} + +void cxl_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct device *dev = &cxlds->cxlmd->dev; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_cor_ras(cxlds, cxlds->regs.ras); + } +} +EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_memdev *cxlmd = cxlds->cxlmd; + struct device *dev = &cxlmd->dev; + bool ue; + + scoped_guard(device, dev) { + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; + } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_ras(cxlds, cxlds->regs.ras); + } + + + switch (state) { + case pci_channel_io_normal: + if (ue) { + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + } + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + dev_warn(&pdev->dev, + "%s: frozen state error detected, disable CXL.mem\n", + dev_name(dev)); + device_release_driver(dev); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + dev_warn(&pdev->dev, + "failure state error detected, request disconnect\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + return PCI_ERS_RESULT_NEED_RESET; +} +EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 231ddccf89773..259ed4b676e13 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -776,14 +776,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port, struct device *dport_dev, int port_id, resource_size_t rcrb); -#ifdef CONFIG_PCIEAER_CXL -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport); -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); -#else -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, - struct device *host) { } -#endif - struct cxl_decoder *to_cxl_decoder(struct device *dev); struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 4985dbd900693..0c8b6ee7b6de5 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -77,7 +77,23 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); + +#ifdef CONFIG_CXL_RAS void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state); +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); +#else +static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } + +static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + return PCI_ERS_RESULT_NONE; +} + +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, + struct device *host) { } +#endif + #endif /* __CXL_PCI_H__ */ diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 0d5ce4b74b9f7..927fbb6c061f3 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -58,12 +58,12 @@ cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-y += $(CXL_CORE_SRC)/cdat.o -cxl_core-y += $(CXL_CORE_SRC)/ras.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o +cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o From a3c6df7dc0f5dd9d235589610e68c945d2eebc4c Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:46 -0600 Subject: [PATCH 35/80] NVIDIA: VR: SAUCE: cxl: Move CXL driver's RCH error handling into core/ras_rch.c Restricted CXL Host (RCH) protocol error handling uses a procedure distinct from the CXL Virtual Hierarchy (VH) handling. This is because of the differences in the RCH and VH topologies. Improve the maintainability and add ability to enable/disable RCH handling. Move and combine the RCH handling code into a single block conditionally compiled with the CONFIG_CXL_RCH_RAS kernel config. Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/Kconfig | 7 +++ drivers/cxl/core/Makefile | 1 + drivers/cxl/core/core.h | 5 +- drivers/cxl/core/pci.c | 115 ----------------------------------- drivers/cxl/core/ras_rch.c | 120 +++++++++++++++++++++++++++++++++++++ tools/testing/cxl/Kbuild | 1 + 6 files changed, 132 insertions(+), 117 deletions(-) create mode 100644 drivers/cxl/core/ras_rch.c diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 217888992c882..ffe6ad981434d 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -237,4 +237,11 @@ config CXL_RAS def_bool y depends on ACPI_APEI_GHES && PCIEAER && CXL_PCI +config CXL_RCH_RAS + bool "CXL: Restricted CXL Host (RCH) protocol error handling" + def_bool n + depends on CXL_RAS + help + RAS support for Restricted CXL Host (RCH) defined in CXL1.1. + endif diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index b2930cc54f8ba..fa1d4aed28b90 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -20,3 +20,4 @@ cxl_core-$(CONFIG_CXL_MCE) += mce.o cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o cxl_core-$(CONFIG_CXL_RAS) += ras.o +cxl_core-$(CONFIG_CXL_RCH_RAS) += ras_rch.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index bc818de87cccc..c30ab7c25a925 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -4,6 +4,7 @@ #ifndef __CXL_CORE_H__ #define __CXL_CORE_H__ +#include #include #include @@ -167,7 +168,7 @@ static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem #endif /* CONFIG_CXL_RAS */ /* Restricted CXL Host specific RAS functions */ -#ifdef CONFIG_CXL_RAS +#ifdef CONFIG_CXL_RCH_RAS void cxl_dport_map_rch_aer(struct cxl_dport *dport); void cxl_disable_rch_root_ints(struct cxl_dport *dport); void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); @@ -175,7 +176,7 @@ void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds); static inline void cxl_dport_map_rch_aer(struct cxl_dport *dport) { } static inline void cxl_disable_rch_root_ints(struct cxl_dport *dport) { } static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } -#endif /* CONFIG_CXL_RAS */ +#endif /* CONFIG_CXL_RCH_RAS */ int cxl_gpf_port_setup(struct cxl_dport *dport); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index cd73cea93282f..a66f7a84b5c82 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -711,121 +711,6 @@ void read_cdat_data(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); -#ifdef CONFIG_CXL_RAS -void cxl_dport_map_rch_aer(struct cxl_dport *dport) -{ - resource_size_t aer_phys; - struct device *host; - u16 aer_cap; - - aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); - if (aer_cap) { - host = dport->reg_map.host; - aer_phys = aer_cap + dport->rcrb.base; - dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, - sizeof(struct aer_capability_regs)); - } -} - -void cxl_disable_rch_root_ints(struct cxl_dport *dport) -{ - void __iomem *aer_base = dport->regs.dport_aer; - u32 aer_cmd_mask, aer_cmd; - - if (!aer_base) - return; - - /* - * Disable RCH root port command interrupts. - * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors - * - * This sequence may not be necessary. CXL spec states disabling - * the root cmd register's interrupts is required. But, PCI spec - * shows these are disabled by default on reset. - */ - aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | - PCI_ERR_ROOT_CMD_NONFATAL_EN | - PCI_ERR_ROOT_CMD_FATAL_EN); - aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); - aer_cmd &= ~aer_cmd_mask; - writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); -} - -/* - * Copy the AER capability registers using 32 bit read accesses. - * This is necessary because RCRB AER capability is MMIO mapped. Clear the - * status after copying. - * - * @aer_base: base address of AER capability block in RCRB - * @aer_regs: destination for copying AER capability - */ -static bool cxl_rch_get_aer_info(void __iomem *aer_base, - struct aer_capability_regs *aer_regs) -{ - int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); - u32 *aer_regs_buf = (u32 *)aer_regs; - int n; - - if (!aer_base) - return false; - - /* Use readl() to guarantee 32-bit accesses */ - for (n = 0; n < read_cnt; n++) - aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); - - writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); - writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); - - return true; -} - -/* Get AER severity. Return false if there is no error. */ -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, - int *severity) -{ - if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { - if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) - *severity = AER_FATAL; - else - *severity = AER_NONFATAL; - return true; - } - - if (aer_regs->cor_status & ~aer_regs->cor_mask) { - *severity = AER_CORRECTABLE; - return true; - } - - return false; -} - -void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - struct aer_capability_regs aer_regs; - struct cxl_dport *dport; - int severity; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return; - - if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) - return; - - if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) - return; - - pci_print_aer(pdev, severity, &aer_regs); - - if (severity == AER_CORRECTABLE) - cxl_handle_cor_ras(cxlds, dport->regs.ras); - else - cxl_handle_ras(cxlds, dport->regs.ras); -} -#endif - static int cxl_flit_size(struct pci_dev *pdev) { if (cxl_pci_flit_256(pdev)) diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c new file mode 100644 index 0000000000000..f6de5492a8b70 --- /dev/null +++ b/drivers/cxl/core/ras_rch.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include +#include "trace.h" + +void cxl_dport_map_rch_aer(struct cxl_dport *dport) +{ + resource_size_t aer_phys; + struct device *host; + u16 aer_cap; + + aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); + if (aer_cap) { + host = dport->reg_map.host; + aer_phys = aer_cap + dport->rcrb.base; + dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, + sizeof(struct aer_capability_regs)); + } +} + +void cxl_disable_rch_root_ints(struct cxl_dport *dport) +{ + void __iomem *aer_base = dport->regs.dport_aer; + u32 aer_cmd_mask, aer_cmd; + + if (!aer_base) + return; + + /* + * Disable RCH root port command interrupts. + * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors + * + * This sequence may not be necessary. CXL spec states disabling + * the root cmd register's interrupts is required. But, PCI spec + * shows these are disabled by default on reset. + */ + aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | + PCI_ERR_ROOT_CMD_NONFATAL_EN | + PCI_ERR_ROOT_CMD_FATAL_EN); + aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); + aer_cmd &= ~aer_cmd_mask; + writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); +} + +/* + * Copy the AER capability registers using 32 bit read accesses. + * This is necessary because RCRB AER capability is MMIO mapped. Clear the + * status after copying. + * + * @aer_base: base address of AER capability block in RCRB + * @aer_regs: destination for copying AER capability + */ +static bool cxl_rch_get_aer_info(void __iomem *aer_base, + struct aer_capability_regs *aer_regs) +{ + int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); + u32 *aer_regs_buf = (u32 *)aer_regs; + int n; + + if (!aer_base) + return false; + + /* Use readl() to guarantee 32-bit accesses */ + for (n = 0; n < read_cnt; n++) + aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); + + writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); + writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); + + return true; +} + +/* Get AER severity. Return false if there is no error. */ +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, + int *severity) +{ + if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { + if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) + *severity = AER_FATAL; + else + *severity = AER_NONFATAL; + return true; + } + + if (aer_regs->cor_status & ~aer_regs->cor_mask) { + *severity = AER_CORRECTABLE; + return true; + } + + return false; +} + +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct aer_capability_regs aer_regs; + struct cxl_dport *dport; + int severity; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return; + + if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) + return; + + if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) + return; + + pci_print_aer(pdev, severity, &aer_regs); + if (severity == AER_CORRECTABLE) + cxl_handle_cor_ras(cxlds, dport->regs.ras); + else + cxl_handle_ras(cxlds, dport->regs.ras); +} diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 927fbb6c061f3..6905f8e710abc 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -64,6 +64,7 @@ cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o +cxl_core-$(CONFIG_CXL_RCH_RAS) += $(CXL_CORE_SRC)/ras_rch.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o From 60015f7edbe3ef0a8a8c34af5b31ebef323bb53e Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:47 -0600 Subject: [PATCH 36/80] NVIDIA: VR: SAUCE: CXL/AER: Replace device_lock() in cxl_rch_handle_error_iter() with guard() lock cxl_rch_handle_error_iter() includes a call to device_lock() using a goto for multiple return paths. Improve readability and maintainability by using the guard() lock variant. Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/pci/pcie/aer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 3dba9c0c6ae11..bc4725924f13b 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1163,12 +1163,11 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) return 0; - /* Protect dev->driver */ - device_lock(&dev->dev); + guard(device)(&dev->dev); err_handler = dev->driver ? dev->driver->err_handler : NULL; if (!err_handler) - goto out; + return 0; if (info->severity == AER_CORRECTABLE) { if (err_handler->cor_error_detected) @@ -1179,8 +1178,6 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) else if (info->severity == AER_FATAL) err_handler->error_detected(dev, pci_channel_io_frozen); } -out: - device_unlock(&dev->dev); return 0; } From d9f5dfe8201c2fb602e53406d68fb0cd8fd99f51 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:48 -0600 Subject: [PATCH 37/80] NVIDIA: VR: SAUCE: CXL/AER: Move AER drivers RCH error handling into pcie/aer_cxl_rch.c The restricted CXL Host (RCH) AER error handling logic currently resides in the AER driver file, drivers/pci/pcie/aer.c. CXL specific changes are conditionally compiled using #ifdefs. Improve the AER driver maintainability by separating the RCH specific logic from the AER driver's core functionality and removing the ifdefs. Introduce drivers/pci/pcie/aer_cxl_rch.c for moving the RCH AER logic into. Conditionally compile the file using the CONFIG_CXL_RCH_RAS Kconfig. Move the CXL logic into the new file but leave helper functions in aer.c for now as they will be moved in future patch for CXL virtual hierarchy handling. Export the handler functions as needed. Export pci_aer_unmask_internal_errors() allowing for all subsystems to use. Avoid multiple declaration moves and export cxl_error_is_native() now to allow access from cxl_core. Inorder to maintain compilation after the move other changes are required. Change cxl_rch_handle_error() & cxl_rch_enable_rcec() to be non-static inorder for accessing from the AER driver in aer.c. Update the new file with the SPDX and 2023 AMD copyright notations because the RCH bits were initally contributed in 2023 by AMD. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/pci/pci.h | 16 +++++ drivers/pci/pcie/Makefile | 1 + drivers/pci/pcie/aer.c | 105 +++------------------------------ drivers/pci/pcie/aer_cxl_rch.c | 96 ++++++++++++++++++++++++++++++ include/linux/aer.h | 8 +++ 5 files changed, 128 insertions(+), 98 deletions(-) create mode 100644 drivers/pci/pcie/aer_cxl_rch.c diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index ee7b515125826..10961a2fd5a21 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1210,4 +1210,20 @@ static inline int pci_msix_write_tph_tag(struct pci_dev *pdev, unsigned int inde (PCI_CONF1_ADDRESS(bus, dev, func, reg) | \ PCI_CONF1_EXT_REG(reg)) +struct aer_err_info; + +#ifdef CONFIG_CXL_RCH_RAS +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info); +void cxl_rch_enable_rcec(struct pci_dev *rcec); +#else +static inline void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) { } +static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } +#endif + +#ifdef CONFIG_CXL_RAS +bool is_internal_error(struct aer_err_info *info); +#else +static inline bool is_internal_error(struct aer_err_info *info) { return false; } +#endif + #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 173829aa02e60..970e7cbc5b34d 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o +obj-$(CONFIG_CXL_RCH_RAS) += aer_cxl_rch.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index bc4725924f13b..b2416d472581d 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1106,7 +1106,7 @@ static bool find_source_device(struct pci_dev *parent, * Note: AER must be enabled and supported by the device which must be * checked in advance, e.g. with pcie_aer_is_native(). */ -static void pci_aer_unmask_internal_errors(struct pci_dev *dev) +void pci_aer_unmask_internal_errors(struct pci_dev *dev) { int aer = dev->aer_cap; u32 mask; @@ -1119,116 +1119,25 @@ static void pci_aer_unmask_internal_errors(struct pci_dev *dev) mask &= ~PCI_ERR_COR_INTERNAL; pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask); } +EXPORT_SYMBOL_GPL(pci_aer_unmask_internal_errors); -static bool is_cxl_mem_dev(struct pci_dev *dev) -{ - /* - * The capability, status, and control fields in Device 0, - * Function 0 DVSEC control the CXL functionality of the - * entire device (CXL 3.0, 8.1.3). - */ - if (dev->devfn != PCI_DEVFN(0, 0)) - return false; - - /* - * CXL Memory Devices must have the 502h class code set (CXL - * 3.0, 8.1.12.1). - */ - if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) - return false; - - return true; -} - -static bool cxl_error_is_native(struct pci_dev *dev) +bool cxl_error_is_native(struct pci_dev *dev) { struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); return (pcie_ports_native || host->native_aer); } +EXPORT_SYMBOL_NS_GPL(cxl_error_is_native, "CXL"); -static bool is_internal_error(struct aer_err_info *info) +bool is_internal_error(struct aer_err_info *info) { if (info->severity == AER_CORRECTABLE) return info->status & PCI_ERR_COR_INTERNAL; return info->status & PCI_ERR_UNC_INTN; } - -static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) -{ - struct aer_err_info *info = (struct aer_err_info *)data; - const struct pci_error_handlers *err_handler; - - if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) - return 0; - - guard(device)(&dev->dev); - - err_handler = dev->driver ? dev->driver->err_handler : NULL; - if (!err_handler) - return 0; - - if (info->severity == AER_CORRECTABLE) { - if (err_handler->cor_error_detected) - err_handler->cor_error_detected(dev); - } else if (err_handler->error_detected) { - if (info->severity == AER_NONFATAL) - err_handler->error_detected(dev, pci_channel_io_normal); - else if (info->severity == AER_FATAL) - err_handler->error_detected(dev, pci_channel_io_frozen); - } - return 0; -} - -static void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) -{ - /* - * Internal errors of an RCEC indicate an AER error in an - * RCH's downstream port. Check and handle them in the CXL.mem - * device driver. - */ - if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && - is_internal_error(info)) - pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); -} - -static int handles_cxl_error_iter(struct pci_dev *dev, void *data) -{ - bool *handles_cxl = data; - - if (!*handles_cxl) - *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); - - /* Non-zero terminates iteration */ - return *handles_cxl; -} - -static bool handles_cxl_errors(struct pci_dev *rcec) -{ - bool handles_cxl = false; - - if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && - pcie_aer_is_native(rcec)) - pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); - - return handles_cxl; -} - -static void cxl_rch_enable_rcec(struct pci_dev *rcec) -{ - if (!handles_cxl_errors(rcec)) - return; - - pci_aer_unmask_internal_errors(rcec); - pci_info(rcec, "CXL: Internal errors unmasked"); -} - -#else -static inline void cxl_rch_enable_rcec(struct pci_dev *dev) { } -static inline void cxl_rch_handle_error(struct pci_dev *dev, - struct aer_err_info *info) { } -#endif +EXPORT_SYMBOL_NS_GPL(is_internal_error, "CXL"); +#endif /* CONFIG_CXL_RAS */ /** * pci_aer_handle_error - handle logging error into an event log diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c new file mode 100644 index 0000000000000..f4d160f181690 --- /dev/null +++ b/drivers/pci/pcie/aer_cxl_rch.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include "../pci.h" + +static bool is_cxl_mem_dev(struct pci_dev *dev) +{ + /* + * The capability, status, and control fields in Device 0, + * Function 0 DVSEC control the CXL functionality of the + * entire device (CXL 3.0, 8.1.3). + */ + if (dev->devfn != PCI_DEVFN(0, 0)) + return false; + + /* + * CXL Memory Devices must have the 502h class code set (CXL + * 3.0, 8.1.12.1). + */ + if ((dev->class >> 8) != PCI_CLASS_MEMORY_CXL) + return false; + + return true; +} + +static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data) +{ + struct aer_err_info *info = (struct aer_err_info *)data; + const struct pci_error_handlers *err_handler; + + if (!is_cxl_mem_dev(dev) || !cxl_error_is_native(dev)) + return 0; + + guard(device)(&dev->dev); + + err_handler = dev->driver ? dev->driver->err_handler : NULL; + if (!err_handler) + return 0; + + if (info->severity == AER_CORRECTABLE) { + if (err_handler->cor_error_detected) + err_handler->cor_error_detected(dev); + } else if (err_handler->error_detected) { + if (info->severity == AER_NONFATAL) + err_handler->error_detected(dev, pci_channel_io_normal); + else if (info->severity == AER_FATAL) + err_handler->error_detected(dev, pci_channel_io_frozen); + } + return 0; +} + +void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info) +{ + /* + * Internal errors of an RCEC indicate an AER error in an + * RCH's downstream port. Check and handle them in the CXL.mem + * device driver. + */ + if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC && + is_internal_error(info)) + pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info); +} + +static int handles_cxl_error_iter(struct pci_dev *dev, void *data) +{ + bool *handles_cxl = data; + + if (!*handles_cxl) + *handles_cxl = is_cxl_mem_dev(dev) && cxl_error_is_native(dev); + + /* Non-zero terminates iteration */ + return *handles_cxl; +} + +static bool handles_cxl_errors(struct pci_dev *rcec) +{ + bool handles_cxl = false; + + if (pci_pcie_type(rcec) == PCI_EXP_TYPE_RC_EC && + pcie_aer_is_native(rcec)) + pcie_walk_rcec(rcec, handles_cxl_error_iter, &handles_cxl); + + return handles_cxl; +} + +void cxl_rch_enable_rcec(struct pci_dev *rcec) +{ + if (!handles_cxl_errors(rcec)) + return; + + pci_aer_unmask_internal_errors(rcec); + pci_info(rcec, "CXL: Internal errors unmasked"); +} diff --git a/include/linux/aer.h b/include/linux/aer.h index 02940be66324e..2ef8205639963 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -56,12 +56,20 @@ struct aer_capability_regs { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); +void pci_aer_unmask_internal_errors(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } +static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } +#endif + +#ifdef CONFIG_CXL_RAS +bool cxl_error_is_native(struct pci_dev *dev); +#else +static inline bool cxl_error_is_native(struct pci_dev *dev) { return false; } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, From 1c2ecaf0664ea32f7863207e77e05571458e5313 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:49 -0600 Subject: [PATCH 38/80] NVIDIA: VR: SAUCE: PCI/AER: Report CXL or PCIe bus error type in trace logging The AER service driver and aer_event tracing currently log 'PCIe Bus Type' for all errors. Update the driver and aer_event tracing to log 'CXL Bus Type' for CXL device errors. This requires the AER can identify and distinguish between PCIe errors and CXL errors. Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in aer_get_device_error_info() and pci_print_aer(). Update the aer_event trace routine to accept a bus type string parameter. Signed-off-by: Terry Bowman Reviewed-by: Ira Weiny Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/pci/pci.h | 37 ++++++++++++++++++++++++++++++------- drivers/pci/pcie/aer.c | 18 ++++++++++++------ include/ras/ras_event.h | 9 ++++++--- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 10961a2fd5a21..b592170baebb5 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -607,31 +607,54 @@ static inline bool pci_dev_binding_disallowed(struct pci_dev *dev) #define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */ +/** + * struct aer_err_info - AER Error Information + * @dev: Devices reporting error + * @ratelimit_print: Flag to log or not log the devices' error. 0=NotLog/1=Log + * @error_devnum: Number of devices reporting an error + * @level: printk level to use in logging + * @id: Value from register PCI_ERR_ROOT_ERR_SRC + * @severity: AER severity, 0-UNCOR Non-fatal, 1-UNCOR fatal, 2-COR + * @root_ratelimit_print: Flag to log or not log the root's error. 0=NotLog/1=Log + * @multi_error_valid: If multiple errors are reported + * @first_error: First reported error + * @is_cxl: Bus type error: 0-PCI Bus error, 1-CXL Bus error + * @tlp_header_valid: Indicates if TLP field contains error information + * @status: COR/UNCOR error status + * @mask: COR/UNCOR mask + * @tlp: Transaction packet information + */ struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; - const char *level; /* printk level */ + const char *level; unsigned int id:16; - unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ - unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */ + unsigned int severity:2; + unsigned int root_ratelimit_print:1; unsigned int __pad1:4; unsigned int multi_error_valid:1; unsigned int first_error:5; - unsigned int __pad2:2; + unsigned int __pad2:1; + bool is_cxl:1; unsigned int tlp_header_valid:1; - unsigned int status; /* COR/UNCOR Error Status */ - unsigned int mask; /* COR/UNCOR Error Mask */ - struct pcie_tlp_log tlp; /* TLP Header */ + unsigned int status; + unsigned int mask; + struct pcie_tlp_log tlp; }; int aer_get_device_error_info(struct aer_err_info *info, int i); void aer_print_error(struct aer_err_info *info, int i); +static inline const char *aer_err_bus(struct aer_err_info *info) +{ + return info->is_cxl ? "CXL" : "PCIe"; +} + int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, struct pcie_tlp_log *log); diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index b2416d472581d..2d9f0e959a4c5 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -844,6 +844,7 @@ void aer_print_error(struct aer_err_info *info, int i) struct pci_dev *dev; int layer, agent, id; const char *level = info->level; + const char *bus_type = aer_err_bus(info); if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) return; @@ -852,23 +853,23 @@ void aer_print_error(struct aer_err_info *info, int i) id = pci_dev_id(dev); pci_dev_aer_stats_incr(dev, info); - trace_aer_event(pci_name(dev), (info->status & ~info->mask), + trace_aer_event(pci_name(dev), bus_type, (info->status & ~info->mask), info->severity, info->tlp_header_valid, &info->tlp); if (!info->ratelimit_print[i]) return; if (!info->status) { - pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", - aer_error_severity_string[info->severity]); + pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", + bus_type, aer_error_severity_string[info->severity]); goto out; } layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", - aer_error_severity_string[info->severity], + aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n", + bus_type, aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); aer_printk(level, dev, " device [%04x:%04x] error status/mask=%08x/%08x\n", @@ -902,6 +903,7 @@ EXPORT_SYMBOL_GPL(cper_severity_to_aer); void pci_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { + const char *bus_type; int layer, agent, tlp_header_valid = 0; u32 status, mask; struct aer_err_info info = { @@ -922,9 +924,12 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, info.status = status; info.mask = mask; + info.is_cxl = pcie_is_cxl(dev); + + bus_type = aer_err_bus(&info); pci_dev_aer_stats_incr(dev, &info); - trace_aer_event(pci_name(dev), (status & ~mask), + trace_aer_event(pci_name(dev), bus_type, (status & ~mask), aer_severity, tlp_header_valid, &aer->header_log); if (!aer_ratelimit(dev, info.severity)) @@ -1285,6 +1290,7 @@ int aer_get_device_error_info(struct aer_err_info *info, int i) /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; + info->is_cxl = pcie_is_cxl(dev); /* The device might not support AER */ if (!aer) diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index fecfeb7c8be7f..9bbb2e0ee5d22 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -298,15 +298,17 @@ TRACE_EVENT(non_standard_event, TRACE_EVENT(aer_event, TP_PROTO(const char *dev_name, + const char *bus_type, const u32 status, const u8 severity, const u8 tlp_header_valid, struct pcie_tlp_log *tlp), - TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp), + TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp), TP_STRUCT__entry( __string( dev_name, dev_name ) + __string( bus_type, bus_type ) __field( u32, status ) __field( u8, severity ) __field( u8, tlp_header_valid) @@ -315,6 +317,7 @@ TRACE_EVENT(aer_event, TP_fast_assign( __assign_str(dev_name); + __assign_str(bus_type); __entry->status = status; __entry->severity = severity; __entry->tlp_header_valid = tlp_header_valid; @@ -326,8 +329,8 @@ TRACE_EVENT(aer_event, } ), - TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n", - __get_str(dev_name), + TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n", + __get_str(dev_name), __get_str(bus_type), __entry->severity == AER_CORRECTABLE ? "Corrected" : __entry->severity == AER_FATAL ? "Fatal" : "Uncorrected, non-fatal", From 1a879f91c62a1acf19dbb6816422e857bb6cf668 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:50 -0600 Subject: [PATCH 39/80] NVIDIA: VR: SAUCE: cxl/pci: Update RAS handler interfaces to also support CXL Ports CXL PCIe Port Protocol Error handling support will be added to the CXL drivers in the future. In preparation, rename the existing interfaces to support handling all CXL PCIe Port Protocol Errors. The driver's RAS support functions currently rely on a 'struct cxl_dev_state' type parameter, which is not available for CXL Port devices. However, since the same CXL RAS capability structure is needed across most CXL components and devices, a common handling approach should be adopted. To accommodate this, update the __cxl_handle_cor_ras() and __cxl_handle_ras() functions to use a `struct device` instead of `struct cxl_dev_state`. No functional changes are introduced. [1] CXL 3.1 Spec, 8.2.4 CXL.cache and CXL.mem Registers Signed-off-by: Terry Bowman Reviewed-by: Alejandro Lucero Reviewed-by: Ira Weiny Reviewed-by: Gregory Price Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 15 ++++++--------- drivers/cxl/core/ras.c | 12 ++++++------ drivers/cxl/core/ras_rch.c | 4 ++-- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index c30ab7c25a925..1a419b35fa59b 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -7,6 +7,7 @@ #include #include #include +#include extern const struct device_type cxl_nvdimm_bridge_type; extern const struct device_type cxl_nvdimm_type; @@ -148,23 +149,19 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, #ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); -bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); -void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base); +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base); +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base); #else static inline int cxl_ras_init(void) { return 0; } - -static inline void cxl_ras_exit(void) -{ -} - -static inline bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +static inline void cxl_ras_exit(void) { } +static inline bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) { return false; } -static inline void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) { } +static inline void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { } #endif /* CONFIG_CXL_RAS */ /* Restricted CXL Host specific RAS functions */ diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index b933030b8e1e7..72908f3ced775 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -160,7 +160,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); -void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { void __iomem *addr; u32 status; @@ -172,7 +172,7 @@ void cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(cxlds->cxlmd, status); + trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); } } @@ -197,7 +197,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) +bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -224,7 +224,7 @@ bool cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base) } header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); + trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; @@ -246,7 +246,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); - cxl_handle_cor_ras(cxlds, cxlds->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -275,7 +275,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_ras(cxlds, cxlds->regs.ras); + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); } diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c index f6de5492a8b70..4d2babe8d2063 100644 --- a/drivers/cxl/core/ras_rch.c +++ b/drivers/cxl/core/ras_rch.c @@ -114,7 +114,7 @@ void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) pci_print_aer(pdev, severity, &aer_regs); if (severity == AER_CORRECTABLE) - cxl_handle_cor_ras(cxlds, dport->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); else - cxl_handle_ras(cxlds, dport->regs.ras); + cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); } From f139cf6d5669f7036c125fdba0ebca36d3a19cde Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:51 -0600 Subject: [PATCH 40/80] NVIDIA: VR: SAUCE: cxl/pci: Log message if RAS registers are unmapped The CXL RAS handlers do not currently log if the RAS registers are unmapped. This is needed in order to help debug CXL error handling. Update the CXL driver to log a warning message if the RAS register block is unmapped during RAS error handling. Signed-off-by: Terry Bowman Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/ras.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 72908f3ced775..0320c391f2019 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -165,8 +165,10 @@ void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) void __iomem *addr; u32 status; - if (!ras_base) + if (!ras_base) { + dev_warn_once(dev, "CXL RAS register block is not mapped"); return; + } addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); @@ -204,8 +206,10 @@ bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) u32 status; u32 fe; - if (!ras_base) + if (!ras_base) { + dev_warn_once(dev, "CXL RAS register block is not mapped"); return false; + } addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); From 53aa86d576eb3ffeea80d34f1cd49fd217faa5ec Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:52 -0600 Subject: [PATCH 41/80] NVIDIA: VR: SAUCE: cxl/pci: Unify CXL trace logging for CXL Endpoints and CXL Ports CXL currently has separate trace routines for CXL Port errors and CXL Endpoint errors. This is inconvenient for the user because they must enable 2 sets of trace routines. Make updates to the trace logging such that a single trace routine logs both CXL Endpoint and CXL Port protocol errors. Keep the trace log fields 'memdev' and 'host'. While these are not accurate for non-Endpoints the fields will remain as-is to prevent breaking userspace RAS trace consumers. Add serial number parameter to the trace logging. This is used for EPs and 0 is provided for CXL port devices without a serial number. Leave the correctable and uncorrectable trace routines' TP_STRUCT__entry() unchanged with respect to member data types and order. Below is output of correctable and uncorrectable protocol error logging. CXL Root Port and CXL Endpoint examples are included below. Root Port: cxl_aer_correctable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 status='CRC Threshold Hit' cxl_aer_uncorrectable_error: memdev=0000:0c:00.0 host=pci0000:0c serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error' Endpoint: cxl_aer_correctable_error: memdev=mem3 host=0000:0f:00.0 serial=0 status='CRC Threshold Hit' cxl_aer_uncorrectable_error: memdev=mem3 host=0000:0f:00.0 serial: 0 status: 'Cache Byte Enable Parity Error' first_error: 'Cache Byte Enable Parity Error' Signed-off-by: Terry Bowman Reviewed-by: Shiju Jose Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 4 +-- drivers/cxl/core/ras.c | 26 ++++++++------- drivers/cxl/core/ras_rch.c | 4 +-- drivers/cxl/core/trace.h | 68 ++++++-------------------------------- 4 files changed, 29 insertions(+), 73 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1a419b35fa59b..e47ae7365ce0b 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -149,8 +149,8 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, #ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); -bool cxl_handle_ras(struct device *dev, void __iomem *ras_base); -void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base); +bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base); +void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base); #else static inline int cxl_ras_init(void) { diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 0320c391f2019..599c88f0b376c 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -13,7 +13,7 @@ static void cxl_cper_trace_corr_port_prot_err(struct pci_dev *pdev, { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - trace_cxl_port_aer_correctable_error(&pdev->dev, status); + trace_cxl_aer_correctable_error(&pdev->dev, status, 0); } static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, @@ -28,8 +28,8 @@ static void cxl_cper_trace_uncorr_port_prot_err(struct pci_dev *pdev, else fe = status; - trace_cxl_port_aer_uncorrectable_error(&pdev->dev, status, fe, - ras_cap.header_log); + trace_cxl_aer_uncorrectable_error(&pdev->dev, status, fe, + ras_cap.header_log, 0); } static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, @@ -37,7 +37,7 @@ static void cxl_cper_trace_corr_prot_err(struct cxl_memdev *cxlmd, { u32 status = ras_cap.cor_status & ~ras_cap.cor_mask; - trace_cxl_aer_correctable_error(cxlmd, status); + trace_cxl_aer_correctable_error(&cxlmd->dev, status, cxlmd->cxlds->serial); } static void @@ -45,6 +45,7 @@ cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, struct cxl_ras_capability_regs ras_cap) { u32 status = ras_cap.uncor_status & ~ras_cap.uncor_mask; + struct cxl_dev_state *cxlds = cxlmd->cxlds; u32 fe; if (hweight32(status) > 1) @@ -53,8 +54,9 @@ cxl_cper_trace_uncorr_prot_err(struct cxl_memdev *cxlmd, else fe = status; - trace_cxl_aer_uncorrectable_error(cxlmd, status, fe, - ras_cap.header_log); + trace_cxl_aer_uncorrectable_error(&cxlmd->dev, status, fe, + ras_cap.header_log, + cxlds->serial); } static int match_memdev_by_parent(struct device *dev, const void *uport) @@ -160,7 +162,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); -void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) +void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base) { void __iomem *addr; u32 status; @@ -174,7 +176,7 @@ void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) status = readl(addr); if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(to_cxl_memdev(dev), status); + trace_cxl_aer_correctable_error(dev, status, serial); } } @@ -199,7 +201,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -228,7 +230,7 @@ bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) } header_log_copy(ras_base, hl); - trace_cxl_aer_uncorrectable_error(to_cxl_memdev(dev), status, fe, hl); + trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); return true; @@ -250,7 +252,7 @@ void cxl_cor_error_detected(struct pci_dev *pdev) if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); - cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); } } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -279,7 +281,7 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->regs.ras); + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); } diff --git a/drivers/cxl/core/ras_rch.c b/drivers/cxl/core/ras_rch.c index 4d2babe8d2063..421dd1bcfc9c0 100644 --- a/drivers/cxl/core/ras_rch.c +++ b/drivers/cxl/core/ras_rch.c @@ -114,7 +114,7 @@ void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) pci_print_aer(pdev, severity, &aer_regs); if (severity == AER_CORRECTABLE) - cxl_handle_cor_ras(&cxlds->cxlmd->dev, dport->regs.ras); + cxl_handle_cor_ras(&cxlds->cxlmd->dev, 0, dport->regs.ras); else - cxl_handle_ras(&cxlds->cxlmd->dev, dport->regs.ras); + cxl_handle_ras(&cxlds->cxlmd->dev, 0, dport->regs.ras); } diff --git a/drivers/cxl/core/trace.h b/drivers/cxl/core/trace.h index a972e4ef19368..69f8a0efd9241 100644 --- a/drivers/cxl/core/trace.h +++ b/drivers/cxl/core/trace.h @@ -48,40 +48,13 @@ { CXL_RAS_UC_IDE_RX_ERR, "IDE Rx Error" } \ ) -TRACE_EVENT(cxl_port_aer_uncorrectable_error, - TP_PROTO(struct device *dev, u32 status, u32 fe, u32 *hl), - TP_ARGS(dev, status, fe, hl), - TP_STRUCT__entry( - __string(device, dev_name(dev)) - __string(host, dev_name(dev->parent)) - __field(u32, status) - __field(u32, first_error) - __array(u32, header_log, CXL_HEADERLOG_SIZE_U32) - ), - TP_fast_assign( - __assign_str(device); - __assign_str(host); - __entry->status = status; - __entry->first_error = fe; - /* - * Embed the 512B headerlog data for user app retrieval and - * parsing, but no need to print this in the trace buffer. - */ - memcpy(__entry->header_log, hl, CXL_HEADERLOG_SIZE); - ), - TP_printk("device=%s host=%s status: '%s' first_error: '%s'", - __get_str(device), __get_str(host), - show_uc_errs(__entry->status), - show_uc_errs(__entry->first_error) - ) -); - TRACE_EVENT(cxl_aer_uncorrectable_error, - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status, u32 fe, u32 *hl), - TP_ARGS(cxlmd, status, fe, hl), + TP_PROTO(const struct device *cxlmd, u32 status, u32 fe, u32 *hl, + u64 serial), + TP_ARGS(cxlmd, status, fe, hl, serial), TP_STRUCT__entry( - __string(memdev, dev_name(&cxlmd->dev)) - __string(host, dev_name(cxlmd->dev.parent)) + __string(memdev, dev_name(cxlmd)) + __string(host, dev_name(cxlmd->parent)) __field(u64, serial) __field(u32, status) __field(u32, first_error) @@ -90,7 +63,7 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, TP_fast_assign( __assign_str(memdev); __assign_str(host); - __entry->serial = cxlmd->cxlds->serial; + __entry->serial = serial; __entry->status = status; __entry->first_error = fe; /* @@ -124,38 +97,19 @@ TRACE_EVENT(cxl_aer_uncorrectable_error, { CXL_RAS_CE_PHYS_LAYER_ERR, "Received Error From Physical Layer" } \ ) -TRACE_EVENT(cxl_port_aer_correctable_error, - TP_PROTO(struct device *dev, u32 status), - TP_ARGS(dev, status), - TP_STRUCT__entry( - __string(device, dev_name(dev)) - __string(host, dev_name(dev->parent)) - __field(u32, status) - ), - TP_fast_assign( - __assign_str(device); - __assign_str(host); - __entry->status = status; - ), - TP_printk("device=%s host=%s status='%s'", - __get_str(device), __get_str(host), - show_ce_errs(__entry->status) - ) -); - TRACE_EVENT(cxl_aer_correctable_error, - TP_PROTO(const struct cxl_memdev *cxlmd, u32 status), - TP_ARGS(cxlmd, status), + TP_PROTO(const struct device *cxlmd, u32 status, u64 serial), + TP_ARGS(cxlmd, status, serial), TP_STRUCT__entry( - __string(memdev, dev_name(&cxlmd->dev)) - __string(host, dev_name(cxlmd->dev.parent)) + __string(memdev, dev_name(cxlmd)) + __string(host, dev_name(cxlmd->parent)) __field(u64, serial) __field(u32, status) ), TP_fast_assign( __assign_str(memdev); __assign_str(host); - __entry->serial = cxlmd->cxlds->serial; + __entry->serial = serial; __entry->status = status; ), TP_printk("memdev=%s host=%s serial=%lld: status: '%s'", From 4f43fe020768fed2417c2734a8d1934166625746 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:53 -0600 Subject: [PATCH 42/80] NVIDIA: VR: SAUCE: cxl/pci: Update cxl_handle_cor_ras() to return early if no RAS errors Update cxl_handle_cor_ras() to exit early in the case there is no RAS errors detected after applying the status mask. This change will make the correctable handler's implementation consistent with the uncorrectable handler, cxl_handle_ras(). Signed-off-by: Terry Bowman Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/ras.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 599c88f0b376c..246dfe56617a2 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -174,10 +174,11 @@ void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base) addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; status = readl(addr); - if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { - writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); - trace_cxl_aer_correctable_error(dev, status, serial); - } + if (!(status & CXL_RAS_CORRECTABLE_STATUS_MASK)) + return; + writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); + + trace_cxl_aer_correctable_error(dev, status, serial); } /* CXL spec rev3.0 8.2.4.16.1 */ From 392aaada6713ca23ee64d0efe58087df781243fc Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:54 -0600 Subject: [PATCH 43/80] NVIDIA: VR: SAUCE: cxl/pci: Map CXL Endpoint Port and CXL Switch Port RAS registers CXL Endpoint (EP) Ports may include Root Ports (RP) or Downstream Switch Ports (DSP). CXL RPs and DSPs contain RAS registers that require memory mapping to enable RAS logging. This initialization is currently missing and must be added for CXL RPs and DSPs. Update cxl_dport_init_ras_reporting() to support RP and DSP RAS mapping. Add alongside the existing Restricted CXL Host Downstream Port RAS mapping. Update cxl_endpoint_port_probe() to invoke cxl_dport_init_ras_reporting(). This will initiate the RAS mapping for CXL RPs and DSPs when each CXL EP is created and added to the EP port. Make a call to cxl_port_setup_regs() in cxl_port_add(). This will probe the Upstream Port's CXL capabilities' physical location to be used in mapping the RAS registers. Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 4 ++++ drivers/cxl/core/ras.c | 12 ++++++++++++ drivers/cxl/cxl.h | 2 ++ drivers/cxl/cxlpci.h | 4 ++++ drivers/cxl/mem.c | 3 ++- 5 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 43054da7e547b..3e8a1d634547f 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1195,6 +1195,8 @@ __devm_cxl_add_dport(struct cxl_port *port, struct device *dport_dev, return ERR_PTR(rc); } port->component_reg_phys = CXL_RESOURCE_NONE; + if (!is_cxl_endpoint(port) && dev_is_pci(port->uport_dev)) + cxl_uport_init_ras_reporting(port, &port->dev); } get_device(dport_dev); @@ -1624,6 +1626,8 @@ static struct cxl_dport *cxl_port_add_dport(struct cxl_port *port, cxl_switch_parse_cdat(new_dport); + cxl_dport_init_ras_reporting(new_dport, &port->dev); + if (ida_is_empty(&port->decoder_ida)) { rc = devm_cxl_switch_port_decoders_setup(port); if (rc) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 246dfe56617a2..19d9ffe885bfb 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -162,6 +162,18 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); +void cxl_uport_init_ras_reporting(struct cxl_port *port, + struct device *host) +{ + struct cxl_register_map *map = &port->reg_map; + + map->host = host; + if (cxl_map_component_regs(map, &port->uport_regs, + BIT(CXL_CM_CAP_CAP_ID_RAS))) + dev_dbg(&port->dev, "Failed to map RAS capability\n"); +} +EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, "CXL"); + void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base) { void __iomem *addr; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 259ed4b676e13..b7654d40dc9ee 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -599,6 +599,7 @@ struct cxl_dax_region { * @parent_dport: dport that points to this port in the parent * @decoder_ida: allocator for decoder ids * @reg_map: component and ras register mapping parameters + * @uport_regs: mapped component registers * @nr_dports: number of entries in @dports * @hdm_end: track last allocated HDM decoder instance for allocation ordering * @commit_end: cursor to track highest committed decoder for commit ordering @@ -620,6 +621,7 @@ struct cxl_port { struct cxl_dport *parent_dport; struct ida decoder_ida; struct cxl_register_map reg_map; + struct cxl_component_regs uport_regs; int nr_dports; int hdm_end; int commit_end; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 0c8b6ee7b6de5..a0a491e7b5b9a 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -83,6 +83,8 @@ void cxl_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, pci_channel_state_t state); void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); +void cxl_uport_init_ras_reporting(struct cxl_port *port, + struct device *host); #else static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } @@ -94,6 +96,8 @@ static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { } +static inline void cxl_uport_init_ras_reporting(struct cxl_port *port, + struct device *host) { } #endif #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 6e6777b7bafb5..d2155f45240da 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -166,7 +166,8 @@ static int cxl_mem_probe(struct device *dev) else endpoint_parent = &parent_port->dev; - cxl_dport_init_ras_reporting(dport, dev); + if (dport->rch) + cxl_dport_init_ras_reporting(dport, dev); scoped_guard(device, endpoint_parent) { if (!endpoint_parent->driver) { From 210eb9c48ae30c2d98956bbeb6d97819fc1fa488 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:55 -0600 Subject: [PATCH 44/80] NVIDIA: VR: SAUCE: CXL/PCI: Introduce PCI_ERS_RESULT_PANIC The CXL driver's error handling for uncorrectable errors (UCE) will be updated in the future. A required change is for the error handlers to to force a system panic when a UCE is detected. Introduce PCI_ERS_RESULT_PANIC as a 'enum pci_ers_result' type. This will be used by CXL UCE fatal and non-fatal recovery in future patches. Update PCIe recovery documentation with details of PCI_ERS_RESULT_PANIC. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- Documentation/PCI/pci-error-recovery.rst | 6 ++++++ include/linux/pci.h | 3 +++ 2 files changed, 9 insertions(+) diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst index 42e1e78353f38..46590510e931f 100644 --- a/Documentation/PCI/pci-error-recovery.rst +++ b/Documentation/PCI/pci-error-recovery.rst @@ -102,6 +102,8 @@ Possible return values are:: PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */ PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */ PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */ + PCI_ERS_RESULT_NO_AER_DRIVER, /* No AER capabilities registered for the driver */ + PCI_ERS_RESULT_PANIC, /* System is unstable, panic. Is CXL specific */ }; A driver does not have to implement all of these callbacks; however, @@ -116,6 +118,10 @@ The actual steps taken by a platform to recover from a PCI error event will be platform-dependent, but will follow the general sequence described below. +PCI_ERS_RESULT_PANIC is currently unique to CXL and handled in CXL +cxl_do_recovery(). The PCI pcie_do_recovery() routine does not report or +handle PCI_ERS_RESULT_PANIC. + STEP 0: Error Event ------------------- A PCI bus error is detected by the PCI hardware. On powerpc, the slot diff --git a/include/linux/pci.h b/include/linux/pci.h index a03cdd8c96122..35aa99fee4600 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -897,6 +897,9 @@ enum pci_ers_result { /* No AER capabilities registered for the driver */ PCI_ERS_RESULT_NO_AER_DRIVER = (__force pci_ers_result_t) 6, + + /* System is unstable, panic. Is CXL specific */ + PCI_ERS_RESULT_PANIC = (__force pci_ers_result_t) 7, }; /* PCI bus error event callbacks */ From 03af83eb41e6c8b7d66ede9a72d8682b38dbe219 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:56 -0600 Subject: [PATCH 45/80] NVIDIA: VR: SAUCE: CXL/AER: Introduce pcie/aer_cxl_vh.c in AER driver for forwarding CXL errors CXL virtual hierarchy (VH) RAS handling for CXL Port devices will be added soon. This requires a notification mechanism for the AER driver to share the AER interrupt with the CXL driver. The notification will be used as an indication for the CXL drivers to handle and log the CXL RAS errors. Note, 'CXL protocol error' terminology will refer to CXL VH and not CXL RCH errors unless specifically noted going forward. Introduce a new file in the AER driver to handle the CXL protocol errors named pci/pcie/aer_cxl_vh.c. Add a kfifo work queue to be used by the AER and CXL drivers. The AER driver will be the sole kfifo producer adding work and the cxl_core will be the sole kfifo consumer removing work. Add the boilerplate kfifo support. Encapsulate the kfifo, RW semaphore, and work pointer in a single structure. Add CXL work queue handler registration functions in the AER driver. Export the functions allowing CXL driver to access. Implement registration functions for the CXL driver to assign or clear the work handler function. Synchronize accesses using the RW semaphore. Introduce 'struct cxl_proto_err_work_data' to serve as the kfifo work data. This will contain a reference to the erring PCI device and the error severity. This will be used when the work is dequeued by the cxl_core driver. Signed-off-by: Terry Bowman Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: Fix potential use after free race condition by adding pci_dev_get in the kfifo producer] [jan: Fix duplicated include of linux/pci.h] Signed-off-by: Jiandi An --- drivers/pci/pci.h | 4 ++ drivers/pci/pcie/Makefile | 1 + drivers/pci/pcie/aer.c | 25 ++------- drivers/pci/pcie/aer_cxl_vh.c | 95 +++++++++++++++++++++++++++++++++++ include/linux/aer.h | 17 +++++++ 5 files changed, 121 insertions(+), 21 deletions(-) create mode 100644 drivers/pci/pcie/aer_cxl_vh.c diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b592170baebb5..758f2f40ce4e7 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1245,8 +1245,12 @@ static inline void cxl_rch_enable_rcec(struct pci_dev *rcec) { } #ifdef CONFIG_CXL_RAS bool is_internal_error(struct aer_err_info *info); +bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info); +void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info); #else static inline bool is_internal_error(struct aer_err_info *info) { return false; } +static inline bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) { return false; } +static inline void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info) { } #endif #endif /* DRIVERS_PCI_H */ diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 970e7cbc5b34d..72992b3ea417b 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o bwctrl.o obj-y += aspm.o obj-$(CONFIG_PCIEAER) += aer.o err.o tlp.o obj-$(CONFIG_CXL_RCH_RAS) += aer_cxl_rch.o +obj-$(CONFIG_CXL_RAS) += aer_cxl_vh.o obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 2d9f0e959a4c5..11b16833ee35a 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1099,8 +1099,6 @@ static bool find_source_device(struct pci_dev *parent, return true; } -#ifdef CONFIG_PCIEAER_CXL - /** * pci_aer_unmask_internal_errors - unmask internal errors * @dev: pointer to the pci_dev data structure @@ -1126,24 +1124,6 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_aer_unmask_internal_errors); -bool cxl_error_is_native(struct pci_dev *dev) -{ - struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); - - return (pcie_ports_native || host->native_aer); -} -EXPORT_SYMBOL_NS_GPL(cxl_error_is_native, "CXL"); - -bool is_internal_error(struct aer_err_info *info) -{ - if (info->severity == AER_CORRECTABLE) - return info->status & PCI_ERR_COR_INTERNAL; - - return info->status & PCI_ERR_UNC_INTN; -} -EXPORT_SYMBOL_NS_GPL(is_internal_error, "CXL"); -#endif /* CONFIG_CXL_RAS */ - /** * pci_aer_handle_error - handle logging error into an event log * @dev: pointer to pci_dev data structure of error source device @@ -1180,7 +1160,10 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info) static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) { cxl_rch_handle_error(dev, info); - pci_aer_handle_error(dev, info); + if (is_cxl_error(dev, info)) + cxl_forward_error(dev, info); + else + pci_aer_handle_error(dev, info); pci_dev_put(dev); } diff --git a/drivers/pci/pcie/aer_cxl_vh.c b/drivers/pci/pcie/aer_cxl_vh.c new file mode 100644 index 0000000000000..c1e767d1ead32 --- /dev/null +++ b/drivers/pci/pcie/aer_cxl_vh.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */ + +#include +#include +#include +#include +#include "../pci.h" + +#define CXL_ERROR_SOURCES_MAX 128 + +struct cxl_proto_err_kfifo { + struct work_struct *work; + struct rw_semaphore rw_sema; + DECLARE_KFIFO(fifo, struct cxl_proto_err_work_data, + CXL_ERROR_SOURCES_MAX); +}; + +static struct cxl_proto_err_kfifo cxl_proto_err_kfifo = { + .rw_sema = __RWSEM_INITIALIZER(cxl_proto_err_kfifo.rw_sema) +}; + +bool cxl_error_is_native(struct pci_dev *dev) +{ + struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); + + return (pcie_ports_native || host->native_aer); +} +EXPORT_SYMBOL_NS_GPL(cxl_error_is_native, "CXL"); + +bool is_internal_error(struct aer_err_info *info) +{ + if (info->severity == AER_CORRECTABLE) + return info->status & PCI_ERR_COR_INTERNAL; + + return info->status & PCI_ERR_UNC_INTN; +} +EXPORT_SYMBOL_NS_GPL(is_internal_error, "CXL"); + +bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) +{ + if (!info || !info->is_cxl) + return false; + + if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) + return false; + + return is_internal_error(info); +} +EXPORT_SYMBOL_NS_GPL(is_cxl_error, "CXL"); + +void cxl_forward_error(struct pci_dev *pdev, struct aer_err_info *info) +{ + struct cxl_proto_err_work_data wd = (struct cxl_proto_err_work_data) { + .severity = info->severity, + .pdev = pci_dev_get(pdev) + }; + + guard(rwsem_write)(&cxl_proto_err_kfifo.rw_sema); + + if (!cxl_proto_err_kfifo.work) { + pci_dev_put(pdev); + dev_warn_once(&pdev->dev, "CXL driver is unregistered. Unable to forward error."); + return; + } + + if (!kfifo_put(&cxl_proto_err_kfifo.fifo, wd)) { + dev_err_ratelimited(&pdev->dev, "AER-CXL kfifo overflow\n"); + return; + } + + schedule_work(cxl_proto_err_kfifo.work); +} +EXPORT_SYMBOL_NS_GPL(cxl_forward_error, "CXL"); + +void cxl_register_proto_err_work(struct work_struct *work) +{ + guard(rwsem_write)(&cxl_proto_err_kfifo.rw_sema); + cxl_proto_err_kfifo.work = work; +} +EXPORT_SYMBOL_NS_GPL(cxl_register_proto_err_work, "CXL"); + +void cxl_unregister_proto_err_work(void) +{ + guard(rwsem_write)(&cxl_proto_err_kfifo.rw_sema); + cxl_proto_err_kfifo.work = NULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_unregister_proto_err_work, "CXL"); + +int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd) +{ + guard(rwsem_read)(&cxl_proto_err_kfifo.rw_sema); + return kfifo_get(&cxl_proto_err_kfifo.fifo, wd); +} +EXPORT_SYMBOL_NS_GPL(cxl_proto_err_kfifo_get, "CXL"); diff --git a/include/linux/aer.h b/include/linux/aer.h index 2ef8205639963..6b2c87d1b5b6d 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -10,6 +10,7 @@ #include #include +#include #define AER_NONFATAL 0 #define AER_FATAL 1 @@ -53,6 +54,16 @@ struct aer_capability_regs { u16 uncor_err_source; }; +/** + * struct cxl_proto_err_work_data - Error information used in CXL error handling + * @severity: AER severity + * @pdev: PCI device detecting the error + */ +struct cxl_proto_err_work_data { + int severity; + struct pci_dev *pdev; +}; + #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); @@ -68,8 +79,14 @@ static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } #ifdef CONFIG_CXL_RAS bool cxl_error_is_native(struct pci_dev *dev); +int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd); +void cxl_register_proto_err_work(struct work_struct *work); +void cxl_unregister_proto_err_work(void); #else static inline bool cxl_error_is_native(struct pci_dev *dev) { return false; } +static inline int cxl_proto_err_kfifo_get(struct cxl_proto_err_work_data *wd) { return 0; } +static inline void cxl_register_proto_err_work(struct work_struct *work) { } +static inline void cxl_unregister_proto_err_work(void) { } #endif void pci_print_aer(struct pci_dev *dev, int aer_severity, From 129a255f961fbc1eb1c9ec48509fdd670ee1e94e Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:57 -0600 Subject: [PATCH 46/80] NVIDIA: VR: SAUCE: cxl: Introduce cxl_pci_drv_bound() to check for bound driver CXL devices handle protocol errors via driver-specific callbacks rather than the generic pci_driver::err_handlers by default. The callbacks are implemented in the cxl_pci driver and are not part of struct pci_driver, so cxl_core must verify that a device is actually bound to the cxl_pci module's driver before invoking the callbacks (the device could be bound to another driver, e.g. VFIO). However, cxl_core can not reference symbols in the cxl_pci module because it creates a circular dependency. This prevents cxl_core from checking the EP's bound driver and calling the callbacks. To fix this, move drivers/cxl/pci.c into drivers/cxl/core/pci_drv.c and build it as part of the cxl_core module. Compile into cxl_core using CXL_PCI and CXL_CORE Kconfig dependencies. This removes the standalone cxl_pci module, consolidates the cxl_pci driver code into cxl_core, and eliminates the circular dependency so cxl_core can safely perform bound-driver checks and invoke the CXL PCI callbacks. Introduce cxl_pci_drv_bound() to return boolean depending on if the PCI EP parameter is bound to a CXL driver instance. This will be used in future patch when dequeuing work from the kfifo. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham Reviewed-by: Jonathan Cameron (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: Fixed cxl_pci_driver_init() rc ignored in cxl_core_init()] Signed-off-by: Jiandi An --- drivers/cxl/Kconfig | 6 +++--- drivers/cxl/Makefile | 2 -- drivers/cxl/core/Makefile | 1 + drivers/cxl/core/core.h | 9 +++++++++ drivers/cxl/{pci.c => core/pci_drv.c} | 21 +++++++++++++-------- drivers/cxl/core/port.c | 7 +++++++ tools/testing/cxl/Kbuild | 1 + 7 files changed, 34 insertions(+), 13 deletions(-) rename drivers/cxl/{pci.c => core/pci_drv.c} (99%) diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index ffe6ad981434d..360c78fa7e976 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -20,7 +20,7 @@ menuconfig CXL_BUS if CXL_BUS config CXL_PCI - tristate "PCI manageability" + bool "PCI manageability" default CXL_BUS help The CXL specification defines a "CXL memory device" sub-class in the @@ -29,12 +29,12 @@ config CXL_PCI memory to be mapped into the system address map (Host-managed Device Memory (HDM)). - Say 'y/m' to enable a driver that will attach to CXL memory expander + Say 'y' to enable a driver that will attach to CXL memory expander devices enumerated by the memory device class code for configuration and management primarily via the mailbox interface. See Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more details. - If unsure say 'm'. + If unsure say 'y'. config CXL_MEM_RAW_COMMANDS bool "RAW Command Interface for Memory Devices" diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index 2caa90fa4bf25..ff6add88b6ae2 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -12,10 +12,8 @@ obj-$(CONFIG_CXL_PORT) += cxl_port.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o obj-$(CONFIG_CXL_MEM) += cxl_mem.o -obj-$(CONFIG_CXL_PCI) += cxl_pci.o cxl_port-y := port.o cxl_acpi-y := acpi.o cxl_pmem-y := pmem.o security.o cxl_mem-y := mem.o -cxl_pci-y := pci.o diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index fa1d4aed28b90..2937d0ddcce27 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_FEATURES) += features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o cxl_core-$(CONFIG_CXL_RAS) += ras.o cxl_core-$(CONFIG_CXL_RCH_RAS) += ras_rch.o +cxl_core-$(CONFIG_CXL_PCI) += pci_drv.o diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index e47ae7365ce0b..61c6726744d75 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -195,4 +195,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid, u16 *return_code); #endif +#ifdef CONFIG_CXL_PCI +bool cxl_pci_drv_bound(struct pci_dev *pdev); +int cxl_pci_driver_init(void); +void cxl_pci_driver_exit(void); +#else +static inline bool cxl_pci_drv_bound(struct pci_dev *pdev) { return false; }; +static inline int cxl_pci_driver_init(void) { return 0; } +static inline void cxl_pci_driver_exit(void) { } +#endif #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/pci.c b/drivers/cxl/core/pci_drv.c similarity index 99% rename from drivers/cxl/pci.c rename to drivers/cxl/core/pci_drv.c index bd95be1f3d5c5..06f2fd993cb0e 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/core/pci_drv.c @@ -1131,6 +1131,17 @@ static struct pci_driver cxl_pci_driver = { }, }; +bool cxl_pci_drv_bound(struct pci_dev *pdev) +{ + device_lock_assert(&pdev->dev); + + if (pdev->driver != &cxl_pci_driver) + pr_err_ratelimited("%s device not bound to CXL PCI driver\n", + pci_name(pdev)); + + return (pdev->driver == &cxl_pci_driver); +} + #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) static void cxl_handle_cper_event(enum cxl_event_type ev_type, struct cxl_cper_event_rec *rec) @@ -1177,7 +1188,7 @@ static void cxl_cper_work_fn(struct work_struct *work) } static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); -static int __init cxl_pci_driver_init(void) +int __init cxl_pci_driver_init(void) { int rc; @@ -1192,15 +1203,9 @@ static int __init cxl_pci_driver_init(void) return rc; } -static void __exit cxl_pci_driver_exit(void) +void cxl_pci_driver_exit(void) { cxl_cper_unregister_work(&cxl_cper_work); cancel_work_sync(&cxl_cper_work); pci_unregister_driver(&cxl_pci_driver); } - -module_init(cxl_pci_driver_init); -module_exit(cxl_pci_driver_exit); -MODULE_DESCRIPTION("CXL: PCI manageability"); -MODULE_LICENSE("GPL v2"); -MODULE_IMPORT_NS("CXL"); diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 3e8a1d634547f..ef6094dccfc01 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -2508,8 +2508,14 @@ static __init int cxl_core_init(void) if (rc) goto err_ras; + rc = cxl_pci_driver_init(); + if (rc) + goto err_pci; + return 0; +err_pci: + cxl_ras_exit(); err_ras: cxl_region_exit(); err_region: @@ -2523,6 +2529,7 @@ static __init int cxl_core_init(void) static void cxl_core_exit(void) { + cxl_pci_driver_exit(); cxl_ras_exit(); cxl_region_exit(); bus_unregister(&cxl_bus_type); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 6905f8e710abc..d8b8272ef87bd 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -65,6 +65,7 @@ cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o cxl_core-$(CONFIG_CXL_RAS) += $(CXL_CORE_SRC)/ras.o cxl_core-$(CONFIG_CXL_RCH_RAS) += $(CXL_CORE_SRC)/ras_rch.o +cxl_core-$(CONFIG_CXL_PCI) += $(CXL_CORE_SRC)/pci_drv.o cxl_core-y += config_check.o cxl_core-y += cxl_core_test.o cxl_core-y += cxl_core_exports.o From 3f6c9ef168704b995f6385d94138fa84bf2452dd Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:58 -0600 Subject: [PATCH 47/80] NVIDIA: VR: SAUCE: cxl: Change CXL handlers to use guard() instead of scoped_guard() The CXL protocol error handlers use scoped_guard() to guarantee access to the underlying CXL memory device. Improve readability and reduce complexity by changing the current scoped_guard() to be guard(). Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/ras.c | 53 +++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 19d9ffe885bfb..cb712772de5c3 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -254,19 +254,19 @@ void cxl_cor_error_detected(struct pci_dev *pdev) struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); struct device *dev = &cxlds->cxlmd->dev; - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return; - } - - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); + guard(device)(dev); - cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return; } + + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + + cxl_handle_cor_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); @@ -278,25 +278,24 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, struct device *dev = &cxlmd->dev; bool ue; - scoped_guard(device, dev) { - if (!dev->driver) { - dev_warn(&pdev->dev, - "%s: memdev disabled, abort error handling\n", - dev_name(dev)); - return PCI_ERS_RESULT_DISCONNECT; - } + guard(device)(dev); - if (cxlds->rcd) - cxl_handle_rdport_errors(cxlds); - /* - * A frozen channel indicates an impending reset which is fatal to - * CXL.mem operation, and will likely crash the system. On the off - * chance the situation is recoverable dump the status of the RAS - * capability registers and bounce the active state of the memdev. - */ - ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); + if (!dev->driver) { + dev_warn(&pdev->dev, + "%s: memdev disabled, abort error handling\n", + dev_name(dev)); + return PCI_ERS_RESULT_DISCONNECT; } + if (cxlds->rcd) + cxl_handle_rdport_errors(cxlds); + /* + * A frozen channel indicates an impending reset which is fatal to + * CXL.mem operation, and will likely crash the system. On the off + * chance the situation is recoverable dump the status of the RAS + * capability registers and bounce the active state of the memdev. + */ + ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); switch (state) { case pci_channel_io_normal: From 6332a97c433a858eb8254cdf54f50085f1ff292f Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:02:59 -0600 Subject: [PATCH 48/80] NVIDIA: VR: SAUCE: cxl/pci: Introduce CXL protocol error handlers for Endpoints CXL Endpoint protocol errors are currently handled by generic PCI error handlers. However, uncorrectable errors (UCEs) require CXL.mem protocol- specific handling logic that the PCI handlers cannot provide. Add dedicated CXL protocol error handlers for CXL Endpoints. Rename the existing cxl_error_handlers to pci_error_handlers to better reflect their purpose and maintain naming consistency. Update the PCI error handlers to invoke the new CXL protocol handlers when the endpoint is operating in CXL.mem mode. Implement cxl_handle_ras() to return PCI_ERS_RESULT_NONE or PCI_ERS_RESULT_PANIC. Remove unnecessary result checks from the previous endpoint UCE handler since CXL UCE recovery is not implemented in this patch. Add device lock assertions to protect against concurrent device or RAS register removal during error handling. Two devices require locking for CXL endpoints: 1. The PCI device (pdev->dev) - RAS registers are allocated and mapped using devm_* functions with this device as the host. Locking prevents the RAS registers from being unmapped until after error handling completes. 2. The CXL memory device (cxlmd->dev) - Holds a reference to the RAS registers accessed during error handling. Locking prevents the memory device and its RAS register references from being removed during error handling. The lock assertions added here will be satisfied by device locks introduced in a subsequent patch. A future patch will extend the CXL UCE handler to support full UCE recovery. Signed-off-by: Terry Bowman Reviewed-by: Kuppuswamy Sathyanarayanan (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: fixed container of on wrong device type causing memory corruption in pci_cor_error_detected] Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 22 +++++++-- drivers/cxl/core/pci_drv.c | 9 ++-- drivers/cxl/core/ras.c | 97 +++++++++++++++++++++++--------------- drivers/cxl/cxlpci.h | 11 ----- 4 files changed, 82 insertions(+), 57 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 61c6726744d75..b2c0ccd6803f9 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -149,19 +149,33 @@ int cxl_port_get_switch_dport_bandwidth(struct cxl_port *port, #ifdef CONFIG_CXL_RAS int cxl_ras_init(void); void cxl_ras_exit(void); -bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base); +pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, + void __iomem *ras_base); void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base); +pci_ers_result_t cxl_error_detected(struct device *dev); +void cxl_cor_error_detected(struct device *dev); +pci_ers_result_t pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error); +void pci_cor_error_detected(struct pci_dev *pdev); #else static inline int cxl_ras_init(void) { return 0; } static inline void cxl_ras_exit(void) { } -static inline bool cxl_handle_ras(struct device *dev, void __iomem *ras_base) +static inline pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, + void __iomem *ras_base) { - return false; + return PCI_ERS_RESULT_NONE; } -static inline void cxl_handle_cor_ras(struct device *dev, void __iomem *ras_base) { } +static inline void cxl_handle_cor_ras(struct device *dev, u64 serial, + void __iomem *ras_base) { } +static inline pci_ers_result_t pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error) +{ + return PCI_ERS_RESULT_NONE; +} +static inline void pci_cor_error_detected(struct pci_dev *pdev) { } #endif /* CONFIG_CXL_RAS */ /* Restricted CXL Host specific RAS functions */ diff --git a/drivers/cxl/core/pci_drv.c b/drivers/cxl/core/pci_drv.c index 06f2fd993cb0e..bc3c959f7eb63 100644 --- a/drivers/cxl/core/pci_drv.c +++ b/drivers/cxl/core/pci_drv.c @@ -16,6 +16,7 @@ #include "cxlpci.h" #include "cxl.h" #include "pmu.h" +#include "core/core.h" /** * DOC: cxl pci @@ -1112,11 +1113,11 @@ static void cxl_reset_done(struct pci_dev *pdev) } } -static const struct pci_error_handlers cxl_error_handlers = { - .error_detected = cxl_error_detected, +static const struct pci_error_handlers pci_error_handlers = { + .error_detected = pci_error_detected, .slot_reset = cxl_slot_reset, .resume = cxl_error_resume, - .cor_error_detected = cxl_cor_error_detected, + .cor_error_detected = pci_cor_error_detected, .reset_done = cxl_reset_done, }; @@ -1124,7 +1125,7 @@ static struct pci_driver cxl_pci_driver = { .name = KBUILD_MODNAME, .id_table = cxl_mem_pci_tbl, .probe = cxl_pci_probe, - .err_handler = &cxl_error_handlers, + .err_handler = &pci_error_handlers, .dev_groups = cxl_rcd_groups, .driver = { .probe_type = PROBE_PREFER_ASYNCHRONOUS, diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index cb712772de5c3..abb09d4025fd6 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -128,6 +128,11 @@ void cxl_ras_exit(void) cancel_work_sync(&cxl_cper_prot_err_work); } +static bool is_pcie_endpoint(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT; +} + static void cxl_dport_map_ras(struct cxl_dport *dport) { struct cxl_register_map *map = &dport->reg_map; @@ -214,7 +219,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log) * Log the state of the RAS status registers and prepare them to log the * next error status. Return 1 if reset needed. */ -bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base) +pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base) { u32 hl[CXL_HEADERLOG_SIZE_U32]; void __iomem *addr; @@ -223,13 +228,13 @@ bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base) if (!ras_base) { dev_warn_once(dev, "CXL RAS register block is not mapped"); - return false; + return PCI_ERS_RESULT_NONE; } addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; status = readl(addr); if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) - return false; + return PCI_ERS_RESULT_NONE; /* If multiple errors, log header points to first error from ctrl reg */ if (hweight32(status) > 1) { @@ -246,18 +251,19 @@ bool cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ras_base) trace_cxl_aer_uncorrectable_error(dev, status, fe, hl, serial); writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); - return true; + return PCI_ERS_RESULT_PANIC; } -void cxl_cor_error_detected(struct pci_dev *pdev) +void cxl_cor_error_detected(struct device *dev) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct device *dev = &cxlds->cxlmd->dev; + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; - guard(device)(dev); + device_lock_assert(cxlds->dev); + device_lock_assert(&cxlmd->dev); if (!dev->driver) { - dev_warn(&pdev->dev, + dev_warn(cxlds->dev, "%s: memdev disabled, abort error handling\n", dev_name(dev)); return; @@ -270,18 +276,31 @@ void cxl_cor_error_detected(struct pci_dev *pdev) } EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) +void pci_cor_error_detected(struct pci_dev *pdev) +{ + struct cxl_dev_state *cxlds; + + device_lock_assert(&pdev->dev); + if (!cxl_pci_drv_bound(pdev)) + return; + + cxlds = pci_get_drvdata(pdev); + guard(device)(&cxlds->cxlmd->dev); + + cxl_cor_error_detected(&cxlds->cxlmd->dev); +} +EXPORT_SYMBOL_NS_GPL(pci_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_error_detected(struct device *dev) { - struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); - struct cxl_memdev *cxlmd = cxlds->cxlmd; - struct device *dev = &cxlmd->dev; - bool ue; + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; - guard(device)(dev); + device_lock_assert(cxlds->dev); + device_lock_assert(&cxlmd->dev); if (!dev->driver) { - dev_warn(&pdev->dev, + dev_warn(cxlds->dev, "%s: memdev disabled, abort error handling\n", dev_name(dev)); return PCI_ERS_RESULT_DISCONNECT; @@ -289,32 +308,34 @@ pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, if (cxlds->rcd) cxl_handle_rdport_errors(cxlds); + /* * A frozen channel indicates an impending reset which is fatal to * CXL.mem operation, and will likely crash the system. On the off * chance the situation is recoverable dump the status of the RAS * capability registers and bounce the active state of the memdev. */ - ue = cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); - - switch (state) { - case pci_channel_io_normal: - if (ue) { - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - } - return PCI_ERS_RESULT_CAN_RECOVER; - case pci_channel_io_frozen: - dev_warn(&pdev->dev, - "%s: frozen state error detected, disable CXL.mem\n", - dev_name(dev)); - device_release_driver(dev); - return PCI_ERS_RESULT_NEED_RESET; - case pci_channel_io_perm_failure: - dev_warn(&pdev->dev, - "failure state error detected, request disconnect\n"); - return PCI_ERS_RESULT_DISCONNECT; - } - return PCI_ERS_RESULT_NEED_RESET; + return cxl_handle_ras(&cxlds->cxlmd->dev, cxlds->serial, cxlds->regs.ras); } EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); + +pci_ers_result_t pci_error_detected(struct pci_dev *pdev, + pci_channel_state_t error) +{ + struct cxl_dev_state *cxlds; + pci_ers_result_t rc; + + device_lock_assert(&pdev->dev); + if (!cxl_pci_drv_bound(pdev)) + return PCI_ERS_RESULT_NONE; + + cxlds = pci_get_drvdata(pdev); + guard(device)(&cxlds->cxlmd->dev); + + rc = cxl_error_detected(&cxlds->cxlmd->dev); + if (rc == PCI_ERS_RESULT_PANIC) + panic("CXL cachemem error."); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(pci_error_detected, "CXL"); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index a0a491e7b5b9a..3526e6d75f796 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -79,21 +79,10 @@ struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); #ifdef CONFIG_CXL_RAS -void cxl_cor_error_detected(struct pci_dev *pdev); -pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state); void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host); void cxl_uport_init_ras_reporting(struct cxl_port *port, struct device *host); #else -static inline void cxl_cor_error_detected(struct pci_dev *pdev) { } - -static inline pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, - pci_channel_state_t state) -{ - return PCI_ERS_RESULT_NONE; -} - static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { } static inline void cxl_uport_init_ras_reporting(struct cxl_port *port, From e1d1d1263a22fd0968511543f56ca82e8ca647a6 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:03:00 -0600 Subject: [PATCH 49/80] NVIDIA: VR: SAUCE: CXL/PCI: Introduce CXL Port protocol error handlers Add CXL protocol error handlers for CXL Port devices (Root Ports, Downstream Ports, and Upstream Ports). Implement cxl_port_cor_error_detected() and cxl_port_error_detected() to handle correctable and uncorrectable errors respectively. Introduce cxl_get_ras_base() to retrieve the cached RAS register base address for a given CXL port. This function supports CXL Root Ports, Downstream Ports, and Upstream Ports by returning their previously mapped RAS register addresses. Add device lock assertions to protect against concurrent device or RAS register removal during error handling. The port error handlers require two device locks: 1. The port's CXL parent device - RAS registers are mapped using devm_* functions with the parent port as the host. Locking the parent prevents the RAS registers from being unmapped during error handling. 2. The PCI device (pdev->dev) - Locking prevents concurrent modifications to the PCI device structure during error handling. The lock assertions added here will be satisfied by device locks introduced in a subsequent patch. Introduce get_pci_cxl_host_dev() to return the device responsible for managing the RAS register mapping. This function increments the reference count on the host device to prevent premature resource release during error handling. The caller is responsible for decrementing the reference count. For CXL endpoints, which manage resources without a separate host device, this function returns NULL. Update the AER driver's is_cxl_error() to recognize CXL Port devices in addition to CXL Endpoints, as both now have CXL-specific error handlers. Signed-off-by: Terry Bowman Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 10 +++++++ drivers/cxl/core/port.c | 7 ++--- drivers/cxl/core/ras.c | 49 +++++++++++++++++++++++++++++++++++ drivers/pci/pcie/aer_cxl_vh.c | 5 +++- 4 files changed, 67 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index b2c0ccd6803f9..046ec65ed1476 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -157,6 +157,8 @@ void cxl_cor_error_detected(struct device *dev); pci_ers_result_t pci_error_detected(struct pci_dev *pdev, pci_channel_state_t error); void pci_cor_error_detected(struct pci_dev *pdev); +pci_ers_result_t cxl_port_error_detected(struct device *dev); +void cxl_port_cor_error_detected(struct device *dev); #else static inline int cxl_ras_init(void) { @@ -176,6 +178,11 @@ static inline pci_ers_result_t pci_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_NONE; } static inline void pci_cor_error_detected(struct pci_dev *pdev) { } +static inline void cxl_port_cor_error_detected(struct device *dev) { } +static inline pci_ers_result_t cxl_port_error_detected(struct device *dev) +{ + return PCI_ERS_RESULT_NONE; +} #endif /* CONFIG_CXL_RAS */ /* Restricted CXL Host specific RAS functions */ @@ -190,6 +197,9 @@ static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } #endif /* CONFIG_CXL_RCH_RAS */ int cxl_gpf_port_setup(struct cxl_dport *dport); +struct cxl_port *find_cxl_port(struct device *dport_dev, + struct cxl_dport **dport); +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev); struct cxl_hdm; int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index ef6094dccfc01..66d42086084c2 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1361,8 +1361,8 @@ static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx) return NULL; } -static struct cxl_port *find_cxl_port(struct device *dport_dev, - struct cxl_dport **dport) +struct cxl_port *find_cxl_port(struct device *dport_dev, + struct cxl_dport **dport) { struct cxl_find_port_ctx ctx = { .dport_dev = dport_dev, @@ -1565,7 +1565,7 @@ static int match_port_by_uport(struct device *dev, const void *data) * Function takes a device reference on the port device. Caller should do a * put_device() when done. */ -static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) +struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) { struct device *dev; @@ -1574,6 +1574,7 @@ static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev) return to_cxl_port(dev); return NULL; } +EXPORT_SYMBOL_NS_GPL(find_cxl_port_by_uport, "CXL"); static int update_decoder_targets(struct device *dev, void *data) { diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index abb09d4025fd6..10b8b94c9d23b 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -145,6 +145,39 @@ static void cxl_dport_map_ras(struct cxl_dport *dport) dev_dbg(dev, "Failed to map RAS capability.\n"); } +static void __iomem *cxl_get_ras_base(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + + switch (pci_pcie_type(pdev)) { + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_DOWNSTREAM: + { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport); + + if (!dport) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return dport->regs.ras; + } + case PCI_EXP_TYPE_UPSTREAM: + { + struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return port->uport_regs.ras; + } + } + + dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev)); + return NULL; +} + /** * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport * @dport: the cxl_dport that needs to be initialized @@ -254,6 +287,22 @@ pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __iomem *ra return PCI_ERS_RESULT_PANIC; } +void cxl_port_cor_error_detected(struct device *dev) +{ + void __iomem *ras_base = cxl_get_ras_base(dev); + + cxl_handle_cor_ras(dev, 0, ras_base); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_cor_error_detected, "CXL"); + +pci_ers_result_t cxl_port_error_detected(struct device *dev) +{ + void __iomem *ras_base = cxl_get_ras_base(dev); + + return cxl_handle_ras(dev, 0, ras_base); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_error_detected, "CXL"); + void cxl_cor_error_detected(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); diff --git a/drivers/pci/pcie/aer_cxl_vh.c b/drivers/pci/pcie/aer_cxl_vh.c index c1e767d1ead32..c2510ac3cb139 100644 --- a/drivers/pci/pcie/aer_cxl_vh.c +++ b/drivers/pci/pcie/aer_cxl_vh.c @@ -42,7 +42,10 @@ bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info) if (!info || !info->is_cxl) return false; - if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) + if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_UPSTREAM) && + (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM)) return false; return is_internal_error(info); From c7c888e0b0dbee0e2713b67efc119d1f6e549424 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:03:01 -0600 Subject: [PATCH 50/80] NVIDIA: VR: SAUCE: PCI/AER: Dequeue forwarded CXL error The AER driver now forwards CXL protocol errors to the CXL driver via a kfifo. The CXL driver must consume these work items, initiate protocol error handling, and ensure RAS mappings remain valid throughout processing. Implement cxl_proto_err_work_fn() to dequeue work items forwarded by the AER service driver and begin protocol error processing by calling cxl_handle_proto_error(). Add a PCI device lock on &pdev->dev within cxl_proto_err_work_fn() to keep the PCI device structure valid during handling. Locking an Endpoint will also defer RAS unmapping until the device is unlocked. For Endpoints, add a lock on CXL memory device cxlds->dev. The CXL memory device structure holds the RAS register reference needed during error handling. Add lock for the parent CXL Port for Root Ports, Downstream Ports, and Upstream Ports to prevent destruction of structures holding mapped RAS addresses while they are in use. Invoke cxl_do_recovery() for uncorrectable errors. Treat this as a stub for now; implement its functionality in a future patch. Export pci_clean_device_status() to enable cleanup of AER status following error handling. Signed-off-by: Terry Bowman Reviewed-by: Kuppuswamy Sathyanarayanan (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: remove extra pci_dev_get(wd.pdev) in the consumer in cxl_proto_err_work_fn()] [jan: change to continue instead of return to avoid dropping kfifo entries in cxl_proto_err_work_fn()] [jan: retrive cxl_dev_state in if (is_pcie_endpoint()) in cxl_handle_proto_error()] Signed-off-by: Jiandi An --- drivers/cxl/core/ras.c | 154 ++++++++++++++++++++++++++++++++++++++--- drivers/pci/pci.c | 1 + drivers/pci/pci.h | 1 - include/linux/pci.h | 2 + 4 files changed, 146 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 10b8b94c9d23b..cdb80a83b403c 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -117,17 +117,6 @@ static void cxl_cper_prot_err_work_fn(struct work_struct *work) } static DECLARE_WORK(cxl_cper_prot_err_work, cxl_cper_prot_err_work_fn); -int cxl_ras_init(void) -{ - return cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work); -} - -void cxl_ras_exit(void) -{ - cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); - cancel_work_sync(&cxl_cper_prot_err_work); -} - static bool is_pcie_endpoint(struct pci_dev *pdev) { return pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT; @@ -178,6 +167,51 @@ static void __iomem *cxl_get_ras_base(struct device *dev) return NULL; } +/* + * Return 'struct cxl_port *' parent CXL port of dev's + * + * Reference count increments on success + * + * dev: Find the parent port of this dev + */ +static struct cxl_port *get_cxl_port(struct pci_dev *pdev) +{ + switch (pci_pcie_type(pdev)) { + case PCI_EXP_TYPE_ROOT_PORT: + case PCI_EXP_TYPE_DOWNSTREAM: + { + struct cxl_dport *dport; + struct cxl_port *port = find_cxl_port(&pdev->dev, &dport); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return port; + } + case PCI_EXP_TYPE_UPSTREAM: + { + struct cxl_port *port = find_cxl_port_by_uport(&pdev->dev); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device"); + return NULL; + } + return port; + } + case PCI_EXP_TYPE_ENDPOINT: + { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + struct cxl_port *port = cxlds->cxlmd->endpoint; + + get_device(&port->dev); + return port; + } + } + pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev)); + return NULL; +} + /** * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport * @dport: the cxl_dport that needs to be initialized @@ -212,6 +246,23 @@ void cxl_uport_init_ras_reporting(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, "CXL"); +static bool device_lock_if(struct device *dev, bool cond) +{ + if (cond) + device_lock(dev); + return cond; +} + +static void device_unlock_if(struct device *dev, bool take) +{ + if (take) + device_unlock(dev); +} + +static void cxl_do_recovery(struct pci_dev *pdev) +{ +} + void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base) { void __iomem *addr; @@ -388,3 +439,84 @@ pci_ers_result_t pci_error_detected(struct pci_dev *pdev, return rc; } EXPORT_SYMBOL_NS_GPL(pci_error_detected, "CXL"); + +static void cxl_handle_proto_error(struct cxl_proto_err_work_data *err_info) +{ + struct pci_dev *pdev = err_info->pdev; + + if (err_info->severity == AER_CORRECTABLE) { + + if (pdev->aer_cap) + pci_clear_and_set_config_dword(pdev, + pdev->aer_cap + PCI_ERR_COR_STATUS, + 0, PCI_ERR_COR_INTERNAL); + + if (is_pcie_endpoint(pdev)) { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + cxl_cor_error_detected(&cxlds->cxlmd->dev); + } else { + cxl_port_cor_error_detected(&pdev->dev); + } + + pcie_clear_device_status(pdev); + } else { + cxl_do_recovery(pdev); + } +} + +static void cxl_proto_err_work_fn(struct work_struct *work) +{ + struct cxl_proto_err_work_data wd; + + while (cxl_proto_err_kfifo_get(&wd)) { + struct pci_dev *pdev __free(pci_dev_put) = wd.pdev; + struct device *cxlmd_dev; + + if (!pdev) { + pr_err_ratelimited("NULL PCI device passed in AER-CXL KFIFO\n"); + continue; + } + + guard(device)(&pdev->dev); + if (is_pcie_endpoint(pdev)) { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + if (!cxl_pci_drv_bound(pdev)) + continue; + cxlmd_dev = &cxlds->cxlmd->dev; + device_lock_if(cxlmd_dev, cxlmd_dev); + } else { + cxlmd_dev = NULL; + } + + struct cxl_port *port __free(put_cxl_port) = get_cxl_port(pdev); + if (!port) + continue; + guard(device)(&port->dev); + + cxl_handle_proto_error(&wd); + device_unlock_if(cxlmd_dev, cxlmd_dev); + } +} + +static struct work_struct cxl_proto_err_work; +static DECLARE_WORK(cxl_proto_err_work, cxl_proto_err_work_fn); + +int cxl_ras_init(void) +{ + if (cxl_cper_register_prot_err_work(&cxl_cper_prot_err_work)) + pr_err("Failed to initialize CXL RAS CPER\n"); + + cxl_register_proto_err_work(&cxl_proto_err_work); + + return 0; +} + +void cxl_ras_exit(void) +{ + cxl_cper_unregister_prot_err_work(&cxl_cper_prot_err_work); + cancel_work_sync(&cxl_cper_prot_err_work); + + cxl_unregister_proto_err_work(); + cancel_work_sync(&cxl_proto_err_work); +} diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 73b586e4efa68..ec876d46264c2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2333,6 +2333,7 @@ void pcie_clear_device_status(struct pci_dev *dev) pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta); pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta); } +EXPORT_SYMBOL_GPL(pcie_clear_device_status); #endif /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 758f2f40ce4e7..0ce95fa1db6d9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -137,7 +137,6 @@ void pci_refresh_power_state(struct pci_dev *dev); int pci_power_up(struct pci_dev *dev); void pci_disable_enabled_device(struct pci_dev *dev); int pci_finish_runtime_suspend(struct pci_dev *dev); -void pcie_clear_device_status(struct pci_dev *dev); void pcie_clear_root_pme_status(struct pci_dev *dev); bool pci_check_pme_status(struct pci_dev *dev); void pci_pme_wakeup_bus(struct pci_bus *bus); diff --git a/include/linux/pci.h b/include/linux/pci.h index 35aa99fee4600..eb38b08d3e2d3 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1893,8 +1893,10 @@ static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } #ifdef CONFIG_PCIEAER bool pci_aer_available(void); +void pcie_clear_device_status(struct pci_dev *dev); #else static inline bool pci_aer_available(void) { return false; } +static inline void pcie_clear_device_status(struct pci_dev *dev) { } #endif bool pci_ats_disabled(void); From 579482c66568463898797f730799e8c27dd30c71 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:03:02 -0600 Subject: [PATCH 51/80] NVIDIA: VR: SAUCE: CXL/PCI: Export and rename merge_result() to pci_ers_merge_result() CXL uncorrectable errors (UCE) will soon be handled separately from the PCI AER handling. The merge_result() function can be made common to use in both handling paths. Rename the PCI subsystem's merge_result() to be pci_ers_merge_result(). Export pci_ers_merge_result() to make available for the CXL and other drivers to use. Update pci_ers_merge_result() to support recently introduced PCI_ERS_RESULT_PANIC result. Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/pci/pcie/err.c | 14 +++++++++----- include/linux/pci.h | 7 +++++++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index e85b9cd5fec1b..feefce97c6872 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -21,9 +21,12 @@ #include "portdrv.h" #include "../pci.h" -static pci_ers_result_t merge_result(enum pci_ers_result orig, - enum pci_ers_result new) +pci_ers_result_t pcie_ers_merge_result(enum pci_ers_result orig, + enum pci_ers_result new) { + if (new == PCI_ERS_RESULT_PANIC) + return PCI_ERS_RESULT_PANIC; + if (new == PCI_ERS_RESULT_NO_AER_DRIVER) return PCI_ERS_RESULT_NO_AER_DRIVER; @@ -45,6 +48,7 @@ static pci_ers_result_t merge_result(enum pci_ers_result orig, return orig; } +EXPORT_SYMBOL(pcie_ers_merge_result); static int report_error_detected(struct pci_dev *dev, pci_channel_state_t state, @@ -81,7 +85,7 @@ static int report_error_detected(struct pci_dev *dev, vote = err_handler->error_detected(dev, state); } pci_uevent_ers(dev, vote); - *result = merge_result(*result, vote); + *result = pcie_ers_merge_result(*result, vote); device_unlock(&dev->dev); return 0; } @@ -127,7 +131,7 @@ static int report_mmio_enabled(struct pci_dev *dev, void *data) err_handler = pdrv->err_handler; vote = err_handler->mmio_enabled(dev); - *result = merge_result(*result, vote); + *result = pcie_ers_merge_result(*result, vote); out: device_unlock(&dev->dev); return 0; @@ -147,7 +151,7 @@ static int report_slot_reset(struct pci_dev *dev, void *data) err_handler = pdrv->err_handler; vote = err_handler->slot_reset(dev); - *result = merge_result(*result, vote); + *result = pcie_ers_merge_result(*result, vote); out: device_unlock(&dev->dev); return 0; diff --git a/include/linux/pci.h b/include/linux/pci.h index eb38b08d3e2d3..546f9e1c4b13c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1894,9 +1894,16 @@ static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } #ifdef CONFIG_PCIEAER bool pci_aer_available(void); void pcie_clear_device_status(struct pci_dev *dev); +pci_ers_result_t pcie_ers_merge_result(enum pci_ers_result orig, + enum pci_ers_result new); #else static inline bool pci_aer_available(void) { return false; } static inline void pcie_clear_device_status(struct pci_dev *dev) { } +static inline pci_ers_result_t pcie_ers_merge_result(enum pci_ers_result orig, + enum pci_ers_result new) +{ + return PCI_ERS_RESULT_NONE; +} #endif bool pci_ats_disabled(void); From eab2c4dd284e792c05ea9f51b4bca837c67855f2 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:03:03 -0600 Subject: [PATCH 52/80] NVIDIA: VR: SAUCE: CXL/PCI: Introduce CXL uncorrectable protocol error recovery Implement cxl_do_recovery() to handle uncorrectable protocol errors (UCE), following the design of pcie_do_recovery(). Unlike PCIe, all CXL UCEs are treated as fatal and trigger a kernel panic to avoid potential CXL memory corruption. Add cxl_walk_port(), analogous to pci_walk_bridge(), to traverse the CXL topology from the error source through downstream CXL ports and endpoints. Introduce cxl_report_error_detected(), mirroring PCI's report_error_detected(), and implement device locking for the affected subtree. Endpoints require locking the PCI device (pdev->dev) and the CXL memdev (cxlmd->dev). CXL ports require locking the PCI device (pdev->dev) and the parent CXL port. The device locks should be taken early where possible. The initially reporting device will be locked after kfifo dequeue. Iterated devices will be locked in cxl_report_error_detected() and must lock the iterated devices except for the first device as it has already been locked. Export pci_aer_clear_fatal_status() for use when a UCE is not present. Signed-off-by: Terry Bowman (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: Fix NULL dereference in cxl_proto_err_work_fun() when get_cxl_port() returns NULL] Signed-off-by: Jiandi An --- drivers/cxl/core/ras.c | 133 ++++++++++++++++++++++++++++++++++++++++- drivers/pci/pci.h | 1 - drivers/pci/pcie/aer.c | 1 + include/linux/aer.h | 2 + 4 files changed, 135 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index cdb80a83b403c..35b5916eabc17 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -259,8 +259,138 @@ static void device_unlock_if(struct device *dev, bool take) device_unlock(dev); } +/** + * cxl_report_error_detected + * @dev: Device being reported + * @data: Result + * @err_pdev: Device with initial detected error. Is locked immediately + * after KFIFO dequeue. + */ +static int cxl_report_error_detected(struct device *dev, void *data, struct pci_dev *err_pdev) +{ + bool need_lock = (dev != &err_pdev->dev); + pci_ers_result_t vote, *result = data; + struct pci_dev *pdev; + + if (!dev || !dev_is_pci(dev)) + return 0; + pdev = to_pci_dev(dev); + + device_lock_if(&pdev->dev, need_lock); + if (is_pcie_endpoint(pdev) && !cxl_pci_drv_bound(pdev)) { + device_unlock_if(&pdev->dev, need_lock); + return PCI_ERS_RESULT_NONE; + } + + if (pdev->aer_cap) + pci_clear_and_set_config_dword(pdev, + pdev->aer_cap + PCI_ERR_COR_STATUS, + 0, PCI_ERR_COR_INTERNAL); + + if (is_pcie_endpoint(pdev)) { + struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); + + device_lock_if(&cxlds->cxlmd->dev, need_lock); + vote = cxl_error_detected(&cxlds->cxlmd->dev); + device_unlock_if(&cxlds->cxlmd->dev, need_lock); + } else { + vote = cxl_port_error_detected(dev); + } + + pcie_clear_device_status(pdev); + *result = pcie_ers_merge_result(*result, vote); + device_unlock_if(&pdev->dev, need_lock); + + return 0; +} + +static int match_port_by_parent_dport(struct device *dev, const void *dport_dev) +{ + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + + port = to_cxl_port(dev); + + return port->parent_dport->dport_dev == dport_dev; +} + +/** + * cxl_walk_port + * + * @port: Port be traversed into + * @cb: Callback for handling the CXL Ports + * @userdata: Result + * @err_pdev: Device with initial detected error. Is locked immediately + * after KFIFO dequeue. + */ +static void cxl_walk_port(struct cxl_port *port, + int (*cb)(struct device *, void *, struct pci_dev *), + void *userdata, + struct pci_dev *err_pdev) +{ + struct cxl_port *err_port __free(put_cxl_port) = get_cxl_port(err_pdev); + bool need_lock = (port != err_port); + struct cxl_dport *dport = NULL; + unsigned long index; + + device_lock_if(&port->dev, need_lock); + if (is_cxl_endpoint(port)) { + cb(port->uport_dev->parent, userdata, err_pdev); + device_unlock_if(&port->dev, need_lock); + return; + } + + if (port->uport_dev && dev_is_pci(port->uport_dev)) + cb(port->uport_dev, userdata, err_pdev); + + /* + * Iterate over the set of Downstream Ports recorded in port->dports (XArray): + * - For each dport, attempt to find a child CXL Port whose parent dport + * match. + * - Invoke the provided callback on the dport's device. + * - If a matching child CXL Port device is found, recurse into that port to + * continue the walk. + */ + xa_for_each(&port->dports, index, dport) + { + struct device *child_port_dev __free(put_device) = + bus_find_device(&cxl_bus_type, &port->dev, dport->dport_dev, + match_port_by_parent_dport); + + cb(dport->dport_dev, userdata, err_pdev); + if (child_port_dev) + cxl_walk_port(to_cxl_port(child_port_dev), cb, userdata, err_pdev); + } + device_unlock_if(&port->dev, need_lock); +} + static void cxl_do_recovery(struct pci_dev *pdev) { + pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; + struct cxl_port *port __free(put_cxl_port) = get_cxl_port(pdev); + + if (!port) { + pci_err(pdev, "Failed to find the CXL device\n"); + return; + } + + cxl_walk_port(port, cxl_report_error_detected, &status, pdev); + if (status == PCI_ERS_RESULT_PANIC) + panic("CXL cachemem error."); + + /* + * If we have native control of AER, clear error status in the device + * that detected the error. If the platform retained control of AER, + * it is responsible for clearing this status. In that case, the + * signaling device may not even be visible to the OS. + */ + if (cxl_error_is_native(pdev)) { + pcie_clear_device_status(pdev); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } } void cxl_handle_cor_ras(struct device *dev, u64 serial, void __iomem *ras_base) @@ -484,16 +614,17 @@ static void cxl_proto_err_work_fn(struct work_struct *work) if (!cxl_pci_drv_bound(pdev)) continue; cxlmd_dev = &cxlds->cxlmd->dev; - device_lock_if(cxlmd_dev, cxlmd_dev); } else { cxlmd_dev = NULL; } + /* Lock the CXL parent Port */ struct cxl_port *port __free(put_cxl_port) = get_cxl_port(pdev); if (!port) continue; guard(device)(&port->dev); + device_lock_if(cxlmd_dev, cxlmd_dev); cxl_handle_proto_error(&wd); device_unlock_if(cxlmd_dev, cxlmd_dev); } diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 0ce95fa1db6d9..82c6b7a498e6c 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -1089,7 +1089,6 @@ void pci_restore_aer_state(struct pci_dev *dev); static inline void pci_no_aer(void) { } static inline void pci_aer_init(struct pci_dev *d) { } static inline void pci_aer_exit(struct pci_dev *d) { } -static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pci_aer_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline int pci_aer_raw_clear_status(struct pci_dev *dev) { return -EINVAL; } static inline void pci_save_aer_state(struct pci_dev *dev) { } diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 11b16833ee35a..0c7370f9573e1 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -287,6 +287,7 @@ void pci_aer_clear_fatal_status(struct pci_dev *dev) if (status) pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS, status); } +EXPORT_SYMBOL_GPL(pci_aer_clear_fatal_status); /** * pci_aer_raw_clear_status - Clear AER error registers. diff --git a/include/linux/aer.h b/include/linux/aer.h index 6b2c87d1b5b6d..64aef69fb546b 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -66,6 +66,7 @@ struct cxl_proto_err_work_data { #if defined(CONFIG_PCIEAER) int pci_aer_clear_nonfatal_status(struct pci_dev *dev); +void pci_aer_clear_fatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); void pci_aer_unmask_internal_errors(struct pci_dev *dev); #else @@ -73,6 +74,7 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { return -EINVAL; } +static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } #endif From b91e1d4788e02a1a419222e736cdb52eabe5dacf Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:03:04 -0600 Subject: [PATCH 53/80] NVIDIA: VR: SAUCE: CXL/PCI: Enable CXL protocol errors during CXL Port probe CXL protocol errors are not enabled for all CXL devices after boot. These must be enabled inorder to process CXL protocol errors. Introduce cxl_unmask_proto_interrupts() to call pci_aer_unmask_internal_errors(). pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized. But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER correctable internal errors and uncorrectable internal errors for all CXL devices. Signed-off-by: Terry Bowman Reviewed-by: Jonathan Cameron Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 4 ++++ drivers/cxl/core/port.c | 4 ++++ drivers/cxl/core/ras.c | 26 +++++++++++++++++++++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 046ec65ed1476..a7a0838c8f23a 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -159,6 +159,8 @@ pci_ers_result_t pci_error_detected(struct pci_dev *pdev, void pci_cor_error_detected(struct pci_dev *pdev); pci_ers_result_t cxl_port_error_detected(struct device *dev); void cxl_port_cor_error_detected(struct device *dev); +void cxl_mask_proto_interrupts(struct device *dev); +void cxl_unmask_proto_interrupts(struct device *dev); #else static inline int cxl_ras_init(void) { @@ -183,6 +185,8 @@ static inline pci_ers_result_t cxl_port_error_detected(struct device *dev) { return PCI_ERS_RESULT_NONE; } +static inline void cxl_unmask_proto_interrupts(struct device *dev) { } +static inline void cxl_mask_proto_interrupts(struct device *dev) { } #endif /* CONFIG_CXL_RAS */ /* Restricted CXL Host specific RAS functions */ diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 66d42086084c2..88e13c453914b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1748,6 +1748,8 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, rc = -ENXIO; } + cxl_unmask_proto_interrupts(cxlmd->cxlds->dev); + return rc; } @@ -1834,6 +1836,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) rc = cxl_add_ep(dport, &cxlmd->dev); + cxl_unmask_proto_interrupts(cxlmd->cxlds->dev); + /* * If the endpoint already exists in the port's list, * that's ok, it was added on a previous pass. diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 35b5916eabc17..15ab9b07c9e88 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -122,6 +122,23 @@ static bool is_pcie_endpoint(struct pci_dev *pdev) return pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT; } +void cxl_unmask_proto_interrupts(struct device *dev) +{ + if (!dev || !dev_is_pci(dev)) + return; + + struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev)); + + if (!pdev->aer_cap) { + pdev->aer_cap = pci_find_ext_capability(pdev, + PCI_EXT_CAP_ID_ERR); + if (!pdev->aer_cap) + return; + } + + pci_aer_unmask_internal_errors(pdev); +} + static void cxl_dport_map_ras(struct cxl_dport *dport) { struct cxl_register_map *map = &dport->reg_map; @@ -230,7 +247,10 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) cxl_dport_map_rch_aer(dport); cxl_disable_rch_root_ints(dport); + return; } + + cxl_unmask_proto_interrupts(dport->dport_dev); } EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); @@ -241,8 +261,12 @@ void cxl_uport_init_ras_reporting(struct cxl_port *port, map->host = host; if (cxl_map_component_regs(map, &port->uport_regs, - BIT(CXL_CM_CAP_CAP_ID_RAS))) + BIT(CXL_CM_CAP_CAP_ID_RAS))) { dev_dbg(&port->dev, "Failed to map RAS capability\n"); + return; + } + + cxl_unmask_proto_interrupts(port->uport_dev); } EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, "CXL"); From 47a674c2e24dfc28ecb93ac35580c7d2aa3093ed Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Tue, 4 Nov 2025 11:03:05 -0600 Subject: [PATCH 54/80] NVIDIA: VR: SAUCE: CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup During CXL device cleanup the CXL PCIe Port device interrupts remain enabled. This potentially allows unnecessary interrupt processing on behalf of the CXL errors while the device is destroyed. Disable CXL protocol errors by setting the CXL devices' AER mask register. Introduce pci_aer_mask_internal_errors() similar to pci_aer_unmask_internal_errors(). Add to the AER service driver allowing other subsystems to use. Introduce cxl_mask_proto_interrupts() to call pci_aer_mask_internal_errors(). Add calls to cxl_mask_proto_interrupts() within CXL Port teardown for CXL Root Ports, CXL Downstream Switch Ports, CXL Upstream Switch Ports, and CXL Endpoints. Follow the same "bottom-up" approach used during CXL Port teardown. Signed-off-by: Terry Bowman Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron (backported from https://lore.kernel.org/linux-cxl/20251104170305.4163840-1-terry.bowman@amd.com/) [jan: add aer_cap check in cxl_mask_proto_interrupt()] Signed-off-by: Jiandi An --- drivers/cxl/core/port.c | 10 +++++++++- drivers/cxl/core/ras.c | 17 +++++++++++++++++ drivers/pci/pcie/aer.c | 21 +++++++++++++++++++++ include/linux/aer.h | 2 ++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 88e13c453914b..0a4138bd05454 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -1438,6 +1438,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL"); */ static void delete_switch_port(struct cxl_port *port) { + cxl_mask_proto_interrupts(port->uport_dev); + if (port->parent_dport) + cxl_mask_proto_interrupts(port->parent_dport->dport_dev); + devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port); devm_release_action(port->dev.parent, cxl_unlink_uport, port); devm_release_action(port->dev.parent, unregister_port, port); @@ -1459,8 +1463,10 @@ static void del_dports(struct cxl_port *port) device_lock_assert(&port->dev); - xa_for_each(&port->dports, index, dport) + xa_for_each(&port->dports, index, dport) { + cxl_mask_proto_interrupts(dport->dport_dev); del_dport(dport); + } } struct detach_ctx { @@ -1487,6 +1493,8 @@ static void cxl_detach_ep(void *data) { struct cxl_memdev *cxlmd = data; + cxl_mask_proto_interrupts(cxlmd->cxlds->dev); + for (int i = cxlmd->depth - 1; i >= 1; i--) { struct cxl_port *port, *parent_port; struct detach_ctx ctx = { diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c index 15ab9b07c9e88..46ce96a89814b 100644 --- a/drivers/cxl/core/ras.c +++ b/drivers/cxl/core/ras.c @@ -139,6 +139,23 @@ void cxl_unmask_proto_interrupts(struct device *dev) pci_aer_unmask_internal_errors(pdev); } +void cxl_mask_proto_interrupts(struct device *dev) +{ + if (!dev || !dev_is_pci(dev)) + return; + + struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev)); + + if (!pdev->aer_cap) { + pdev->aer_cap = pci_find_ext_capability(pdev, + PCI_EXT_CAP_ID_ERR); + if (!pdev->aer_cap) + return; + } + + pci_aer_mask_internal_errors(pdev); +} + static void cxl_dport_map_ras(struct cxl_dport *dport) { struct cxl_register_map *map = &dport->reg_map; diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 0c7370f9573e1..4b187dc4f1d0a 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1125,6 +1125,27 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_aer_unmask_internal_errors); +/** + * pci_aer_mask_internal_errors - mask internal errors + * @dev: pointer to the pcie_dev data structure + * + * Masks internal errors in the Uncorrectable and Correctable Error + * Mask registers. + * + * Note: AER must be enabled and supported by the device which must be + * checked in advance, e.g. with pcie_aer_is_native(). + */ +void pci_aer_mask_internal_errors(struct pci_dev *dev) +{ + int aer = dev->aer_cap; + + pci_clear_and_set_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, + 0, PCI_ERR_UNC_INTN); + pci_clear_and_set_config_dword(dev, aer + PCI_ERR_COR_MASK, + 0, PCI_ERR_COR_INTERNAL); +} +EXPORT_SYMBOL_GPL(pci_aer_mask_internal_errors); + /** * pci_aer_handle_error - handle logging error into an event log * @dev: pointer to pci_dev data structure of error source device diff --git a/include/linux/aer.h b/include/linux/aer.h index 64aef69fb546b..2b89bd940ac15 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -69,6 +69,7 @@ int pci_aer_clear_nonfatal_status(struct pci_dev *dev); void pci_aer_clear_fatal_status(struct pci_dev *dev); int pcie_aer_is_native(struct pci_dev *dev); void pci_aer_unmask_internal_errors(struct pci_dev *dev); +void pci_aer_mask_internal_errors(struct pci_dev *dev); #else static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) { @@ -77,6 +78,7 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev) static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { } static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; } static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { } +static inline void pci_aer_mask_internal_errors(struct pci_dev *dev) { } #endif #ifdef CONFIG_CXL_RAS From ebc76f8c87a282cbe455a3c6bee687e491868809 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 5 Dec 2025 11:52:24 +0000 Subject: [PATCH 55/80] NVIDIA: VR: SAUCE: cxl/mem: Arrange for always-synchronous memdev attach In preparation for CXL accelerator drivers that have a hard dependency on CXL capability initialization, arrange for the endpoint probe result to be conveyed to the caller of devm_cxl_add_memdev(). As it stands cxl_pci does not care about the attach state of the cxl_memdev because all generic memory expansion functionality can be handled by the cxl_core. For accelerators, that driver needs to know perform driver specific initialization if CXL is available, or exectute a fallback to PCIe only operation. By moving devm_cxl_add_memdev() to cxl_mem.ko it removes async module loading as one reason that a memdev may not be attached upon return from devm_cxl_add_memdev(). The diff is busy as this moves cxl_memdev_alloc() down below the definition of cxl_memdev_fops and introduces devm_cxl_memdev_add_or_reset() to preclude needing to export more symbols from the cxl_core. Signed-off-by: Dan Williams (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/Kconfig | 4 +- drivers/cxl/core/memdev.c | 97 ++++++++++++++++----------------------- drivers/cxl/mem.c | 30 ++++++++++++ drivers/cxl/private.h | 11 +++++ 4 files changed, 83 insertions(+), 59 deletions(-) create mode 100644 drivers/cxl/private.h diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 360c78fa7e976..94a3102ce86be 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig CXL_BUS - tristate "CXL (Compute Express Link) Devices Support" + bool "CXL (Compute Express Link) Devices Support" depends on PCI select FW_LOADER select FW_UPLOAD @@ -22,6 +22,7 @@ if CXL_BUS config CXL_PCI bool "PCI manageability" default CXL_BUS + select CXL_MEM help The CXL specification defines a "CXL memory device" sub-class in the PCI "memory controller" base class of devices. Device's identified by @@ -89,7 +90,6 @@ config CXL_PMEM config CXL_MEM tristate "CXL: Memory Expansion" - depends on CXL_PCI default CXL_BUS help The CXL.mem protocol allows a device to act as a provider of "System diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index e370d733e4400..3152e9ef41fc4 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -8,6 +8,7 @@ #include #include #include +#include "private.h" #include "trace.h" #include "core.h" @@ -648,42 +649,29 @@ static void detach_memdev(struct work_struct *work) static struct lock_class_key cxl_memdev_key; -static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, - const struct file_operations *fops) +struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, + struct cxl_memdev *cxlmd) { - struct cxl_memdev *cxlmd; - struct device *dev; - struct cdev *cdev; + struct device *dev = &cxlmd->dev; + struct cdev *cdev = &cxlmd->cdev; int rc; - cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); - if (!cxlmd) - return ERR_PTR(-ENOMEM); - - rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); - if (rc < 0) - goto err; - cxlmd->id = rc; - cxlmd->depth = -1; - - dev = &cxlmd->dev; - device_initialize(dev); - lockdep_set_class(&dev->mutex, &cxl_memdev_key); - dev->parent = cxlds->dev; - dev->bus = &cxl_bus_type; - dev->devt = MKDEV(cxl_mem_major, cxlmd->id); - dev->type = &cxl_memdev_type; - device_set_pm_not_required(dev); - INIT_WORK(&cxlmd->detach_work, detach_memdev); - - cdev = &cxlmd->cdev; - cdev_init(cdev, fops); + rc = cdev_device_add(cdev, dev); + if (rc) { + /* + * The cdev was briefly live, shutdown any ioctl operations that + * saw that state. + */ + cxl_memdev_shutdown(dev); + put_device(dev); + return ERR_PTR(rc); + } + rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); + if (rc) + return ERR_PTR(rc); return cxlmd; - -err: - kfree(cxlmd); - return ERR_PTR(rc); } +EXPORT_SYMBOL_NS_GPL(devm_cxl_memdev_add_or_reset, "CXL"); static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, unsigned long arg) @@ -1051,50 +1039,45 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds) +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds) { struct cxl_memdev *cxlmd; struct device *dev; struct cdev *cdev; int rc; - cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); - if (IS_ERR(cxlmd)) - return cxlmd; + cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); + if (!cxlmd) + return ERR_PTR(-ENOMEM); - dev = &cxlmd->dev; - rc = dev_set_name(dev, "mem%d", cxlmd->id); - if (rc) + rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); + if (rc < 0) goto err; - /* - * Activate ioctl operations, no cxl_memdev_rwsem manipulation - * needed as this is ordered with cdev_add() publishing the device. - */ + cxlmd->id = rc; + cxlmd->depth = -1; cxlmd->cxlds = cxlds; cxlds->cxlmd = cxlmd; - cdev = &cxlmd->cdev; - rc = cdev_device_add(cdev, dev); - if (rc) - goto err; + dev = &cxlmd->dev; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_memdev_key); + dev->parent = cxlds->dev; + dev->bus = &cxl_bus_type; + dev->devt = MKDEV(cxl_mem_major, cxlmd->id); + dev->type = &cxl_memdev_type; + device_set_pm_not_required(dev); + INIT_WORK(&cxlmd->detach_work, detach_memdev); - rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd); - if (rc) - return ERR_PTR(rc); + cdev = &cxlmd->cdev; + cdev_init(cdev, &cxl_memdev_fops); return cxlmd; err: - /* - * The cdev was briefly live, shutdown any ioctl operations that - * saw that state. - */ - cxl_memdev_shutdown(dev); - put_device(dev); + kfree(cxlmd); return ERR_PTR(rc); } -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_memdev_alloc, "CXL"); static void sanitize_teardown_notifier(void *data) { diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index d2155f45240da..ac354fee704ca 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -7,6 +7,7 @@ #include "cxlmem.h" #include "cxlpci.h" +#include "private.h" /** * DOC: cxl mem @@ -202,6 +203,34 @@ static int cxl_mem_probe(struct device *dev) return devm_add_action_or_reset(dev, enable_suspend, NULL); } +/** + * devm_cxl_add_memdev - Add a CXL memory device + * @host: devres alloc/release context and parent for the memdev + * @cxlds: CXL device state to associate with the memdev + * + * Upon return the device will have had a chance to attach to the + * cxl_mem driver, but may fail if the CXL topology is not ready + * (hardware CXL link down, or software platform CXL root not attached) + */ +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds) +{ + struct cxl_memdev *cxlmd = cxl_memdev_alloc(cxlds); + int rc; + + if (IS_ERR(cxlmd)) + return cxlmd; + + rc = dev_set_name(&cxlmd->dev, "mem%d", cxlmd->id); + if (rc) { + put_device(&cxlmd->dev); + return ERR_PTR(rc); + } + + return devm_cxl_memdev_add_or_reset(host, cxlmd); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); + static ssize_t trigger_poison_list_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -250,6 +279,7 @@ static struct cxl_driver cxl_mem_driver = { .id = CXL_DEVICE_MEMORY_EXPANDER, .drv = { .dev_groups = cxl_mem_groups, + .probe_type = PROBE_FORCE_SYNCHRONOUS, }, }; diff --git a/drivers/cxl/private.h b/drivers/cxl/private.h new file mode 100644 index 0000000000000..eff425822af32 --- /dev/null +++ b/drivers/cxl/private.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2025 Intel Corporation. */ + +/* Private interfaces betwen common drivers ("cxl_mem") and the cxl_core */ + +#ifndef __CXL_PRIVATE_H__ +#define __CXL_PRIVATE_H__ +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds); +struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, + struct cxl_memdev *cxlmd); +#endif /* __CXL_PRIVATE_H__ */ From d42c8094b4652141c005651324f63c610e7775a5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 5 Dec 2025 11:52:25 +0000 Subject: [PATCH 56/80] NVIDIA: VR: SAUCE: cxl/port: Arrange for always synchronous endpoint attach Make it so that upon return from devm_cxl_add_endpoint() that cxl_mem_probe() can assume that the endpoint has had a chance to complete cxl_port_probe(). I.e. cxl_port module loading has completed prior to device registration. MODULE_SOFTDEP() is not sufficient for this purpose, but a hard link-time dependency is reliable. Signed-off-by: Dan Williams (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/mem.c | 43 ------------------------------------------- drivers/cxl/port.c | 41 +++++++++++++++++++++++++++++++++++++++++ drivers/cxl/private.h | 8 ++++++-- 3 files changed, 47 insertions(+), 45 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index ac354fee704ca..8569c01bf3c20 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -46,44 +46,6 @@ static int cxl_mem_dpa_show(struct seq_file *file, void *data) return 0; } -static int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, - struct cxl_dport *parent_dport) -{ - struct cxl_port *parent_port = parent_dport->port; - struct cxl_port *endpoint, *iter, *down; - int rc; - - /* - * Now that the path to the root is established record all the - * intervening ports in the chain. - */ - for (iter = parent_port, down = NULL; !is_cxl_root(iter); - down = iter, iter = to_cxl_port(iter->dev.parent)) { - struct cxl_ep *ep; - - ep = cxl_ep_load(iter, cxlmd); - ep->next = down; - } - - /* Note: endpoint port component registers are derived from @cxlds */ - endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, - parent_dport); - if (IS_ERR(endpoint)) - return PTR_ERR(endpoint); - - rc = cxl_endpoint_autoremove(cxlmd, endpoint); - if (rc) - return rc; - - if (!endpoint->dev.driver) { - dev_err(&cxlmd->dev, "%s failed probe\n", - dev_name(&endpoint->dev)); - return -ENXIO; - } - - return 0; -} - static int cxl_debugfs_poison_inject(void *data, u64 dpa) { struct cxl_memdev *cxlmd = data; @@ -289,8 +251,3 @@ MODULE_DESCRIPTION("CXL: Memory Expansion"); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS("CXL"); MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); -/* - * create_endpoint() wants to validate port driver attach immediately after - * endpoint registration. - */ -MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index 51c8f2f84717a..ef65d983e1c82 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -6,6 +6,7 @@ #include "cxlmem.h" #include "cxlpci.h" +#include "private.h" /** * DOC: cxl port @@ -156,10 +157,50 @@ static struct cxl_driver cxl_port_driver = { .probe = cxl_port_probe, .id = CXL_DEVICE_PORT, .drv = { + .probe_type = PROBE_FORCE_SYNCHRONOUS, .dev_groups = cxl_port_attribute_groups, }, }; +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport) +{ + struct cxl_port *parent_port = parent_dport->port; + struct cxl_port *endpoint, *iter, *down; + int rc; + + /* + * Now that the path to the root is established record all the + * intervening ports in the chain. + */ + for (iter = parent_port, down = NULL; !is_cxl_root(iter); + down = iter, iter = to_cxl_port(iter->dev.parent)) { + struct cxl_ep *ep; + + ep = cxl_ep_load(iter, cxlmd); + ep->next = down; + } + + /* Note: endpoint port component registers are derived from @cxlds */ + endpoint = devm_cxl_add_port(host, &cxlmd->dev, CXL_RESOURCE_NONE, + parent_dport); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + rc = cxl_endpoint_autoremove(cxlmd, endpoint); + if (rc) + return rc; + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_endpoint, "CXL"); + static int __init cxl_port_init(void) { return cxl_driver_register(&cxl_port_driver); diff --git a/drivers/cxl/private.h b/drivers/cxl/private.h index eff425822af32..93ff0101dd4bb 100644 --- a/drivers/cxl/private.h +++ b/drivers/cxl/private.h @@ -1,11 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2025 Intel Corporation. */ -/* Private interfaces betwen common drivers ("cxl_mem") and the cxl_core */ - +/* + * Private interfaces betwen common drivers ("cxl_mem", "cxl_port") and + * the cxl_core. + */ #ifndef __CXL_PRIVATE_H__ #define __CXL_PRIVATE_H__ struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds); struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, struct cxl_memdev *cxlmd); +int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, + struct cxl_dport *parent_dport); #endif /* __CXL_PRIVATE_H__ */ From 693cf4a2659b6e578d392375a499e3474af8c46e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 5 Dec 2025 11:52:26 +0000 Subject: [PATCH 57/80] NVIDIA: VR: SAUCE: cxl/mem: Introduce a memdev creation ->probe() operation Allow for a driver to pass a routine to be called in cxl_mem_probe() context. This ability mirrors the semantics of faux_device_create() and allows for the caller to run CXL-topology-attach dependent logic on behalf of the caller. This capability is needed for CXL accelerator device drivers that need to make decisions about enabling CXL dependent functionality in the device, or falling back to PCIe-only operation. The probe callback runs after the port topology is successfully attached for the given memdev. Signed-off-by: Dan Williams (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/memdev.c | 5 ++++- drivers/cxl/core/pci_drv.c | 2 +- drivers/cxl/cxlmem.h | 10 +++++++++- drivers/cxl/mem.c | 33 ++++++++++++++++++++++++++++++--- drivers/cxl/private.h | 3 ++- tools/testing/cxl/test/mem.c | 2 +- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 3152e9ef41fc4..fd64f558c8fdb 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -1039,7 +1039,8 @@ static const struct file_operations cxl_memdev_fops = { .llseek = noop_llseek, }; -struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds) +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops) { struct cxl_memdev *cxlmd; struct device *dev; @@ -1056,6 +1057,8 @@ struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds) cxlmd->id = rc; cxlmd->depth = -1; + cxlmd->ops = ops; + cxlmd->endpoint = ERR_PTR(-ENXIO); cxlmd->cxlds = cxlds; cxlds->cxlmd = cxlmd; diff --git a/drivers/cxl/core/pci_drv.c b/drivers/cxl/core/pci_drv.c index bc3c959f7eb63..f43590062efd6 100644 --- a/drivers/cxl/core/pci_drv.c +++ b/drivers/cxl/core/pci_drv.c @@ -1007,7 +1007,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) dev_dbg(&pdev->dev, "No CXL Features discovered\n"); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 434031a0c1f74..63b1957fddda0 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,6 +34,10 @@ (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ CXLMDEV_RESET_NEEDED_NOT) +struct cxl_memdev_ops { + int (*probe)(struct cxl_memdev *cxlmd); +}; + /** * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device * @dev: driver core device object @@ -43,6 +47,7 @@ * @cxl_nvb: coordinate removal of @cxl_nvd if present * @cxl_nvd: optional bridge to an nvdimm if the device supports pmem * @endpoint: connection to the CXL port topology for this memory device + * @ops: incremental caller specific probe routine * @id: id number of this memdev instance. * @depth: endpoint port depth * @scrub_cycle: current scrub cycle set for this device @@ -59,6 +64,7 @@ struct cxl_memdev { struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_nvdimm *cxl_nvd; struct cxl_port *endpoint; + const struct cxl_memdev_ops *ops; int id; int depth; u8 scrub_cycle; @@ -96,7 +102,9 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) } struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds); + struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops); + int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 8569c01bf3c20..b36d8bb812a36 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -144,6 +144,12 @@ static int cxl_mem_probe(struct device *dev) return rc; } + if (cxlmd->ops) { + rc = cxlmd->ops->probe(cxlmd); + if (rc) + return rc; + } + rc = devm_cxl_memdev_edac_register(cxlmd); if (rc) dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc); @@ -169,15 +175,17 @@ static int cxl_mem_probe(struct device *dev) * devm_cxl_add_memdev - Add a CXL memory device * @host: devres alloc/release context and parent for the memdev * @cxlds: CXL device state to associate with the memdev + * @ops: optional operations to run in cxl_mem::{probe,remove}() context * * Upon return the device will have had a chance to attach to the * cxl_mem driver, but may fail if the CXL topology is not ready * (hardware CXL link down, or software platform CXL root not attached) */ struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds) + struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops) { - struct cxl_memdev *cxlmd = cxl_memdev_alloc(cxlds); + struct cxl_memdev *cxlmd = cxl_memdev_alloc(cxlds, ops); int rc; if (IS_ERR(cxlmd)) @@ -189,7 +197,26 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host, return ERR_PTR(rc); } - return devm_cxl_memdev_add_or_reset(host, cxlmd); + cxlmd = devm_cxl_memdev_add_or_reset(host, cxlmd); + if (IS_ERR(cxlmd)) + return cxlmd; + + /* + * If ops is provided fail if the driver is not attached upon + * return. The ->endpoint ERR_PTR may have a more precise error + * code to convey. Note that failure here could be the result of + * a race to teardown the CXL port topology. I.e. + * cxl_mem_probe() could have succeeded and then cxl_mem unbound + * before the lock is acquired. + */ + guard(device)(&cxlmd->dev); + if (ops && !cxlmd->dev.driver) { + if (IS_ERR(cxlmd->endpoint)) + return ERR_CAST(cxlmd->endpoint); + return ERR_PTR(-ENXIO); + } + + return cxlmd; } EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, "CXL"); diff --git a/drivers/cxl/private.h b/drivers/cxl/private.h index 93ff0101dd4bb..167a538efd18d 100644 --- a/drivers/cxl/private.h +++ b/drivers/cxl/private.h @@ -7,7 +7,8 @@ */ #ifndef __CXL_PRIVATE_H__ #define __CXL_PRIVATE_H__ -struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds); +struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops); struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, struct cxl_memdev *cxlmd); int devm_cxl_add_endpoint(struct device *host, struct cxl_memdev *cxlmd, diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index d533481672b78..33d06ec5a4b98 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1768,7 +1768,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxl_mock_add_event_logs(&mdata->mes); - cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds); + cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlds, NULL); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); From a8672108e02b75a65e7544ec4ca21a341b6519a4 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:27 +0000 Subject: [PATCH 58/80] NVIDIA: VR: SAUCE: cxl: Add type2 device basic support Differentiate CXL memory expanders (type 3) from CXL device accelerators (type 2) with a new function for initializing cxl_dev_state and a macro for helping accel drivers to embed cxl_dev_state inside a private struct. Move structs to include/cxl as the size of the accel driver private struct embedding cxl_dev_state needs to know the size of this struct. Use same new initialization with the type3 pci driver. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/mbox.c | 12 +- drivers/cxl/core/memdev.c | 32 +++++ drivers/cxl/core/pci_drv.c | 14 +-- drivers/cxl/cxl.h | 97 +-------------- drivers/cxl/cxlmem.h | 86 +------------ include/cxl/cxl.h | 226 +++++++++++++++++++++++++++++++++++ tools/testing/cxl/test/mem.c | 3 +- 7 files changed, 274 insertions(+), 196 deletions(-) create mode 100644 include/cxl/cxl.h diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index fa6dd0c94656f..bee84d0101d1a 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1514,23 +1514,21 @@ int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host) } EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL"); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec) { struct cxl_memdev_state *mds; int rc; - mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL); + mds = devm_cxl_dev_state_create(dev, CXL_DEVTYPE_CLASSMEM, serial, + dvsec, struct cxl_memdev_state, cxlds, + true); if (!mds) { dev_err(dev, "No memory available\n"); return ERR_PTR(-ENOMEM); } mutex_init(&mds->event.log_lock); - mds->cxlds.dev = dev; - mds->cxlds.reg_map.host = dev; - mds->cxlds.cxl_mbox.host = dev; - mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; - mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier); if (rc == -EOPNOTSUPP) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index fd64f558c8fdb..1dd6f02940301 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -649,6 +649,38 @@ static void detach_memdev(struct work_struct *work) static struct lock_class_key cxl_memdev_key; +static void cxl_dev_state_init(struct cxl_dev_state *cxlds, struct device *dev, + enum cxl_devtype type, u64 serial, u16 dvsec, + bool has_mbox) +{ + *cxlds = (struct cxl_dev_state) { + .dev = dev, + .type = type, + .serial = serial, + .cxl_dvsec = dvsec, + .reg_map.host = dev, + .reg_map.resource = CXL_RESOURCE_NONE, + }; + + if (has_mbox) + cxlds->cxl_mbox.host = dev; +} + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox) +{ + struct cxl_dev_state *cxlds = devm_kzalloc(dev, size, GFP_KERNEL); + + if (!cxlds) + return NULL; + + cxl_dev_state_init(cxlds, dev, type, serial, dvsec, has_mbox); + return cxlds; +} +EXPORT_SYMBOL_NS_GPL(_devm_cxl_dev_state_create, "CXL"); + struct cxl_memdev *devm_cxl_memdev_add_or_reset(struct device *host, struct cxl_memdev *cxlmd) { diff --git a/drivers/cxl/core/pci_drv.c b/drivers/cxl/core/pci_drv.c index f43590062efd6..b4b8350ba44d0 100644 --- a/drivers/cxl/core/pci_drv.c +++ b/drivers/cxl/core/pci_drv.c @@ -912,6 +912,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) int rc, pmu_count; unsigned int i; bool irq_avail; + u16 dvsec; /* * Double check the anonymous union trickery in struct cxl_regs @@ -925,19 +926,18 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; pci_set_master(pdev); - mds = cxl_memdev_state_create(&pdev->dev); + dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) + pci_warn(pdev, "Device DVSEC not present, skip CXL.mem init\n"); + + mds = cxl_memdev_state_create(&pdev->dev, pci_get_dsn(pdev), dvsec); if (IS_ERR(mds)) return PTR_ERR(mds); cxlds = &mds->cxlds; pci_set_drvdata(pdev, cxlds); cxlds->rcd = is_cxl_restricted(pdev); - cxlds->serial = pci_get_dsn(pdev); - cxlds->cxl_dvsec = pci_find_dvsec_capability( - pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); - if (!cxlds->cxl_dvsec) - dev_warn(&pdev->dev, - "Device DVSEC not present, skip CXL.mem init\n"); rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index b7654d40dc9ee..1517250b0ec2a 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -12,6 +12,7 @@ #include #include #include +#include extern const struct nvdimm_security_ops *cxl_security_ops; @@ -201,97 +202,6 @@ static inline int ways_to_eiw(unsigned int ways, u8 *eiw) #define CXLDEV_MBOX_BG_CMD_COMMAND_VENDOR_MASK GENMASK_ULL(63, 48) #define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20 -/* - * Using struct_group() allows for per register-block-type helper routines, - * without requiring block-type agnostic code to include the prefix. - */ -struct cxl_regs { - /* - * Common set of CXL Component register block base pointers - * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure - * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure - */ - struct_group_tagged(cxl_component_regs, component, - void __iomem *hdm_decoder; - void __iomem *ras; - ); - /* - * Common set of CXL Device register block base pointers - * @status: CXL 2.0 8.2.8.3 Device Status Registers - * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers - * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers - */ - struct_group_tagged(cxl_device_regs, device_regs, - void __iomem *status, *mbox, *memdev; - ); - - struct_group_tagged(cxl_pmu_regs, pmu_regs, - void __iomem *pmu; - ); - - /* - * RCH downstream port specific RAS register - * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB - */ - struct_group_tagged(cxl_rch_regs, rch_regs, - void __iomem *dport_aer; - ); - - /* - * RCD upstream port specific PCIe cap register - * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB - */ - struct_group_tagged(cxl_rcd_regs, rcd_regs, - void __iomem *rcd_pcie_cap; - ); -}; - -struct cxl_reg_map { - bool valid; - int id; - unsigned long offset; - unsigned long size; -}; - -struct cxl_component_reg_map { - struct cxl_reg_map hdm_decoder; - struct cxl_reg_map ras; -}; - -struct cxl_device_reg_map { - struct cxl_reg_map status; - struct cxl_reg_map mbox; - struct cxl_reg_map memdev; -}; - -struct cxl_pmu_reg_map { - struct cxl_reg_map pmu; -}; - -/** - * struct cxl_register_map - DVSEC harvested register block mapping parameters - * @host: device for devm operations and logging - * @base: virtual base of the register-block-BAR + @block_offset - * @resource: physical resource base of the register block - * @max_size: maximum mapping size to perform register search - * @reg_type: see enum cxl_regloc_type - * @component_map: cxl_reg_map for component registers - * @device_map: cxl_reg_maps for device registers - * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units - */ -struct cxl_register_map { - struct device *host; - void __iomem *base; - resource_size_t resource; - resource_size_t max_size; - u8 reg_type; - union { - struct cxl_component_reg_map component_map; - struct cxl_device_reg_map device_map; - struct cxl_pmu_reg_map pmu_map; - }; -}; - void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, @@ -497,11 +407,6 @@ struct cxl_region_params { resource_size_t cache_size; }; -enum cxl_partition_mode { - CXL_PARTMODE_RAM, - CXL_PARTMODE_PMEM, -}; - /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 63b1957fddda0..05f4cb5aaed0d 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -113,8 +113,6 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); -#define CXL_NR_PARTITIONS_MAX 2 - struct cxl_dpa_info { u64 size; struct cxl_dpa_part_info { @@ -373,87 +371,6 @@ struct cxl_security_state { struct kernfs_node *sanitize_node; }; -/* - * enum cxl_devtype - delineate type-2 from a generic type-3 device - * @CXL_DEVTYPE_DEVMEM - Vendor specific CXL Type-2 device implementing HDM-D or - * HDM-DB, no requirement that this device implements a - * mailbox, or other memory-device-standard manageability - * flows. - * @CXL_DEVTYPE_CLASSMEM - Common class definition of a CXL Type-3 device with - * HDM-H and class-mandatory memory device registers - */ -enum cxl_devtype { - CXL_DEVTYPE_DEVMEM, - CXL_DEVTYPE_CLASSMEM, -}; - -/** - * struct cxl_dpa_perf - DPA performance property entry - * @dpa_range: range for DPA address - * @coord: QoS performance data (i.e. latency, bandwidth) - * @cdat_coord: raw QoS performance data from CDAT - * @qos_class: QoS Class cookies - */ -struct cxl_dpa_perf { - struct range dpa_range; - struct access_coordinate coord[ACCESS_COORDINATE_MAX]; - struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; - int qos_class; -}; - -/** - * struct cxl_dpa_partition - DPA partition descriptor - * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) - * @perf: performance attributes of the partition from CDAT - * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... - */ -struct cxl_dpa_partition { - struct resource res; - struct cxl_dpa_perf perf; - enum cxl_partition_mode mode; -}; - -/** - * struct cxl_dev_state - The driver device state - * - * cxl_dev_state represents the CXL driver/device state. It provides an - * interface to mailbox commands as well as some cached data about the device. - * Currently only memory devices are represented. - * - * @dev: The device associated with this CXL state - * @cxlmd: The device representing the CXL.mem capabilities of @dev - * @reg_map: component and ras register mapping parameters - * @regs: Parsed register blocks - * @cxl_dvsec: Offset to the PCIe device DVSEC - * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) - * @media_ready: Indicate whether the device media is usable - * @dpa_res: Overall DPA resource tree for the device - * @part: DPA partition array - * @nr_partitions: Number of DPA partitions - * @serial: PCIe Device Serial Number - * @type: Generic Memory Class device or Vendor Specific Memory device - * @cxl_mbox: CXL mailbox context - * @cxlfs: CXL features context - */ -struct cxl_dev_state { - struct device *dev; - struct cxl_memdev *cxlmd; - struct cxl_register_map reg_map; - struct cxl_regs regs; - int cxl_dvsec; - bool rcd; - bool media_ready; - struct resource dpa_res; - struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; - unsigned int nr_partitions; - u64 serial; - enum cxl_devtype type; - struct cxl_mailbox cxl_mbox; -#ifdef CONFIG_CXL_FEATURES - struct cxl_features_state *cxlfs; -#endif -}; - static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) { /* @@ -858,7 +775,8 @@ int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); -struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); +struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev, u64 serial, + u16 dvsec); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_memdev_state *mds, diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h new file mode 100644 index 0000000000000..13d448686189c --- /dev/null +++ b/include/cxl/cxl.h @@ -0,0 +1,226 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2020 Intel Corporation. */ +/* Copyright(c) 2025 Advanced Micro Devices, Inc. */ + +#ifndef __CXL_CXL_H__ +#define __CXL_CXL_H__ + +#include +#include +#include + +/** + * enum cxl_devtype - delineate type-2 from a generic type-3 device + * @CXL_DEVTYPE_DEVMEM: Vendor specific CXL Type-2 device implementing HDM-D or + * HDM-DB, no requirement that this device implements a + * mailbox, or other memory-device-standard manageability + * flows. + * @CXL_DEVTYPE_CLASSMEM: Common class definition of a CXL Type-3 device with + * HDM-H and class-mandatory memory device registers + */ +enum cxl_devtype { + CXL_DEVTYPE_DEVMEM, + CXL_DEVTYPE_CLASSMEM, +}; + +struct device; + +/* + * Using struct_group() allows for per register-block-type helper routines, + * without requiring block-type agnostic code to include the prefix. + */ +struct cxl_regs { + /* + * Common set of CXL Component register block base pointers + * @hdm_decoder: CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure + * @ras: CXL 2.0 8.2.5.9 CXL RAS Capability Structure + */ + struct_group_tagged(cxl_component_regs, component, + void __iomem *hdm_decoder; + void __iomem *ras; + ); + /* + * Common set of CXL Device register block base pointers + * @status: CXL 2.0 8.2.8.3 Device Status Registers + * @mbox: CXL 2.0 8.2.8.4 Mailbox Registers + * @memdev: CXL 2.0 8.2.8.5 Memory Device Registers + */ + struct_group_tagged(cxl_device_regs, device_regs, + void __iomem *status, *mbox, *memdev; + ); + + struct_group_tagged(cxl_pmu_regs, pmu_regs, + void __iomem *pmu; + ); + + /* + * RCH downstream port specific RAS register + * @aer: CXL 3.0 8.2.1.1 RCH Downstream Port RCRB + */ + struct_group_tagged(cxl_rch_regs, rch_regs, + void __iomem *dport_aer; + ); + + /* + * RCD upstream port specific PCIe cap register + * @pcie_cap: CXL 3.0 8.2.1.2 RCD Upstream Port RCRB + */ + struct_group_tagged(cxl_rcd_regs, rcd_regs, + void __iomem *rcd_pcie_cap; + ); +}; + +struct cxl_reg_map { + bool valid; + int id; + unsigned long offset; + unsigned long size; +}; + +struct cxl_component_reg_map { + struct cxl_reg_map hdm_decoder; + struct cxl_reg_map ras; +}; + +struct cxl_device_reg_map { + struct cxl_reg_map status; + struct cxl_reg_map mbox; + struct cxl_reg_map memdev; +}; + +struct cxl_pmu_reg_map { + struct cxl_reg_map pmu; +}; + +/** + * struct cxl_register_map - DVSEC harvested register block mapping parameters + * @host: device for devm operations and logging + * @base: virtual base of the register-block-BAR + @block_offset + * @resource: physical resource base of the register block + * @max_size: maximum mapping size to perform register search + * @reg_type: see enum cxl_regloc_type + * @component_map: cxl_reg_map for component registers + * @device_map: cxl_reg_maps for device registers + * @pmu_map: cxl_reg_maps for CXL Performance Monitoring Units + */ +struct cxl_register_map { + struct device *host; + void __iomem *base; + resource_size_t resource; + resource_size_t max_size; + u8 reg_type; + union { + struct cxl_component_reg_map component_map; + struct cxl_device_reg_map device_map; + struct cxl_pmu_reg_map pmu_map; + }; +}; + +/** + * struct cxl_dpa_perf - DPA performance property entry + * @dpa_range: range for DPA address + * @coord: QoS performance data (i.e. latency, bandwidth) + * @cdat_coord: raw QoS performance data from CDAT + * @qos_class: QoS Class cookies + */ +struct cxl_dpa_perf { + struct range dpa_range; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate cdat_coord[ACCESS_COORDINATE_MAX]; + int qos_class; +}; + +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + +/** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +#define CXL_NR_PARTITIONS_MAX 2 + +/** + * struct cxl_dev_state - The driver device state + * + * cxl_dev_state represents the CXL driver/device state. It provides an + * interface to mailbox commands as well as some cached data about the device. + * Currently only memory devices are represented. + * + * @dev: The device associated with this CXL state + * @cxlmd: The device representing the CXL.mem capabilities of @dev + * @reg_map: component and ras register mapping parameters + * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC + * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) + * @media_ready: Indicate whether the device media is usable + * @dpa_res: Overall DPA resource tree for the device + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions + * @serial: PCIe Device Serial Number + * @type: Generic Memory Class device or Vendor Specific Memory device + * @cxl_mbox: CXL mailbox context + * @cxlfs: CXL features context + */ +struct cxl_dev_state { + /* public for Type2 drivers */ + struct device *dev; + struct cxl_memdev *cxlmd; + + /* private for Type2 drivers */ + struct cxl_register_map reg_map; + struct cxl_regs regs; + int cxl_dvsec; + bool rcd; + bool media_ready; + struct resource dpa_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; + u64 serial; + enum cxl_devtype type; + struct cxl_mailbox cxl_mbox; +#ifdef CONFIG_CXL_FEATURES + struct cxl_features_state *cxlfs; +#endif +}; + +struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, + enum cxl_devtype type, + u64 serial, u16 dvsec, + size_t size, bool has_mbox); + +/** + * cxl_dev_state_create - safely create and cast a cxl dev state embedded in a + * driver specific struct. + * + * @parent: device behind the request + * @type: CXL device type + * @serial: device identification + * @dvsec: dvsec capability offset + * @drv_struct: driver struct embedding a cxl_dev_state struct + * @member: drv_struct member as cxl_dev_state + * @mbox: true if mailbox supported + * + * Returns a pointer to the drv_struct allocated and embedding a cxl_dev_state + * struct initialized. + * + * Introduced for Type2 driver support. + */ +#define devm_cxl_dev_state_create(parent, type, serial, dvsec, drv_struct, member, mbox) \ + ({ \ + static_assert(__same_type(struct cxl_dev_state, \ + ((drv_struct *)NULL)->member)); \ + static_assert(offsetof(drv_struct, member) == 0); \ + (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ + sizeof(drv_struct), mbox); \ + }) +#endif /* __CXL_CXL_H__ */ diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 33d06ec5a4b98..6fbe0af3e8f88 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1717,7 +1717,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mds = cxl_memdev_state_create(dev); + mds = cxl_memdev_state_create(dev, pdev->id + 1, 0); if (IS_ERR(mds)) return PTR_ERR(mds); @@ -1733,7 +1733,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds->serial = pdev->id + 1; if (is_rcd(pdev)) cxlds->rcd = true; From 69bf7ddcea5bf4339c6442be1200bf5b875789aa Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:28 +0000 Subject: [PATCH 59/80] NVIDIA: VR: SAUCE: sfc: add cxl support Add CXL initialization based on new CXL API for accel drivers and make it dependent on kernel CXL configuration. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Acked-by: Edward Cree Reviewed-by: Alison Schofield Reviewed-by: Dan Williams Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/net/ethernet/sfc/Kconfig | 9 +++++ drivers/net/ethernet/sfc/Makefile | 1 + drivers/net/ethernet/sfc/efx.c | 15 ++++++- drivers/net/ethernet/sfc/efx_cxl.c | 56 +++++++++++++++++++++++++++ drivers/net/ethernet/sfc/efx_cxl.h | 40 +++++++++++++++++++ drivers/net/ethernet/sfc/net_driver.h | 10 +++++ 6 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/sfc/efx_cxl.c create mode 100644 drivers/net/ethernet/sfc/efx_cxl.h diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index c4c43434f3143..979f2801e2a8e 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -66,6 +66,15 @@ config SFC_MCDI_LOGGING Driver-Interface) commands and responses, allowing debugging of driver/firmware interaction. The tracing is actually enabled by a sysfs file 'mcdi_logging' under the PCI device. +config SFC_CXL + bool "Solarflare SFC9100-family CXL support" + depends on SFC && CXL_BUS >= SFC + default SFC + help + This enables SFC CXL support if the kernel is configuring CXL for + using CTPIO with CXL.mem. The SFC device with CXL support and + with a CXL-aware firmware can be used for minimizing latencies + when sending through CTPIO. source "drivers/net/ethernet/sfc/falcon/Kconfig" source "drivers/net/ethernet/sfc/siena/Kconfig" diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index d99039ec468d6..bb0f1891cde65 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -13,6 +13,7 @@ sfc-$(CONFIG_SFC_SRIOV) += sriov.o ef10_sriov.o ef100_sriov.o ef100_rep.o \ mae.o tc.o tc_bindings.o tc_counters.o \ tc_encap_actions.o tc_conntrack.o +sfc-$(CONFIG_SFC_CXL) += efx_cxl.o obj-$(CONFIG_SFC) += sfc.o obj-$(CONFIG_SFC_FALCON) += falcon/ diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 112e55b98ed3b..537668278375b 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -34,6 +34,7 @@ #include "selftest.h" #include "sriov.h" #include "efx_devlink.h" +#include "efx_cxl.h" #include "mcdi_port_common.h" #include "mcdi_pcol.h" @@ -981,12 +982,15 @@ static void efx_pci_remove(struct pci_dev *pci_dev) efx_pci_remove_main(efx); efx_fini_io(efx); + + probe_data = container_of(efx, struct efx_probe_data, efx); + efx_cxl_exit(probe_data); + pci_dbg(efx->pci_dev, "shutdown successful\n"); efx_fini_devlink_and_unlock(efx); efx_fini_struct(efx); free_netdev(efx->net_dev); - probe_data = container_of(efx, struct efx_probe_data, efx); kfree(probe_data); }; @@ -1190,6 +1194,15 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail2; + /* A successful cxl initialization implies a CXL region created to be + * used for PIO buffers. If there is no CXL support, or initialization + * fails, efx_cxl_pio_initialised will be false and legacy PIO buffers + * defined at specific PCI BAR regions will be used. + */ + rc = efx_cxl_init(probe_data); + if (rc) + pci_err(pci_dev, "CXL initialization failed with error %d\n", rc); + rc = efx_pci_probe_post_io(efx); if (rc) { /* On failure, retry once immediately. diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c new file mode 100644 index 0000000000000..8e0481d8dced6 --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/**************************************************************************** + * + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + */ + +#include + +#include "net_driver.h" +#include "efx_cxl.h" + +#define EFX_CTPIO_BUFFER_SIZE SZ_256M + +int efx_cxl_init(struct efx_probe_data *probe_data) +{ + struct efx_nic *efx = &probe_data->efx; + struct pci_dev *pci_dev = efx->pci_dev; + struct efx_cxl *cxl; + u16 dvsec; + + probe_data->cxl_pio_initialised = false; + + /* Is the device configured with and using CXL? */ + if (!pcie_is_cxl(pci_dev)) + return 0; + + dvsec = pci_find_dvsec_capability(pci_dev, PCI_VENDOR_ID_CXL, + PCI_DVSEC_CXL_DEVICE); + if (!dvsec) { + pci_err(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability not found\n"); + return 0; + } + + pci_dbg(pci_dev, "CXL_DVSEC_PCIE_DEVICE capability found\n"); + + /* Create a cxl_dev_state embedded in the cxl struct using cxl core api + * specifying no mbox available. + */ + cxl = devm_cxl_dev_state_create(&pci_dev->dev, CXL_DEVTYPE_DEVMEM, + pci_dev->dev.id, dvsec, struct efx_cxl, + cxlds, false); + + if (!cxl) + return -ENOMEM; + + probe_data->cxl = cxl; + + return 0; +} + +void efx_cxl_exit(struct efx_probe_data *probe_data) +{ +} + +MODULE_IMPORT_NS("CXL"); diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h new file mode 100644 index 0000000000000..961639cef692e --- /dev/null +++ b/drivers/net/ethernet/sfc/efx_cxl.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/**************************************************************************** + * Driver for AMD network controllers and boards + * Copyright (C) 2025, Advanced Micro Devices, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_CXL_H +#define EFX_CXL_H + +#ifdef CONFIG_SFC_CXL + +#include + +struct cxl_root_decoder; +struct cxl_port; +struct cxl_endpoint_decoder; +struct cxl_region; +struct efx_probe_data; + +struct efx_cxl { + struct cxl_dev_state cxlds; + struct cxl_memdev *cxlmd; + struct cxl_root_decoder *cxlrd; + struct cxl_port *endpoint; + struct cxl_endpoint_decoder *cxled; + struct cxl_region *efx_region; + void __iomem *ctpio_cxl; +}; + +int efx_cxl_init(struct efx_probe_data *probe_data); +void efx_cxl_exit(struct efx_probe_data *probe_data); +#else +static inline int efx_cxl_init(struct efx_probe_data *probe_data) { return 0; } +static inline void efx_cxl_exit(struct efx_probe_data *probe_data) {} +#endif +#endif diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index b98c259f672db..3964b2c56609c 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1197,14 +1197,24 @@ struct efx_nic { atomic_t n_rx_noskb_drops; }; +#ifdef CONFIG_SFC_CXL +struct efx_cxl; +#endif + /** * struct efx_probe_data - State after hardware probe * @pci_dev: The PCI device * @efx: Efx NIC details + * @cxl: details of related cxl objects + * @cxl_pio_initialised: cxl initialization outcome. */ struct efx_probe_data { struct pci_dev *pci_dev; struct efx_nic efx; +#ifdef CONFIG_SFC_CXL + struct efx_cxl *cxl; + bool cxl_pio_initialised; +#endif }; static inline struct efx_nic *efx_netdev_priv(struct net_device *dev) From 05e993aba662189e4e95b5bee4cca15ee093c6f7 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:29 +0000 Subject: [PATCH 60/80] NVIDIA: VR: SAUCE: cxl: Move pci generic code Inside cxl/core/pci.c there are helpers for CXL PCIe initialization meanwhile cxl/pci_drv.c implements the functionality for a Type3 device initialization. Move helper functions from cxl/core/pci_drv.c to cxl/core/pci.c in order to be exported and shared with CXL Type2 device initialization. Signed-off-by: Alejandro Lucero Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham Reviewed-by: Fan Ni Reviewed-by: Jonathan Cameron Reviewed-by: Alison Schofield Reviewed-by: Dan Williams (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 3 ++ drivers/cxl/core/pci.c | 62 +++++++++++++++++++++++++++++++++ drivers/cxl/core/pci_drv.c | 70 -------------------------------------- drivers/cxl/core/regs.c | 1 - drivers/cxl/cxl.h | 2 -- drivers/cxl/cxlpci.h | 13 +++++++ 6 files changed, 78 insertions(+), 73 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index a7a0838c8f23a..2b2d3af0b5ec5 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -232,4 +232,7 @@ static inline bool cxl_pci_drv_bound(struct pci_dev *pdev) { return false; }; static inline int cxl_pci_driver_init(void) { return 0; } static inline void cxl_pci_driver_exit(void) { } #endif + +resource_size_t cxl_rcd_component_reg_phys(struct device *dev, + struct cxl_dport *dport); #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a66f7a84b5c82..566d57ba0579e 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -775,6 +775,68 @@ bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL"); +static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, + struct cxl_register_map *map, + struct cxl_dport *dport) +{ + resource_size_t component_reg_phys; + + *map = (struct cxl_register_map) { + .host = &pdev->dev, + .resource = CXL_RESOURCE_NONE, + }; + + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); + if (component_reg_phys == CXL_RESOURCE_NONE) + return -ENXIO; + + map->resource = component_reg_phys; + map->reg_type = CXL_REGLOC_RBI_COMPONENT; + map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; + + return 0; +} + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + int rc; + + rc = cxl_find_regblock(pdev, type, map); + + /* + * If the Register Locator DVSEC does not exist, check if it + * is an RCH and try to extract the Component Registers from + * an RCRB. + */ + if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { + struct cxl_dport *dport; + struct cxl_port *port __free(put_cxl_port) = + cxl_pci_find_port(pdev, &dport); + if (!port) + return -EPROBE_DEFER; + + rc = cxl_rcrb_get_comp_regs(pdev, map, dport); + if (rc) + return rc; + + rc = cxl_dport_map_rcd_linkcap(pdev, dport); + if (rc) + return rc; + + } else if (rc) { + return rc; + } + + return cxl_setup_regs(map); +} +EXPORT_SYMBOL_NS_GPL(cxl_pci_setup_regs, "CXL"); + int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) { int speed, bw; diff --git a/drivers/cxl/core/pci_drv.c b/drivers/cxl/core/pci_drv.c index b4b8350ba44d0..761779528eb56 100644 --- a/drivers/cxl/core/pci_drv.c +++ b/drivers/cxl/core/pci_drv.c @@ -466,76 +466,6 @@ static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) return 0; } -/* - * Assume that any RCIEP that emits the CXL memory expander class code - * is an RCD - */ -static bool is_cxl_restricted(struct pci_dev *pdev) -{ - return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; -} - -static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, - struct cxl_register_map *map, - struct cxl_dport *dport) -{ - resource_size_t component_reg_phys; - - *map = (struct cxl_register_map) { - .host = &pdev->dev, - .resource = CXL_RESOURCE_NONE, - }; - - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); - if (component_reg_phys == CXL_RESOURCE_NONE) - return -ENXIO; - - map->resource = component_reg_phys; - map->reg_type = CXL_REGLOC_RBI_COMPONENT; - map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; - - return 0; -} - -static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - int rc; - - rc = cxl_find_regblock(pdev, type, map); - - /* - * If the Register Locator DVSEC does not exist, check if it - * is an RCH and try to extract the Component Registers from - * an RCRB. - */ - if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { - struct cxl_dport *dport; - struct cxl_port *port __free(put_cxl_port) = - cxl_pci_find_port(pdev, &dport); - if (!port) - return -EPROBE_DEFER; - - rc = cxl_rcrb_get_comp_regs(pdev, map, dport); - if (rc) - return rc; - - rc = cxl_dport_map_rcd_linkcap(pdev, dport); - if (rc) - return rc; - - } else if (rc) { - return rc; - } - - return cxl_setup_regs(map); -} - static int cxl_pci_ras_unmask(struct pci_dev *pdev) { struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index fb70ffbba72d3..fc7fbd4f39d23 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -641,4 +641,3 @@ resource_size_t cxl_rcd_component_reg_phys(struct device *dev, return CXL_RESOURCE_NONE; return __rcrb_to_component(dev, &dport->rcrb, CXL_RCRB_UPSTREAM); } -EXPORT_SYMBOL_NS_GPL(cxl_rcd_component_reg_phys, "CXL"); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 1517250b0ec2a..536c9d99e0e6d 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -222,8 +222,6 @@ int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map); int cxl_setup_regs(struct cxl_register_map *map); struct cxl_dport; -resource_size_t cxl_rcd_component_reg_phys(struct device *dev, - struct cxl_dport *dport); int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 3526e6d75f796..24aba9ff6d2e3 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -74,6 +74,17 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev) return lnksta2 & PCI_EXP_LNKSTA2_FLIT; } +/* + * Assume that the caller has already validated that @pdev has CXL + * capabilities, any RCiEP with CXL capabilities is treated as a + * Restricted CXL Device (RCD) and finds upstream port and endpoint + * registers in a Root Complex Register Block (RCRB). + */ +static inline bool is_cxl_restricted(struct pci_dev *pdev) +{ + return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; +} + int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; void read_cdat_data(struct cxl_port *port); @@ -89,4 +100,6 @@ static inline void cxl_uport_init_ras_reporting(struct cxl_port *port, struct device *host) { } #endif +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); #endif /* __CXL_PCI_H__ */ From 8d9cb7417c833dd472883d89c3bf771b5eb4ba68 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:30 +0000 Subject: [PATCH 61/80] NVIDIA: VR: SAUCE: cxl/sfc: Map cxl component regs Export cxl core functions for a Type2 driver being able to discover and map the device component registers. Use it in sfc driver cxl initialization. Signed-off-by: Alejandro Lucero Reviewed-by: Dan Williams Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/pci.c | 1 + drivers/cxl/core/pci_drv.c | 1 + drivers/cxl/core/port.c | 1 + drivers/cxl/core/regs.c | 1 + drivers/cxl/cxl.h | 7 ------ drivers/cxl/cxlpci.h | 12 ---------- drivers/net/ethernet/sfc/efx_cxl.c | 35 ++++++++++++++++++++++++++++++ include/cxl/cxl.h | 19 ++++++++++++++++ include/cxl/pci.h | 21 ++++++++++++++++++ 9 files changed, 79 insertions(+), 19 deletions(-) create mode 100644 include/cxl/pci.h diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 566d57ba0579e..90a0763e72c4b 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/core/pci_drv.c b/drivers/cxl/core/pci_drv.c index 761779528eb56..4a812765217e4 100644 --- a/drivers/cxl/core/pci_drv.c +++ b/drivers/cxl/core/pci_drv.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "cxlmem.h" #include "cxlpci.h" diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 0a4138bd05454..f7f597c83181a 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index fc7fbd4f39d23..dcf444f1fe48d 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 536c9d99e0e6d..d7ddca6f71154 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -39,10 +39,6 @@ extern const struct nvdimm_security_ops *cxl_security_ops; #define CXL_CM_CAP_HDR_ARRAY_SIZE_MASK GENMASK(31, 24) #define CXL_CM_CAP_PTR_MASK GENMASK(31, 20) -#define CXL_CM_CAP_CAP_ID_RAS 0x2 -#define CXL_CM_CAP_CAP_ID_HDM 0x5 -#define CXL_CM_CAP_CAP_HDM_VERSION 1 - /* HDM decoders CXL 2.0 8.2.5.12 CXL HDM Decoder Capability Structure */ #define CXL_HDM_DECODER_CAP_OFFSET 0x0 #define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) @@ -206,9 +202,6 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map); void cxl_probe_device_regs(struct device *dev, void __iomem *base, struct cxl_device_reg_map *map); -int cxl_map_component_regs(const struct cxl_register_map *map, - struct cxl_component_regs *regs, - unsigned long map_mask); int cxl_map_device_regs(const struct cxl_register_map *map, struct cxl_device_regs *regs); int cxl_map_pmu_regs(struct cxl_register_map *map, struct cxl_pmu_regs *regs); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 24aba9ff6d2e3..53760ce31af80 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -13,16 +13,6 @@ */ #define CXL_PCI_DEFAULT_MAX_VECTORS 16 -/* Register Block Identifier (RBI) */ -enum cxl_regloc_type { - CXL_REGLOC_RBI_EMPTY = 0, - CXL_REGLOC_RBI_COMPONENT, - CXL_REGLOC_RBI_VIRT, - CXL_REGLOC_RBI_MEMDEV, - CXL_REGLOC_RBI_PMU, - CXL_REGLOC_RBI_TYPES -}; - /* * Table Access DOE, CDAT Read Entry Response * @@ -100,6 +90,4 @@ static inline void cxl_uport_init_ras_reporting(struct cxl_port *port, struct device *host) { } #endif -int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 8e0481d8dced6..34126bc4826c8 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -7,6 +7,8 @@ #include +#include +#include #include "net_driver.h" #include "efx_cxl.h" @@ -18,6 +20,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data) struct pci_dev *pci_dev = efx->pci_dev; struct efx_cxl *cxl; u16 dvsec; + int rc; probe_data->cxl_pio_initialised = false; @@ -44,6 +47,38 @@ int efx_cxl_init(struct efx_probe_data *probe_data) if (!cxl) return -ENOMEM; + rc = cxl_pci_setup_regs(pci_dev, CXL_REGLOC_RBI_COMPONENT, + &cxl->cxlds.reg_map); + if (rc) { + pci_err(pci_dev, "No component registers\n"); + return rc; + } + + if (!cxl->cxlds.reg_map.component_map.hdm_decoder.valid) { + pci_err(pci_dev, "Expected HDM component register not found\n"); + return -ENODEV; + } + + if (!cxl->cxlds.reg_map.component_map.ras.valid) { + pci_err(pci_dev, "Expected RAS component register not found\n"); + return -ENODEV; + } + + rc = cxl_map_component_regs(&cxl->cxlds.reg_map, + &cxl->cxlds.regs.component, + BIT(CXL_CM_CAP_CAP_ID_RAS)); + if (rc) { + pci_err(pci_dev, "Failed to map RAS capability.\n"); + return rc; + } + + /* + * Set media ready explicitly as there are neither mailbox for checking + * this state nor the CXL register involved, both not mandatory for + * type2. + */ + cxl->cxlds.media_ready = true; + probe_data->cxl = cxl; return 0; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 13d448686189c..7f2e23bce1f78 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -70,6 +70,10 @@ struct cxl_regs { ); }; +#define CXL_CM_CAP_CAP_ID_RAS 0x2 +#define CXL_CM_CAP_CAP_ID_HDM 0x5 +#define CXL_CM_CAP_CAP_HDM_VERSION 1 + struct cxl_reg_map { bool valid; int id; @@ -223,4 +227,19 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, (drv_struct *)_devm_cxl_dev_state_create(parent, type, serial, dvsec, \ sizeof(drv_struct), mbox); \ }) + +/** + * cxl_map_component_regs - map cxl component registers + * + * @map: cxl register map to update with the mappings + * @regs: cxl component registers to work with + * @map_mask: cxl component regs to map + * + * Returns integer: success (0) or error (-ENOMEM) + * + * Made public for Type2 driver support. + */ +int cxl_map_component_regs(const struct cxl_register_map *map, + struct cxl_component_regs *regs, + unsigned long map_mask); #endif /* __CXL_CXL_H__ */ diff --git a/include/cxl/pci.h b/include/cxl/pci.h new file mode 100644 index 0000000000000..a172439f08c60 --- /dev/null +++ b/include/cxl/pci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ + +#ifndef __CXL_CXL_PCI_H__ +#define __CXL_CXL_PCI_H__ + +/* Register Block Identifier (RBI) */ +enum cxl_regloc_type { + CXL_REGLOC_RBI_EMPTY = 0, + CXL_REGLOC_RBI_COMPONENT, + CXL_REGLOC_RBI_VIRT, + CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_PMU, + CXL_REGLOC_RBI_TYPES +}; + +struct cxl_register_map; + +int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); +#endif From 71f28dc1df8d5e2f2af8749d4fc0a4b22051747f Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:31 +0000 Subject: [PATCH 62/80] NVIDIA: VR: SAUCE: cxl/sfc: Initialize dpa without a mailbox Type3 relies on mailbox CXL_MBOX_OP_IDENTIFY command for initializing memdev state params which end up being used for DPA initialization. Allow a Type2 driver to initialize DPA simply by giving the size of its volatile hardware partition. Move related functions to memdev. Add sfc driver as the client. Signed-off-by: Alejandro Lucero Reviewed-by: Dan Williams Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham Reviewed-by: Jonathan Cameron (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 2 + drivers/cxl/core/mbox.c | 51 +---------------------- drivers/cxl/core/memdev.c | 66 ++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/efx_cxl.c | 5 +++ include/cxl/cxl.h | 1 + 5 files changed, 75 insertions(+), 50 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 2b2d3af0b5ec5..1c17268561391 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -91,6 +91,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, struct dentry *cxl_debugfs_create_dir(const char *dir); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode); +struct cxl_memdev_state; +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index bee84d0101d1a..d57a0c2d39fb6 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1144,7 +1144,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, "CXL"); * * See CXL @8.2.9.5.2.1 Get Partition Info */ -static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) +int cxl_mem_get_partition_info(struct cxl_memdev_state *mds) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_partition_info pi; @@ -1300,55 +1300,6 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) return -EBUSY; } -static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) -{ - int i = info->nr_partitions; - - if (size == 0) - return; - - info->part[i].range = (struct range) { - .start = start, - .end = start + size - 1, - }; - info->part[i].mode = mode; - info->nr_partitions++; -} - -int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) -{ - struct cxl_dev_state *cxlds = &mds->cxlds; - struct device *dev = cxlds->dev; - int rc; - - if (!cxlds->media_ready) { - info->size = 0; - return 0; - } - - info->size = mds->total_bytes; - - if (mds->partition_align_bytes == 0) { - add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->volatile_only_bytes, - mds->persistent_only_bytes, CXL_PARTMODE_PMEM); - return 0; - } - - rc = cxl_mem_get_partition_info(mds); - if (rc) { - dev_err(dev, "Failed to query partition information\n"); - return rc; - } - - add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); - add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, - CXL_PARTMODE_PMEM); - - return 0; -} -EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); - int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count) { struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1dd6f02940301..e5def6f08f1cf 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -584,6 +584,72 @@ bool is_cxl_memdev(const struct device *dev) } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) +{ + int i = info->nr_partitions; + + if (size == 0) + return; + + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; +} + +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; + struct device *dev = cxlds->dev; + int rc; + + if (!cxlds->media_ready) { + info->size = 0; + return 0; + } + + info->size = mds->total_bytes; + + if (mds->partition_align_bytes == 0) { + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; + } + + rc = cxl_mem_get_partition_info(mds); + if (rc) { + dev_err(dev, "Failed to query partition information\n"); + return rc; + } + + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); + +/** + * cxl_set_capacity: initialize dpa by a driver without a mailbox. + * + * @cxlds: pointer to cxl_dev_state + * @capacity: device volatile memory size + */ +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity) +{ + struct cxl_dpa_info range_info = { + .size = capacity, + }; + + add_part(&range_info, 0, capacity, CXL_PARTMODE_RAM); + return cxl_dpa_setup(cxlds, &range_info); +} +EXPORT_SYMBOL_NS_GPL(cxl_set_capacity, "CXL"); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @mds: The device state to operate on diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 34126bc4826c8..0b10a2e6aceb6 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -79,6 +79,11 @@ int efx_cxl_init(struct efx_probe_data *probe_data) */ cxl->cxlds.media_ready = true; + if (cxl_set_capacity(&cxl->cxlds, EFX_CTPIO_BUFFER_SIZE)) { + pci_err(pci_dev, "dpa capacity setup failed\n"); + return -ENODEV; + } + probe_data->cxl = cxl; return 0; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 7f2e23bce1f78..fb2f8f2395d50 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -242,4 +242,5 @@ struct cxl_dev_state *_devm_cxl_dev_state_create(struct device *dev, int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask); +int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); #endif /* __CXL_CXL_H__ */ From c71ef2b2754460c80451d2ff50e58a8e70c9e6e1 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:32 +0000 Subject: [PATCH 63/80] NVIDIA: VR: SAUCE: cxl: Prepare memdev creation for type2 Current cxl core is relying on a CXL_DEVTYPE_CLASSMEM type device when creating a memdev leading to problems when obtaining cxl_memdev_state references from a CXL_DEVTYPE_DEVMEM type. Modify check for obtaining cxl_memdev_state adding CXL_DEVTYPE_DEVMEM support. Make devm_cxl_add_memdev accessible from a accel driver. Signed-off-by: Alejandro Lucero Reviewed-by: Ben Cheatham Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Alison Schofield Reviewed-by: Dan Williams (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/memdev.c | 15 +++++++++++-- drivers/cxl/cxlmem.h | 8 ------- drivers/cxl/mem.c | 45 +++++++++++++++++++++++++++++---------- include/cxl/cxl.h | 7 ++++++ 4 files changed, 54 insertions(+), 21 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index e5def6f08f1cf..2d4828831ce1a 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include "private.h" #include "trace.h" @@ -578,9 +579,16 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +static const struct device_type cxl_accel_memdev_type = { + .name = "cxl_accel_memdev", + .release = cxl_memdev_release, + .devnode = cxl_memdev_devnode, +}; + bool is_cxl_memdev(const struct device *dev) { - return dev->type == &cxl_memdev_type; + return (dev->type == &cxl_memdev_type || + dev->type == &cxl_accel_memdev_type); } EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, "CXL"); @@ -1166,7 +1174,10 @@ struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); - dev->type = &cxl_memdev_type; + if (cxlds->type == CXL_DEVTYPE_DEVMEM) + dev->type = &cxl_accel_memdev_type; + else + dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); INIT_WORK(&cxlmd->detach_work, detach_memdev); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 05f4cb5aaed0d..1eaf4e57554e0 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,10 +34,6 @@ (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) != \ CXLMDEV_RESET_NEEDED_NOT) -struct cxl_memdev_ops { - int (*probe)(struct cxl_memdev *cxlmd); -}; - /** * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device * @dev: driver core device object @@ -101,10 +97,6 @@ static inline bool is_cxl_endpoint(struct cxl_port *port) return is_cxl_memdev(port->uport_dev); } -struct cxl_memdev *devm_cxl_add_memdev(struct device *host, - struct cxl_dev_state *cxlds, - const struct cxl_memdev_ops *ops); - int devm_cxl_sanitize_setup_notifier(struct device *host, struct cxl_memdev *cxlmd); struct cxl_memdev_state; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index b36d8bb812a36..6d0f2f0b332a2 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -66,6 +66,26 @@ static int cxl_debugfs_poison_clear(void *data, u64 dpa) DEFINE_DEBUGFS_ATTRIBUTE(cxl_poison_clear_fops, NULL, cxl_debugfs_poison_clear, "%llx\n"); +static void cxl_memdev_poison_enable(struct cxl_memdev_state *mds, + struct cxl_memdev *cxlmd, + struct dentry *dentry) +{ + /* + * Avoid poison debugfs for DEVMEM aka accelerators as they rely on + * cxl_memdev_state. + */ + if (!mds) + return; + + if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) + debugfs_create_file("inject_poison", 0200, dentry, cxlmd, + &cxl_poison_inject_fops); + + if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) + debugfs_create_file("clear_poison", 0200, dentry, cxlmd, + &cxl_poison_clear_fops); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -93,12 +113,7 @@ static int cxl_mem_probe(struct device *dev) dentry = cxl_debugfs_create_dir(dev_name(dev)); debugfs_create_devm_seqfile(dev, "dpamem", dentry, cxl_mem_dpa_show); - if (test_bit(CXL_POISON_ENABLED_INJECT, mds->poison.enabled_cmds)) - debugfs_create_file("inject_poison", 0200, dentry, cxlmd, - &cxl_poison_inject_fops); - if (test_bit(CXL_POISON_ENABLED_CLEAR, mds->poison.enabled_cmds)) - debugfs_create_file("clear_poison", 0200, dentry, cxlmd, - &cxl_poison_clear_fops); + cxl_memdev_poison_enable(mds, cxlmd, dentry); rc = devm_add_action_or_reset(dev, remove_debugfs, dentry); if (rc) @@ -236,16 +251,24 @@ static ssize_t trigger_poison_list_store(struct device *dev, } static DEVICE_ATTR_WO(trigger_poison_list); -static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +static bool cxl_poison_attr_visible(struct kobject *kobj, struct attribute *a) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); - if (a == &dev_attr_trigger_poison_list.attr) - if (!test_bit(CXL_POISON_ENABLED_LIST, - mds->poison.enabled_cmds)) - return 0; + if (!mds || + !test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) + return false; + + return true; +} + +static umode_t cxl_mem_visible(struct kobject *kobj, struct attribute *a, int n) +{ + if (a == &dev_attr_trigger_poison_list.attr && + !cxl_poison_attr_visible(kobj, a)) + return 0; return a->mode; } diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index fb2f8f2395d50..043fc31c764e9 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -153,6 +153,10 @@ struct cxl_dpa_partition { #define CXL_NR_PARTITIONS_MAX 2 +struct cxl_memdev_ops { + int (*probe)(struct cxl_memdev *cxlmd); +}; + /** * struct cxl_dev_state - The driver device state * @@ -243,4 +247,7 @@ int cxl_map_component_regs(const struct cxl_register_map *map, struct cxl_component_regs *regs, unsigned long map_mask); int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); +struct cxl_memdev *devm_cxl_add_memdev(struct device *host, + struct cxl_dev_state *cxlds, + const struct cxl_memdev_ops *ops); #endif /* __CXL_CXL_H__ */ From 11e616e2a87fb2191161f70f4666781a927eabc0 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:33 +0000 Subject: [PATCH 64/80] NVIDIA: VR: SAUCE: sfc: create type2 cxl memdev Use cxl API for creating a cxl memory device using the type2 cxl_dev_state struct. Signed-off-by: Alejandro Lucero Reviewed-by: Martin Habets Reviewed-by: Fan Ni Acked-by: Edward Cree Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/net/ethernet/sfc/efx_cxl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 0b10a2e6aceb6..f6eda93e67e2d 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -84,6 +84,12 @@ int efx_cxl_init(struct efx_probe_data *probe_data) return -ENODEV; } + cxl->cxlmd = devm_cxl_add_memdev(&pci_dev->dev, &cxl->cxlds, NULL); + if (IS_ERR(cxl->cxlmd)) { + pci_err(pci_dev, "CXL accel memdev creation failed"); + return PTR_ERR(cxl->cxlmd); + } + probe_data->cxl = cxl; return 0; From 49c3fd4875d8abcde8117261c6973ec7a571432d Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:34 +0000 Subject: [PATCH 65/80] NVIDIA: VR: SAUCE: cxl/hdm: Add support for getting region from committed decoder A Type2 device configured by the BIOS can already have its HDM committed. Add a cxl_get_committed_decoder() function for cheking so after memdev creation. A CXL region should have been created during memdev initialization, therefore a Type2 driver can ask for such a region for working with the HPA. If the HDM is not committed, a Type2 driver will create the region after obtaining proper HPA and DPA space. Signed-off-by: Alejandro Lucero (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) [jan: Change if (!endpoint) to if (IS_ERR_OR_NULL(endpoint)) in cxl_get_committed_decoder()] [jan: Fix dangling pointer by removing the put_device(cxled_dev) from cxl_get_committed_decoder()] [jan: Preserve DVSEC emulation for Type-3 devices that have unprogrammed HDM decoders] Signed-off-by: Jiandi An --- drivers/cxl/core/hdm.c | 53 ++++++++++++++++++++++++++++++++++++++++++ include/cxl/cxl.h | 3 +++ 2 files changed, 56 insertions(+) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index de78601821e60..9be199caaf2be 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -92,6 +92,7 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) { struct cxl_hdm *cxlhdm; + struct cxl_port *port; void __iomem *hdm; u32 ctrl; int i; @@ -105,6 +106,19 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info) if (!hdm) return true; + port = cxlhdm->port; + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); + /* + * Type-2 accelerators (DEVMEM) have HDM decoders programmed + * by firmware/BIOS - never use DVSEC emulation. + * Type-3 memory (CLASSMEM) may still need DVSEC emulation + * fallback in certain firmware configurations. + */ + if (cxlmd->cxlds->type == CXL_DEVTYPE_DEVMEM) + return false; + } + /* * If HDM decoders are present and the driver is in control of * Mem_Enable skip DVSEC based emulation @@ -686,6 +700,45 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled); } +static int find_committed_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == (port->hdm_end); +} + +struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, + struct cxl_region **cxlr) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct cxl_endpoint_decoder *cxled; + struct device *cxled_dev; + + if (IS_ERR_OR_NULL(endpoint)) + return NULL; + + guard(rwsem_read)(&cxl_rwsem.dpa); + cxled_dev = device_find_child(&endpoint->dev, NULL, + find_committed_decoder); + + if (!cxled_dev) + return NULL; + + cxled = to_cxl_endpoint_decoder(cxled_dev); + *cxlr = cxled->cxld.region; + + /* Reference transferred to caller - caller must put_device() */ + return cxled; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_committed_decoder, "CXL"); + static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl) { u16 eig; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 043fc31c764e9..2ff3c19c684c5 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -250,4 +250,7 @@ int cxl_set_capacity(struct cxl_dev_state *cxlds, u64 capacity); struct cxl_memdev *devm_cxl_add_memdev(struct device *host, struct cxl_dev_state *cxlds, const struct cxl_memdev_ops *ops); +struct cxl_region; +struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, + struct cxl_region **cxlr); #endif /* __CXL_CXL_H__ */ From b17b6a228817b9ad8f0683effdf851adeac943b5 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:35 +0000 Subject: [PATCH 66/80] NVIDIA: VR: SAUCE: cxl: Add function for obtaining region range A CXL region struct contains the physical address to work with. Type2 drivers can create a CXL region but have not access to the related struct as it is defined as private by the kernel CXL core. Add a function for getting the cxl region range to be used for mapping such memory range by a Type2 driver. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 23 +++++++++++++++++++++++ include/cxl/cxl.h | 2 ++ 2 files changed, 25 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 149f9bdabbb40..4b54c2d06d6ae 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2575,6 +2575,29 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(rc); } +/** + * cxl_get_region_range - obtain range linked to a CXL region + * + * @region: a pointer to struct cxl_region + * @range: a pointer to a struct range to be set + * + * Returns 0 or error. + */ +int cxl_get_region_range(struct cxl_region *region, struct range *range) +{ + if (WARN_ON_ONCE(!region)) + return -ENODEV; + + if (!region->params.res) + return -ENOSPC; + + range->start = region->params.res->start; + range->end = region->params.res->end; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_region_range, "CXL"); + static ssize_t __create_region_show(struct cxl_root_decoder *cxlrd, char *buf) { return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id)); diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 2ff3c19c684c5..f02dd817b40fa 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -253,4 +253,6 @@ struct cxl_memdev *devm_cxl_add_memdev(struct device *host, struct cxl_region; struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, struct cxl_region **cxlr); +struct range; +int cxl_get_region_range(struct cxl_region *region, struct range *range); #endif /* __CXL_CXL_H__ */ From 7bd6e2bf17bf8bb383c4416b845f49688131fa41 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:36 +0000 Subject: [PATCH 67/80] NVIDIA: VR: SAUCE: cxl: Export functions for unwinding cxl by accelerators Add unregister_region() and cxl_decoder_detach() to the accelerator driver API for a clean exit. Signed-off-by: Alejandro Lucero (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/core.h | 5 ----- drivers/cxl/core/region.c | 4 +++- include/cxl/cxl.h | 9 +++++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 1c17268561391..9a6775845afe4 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -15,11 +15,6 @@ extern const struct device_type cxl_pmu_type; extern struct attribute_group cxl_base_attribute_group; -enum cxl_detach_mode { - DETACH_ONLY, - DETACH_INVALIDATE, -}; - #ifdef CONFIG_CXL_REGION extern struct device_attribute dev_attr_create_pmem_region; extern struct device_attribute dev_attr_create_ram_region; diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 4b54c2d06d6ae..491d47dd0d25d 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2199,6 +2199,7 @@ int cxl_decoder_detach(struct cxl_region *cxlr, } return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_decoder_detach, "CXL"); static int __attach_target(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, @@ -2393,7 +2394,7 @@ static struct cxl_region *to_cxl_region(struct device *dev) return container_of(dev, struct cxl_region, dev); } -static void unregister_region(void *_cxlr) +void unregister_region(void *_cxlr) { struct cxl_region *cxlr = _cxlr; struct cxl_region_params *p = &cxlr->params; @@ -2412,6 +2413,7 @@ static void unregister_region(void *_cxlr) cxl_region_iomem_release(cxlr); put_device(&cxlr->dev); } +EXPORT_SYMBOL_NS_GPL(unregister_region, "CXL"); static struct lock_class_key cxl_region_key; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index f02dd817b40fa..b8683c75dfde7 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -255,4 +255,13 @@ struct cxl_endpoint_decoder *cxl_get_committed_decoder(struct cxl_memdev *cxlmd, struct cxl_region **cxlr); struct range; int cxl_get_region_range(struct cxl_region *region, struct range *range); +enum cxl_detach_mode { + DETACH_ONLY, + DETACH_INVALIDATE, +}; + +int cxl_decoder_detach(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled, int pos, + enum cxl_detach_mode mode); +void unregister_region(void *_cxlr); #endif /* __CXL_CXL_H__ */ From a8397ceb6625daf8dd87557fd16fdbc85b237a70 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:37 +0000 Subject: [PATCH 68/80] NVIDIA: VR: SAUCE: sfc: obtain decoder and region if committed by firmware Check if device HDM is already committed during firmware/BIOS initialization. A CXL region should exist if so after memdev allocation/initialization. Get HPA from region and map it. Signed-off-by: Alejandro Lucero (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) [jan: The SFC caller must add put_device(&cxled->cxld.dev) in cleanup path] Signed-off-by: Jiandi An --- drivers/net/ethernet/sfc/efx_cxl.c | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index f6eda93e67e2d..3d462aedb745f 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -19,6 +19,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data) struct efx_nic *efx = &probe_data->efx; struct pci_dev *pci_dev = efx->pci_dev; struct efx_cxl *cxl; + struct range range; u16 dvsec; int rc; @@ -90,6 +91,26 @@ int efx_cxl_init(struct efx_probe_data *probe_data) return PTR_ERR(cxl->cxlmd); } + cxl->cxled = cxl_get_committed_decoder(cxl->cxlmd, &cxl->efx_region); + if (cxl->cxled) { + if (!cxl->efx_region) { + pci_err(pci_dev, "CXL found committed decoder without a region"); + return -ENODEV; + } + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, + "CXL getting regions params from a committed decoder failed"); + return rc; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + return -ENOMEM; + } + } + probe_data->cxl = cxl; return 0; @@ -97,6 +118,16 @@ int efx_cxl_init(struct efx_probe_data *probe_data) void efx_cxl_exit(struct efx_probe_data *probe_data) { + if (!probe_data->cxl) + return; + + iounmap(probe_data->cxl->ctpio_cxl); + cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, DETACH_INVALIDATE); + unregister_region(probe_data->cxl->efx_region); + + /* Release decoder reference from cxl_get_committed_decoder() */ + if (probe_data->cxl->cxled) + put_device(&probe_data->cxl->cxled->cxld.dev); } MODULE_IMPORT_NS("CXL"); From 192fd8e4301f6fd8b51ae9807bff042523f0e409 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:38 +0000 Subject: [PATCH 69/80] NVIDIA: VR: SAUCE: cxl: Define a driver interface for HPA free space enumeration CXL region creation involves allocating capacity from Device Physical Address (DPA) and assigning it to decode a given Host Physical Address (HPA). Before determining how much DPA to allocate the amount of available HPA must be determined. Also, not all HPA is created equal, some HPA targets RAM, some targets PMEM, some is prepared for device-memory flows like HDM-D and HDM-DB, and some is HDM-H (host-only). In order to support Type2 CXL devices, wrap all of those concerns into an API that retrieves a root decoder (platform CXL window) that fits the specified constraints and the capacity available for a new region. Add a complementary function for releasing the reference to such root decoder. Based on https://lore.kernel.org/linux-cxl/168592159290.1948938.13522227102445462976.stgit@dwillia2-xfh.jf.intel.com/ Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) [jan: Fix HPA free space calculation reporting 2 extra bytes] Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 165 ++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 3 + include/cxl/cxl.h | 6 ++ 3 files changed, 174 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 491d47dd0d25d..50ed086339bba 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -711,6 +711,171 @@ static int free_hpa(struct cxl_region *cxlr) return 0; } +struct cxlrd_max_context { + struct device * const *host_bridges; + int interleave_ways; + unsigned long flags; + resource_size_t max_hpa; + struct cxl_root_decoder *cxlrd; +}; + +static int find_max_hpa(struct device *dev, void *data) +{ + struct cxlrd_max_context *ctx = data; + struct cxl_switch_decoder *cxlsd; + struct cxl_root_decoder *cxlrd; + struct resource *res, *prev; + struct cxl_decoder *cxld; + resource_size_t free = 0; + resource_size_t max; + int found = 0; + + if (!is_root_decoder(dev)) + return 0; + + cxlrd = to_cxl_root_decoder(dev); + cxlsd = &cxlrd->cxlsd; + cxld = &cxlsd->cxld; + + if ((cxld->flags & ctx->flags) != ctx->flags) { + dev_dbg(dev, "flags not matching: %08lx vs %08lx\n", + cxld->flags, ctx->flags); + return 0; + } + + for (int i = 0; i < ctx->interleave_ways; i++) { + for (int j = 0; j < ctx->interleave_ways; j++) { + if (ctx->host_bridges[i] == cxlsd->target[j]->dport_dev) { + found++; + break; + } + } + } + + if (found != ctx->interleave_ways) { + dev_dbg(dev, + "Not enough host bridges. Found %d for %d interleave ways requested\n", + found, ctx->interleave_ways); + return 0; + } + + /* + * Walk the root decoder resource range relying on cxl_rwsem.region to + * preclude sibling arrival/departure and find the largest free space + * gap. + */ + lockdep_assert_held_read(&cxl_rwsem.region); + res = cxlrd->res->child; + + /* With no resource child the whole parent resource is available */ + if (!res) + max = resource_size(cxlrd->res); + else + max = 0; + + for (prev = NULL; res; prev = res, res = res->sibling) { + + if (!prev && res->start == cxlrd->res->start && + res->end == cxlrd->res->end) { + max = resource_size(cxlrd->res); + break; + } + /* + * Sanity check for preventing arithmetic problems below as a + * resource with size 0 could imply using the end field below + * when set to unsigned zero - 1 or all f in hex. + */ + if (prev && !resource_size(prev)) + continue; + + if (!prev && res->start > cxlrd->res->start) { + free = res->start - cxlrd->res->start; + max = max(free, max); + } + if (prev && res->start > prev->end + 1) { + free = res->start - prev->end - 1; + max = max(free, max); + } + } + + if (prev && prev->end + 1 < cxlrd->res->end + 1) { + free = cxlrd->res->end - prev->end; + max = max(free, max); + } + + dev_dbg(cxlrd_dev(cxlrd), "found %pa bytes of free space\n", &max); + if (max > ctx->max_hpa) { + if (ctx->cxlrd) + put_device(cxlrd_dev(ctx->cxlrd)); + get_device(cxlrd_dev(cxlrd)); + ctx->cxlrd = cxlrd; + ctx->max_hpa = max; + } + return 0; +} + +/** + * cxl_get_hpa_freespace - find a root decoder with free capacity per constraints + * @cxlmd: the mem device requiring the HPA + * @interleave_ways: number of entries in @host_bridges + * @flags: CXL_DECODER_F flags for selecting RAM vs PMEM, and Type2 device + * @max_avail_contig: output parameter of max contiguous bytes available in the + * returned decoder + * + * Returns a pointer to a struct cxl_root_decoder + * + * The return tuple of a 'struct cxl_root_decoder' and 'bytes available given + * in (@max_avail_contig))' is a point in time snapshot. If by the time the + * caller goes to use this decoder and its capacity is reduced then caller needs + * to loop and retry. + * + * The returned root decoder has an elevated reference count that needs to be + * put with cxl_put_root_decoder(cxlrd). + */ +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max_avail_contig) +{ + struct cxlrd_max_context ctx = { + .flags = flags, + .interleave_ways = interleave_ways, + }; + struct cxl_port *root_port; + struct cxl_port *endpoint; + + endpoint = cxlmd->endpoint; + if (!endpoint) { + dev_dbg(&cxlmd->dev, "endpoint not linked to memdev\n"); + return ERR_PTR(-ENXIO); + } + + ctx.host_bridges = &endpoint->host_bridge; + + struct cxl_root *root __free(put_cxl_root) = find_cxl_root(endpoint); + if (!root) { + dev_dbg(&endpoint->dev, "endpoint is not related to a root port\n"); + return ERR_PTR(-ENXIO); + } + + root_port = &root->port; + scoped_guard(rwsem_read, &cxl_rwsem.region) + device_for_each_child(&root_port->dev, &ctx, find_max_hpa); + + if (!ctx.cxlrd) + return ERR_PTR(-ENOMEM); + + *max_avail_contig = ctx.max_hpa; + return ctx.cxlrd; +} +EXPORT_SYMBOL_NS_GPL(cxl_get_hpa_freespace, "CXL"); + +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd) +{ + put_device(cxlrd_dev(cxlrd)); +} +EXPORT_SYMBOL_NS_GPL(cxl_put_root_decoder, "CXL"); + static ssize_t size_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index d7ddca6f71154..78845e0e3e4fd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -679,6 +679,9 @@ struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev); struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev); struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev); bool is_root_decoder(struct device *dev); + +#define cxlrd_dev(cxlrd) (&(cxlrd)->cxlsd.cxld.dev) + bool is_switch_decoder(struct device *dev); bool is_endpoint_decoder(struct device *dev); struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port, diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index b8683c75dfde7..f138bb4c25600 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -264,4 +264,10 @@ int cxl_decoder_detach(struct cxl_region *cxlr, struct cxl_endpoint_decoder *cxled, int pos, enum cxl_detach_mode mode); void unregister_region(void *_cxlr); +struct cxl_port; +struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, + int interleave_ways, + unsigned long flags, + resource_size_t *max); +void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd); #endif /* __CXL_CXL_H__ */ From f96b711633528505809863b96650b1589f53cfb8 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:39 +0000 Subject: [PATCH 70/80] NVIDIA: VR: SAUCE: sfc: get root decoder Use cxl api for getting HPA (Host Physical Address) to use from a CXL root decoder. Signed-off-by: Alejandro Lucero Reviewed-by: Martin Habets Acked-by: Edward Cree Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/cxl.h | 15 ------------ drivers/net/ethernet/sfc/Kconfig | 1 + drivers/net/ethernet/sfc/efx_cxl.c | 38 ++++++++++++++++++++++++------ drivers/net/ethernet/sfc/efx_cxl.h | 1 + include/cxl/cxl.h | 15 ++++++++++++ 5 files changed, 48 insertions(+), 22 deletions(-) diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 78845e0e3e4fd..5441a296c351f 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -220,21 +220,6 @@ int cxl_dport_map_rcd_linkcap(struct pci_dev *pdev, struct cxl_dport *dport); #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 -/* - * cxl_decoder flags that define the type of memory / devices this - * decoder supports as well as configuration lock status See "CXL 2.0 - * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. - * Additionally indicate whether decoder settings were autodetected, - * user customized. - */ -#define CXL_DECODER_F_RAM BIT(0) -#define CXL_DECODER_F_PMEM BIT(1) -#define CXL_DECODER_F_TYPE2 BIT(2) -#define CXL_DECODER_F_TYPE3 BIT(3) -#define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_ENABLE BIT(5) -#define CXL_DECODER_F_MASK GENMASK(5, 0) - enum cxl_decoder_type { CXL_DECODER_DEVMEM = 2, CXL_DECODER_HOSTONLYMEM = 3, diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig index 979f2801e2a8e..e959d9b4f4cef 100644 --- a/drivers/net/ethernet/sfc/Kconfig +++ b/drivers/net/ethernet/sfc/Kconfig @@ -69,6 +69,7 @@ config SFC_MCDI_LOGGING config SFC_CXL bool "Solarflare SFC9100-family CXL support" depends on SFC && CXL_BUS >= SFC + depends on CXL_REGION default SFC help This enables SFC CXL support if the kernel is configuring CXL for diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 3d462aedb745f..89e1b4a784504 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -18,6 +18,7 @@ int efx_cxl_init(struct efx_probe_data *probe_data) { struct efx_nic *efx = &probe_data->efx; struct pci_dev *pci_dev = efx->pci_dev; + resource_size_t max_size; struct efx_cxl *cxl; struct range range; u16 dvsec; @@ -109,6 +110,24 @@ int efx_cxl_init(struct efx_probe_data *probe_data) pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); return -ENOMEM; } + cxl->hdm_was_committed = true; + } else { + cxl->cxlrd = cxl_get_hpa_freespace(cxl->cxlmd, 1, + CXL_DECODER_F_RAM | + CXL_DECODER_F_TYPE2, + &max_size); + + if (IS_ERR(cxl->cxlrd)) { + dev_err(&pci_dev->dev, "cxl_get_hpa_freespace failed\n"); + return PTR_ERR(cxl->cxlrd); + } + + if (max_size < EFX_CTPIO_BUFFER_SIZE) { + dev_err(&pci_dev->dev, "%s: not enough free HPA space %pap < %u\n", + __func__, &max_size, EFX_CTPIO_BUFFER_SIZE); + cxl_put_root_decoder(cxl->cxlrd); + return -ENOSPC; + } } probe_data->cxl = cxl; @@ -121,13 +140,18 @@ void efx_cxl_exit(struct efx_probe_data *probe_data) if (!probe_data->cxl) return; - iounmap(probe_data->cxl->ctpio_cxl); - cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, DETACH_INVALIDATE); - unregister_region(probe_data->cxl->efx_region); - - /* Release decoder reference from cxl_get_committed_decoder() */ - if (probe_data->cxl->cxled) - put_device(&probe_data->cxl->cxled->cxld.dev); + if (probe_data->cxl->hdm_was_committed) { + iounmap(probe_data->cxl->ctpio_cxl); + cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, + DETACH_INVALIDATE); + unregister_region(probe_data->cxl->efx_region); + + /* Release decoder reference from cxl_get_committed_decoder() */ + if (probe_data->cxl->cxled) + put_device(&probe_data->cxl->cxled->cxld.dev); + } else { + cxl_put_root_decoder(probe_data->cxl->cxlrd); + } } MODULE_IMPORT_NS("CXL"); diff --git a/drivers/net/ethernet/sfc/efx_cxl.h b/drivers/net/ethernet/sfc/efx_cxl.h index 961639cef692e..9a92e386695bb 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.h +++ b/drivers/net/ethernet/sfc/efx_cxl.h @@ -27,6 +27,7 @@ struct efx_cxl { struct cxl_root_decoder *cxlrd; struct cxl_port *endpoint; struct cxl_endpoint_decoder *cxled; + bool hdm_was_committed; struct cxl_region *efx_region; void __iomem *ctpio_cxl; }; diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index f138bb4c25600..6fe5c15bd3c5d 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -153,6 +153,21 @@ struct cxl_dpa_partition { #define CXL_NR_PARTITIONS_MAX 2 +/* + * cxl_decoder flags that define the type of memory / devices this + * decoder supports as well as configuration lock status See "CXL 2.0 + * 8.2.5.12.7 CXL HDM Decoder 0 Control Register" for details. + * Additionally indicate whether decoder settings were autodetected, + * user customized. + */ +#define CXL_DECODER_F_RAM BIT(0) +#define CXL_DECODER_F_PMEM BIT(1) +#define CXL_DECODER_F_TYPE2 BIT(2) +#define CXL_DECODER_F_TYPE3 BIT(3) +#define CXL_DECODER_F_LOCK BIT(4) +#define CXL_DECODER_F_ENABLE BIT(5) +#define CXL_DECODER_F_MASK GENMASK(5, 0) + struct cxl_memdev_ops { int (*probe)(struct cxl_memdev *cxlmd); }; From 962afbc5427abbffa23ea8c048149f3e2c0c9a1e Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:40 +0000 Subject: [PATCH 71/80] NVIDIA: VR: SAUCE: cxl: Define a driver interface for DPA allocation Region creation involves finding available DPA (device-physical-address) capacity to map into HPA (host-physical-address) space. In order to support CXL Type2 devices, define an API, cxl_request_dpa(), that tries to allocate the DPA memory the driver requires to operate.The memory requested should not be bigger than the max available HPA obtained previously with cxl_get_hpa_freespace(). Based on https://lore.kernel.org/linux-cxl/168592158743.1948938.7622563891193802610.stgit@dwillia2-xfh.jf.intel.com/ Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/hdm.c | 84 ++++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxl.h | 1 + include/cxl/cxl.h | 5 +++ 3 files changed, 90 insertions(+) diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 9be199caaf2be..6c85a849bee79 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "cxlmem.h" #include "core.h" @@ -560,6 +561,12 @@ bool cxl_resource_contains_addr(const struct resource *res, const resource_size_ return resource_contains(res, &_addr); } +/** + * cxl_dpa_free - release DPA (Device Physical Address) + * @cxled: endpoint decoder linked to the DPA + * + * Returns 0 or error. + */ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) { struct cxl_port *port = cxled_to_port(cxled); @@ -586,6 +593,7 @@ int cxl_dpa_free(struct cxl_endpoint_decoder *cxled) devm_cxl_dpa_release(cxled); return 0; } +EXPORT_SYMBOL_NS_GPL(cxl_dpa_free, "CXL"); int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, enum cxl_partition_mode mode) @@ -617,6 +625,82 @@ int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, return 0; } +static int find_free_decoder(struct device *dev, const void *data) +{ + struct cxl_endpoint_decoder *cxled; + struct cxl_port *port; + + if (!is_endpoint_decoder(dev)) + return 0; + + cxled = to_cxl_endpoint_decoder(dev); + port = cxled_to_port(cxled); + + return cxled->cxld.id == (port->hdm_end + 1); +} + +static struct cxl_endpoint_decoder * +cxl_find_free_decoder(struct cxl_memdev *cxlmd) +{ + struct cxl_port *endpoint = cxlmd->endpoint; + struct device *dev; + + guard(rwsem_read)(&cxl_rwsem.dpa); + dev = device_find_child(&endpoint->dev, NULL, + find_free_decoder); + if (!dev) + return NULL; + + return to_cxl_endpoint_decoder(dev); +} + +/** + * cxl_request_dpa - search and reserve DPA given input constraints + * @cxlmd: memdev with an endpoint port with available decoders + * @mode: CXL partition mode (ram vs pmem) + * @alloc: dpa size required + * + * Returns a pointer to a 'struct cxl_endpoint_decoder' on success or + * an errno encoded pointer on failure. + * + * Given that a region needs to allocate from limited HPA capacity it + * may be the case that a device has more mappable DPA capacity than + * available HPA. The expectation is that @alloc is a driver known + * value based on the device capacity but which could not be fully + * available due to HPA constraints. + * + * Returns a pinned cxl_decoder with at least @alloc bytes of capacity + * reserved, or an error pointer. The caller is also expected to own the + * lifetime of the memdev registration associated with the endpoint to + * pin the decoder registered as well. + */ +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc) +{ + int rc; + + if (!IS_ALIGNED(alloc, SZ_256M)) + return ERR_PTR(-EINVAL); + + struct cxl_endpoint_decoder *cxled __free(put_cxled) = + cxl_find_free_decoder(cxlmd); + + if (!cxled) + return ERR_PTR(-ENODEV); + + rc = cxl_dpa_set_part(cxled, mode); + if (rc) + return ERR_PTR(rc); + + rc = cxl_dpa_alloc(cxled, alloc); + if (rc) + return ERR_PTR(rc); + + return no_free_ptr(cxled); +} +EXPORT_SYMBOL_NS_GPL(cxl_request_dpa, "CXL"); + static int __cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, u64 size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 5441a296c351f..06a111392c3b9 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -640,6 +640,7 @@ struct cxl_root *find_cxl_root(struct cxl_port *port); DEFINE_FREE(put_cxl_root, struct cxl_root *, if (_T) put_device(&_T->port.dev)) DEFINE_FREE(put_cxl_port, struct cxl_port *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) +DEFINE_FREE(put_cxled, struct cxl_endpoint_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxld.dev)) DEFINE_FREE(put_cxl_root_decoder, struct cxl_root_decoder *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->cxlsd.cxld.dev)) DEFINE_FREE(put_cxl_region, struct cxl_region *, if (!IS_ERR_OR_NULL(_T)) put_device(&_T->dev)) diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 6fe5c15bd3c5d..7bd88e6b8598e 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -7,6 +7,7 @@ #include #include +#include #include /** @@ -285,4 +286,8 @@ struct cxl_root_decoder *cxl_get_hpa_freespace(struct cxl_memdev *cxlmd, unsigned long flags, resource_size_t *max); void cxl_put_root_decoder(struct cxl_root_decoder *cxlrd); +struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, + enum cxl_partition_mode mode, + resource_size_t alloc); +int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); #endif /* __CXL_CXL_H__ */ From a9dab9592d9d68a46e329b2834254895696a05f1 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:41 +0000 Subject: [PATCH 72/80] NVIDIA: VR: SAUCE: sfc: get endpoint decoder Use cxl api for getting DPA (Device Physical Address) to use through an endpoint decoder. Signed-off-by: Alejandro Lucero Reviewed-by: Martin Habets Acked-by: Edward Cree Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/net/ethernet/sfc/efx_cxl.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 89e1b4a784504..1b943c29eaa61 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -128,6 +128,14 @@ int efx_cxl_init(struct efx_probe_data *probe_data) cxl_put_root_decoder(cxl->cxlrd); return -ENOSPC; } + + cxl->cxled = cxl_request_dpa(cxl->cxlmd, CXL_PARTMODE_RAM, + EFX_CTPIO_BUFFER_SIZE); + if (IS_ERR(cxl->cxled)) { + pci_err(pci_dev, "CXL accel request DPA failed"); + cxl_put_root_decoder(cxl->cxlrd); + return PTR_ERR(cxl->cxled); + } } probe_data->cxl = cxl; @@ -150,6 +158,7 @@ void efx_cxl_exit(struct efx_probe_data *probe_data) if (probe_data->cxl->cxled) put_device(&probe_data->cxl->cxled->cxld.dev); } else { + cxl_dpa_free(probe_data->cxl->cxled); cxl_put_root_decoder(probe_data->cxl->cxlrd); } } From fca7c05a6a11c272462abd8b0ba94771d1a1fadd Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:42 +0000 Subject: [PATCH 73/80] NVIDIA: VR: SAUCE: cxl: Make region type based on endpoint type Current code is expecting Type3 or CXL_DECODER_HOSTONLYMEM devices only. Support for Type2 implies region type needs to be based on the endpoint type HDM-D[B] instead. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield Reviewed-by: Davidlohr Bueso Reviewed-by: Davidlohr Bueso (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 50ed086339bba..c3b07045b9aa4 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2783,7 +2783,8 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_partition_mode mode, int id) + enum cxl_partition_mode mode, int id, + enum cxl_decoder_type target_type) { int rc; @@ -2805,7 +2806,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, return ERR_PTR(-EBUSY); } - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM); + return devm_cxl_add_region(cxlrd, id, mode, target_type); } static ssize_t create_region_store(struct device *dev, const char *buf, @@ -2819,7 +2820,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf, if (rc != 1) return -EINVAL; - cxlr = __create_region(cxlrd, mode, id); + cxlr = __create_region(cxlrd, mode, id, CXL_DECODER_HOSTONLYMEM); if (IS_ERR(cxlr)) return PTR_ERR(cxlr); @@ -3721,7 +3722,8 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, do { cxlr = __create_region(cxlrd, cxlds->part[part].mode, - atomic_read(&cxlrd->region_id)); + atomic_read(&cxlrd->region_id), + cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); if (IS_ERR(cxlr)) { From a5b7643d797fb4b04b59588d53fa7f12f0a1f140 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:43 +0000 Subject: [PATCH 74/80] NVIDIA: VR: SAUCE: cxl/region: Factor out interleave ways setup Region creation based on Type3 devices is triggered from user space allowing memory combination through interleaving. In preparation for kernel driven region creation, that is Type2 drivers triggering region creation backed with its advertised CXL memory, factor out a common helper from the user-sysfs region setup for interleave ways. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 43 ++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c3b07045b9aa4..fff1cc304c649 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -491,22 +491,14 @@ static ssize_t interleave_ways_show(struct device *dev, static const struct attribute_group *get_cxl_region_target_group(void); -static ssize_t interleave_ways_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_ways(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - unsigned int val, save; - int rc; + int save, rc; u8 iw; - rc = kstrtouint(buf, 0, &val); - if (rc) - return rc; - rc = ways_to_eiw(val, &iw); if (rc) return rc; @@ -521,9 +513,7 @@ static ssize_t interleave_ways_store(struct device *dev, return -EINVAL; } - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; @@ -531,10 +521,31 @@ static ssize_t interleave_ways_store(struct device *dev, save = p->interleave_ways; p->interleave_ways = val; rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group()); - if (rc) { + if (rc) p->interleave_ways = save; + + return rc; +} + +static ssize_t interleave_ways_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_ways(cxlr, val); + if (rc) return rc; - } return len; } From 6136b2d14d036b1b640d51c2c4b3caa4170cb972 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:44 +0000 Subject: [PATCH 75/80] NVIDIA: VR: SAUCE: cxl/region: Factor out interleave granularity setup Region creation based on Type3 devices is triggered from user space allowing memory combination through interleaving. In preparation for kernel driven region creation, that is Type2 drivers triggering region creation backed with its advertised CXL memory, factor out a common helper from the user-sysfs region setup forinterleave granularity. Signed-off-by: Alejandro Lucero Reviewed-by: Zhi Wang Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Reviewed-by: Ben Cheatham Reviewed-by: Alison Schofield (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index fff1cc304c649..f1e301addba05 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -565,21 +565,14 @@ static ssize_t interleave_granularity_show(struct device *dev, return sysfs_emit(buf, "%d\n", p->interleave_granularity); } -static ssize_t interleave_granularity_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) +static int set_interleave_granularity(struct cxl_region *cxlr, int val) { - struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent); + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; - struct cxl_region *cxlr = to_cxl_region(dev); struct cxl_region_params *p = &cxlr->params; - int rc, val; + int rc; u16 ig; - rc = kstrtoint(buf, 0, &val); - if (rc) - return rc; - rc = granularity_to_eig(val, &ig); if (rc) return rc; @@ -595,14 +588,32 @@ static ssize_t interleave_granularity_store(struct device *dev, if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity) return -EINVAL; - ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); - if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) - return rc; - + lockdep_assert_held_write(&cxl_rwsem.region); if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) return -EBUSY; p->interleave_granularity = val; + return 0; +} + +static ssize_t interleave_granularity_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct cxl_region *cxlr = to_cxl_region(dev); + int rc, val; + + rc = kstrtoint(buf, 0, &val); + if (rc) + return rc; + + ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region); + if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem))) + return rc; + + rc = set_interleave_granularity(cxlr, val); + if (rc) + return rc; return len; } From 757ac63eff340c06392350745c0fbd0feeabff95 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:45 +0000 Subject: [PATCH 76/80] NVIDIA: VR: SAUCE: cxl: Allow region creation by type2 drivers Creating a CXL region requires userspace intervention through the cxl sysfs files. Type2 support should allow accelerator drivers to create such cxl region from kernel code. Adding that functionality and integrating it with current support for memory expanders. Based on https://lore.kernel.org/linux-cxl/168592159835.1948938.1647215579839222774.stgit@dwillia2-xfh.jf.intel.com/ Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) [jan: Fix hardcoded position 0 for all interleave ways in cxl_create_region()] Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 129 ++++++++++++++++++++++++++++++++++++-- include/cxl/cxl.h | 3 + 2 files changed, 126 insertions(+), 6 deletions(-) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index f1e301addba05..d906a36ca0850 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2894,6 +2894,14 @@ cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name) return to_cxl_region(region_dev); } +static void drop_region(struct cxl_region *cxlr) +{ + struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); + struct cxl_port *port = cxlrd_to_port(cxlrd); + + devm_release_action(port->uport_dev, unregister_region, cxlr); +} + static ssize_t delete_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) @@ -3724,14 +3732,12 @@ static int __construct_region(struct cxl_region *cxlr, return 0; } -/* Establish an empty region covering the given HPA range */ -static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, +static struct cxl_region *construct_region_begin(struct cxl_root_decoder *cxlrd, struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_port *port = cxlrd_to_port(cxlrd); struct cxl_dev_state *cxlds = cxlmd->cxlds; - int rc, part = READ_ONCE(cxled->part); + int part = READ_ONCE(cxled->part); struct cxl_region *cxlr; if (part < 0 || part >= cxlds->nr_partitions) { @@ -3748,13 +3754,26 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, cxled->cxld.target_type); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); - if (IS_ERR(cxlr)) { + if (IS_ERR(cxlr)) dev_err(cxlmd->dev.parent, "%s:%s: %s failed assign region: %ld\n", dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), __func__, PTR_ERR(cxlr)); + + return cxlr; +} + +/* Establish an empty region covering the given HPA range */ +static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_region *cxlr; + int rc; + + cxlr = construct_region_begin(cxlrd, cxled); + if (IS_ERR(cxlr)) return cxlr; - } rc = __construct_region(cxlr, cxlrd, cxled); if (rc) { @@ -3765,6 +3784,104 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, return cxlr; } +DEFINE_FREE(cxl_region_drop, struct cxl_region *, if (_T) drop_region(_T)) + +static struct cxl_region * +__construct_new_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, int ways) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled[0]); + struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld; + struct cxl_region_params *p; + resource_size_t size = 0; + int rc, i; + + struct cxl_region *cxlr __free(cxl_region_drop) = + construct_region_begin(cxlrd, cxled[0]); + if (IS_ERR(cxlr)) + return cxlr; + + guard(rwsem_write)(&cxl_rwsem.region); + + /* + * Sanity check. This should not happen with an accel driver handling + * the region creation. + */ + p = &cxlr->params; + if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) { + dev_err(cxlmd->dev.parent, + "%s:%s: %s unexpected region state\n", + dev_name(&cxlmd->dev), dev_name(&cxled[0]->cxld.dev), + __func__); + return ERR_PTR(-EBUSY); + } + + rc = set_interleave_ways(cxlr, ways); + if (rc) + return ERR_PTR(rc); + + rc = set_interleave_granularity(cxlr, cxld->interleave_granularity); + if (rc) + return ERR_PTR(rc); + + scoped_guard(rwsem_read, &cxl_rwsem.dpa) { + for (i = 0; i < ways; i++) { + if (!cxled[i]->dpa_res) + return ERR_PTR(-EINVAL); + size += resource_size(cxled[i]->dpa_res); + } + + rc = alloc_hpa(cxlr, size); + if (rc) + return ERR_PTR(rc); + + for (i = 0; i < ways; i++) { + rc = cxl_region_attach(cxlr, cxled[i], i); + if (rc) + return ERR_PTR(rc); + } + } + + rc = cxl_region_decode_commit(cxlr); + if (rc) + return ERR_PTR(rc); + + p->state = CXL_CONFIG_COMMIT; + + return no_free_ptr(cxlr); +} + +/** + * cxl_create_region - Establish a region given an endpoint decoder + * @cxlrd: root decoder to allocate HPA + * @cxled: endpoint decoders with reserved DPA capacity + * @ways: interleave ways required + * + * Returns a fully formed region in the commit state and attached to the + * cxl_region driver. + */ +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways) +{ + struct cxl_region *cxlr; + + mutex_lock(&cxlrd->range_lock); + cxlr = __construct_new_region(cxlrd, cxled, ways); + mutex_unlock(&cxlrd->range_lock); + if (IS_ERR(cxlr)) + return cxlr; + + if (device_attach(&cxlr->dev) <= 0) { + dev_err(&cxlr->dev, "failed to create region\n"); + drop_region(cxlr); + return ERR_PTR(-ENODEV); + } + + return cxlr; +} +EXPORT_SYMBOL_NS_GPL(cxl_create_region, "CXL"); + static struct cxl_region * cxl_find_region_by_range(struct cxl_root_decoder *cxlrd, struct range *hpa) { diff --git a/include/cxl/cxl.h b/include/cxl/cxl.h index 7bd88e6b8598e..e6176677ea940 100644 --- a/include/cxl/cxl.h +++ b/include/cxl/cxl.h @@ -290,4 +290,7 @@ struct cxl_endpoint_decoder *cxl_request_dpa(struct cxl_memdev *cxlmd, enum cxl_partition_mode mode, resource_size_t alloc); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); +struct cxl_region *cxl_create_region(struct cxl_root_decoder *cxlrd, + struct cxl_endpoint_decoder **cxled, + int ways); #endif /* __CXL_CXL_H__ */ From 43ffc35d34e6a9559c7566924d731138a4bb96a0 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:46 +0000 Subject: [PATCH 77/80] NVIDIA: VR: SAUCE: cxl: Avoid dax creation for accelerators By definition a type2 cxl device will use the host managed memory for specific functionality, therefore it should not be available to other uses. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Reviewed-by: Dave Jiang Reviewed-by: Ben Cheatham (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/cxl/core/region.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index d906a36ca0850..32d9b9bdc53a6 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4124,6 +4124,13 @@ static int cxl_region_probe(struct device *dev) if (rc) return rc; + /* + * HDM-D[B] (device-memory) regions have accelerator specific usage. + * Skip device-dax registration. + */ + if (cxlr->type == CXL_DECODER_DEVMEM) + return 0; + /* * From this point on any path that changes the region's state away from * CXL_CONFIG_COMMIT is also responsible for releasing the driver. From 8953003492056422c747051451b8b716a2efdb77 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:47 +0000 Subject: [PATCH 78/80] NVIDIA: VR: SAUCE: sfc: create cxl region Use cxl api for creating a region using the endpoint decoder related to a DPA range. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) Signed-off-by: Jiandi An --- drivers/net/ethernet/sfc/efx_cxl.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 1b943c29eaa61..805889ce5aa93 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -136,6 +136,14 @@ int efx_cxl_init(struct efx_probe_data *probe_data) cxl_put_root_decoder(cxl->cxlrd); return PTR_ERR(cxl->cxled); } + + cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1); + if (IS_ERR(cxl->efx_region)) { + pci_err(pci_dev, "CXL accel create region failed"); + cxl_put_root_decoder(cxl->cxlrd); + cxl_dpa_free(cxl->cxled); + return PTR_ERR(cxl->efx_region); + } } probe_data->cxl = cxl; @@ -152,15 +160,18 @@ void efx_cxl_exit(struct efx_probe_data *probe_data) iounmap(probe_data->cxl->ctpio_cxl); cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, DETACH_INVALIDATE); - unregister_region(probe_data->cxl->efx_region); /* Release decoder reference from cxl_get_committed_decoder() */ if (probe_data->cxl->cxled) put_device(&probe_data->cxl->cxled->cxld.dev); } else { + cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, + DETACH_INVALIDATE); cxl_dpa_free(probe_data->cxl->cxled); cxl_put_root_decoder(probe_data->cxl->cxlrd); } + + unregister_region(probe_data->cxl->efx_region); } MODULE_IMPORT_NS("CXL"); From 01fda7797206d47b0db8820f82ca2b2b44f0a0c9 Mon Sep 17 00:00:00 2001 From: Alejandro Lucero Date: Fri, 5 Dec 2025 11:52:48 +0000 Subject: [PATCH 79/80] NVIDIA: VR: SAUCE: sfc: support pio mapping based on cxl A PIO buffer is a region of device memory to which the driver can write a packet for TX, with the device handling the transmit doorbell without requiring a DMA for getting the packet data, which helps reducing latency in certain exchanges. With CXL mem protocol this latency can be lowered further. With a device supporting CXL and successfully initialised, use the cxl region to map the memory range and use this mapping for PIO buffers. Add the disabling of those CXL-based PIO buffers if the callback for potential cxl endpoint removal by the CXL code happens. Signed-off-by: Alejandro Lucero Reviewed-by: Jonathan Cameron Reviewed-by: Dave Jiang (backported from https://lore.kernel.org/linux-cxl/20251205115248.772945-1-alejandro.lucero-palau@amd.com/) [jan: Fix pio_write_vi_base not set in CXL PIO path] [jan: Fix region leak in efx_cxl_init() error path] Signed-off-by: Jiandi An --- drivers/net/ethernet/sfc/ef10.c | 51 +++++++++++++++++++++++---- drivers/net/ethernet/sfc/efx_cxl.c | 44 ++++++++++++++++------- drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/nic.h | 3 ++ 4 files changed, 81 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index fcec81f862ec5..52c25148844c9 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -24,6 +24,7 @@ #include #include #include +#include "efx_cxl.h" /* Hardware control for EF10 architecture including 'Huntington'. */ @@ -106,7 +107,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx) static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V4_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V7_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; @@ -177,6 +178,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) efx->num_mac_stats); } + if (outlen < MC_CMD_GET_CAPABILITIES_V7_OUT_LEN) + nic_data->datapath_caps3 = 0; + else + nic_data->datapath_caps3 = MCDI_DWORD(outbuf, + GET_CAPABILITIES_V7_OUT_FLAGS3); + return 0; } @@ -919,6 +926,9 @@ static void efx_ef10_forget_old_piobufs(struct efx_nic *efx) static void efx_ef10_remove(struct efx_nic *efx) { struct efx_ef10_nic_data *nic_data = efx->nic_data; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif int rc; #ifdef CONFIG_SFC_SRIOV @@ -949,7 +959,12 @@ static void efx_ef10_remove(struct efx_nic *efx) efx_mcdi_rx_free_indir_table(efx); +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if (nic_data->wc_membase && !probe_data->cxl_pio_in_use) +#else if (nic_data->wc_membase) +#endif iounmap(nic_data->wc_membase); rc = efx_mcdi_free_vis(efx); @@ -1140,6 +1155,9 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) unsigned int channel_vis, pio_write_vi_base, max_vis; struct efx_ef10_nic_data *nic_data = efx->nic_data; unsigned int uc_mem_map_size, wc_mem_map_size; +#ifdef CONFIG_SFC_CXL + struct efx_probe_data *probe_data; +#endif void __iomem *membase; int rc; @@ -1263,8 +1281,26 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) iounmap(efx->membase); efx->membase = membase; - /* Set up the WC mapping if needed */ - if (wc_mem_map_size) { + if (!wc_mem_map_size) + goto skip_pio; + + /* Set up the WC mapping */ + +#ifdef CONFIG_SFC_CXL + probe_data = container_of(efx, struct efx_probe_data, efx); + if ((nic_data->datapath_caps3 & + (1 << MC_CMD_GET_CAPABILITIES_V7_OUT_CXL_CONFIG_ENABLE_LBN)) && + probe_data->cxl_pio_initialised) { + /* Using PIO through CXL mapping? */ + nic_data->pio_write_vi_base = pio_write_vi_base; + nic_data->pio_write_base = probe_data->cxl->ctpio_cxl + + (pio_write_vi_base * efx->vi_stride + + ER_DZ_TX_PIOBUF - uc_mem_map_size); + probe_data->cxl_pio_in_use = true; + } else +#endif + { + /* Using legacy PIO BAR mapping */ nic_data->wc_membase = ioremap_wc(efx->membase_phys + uc_mem_map_size, wc_mem_map_size); @@ -1279,12 +1315,13 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx) nic_data->wc_membase + (pio_write_vi_base * efx->vi_stride + ER_DZ_TX_PIOBUF - uc_mem_map_size); - - rc = efx_ef10_link_piobufs(efx); - if (rc) - efx_ef10_free_piobufs(efx); } + rc = efx_ef10_link_piobufs(efx); + if (rc) + efx_ef10_free_piobufs(efx); + +skip_pio: netif_dbg(efx, probe, efx->net_dev, "memory BAR at %pa (virtual %p+%x UC, %p+%x WC)\n", &efx->membase_phys, efx->membase, uc_mem_map_size, diff --git a/drivers/net/ethernet/sfc/efx_cxl.c b/drivers/net/ethernet/sfc/efx_cxl.c index 805889ce5aa93..27919dcf61877 100644 --- a/drivers/net/ethernet/sfc/efx_cxl.c +++ b/drivers/net/ethernet/sfc/efx_cxl.c @@ -11,6 +11,7 @@ #include #include "net_driver.h" #include "efx_cxl.h" +#include "efx.h" #define EFX_CTPIO_BUFFER_SIZE SZ_256M @@ -140,15 +141,36 @@ int efx_cxl_init(struct efx_probe_data *probe_data) cxl->efx_region = cxl_create_region(cxl->cxlrd, &cxl->cxled, 1); if (IS_ERR(cxl->efx_region)) { pci_err(pci_dev, "CXL accel create region failed"); - cxl_put_root_decoder(cxl->cxlrd); - cxl_dpa_free(cxl->cxled); - return PTR_ERR(cxl->efx_region); + rc = PTR_ERR(cxl->efx_region); + goto err_dpa; + } + + rc = cxl_get_region_range(cxl->efx_region, &range); + if (rc) { + pci_err(pci_dev, "CXL getting regions params failed"); + goto err_detach; + } + + cxl->ctpio_cxl = ioremap(range.start, range.end - range.start + 1); + if (!cxl->ctpio_cxl) { + pci_err(pci_dev, "CXL ioremap region (%pra) failed", &range); + rc = -ENOMEM; + goto err_detach; } } probe_data->cxl = cxl; + probe_data->cxl_pio_initialised = true; return 0; + +err_detach: + cxl_decoder_detach(NULL, cxl->cxled, 0, DETACH_INVALIDATE); + unregister_region(cxl->efx_region); +err_dpa: + cxl_put_root_decoder(cxl->cxlrd); + cxl_dpa_free(cxl->cxled); + return rc; } void efx_cxl_exit(struct efx_probe_data *probe_data) @@ -156,19 +178,17 @@ void efx_cxl_exit(struct efx_probe_data *probe_data) if (!probe_data->cxl) return; - if (probe_data->cxl->hdm_was_committed) { - iounmap(probe_data->cxl->ctpio_cxl); - cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, - DETACH_INVALIDATE); + iounmap(probe_data->cxl->ctpio_cxl); + cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, + DETACH_INVALIDATE); + if (!probe_data->cxl->hdm_was_committed) { + cxl_dpa_free(probe_data->cxl->cxled); + cxl_put_root_decoder(probe_data->cxl->cxlrd); + } else { /* Release decoder reference from cxl_get_committed_decoder() */ if (probe_data->cxl->cxled) put_device(&probe_data->cxl->cxled->cxld.dev); - } else { - cxl_decoder_detach(NULL, probe_data->cxl->cxled, 0, - DETACH_INVALIDATE); - cxl_dpa_free(probe_data->cxl->cxled); - cxl_put_root_decoder(probe_data->cxl->cxlrd); } unregister_region(probe_data->cxl->efx_region); diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 3964b2c56609c..bea4eecdf842d 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -1207,6 +1207,7 @@ struct efx_cxl; * @efx: Efx NIC details * @cxl: details of related cxl objects * @cxl_pio_initialised: cxl initialization outcome. + * @cxl_pio_in_use: PIO using CXL mapping */ struct efx_probe_data { struct pci_dev *pci_dev; @@ -1214,6 +1215,7 @@ struct efx_probe_data { #ifdef CONFIG_SFC_CXL struct efx_cxl *cxl; bool cxl_pio_initialised; + bool cxl_pio_in_use; #endif }; diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 9fa5c4c713abd..c87cc9214690b 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -152,6 +152,8 @@ enum { * %MC_CMD_GET_CAPABILITIES response) * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps3: Further Capabilities of datapath firmware (FLAGS3 field of + * %MC_CMD_GET_CAPABILITIES response) * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot @@ -186,6 +188,7 @@ struct efx_ef10_nic_data { bool must_check_datapath_caps; u32 datapath_caps; u32 datapath_caps2; + u32 datapath_caps3; unsigned int rx_dpcpu_fw_id; unsigned int tx_dpcpu_fw_id; bool must_probe_vswitching; From 2df3b8f14d26e772a08d10930e57ad1c9961bbb3 Mon Sep 17 00:00:00 2001 From: Jiandi An Date: Mon, 16 Feb 2026 03:14:27 -0600 Subject: [PATCH 80/80] NVIDIA: VR: SAUCE: [Config] CXL config change for CXL type 2 and CXL RAS support CONFIG_CXL_BUS: Changed to bool for CXL Type-2 device support CONFIG_CXL_PCI: Changed to bool for CXL Type-2 device support CONFIG_CXL_MEM: Changed to y due to CXL_BUS being bool CONFIG_CXL_PORT: Changed to y due to CXL_BUS being bool CONFIG_FWCTL: Selected by CXL_BUS when bool CONFIG_CXL_RAS: CXL RAS error handling support CONFIG_CXL_RCH_RAS: CXL Restricted CXL Host protocol error handling CONFIG_SFC_CXL: Solarflare SFC9100-family CXL Type-2 device support CONFIG_ACPI_APEI_EINJ: Required for CONFIG_ACPI_APEI_EINJ_CXL CONFIG_ACPI_APEI_EINJ_CXL: CXL protocol error injection support via APEI EINJ Signed-off-by: Jiandi An --- debian.nvidia-6.17/config/annotations | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/debian.nvidia-6.17/config/annotations b/debian.nvidia-6.17/config/annotations index 4fb025e692b3a..1406e71970e4b 100644 --- a/debian.nvidia-6.17/config/annotations +++ b/debian.nvidia-6.17/config/annotations @@ -204,6 +204,37 @@ CONFIG_UBUNTU_ODM_DRIVERS note<'Disable all Ubuntu ODM dri CONFIG_ULTRASOC_SMB policy<{'arm64': 'n'}> CONFIG_ULTRASOC_SMB note<'Required for Grace enablement'> +CONFIG_CXL_BUS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_BUS note<'Changed to bool for CXL Type-2 device support'> + +CONFIG_CXL_PCI policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_PCI note<'Changed to bool for CXL Type-2 device support'> + +CONFIG_CXL_MEM policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_MEM note<'Changed to y due to CXL_BUS being bool'> + +CONFIG_CXL_PORT policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_PORT note<'Changed to y due to CXL_BUS being bool'> + +CONFIG_FWCTL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_FWCTL note<'Selected by CXL_BUS when bool'> + +CONFIG_CXL_RAS policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_CXL_RAS note<'CXL RAS error handling support'> + +CONFIG_CXL_RCH_RAS policy<{'amd64': 'n', 'arm64': 'n'}> +CONFIG_CXL_RCH_RAS note<'RAS support for Restricted CXL Host defined in CXL1.1'> + +CONFIG_SFC_CXL policy<{'amd64': 'n', 'arm64': 'n'}> +CONFIG_SFC_CXL note<'Solarflare SFC9100-family CXL Type-2 device support'> + +CONFIG_ACPI_APEI_EINJ policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ACPI_APEI_EINJ note<'Required for CONFIG_ACPI_APEI_EINJ_CXL'> + + +CONFIG_ACPI_APEI_EINJ_CXL policy<{'amd64': 'y', 'arm64': 'y'}> +CONFIG_ACPI_APEI_EINJ_CXL note<'CXL protocol error injection support via APEI EINJ'> + # ---- Annotations without notes ----