diff --git a/package-lock.json b/package-lock.json index bdd27102..86333d1e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,7 @@ "name": "ipfs-specs-website", "version": "1.0.0", "dependencies": { - "spec-generator": "^1.6.1" + "spec-generator": "^1.7.0" } }, "node_modules/@11ty/dependency-tree": { @@ -5201,6 +5201,16 @@ "markdown-it": "bin/markdown-it.js" } }, + "node_modules/markdown-table": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz", + "integrity": "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/marked": { "version": "12.0.2", "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.2.tgz", @@ -5354,6 +5364,107 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/mdast-util-gfm": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", + "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", + "license": "MIT", + "dependencies": { + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-gfm-autolink-literal": "^2.0.0", + "mdast-util-gfm-footnote": "^2.0.0", + "mdast-util-gfm-strikethrough": "^2.0.0", + "mdast-util-gfm-table": "^2.0.0", + "mdast-util-gfm-task-list-item": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-autolink-literal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.1.tgz", + "integrity": "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "ccount": "^2.0.0", + "devlop": "^1.0.0", + "mdast-util-find-and-replace": "^3.0.0", + "micromark-util-character": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-footnote": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-strikethrough": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", + "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", + "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "markdown-table": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-task-list-item": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", + "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/mdast-util-phrasing": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", @@ -5565,6 +5676,127 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/micromark-extension-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", + "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", + "license": "MIT", + "dependencies": { + "micromark-extension-gfm-autolink-literal": "^2.0.0", + "micromark-extension-gfm-footnote": "^2.0.0", + "micromark-extension-gfm-strikethrough": "^2.0.0", + "micromark-extension-gfm-table": "^2.0.0", + "micromark-extension-gfm-tagfilter": "^2.0.0", + "micromark-extension-gfm-task-list-item": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-autolink-literal": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.1.0.tgz", + "integrity": "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==", + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-footnote": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-strikethrough": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.1.0.tgz", + "integrity": "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-table": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.1.1.tgz", + "integrity": "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-tagfilter": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", + "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-task-list-item": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.1.0.tgz", + "integrity": "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/micromark-factory-destination": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", @@ -8658,6 +8890,24 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/remark-gfm": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.1.tgz", + "integrity": "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-gfm": "^3.0.0", + "micromark-extension-gfm": "^3.0.0", + "remark-parse": "^11.0.0", + "remark-stringify": "^11.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remark-heading-id": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/remark-heading-id/-/remark-heading-id-1.0.1.tgz", @@ -9320,9 +9570,9 @@ "integrity": "sha512-zC8zGoGkmc8J9ndvml8Xksr1Amk9qBujgbF0JAIWO7kXr43w0h/0GJNM/Vustixu+YE8N/MTrQ7N31FvHUACxQ==" }, "node_modules/spec-generator": { - "version": "1.6.1", - "resolved": "https://registry.npmjs.org/spec-generator/-/spec-generator-1.6.1.tgz", - "integrity": "sha512-yDzubb+cWKPlg82SQSaFeHjHVbKu58tlcvbnAy8yFtxnikUL2c06GViBw7yAOZPYjTS/meZ7vQp61IJ0myG0XQ==", + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/spec-generator/-/spec-generator-1.7.0.tgz", + "integrity": "sha512-U5itp3X8mU84chN0xmwgEtAaq/VL4gbC4AK5EdqJxGydhUhz+OnNUcRTKP9v3k9wYVaXbNIeoOzVBnzrNDV1XQ==", "license": "MIT", "dependencies": { "@11ty/eleventy": "^2.0.1", @@ -9342,6 +9592,7 @@ "pluralize": "^8.0.0", "remark": "^15.0.1", "remark-directive": "^3.0.0", + "remark-gfm": "^4.0.1", "remark-heading-id": "^1.0.1", "remark-html": "^16.0.1", "remark-squeeze-paragraphs": "^6.0.0", diff --git a/package.json b/package.json index 53ed9595..2a9f851c 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,6 @@ "license": "", "private": true, "dependencies": { - "spec-generator": "^1.6.1" + "spec-generator": "^1.7.0" } } diff --git a/src/http-gateways/path-gateway.md b/src/http-gateways/path-gateway.md index c207ae6d..12a16ff9 100644 --- a/src/http-gateways/path-gateway.md +++ b/src/http-gateways/path-gateway.md @@ -37,7 +37,7 @@ thanks: name: Protocol Labs url: https://protocol.ai/ xref: - - url + - rfc3986 - trustless-gateway - subdomain-gateway - dnslink-gateway @@ -518,7 +518,7 @@ When deserialized responses are enabled, and no explicit response format is provided with the request, and the requested data itself has no built-in content type metadata, implementations SHOULD perform content type sniffing based on file name -(from :ref[url] path, or optional [`filename`](#filename-request-query-parameter) parameter) +(from URI path, or optional [`filename`](#filename-request-query-parameter) parameter) and magic bytes to improve the utility of produced responses. For example: diff --git a/src/ipips/ipip-0518.md b/src/ipips/ipip-0518.md new file mode 100644 index 00000000..c7d6e67d --- /dev/null +++ b/src/ipips/ipip-0518.md @@ -0,0 +1,345 @@ +--- +title: "IPIP-0518: URIs in Routing V1 API via Generic Schema" +date: 2026-02-11 +ipip: proposal +editors: + - name: Marcin Rataj + github: lidel + url: https://lidel.org/ + affiliation: + name: Shipyard + url: https://ipshipyard.com +thanks: + - name: Adin Schmahmann + github: aschmahmann + affiliation: + name: Shipyard + url: https://ipshipyard.com +relatedIssues: + - https://github.com/ipfs/specs/issues/192 + - https://github.com/ipfs/specs/issues/496 + - https://github.com/multiformats/multiaddr/issues/63 + - https://github.com/multiformats/multiaddr/issues/87 + - https://github.com/ipshipyard/roadmaps/issues/15 + - https://github.com/ipfs/specs/pull/518 +order: 518 +tags: ['ipips'] +xref: + - rfc3986 +--- + +## Summary + +Introduce a `generic` record schema for the Delegated Routing V1 HTTP API that supports URIs alongside multiaddrs in the `Addrs` field. Unlike the `peer` schema, which is tied to libp2p PeerIDs and multiaddrs, `generic` supports arbitrary identifiers and address formats including HTTP(S) URLs and other URI schemes. This enables HTTP-only providers, WebSeeds, and other non-libp2p use cases without breaking existing clients. + +## Motivation + +The Delegated Routing V1 HTTP API currently requires all provider records to use the `peer` schema, which mandates a libp2p PeerID as the identifier and multiaddrs as the address format. + +Many IPFS services are primarily accessible via HTTP(S) and do not use libp2p: + +- IPFS Gateways (path and subdomain) +- HTTP-based content providers and pinning services +- WebSeed providers + +Converting HTTP(S) URLs to multiaddrs is lossy and error-prone: + +- HTTP URLs must be encoded as `/dns4/example.com/tcp/80/http` or `/dns4/example.com/tcp/443/https` +- URL-to-multiaddr round-trips are not lossless (see [multiaddr#63](https://github.com/multiformats/multiaddr/issues/63)) +- Multiple implementations handle edge cases differently (default ports, paths, fragments, HTTP basic-auth) +- A single `https://example.com` URL supports HTTP/1.1, HTTP/2, and HTTP/3, but multiaddr requires separate entries per transport +- Requiring multiaddr libraries raises the barrier for lightweight HTTP-only clients + +A new schema decouples provider records from libp2p, allowing the ecosystem to experiment with HTTP-only providers, WebSeeds, alternative protocols, and other novel concepts without vendor lock-in -- no need for explicit entries in [multicodec table.csv](https://github.com/multiformats/multicodec/blob/master/table.csv) or being blocked by ecosystem-wide adoption of a new addressing scheme. Existing clients remain unaffected. + +## Detailed design + +### Generic Schema + +A new `generic` schema is added to the [Known Schemas](https://specs.ipfs.tech/routing/http-routing-v1/#known-schemas) section of the Routing V1 spec. + +```json +{ + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com", "/ip4/1.2.3.4/tcp/5000"], + "Protocols": ["transport-ipfs-gateway-http"] +} +``` + +Fields: + +- `ID`: a string identifier for the provider. Unlike the `peer` schema, this is not restricted to libp2p PeerIDs. Implementations SHOULD use identifiers that are self-authenticating (e.g. `did:key`), sufficiently unique, and less than 100 bytes. +- `Addrs`: an optional list of addresses as strings. Addresses are duck-typed by prefix: + - If a string starts with `/`, it is parsed as a [multiaddr](https://github.com/multiformats/multiaddr) + - Otherwise, it is parsed as a URI per :cite[rfc3986] + - Clients MUST skip addresses they cannot parse or do not support and continue with remaining entries. This includes URIs with unrecognized schemes, unsupported multiaddrs, or all multiaddrs if the client only supports URIs. +- `Protocols`: an optional list of transfer protocol names associated with this record. Protocol names are opaque strings with a max length of 63 characters, established by rough consensus across compatible implementations per the [robustness principle](https://specs.ipfs.tech/architecture/principles/#robustness). This is a deliberate departure from the `peer` schema, which suggested protocol names require registration in [multicodec table.csv](https://github.com/multiformats/multicodec/blob/master/table.csv), creating an IANA-like chokepoint for adopting new protocols. The `generic` schema removes this gatekeeping: anyone can return novel addresses and protocol names without external approval, and clients that do not recognize them simply skip them without breaking. + +Servers and caching proxies MUST act as pass-through and return `Addrs` and `Protocols` as-is, unless explicitly filtered by the client via `?filter-addrs` or `?filter-protocols` query parameters. + +To allow for protocol-specific fields and future-proofing, the parser MUST allow unknown fields, and clients MUST ignore fields they do not recognize. + +The total serialized size of a single `generic` record MUST be less than 10 KiB. + +### Supported URI Schemes + +Initially, `https://` SHOULD be supported as the primary URI scheme. + +Other URI schemes (e.g. `magnet:`, `foo://`, or any future scheme) MAY appear in `Addrs`. Clients MUST skip URIs with schemes they do not support. This ensures new URI schemes can be introduced over time without breaking existing clients or requiring central coordination. + +### URI Requirements + +URIs in the `Addrs` field: + +- MUST be absolute URIs (not relative references) +- MUST include the scheme (e.g. `https://`, `magnet:`) +- MAY include paths, query parameters, or fragments, but clients MUST handle their presence defensively +- SHOULD point to endpoints that support protocols listed in the `Protocols` field + +### Interaction with `filter-addrs` + +The `filter-addrs` query parameter from [IPIP-0484](https://specs.ipfs.tech/ipips/ipip-0484/) applies to `generic` records the same way it applies to `peer` records: + +- Multiaddr addresses (strings starting with `/`) are filtered by multiaddr protocol name. +- URI addresses (strings not starting with `/`) are filtered by URI scheme name. For example, `?filter-addrs=https` matches `https://example.com`. +- This is naturally consistent: `https` is both a multiaddr protocol name (matching `/dns/example.com/tcp/443/https`) and a URI scheme (matching `https://example.com`). +- `?filter-addrs=unknown` includes `generic` records with no known addresses. +- If no addresses remain after filtering, the `generic` record is omitted from the response. + +### Relationship to Peer Schema + +The `peer` schema remains unchanged. It represents a libp2p node identified by PeerID with multiaddr addresses. The `generic` schema is complementary: + +| | `peer` schema | `generic` schema | +|---|---|---| +| `ID` | libp2p PeerID | any string (e.g. `did:key`) | +| `Addrs` | multiaddrs only | multiaddrs and/or URIs | +| use case | libp2p-native providers | HTTP-only, WebSeed, custom protocols | + +Routing servers MAY emit both schema types for the same provider: + +```json +{ + "Providers": [ + { + "Schema": "peer", + "ID": "12D3KooW...", + "Addrs": ["/ip4/192.168.1.1/tcp/4001"], + "Protocols": ["transport-bitswap"] + }, + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +## Design rationale + +### Why a new schema instead of modifying Peer + +The `peer` schema has a hard dependency on libp2p: `ID` is a PeerID and `Addrs` are multiaddrs. Existing clients parse every entry in `Addrs` as a multiaddr. Introducing URIs into the `Addrs` field of the `peer` schema would cause parse errors in all third-party clients that have not been updated, breaking backward compatibility. + +Previous rollouts of new multiaddr protocols (`/quic-v1`, `/webtransport`, `/webrtc-direct`) did not break clients because those strings still parsed as valid multiaddrs, even when the client could not dial them. URIs are not multiaddrs and will fail multiaddr parsing. + +By introducing a new schema, we leverage the existing requirement that clients MUST skip records with unknown schemas: + +- Existing clients continue to work, only seeing `peer` records they already understand +- Updated clients opt in to `generic` records at their own pace +- No flag day or coordinated upgrade required + +### Incremental migration + +Libp2p-native peers continue using the `peer` schema as-is. The migration only impacts providers that are not actual libp2p peers -- such as HTTP-only Trustless Gateways that today must be shoehorned into the `peer` schema with a synthetic PeerID. During the transition period, routing servers can return both `peer` and `generic` records for the same provider. Clients that understand `generic` use the richer address information; others fall back to `peer` records with the synthetic PeerID. + +### Decoupling from libp2p + +The `generic` schema removes the hard requirement on libp2p PeerIDs and multiaddrs. This lowers the barrier for building lightweight IPFS clients that only speak HTTP, and enables experimentation with new provider types (WebSeeds, S3-backed storage) without requiring changes to the libp2p specification or multiaddr registry. + +## User benefit + +### For developers + +- HTTP-only providers and HTTP-only stacks can be built without multiaddr encoding/decoding libraries. Lower cognitive overhead: everyone familiar with `https://` URIs knows how to work with them. +- Alternative URI schemes are also easier to integrate than new multiaddr protocols +- Lightweight HTTP-only IPFS clients become feasible without re-implementing libp2p concepts + +### For service providers + +- HTTP(S) endpoints advertised directly as URLs +- Custom address formats supported without multiaddr registry changes +- Protocol-specific metadata via extra fields + +### For end users + +- Lower barrier for new client implementations increases ecosystem diversity +- HTTP-only providers improve compatibility with web-based IPFS implementations + +## Compatibility + +### Backward compatibility + +Fully backward compatible. Existing clients skip `generic` records because they use an unknown schema. The `peer` schema is unchanged. + +### Forward compatibility + +Unknown fields MUST be ignored by clients. New address formats and protocol-specific fields can be added without breaking existing implementations. + +URIs in `Addrs` are not limited to a specific scheme. Clients parsing a `generic` record MUST skip addresses with unrecognized URI schemes, which allows the ecosystem to introduce addressing beyond `https://` without requiring coordination or simultaneous upgrades. + +### Migration path + +1. Routing servers emit `generic` records alongside existing `peer` records +2. Clients add support for `generic` schema at their own pace +3. HTTP-only providers that previously required multiaddr conversion can switch to `generic` with native URI addresses + +## Security + +### URI validation + +Implementations SHOULD validate URIs: + +- Verify the URI scheme is supported (e.g. `https://`) +- Validate URI length limits (practical limit: 2048-8192 characters) +- Apply scheme-specific rate limits where appropriate (e.g. rate-limiting HTTP requests to URIs returning non-success responses) + +### HTTPS preference + +For HTTP-based URIs, implementations SHOULD prefer `https://`. The `http://` scheme SHOULD only be allowed for testing and private LAN deployments, gated behind an explicit opt-in flag. + +### DNS considerations + +HTTP(S) URIs rely on DNS resolution. The same security considerations that apply to `/dns`, `/dns4`, and `/dns6` multiaddrs apply here: + +- DNS responses can be spoofed without DNSSEC +- Clients SHOULD use secure DNS transports where available +- Certificate validation MUST be performed for HTTPS URIs on the public internet + +### ID trust + +The `generic` schema `ID` field is self-reported. Clients SHOULD use self-authenticating identifiers (e.g. `did:key`) and verify signatures where applicable. Reputation and resource allocation decisions SHOULD be tied to `ID`. + +## Alternatives + +### URIs in Peer Schema Addrs field + +Adding URIs directly to the `Addrs` field of the existing `peer` schema was considered. The `peer` schema was introduced in [IPIP-0337](https://specs.ipfs.tech/ipips/ipip-0337/) and has been used in production by multiple independent implementations for years. Changing the semantics of `Addrs` from multiaddr-only to a mixed format would break all third-party clients that parse entries as multiaddrs. Unlike new multiaddr protocols which still parse as valid multiaddrs, URIs are a fundamentally different format and cause parse errors. A new schema avoids this by leveraging the existing unknown-schema-skipping behavior. + +### URI-to-multiaddr conversion + +The status quo requires converting HTTP URLs to multiaddrs like `/dns4/example.com/tcp/443/https`. This conversion is lossy: URI paths, fragments, query parameters, and HTTP/3 transport information are lost. Multiple implementations handle edge cases differently, leading to interoperability issues (see [multiaddr#63](https://github.com/multiformats/multiaddr/issues/63)). It also means libp2p-specific address libraries and parsers have to be implemented by every new client, increasing complexity and raising the barrier for new implementations. + +### Custom multiaddr keyword arguments + +Adding keyword arguments to multiaddr protocols was proposed in [multiaddr#87](https://github.com/multiformats/multiaddr/issues/87). This would increase complexity for all multiaddr implementers without addressing the fundamental desire to use standard URIs. + +### Separate URI field in Peer Schema + +Adding a separate `URIs` field to the `peer` schema would complicate the schema and create ambiguity about which field to check for addresses. A new schema is a cleaner separation: `peer` stays focused on libp2p peers, `generic` handles everything else. + +## Test fixtures + +### HTTPS-only provider + +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +### Provider with protocol-specific metadata and custom URI scheme + +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["foo://custom-storage.example.com/bucket"], + "Protocols": ["example-future-protocol"], + "example-future-protocol": {"version": 2, "features": ["foo"]} + } + ] +} +``` + +Clients that do not recognize the `foo://` URI scheme MUST skip that address. + +### Provider with opaque identifier + +The `ID` field is not restricted to `did:key`. Any string identifier can be used: + +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "550e8400-e29b-41d4-a716-446655440000", + "Addrs": ["https://cdn.example.com"], + "Protocols": ["transport-ipfs-gateway-http", "example-future-protocol"] + } + ] +} +``` + +### Mixed response with both schemas + +```json +{ + "Providers": [ + { + "Schema": "peer", + "ID": "12D3KooW...", + "Addrs": [ + "/ip4/192.168.1.1/tcp/4001", + "/ip4/192.168.1.1/udp/4001/quic-v1" + ], + "Protocols": ["transport-bitswap"] + }, + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +### Filtering with `filter-addrs` + +Given a response containing: + +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://provider.example.com", "/ip4/1.2.3.4/tcp/443/https"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +A request with `?filter-addrs=https` returns both addresses, because `https` matches the URI `https://provider.example.com` by URI scheme and the multiaddr `/ip4/1.2.3.4/tcp/443/https` by multiaddr protocol name. + +A request with `?filter-addrs=tcp` returns only the multiaddr `/ip4/1.2.3.4/tcp/443/https`, because `tcp` does not match the URI scheme `https`. + +A request with `?filter-addrs=!https` omits the record entirely, because all addresses are removed by the negative filter. + +## Copyright + +Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/). diff --git a/src/routing/http-routing-v1.md b/src/routing/http-routing-v1.md index 038adf0c..bb13261a 100644 --- a/src/routing/http-routing-v1.md +++ b/src/routing/http-routing-v1.md @@ -4,7 +4,7 @@ description: > Delegated routing is a mechanism for IPFS implementations to use for offloading content routing, peer routing and naming to another process/server. This specification describes an HTTP API for delegated routing of content, peers, and IPNS. -date: 2025-12-17 +date: 2026-02-11 maturity: reliable editors: - name: Marcin Rataj @@ -50,7 +50,9 @@ thanks: url: https://ipshipyard.com xref: - ipip-0337 + - ipip-0518 - ipns-record + - rfc3986 order: 3 tags: ['routing'] --- @@ -91,16 +93,17 @@ This API uses a standard version prefix in the path, such as `/v1/...`. If a bac Optional `?filter-addrs` to apply Network Address Filtering from [IPIP-484](https://specs.ipfs.tech/ipips/ipip-0484/). -- `?filter-addrs=` optional parameter that indicates which network transports to return by filtering the multiaddrs in the `Addrs` field of the [Peer schema](#peer-schema). -- The value of the `filter-addrs` parameter is a comma-separated (`,` or `%2C`) list of network transport protocol _name strings_ as defined in the [multiaddr protocol registry](https://github.com/multiformats/multiaddr/blob/master/protocols.csv), e.g. `?filter-addrs=tls,webrtc-direct,webtransport`. -- `unknown` can be be passed to include providers whose multiaddrs are unknown, e.g. `?filter-addrs=unknown`. This allows for not removing providers whose multiaddrs are unknown at the time of filtering (e.g. keeping DHT results that require additional peer lookup). -- Multiaddrs are filtered by checking if the protocol name appears in any of the multiaddrs (logical OR). -- Negative filtering is done by prefixing the protocol name with `!`, e.g. to skip IPv6 and QUIC addrs: `?filter-addrs=!ip6,!quic-v1`. Note that negative filtering is done by checking if the protocol name does not appear in any of the multiaddrs (logical AND). +- `?filter-addrs=` optional parameter that indicates which network addresses to return by filtering the `Addrs` field of [Peer schema](#peer-schema) and [Generic schema](#generic-schema) provider records. +- The value is a comma-separated (`,` or `%2C`) list of filter names. +- For multiaddr addresses (all addresses in the `peer` schema, and strings starting with `/` in the `generic` schema): filter names are matched against multiaddr protocol _name strings_ as defined in the [multiaddr protocol registry](https://github.com/multiformats/multiaddr/blob/master/protocols.csv), e.g. `?filter-addrs=tls,webrtc-direct,webtransport`. A multiaddr matches a filter if the protocol name appears anywhere in the multiaddr (logical OR). +- For URI addresses in the [Generic schema](#generic-schema) (strings not starting with `/`): filter names are matched against the URI scheme per :cite[rfc3986]. For example, `?filter-addrs=https` matches `https://example.com`. +- `unknown` can be passed to include records whose addresses are unknown, e.g. `?filter-addrs=unknown`. This allows for not removing records whose addresses are unknown at the time of filtering (e.g. keeping DHT results that require additional peer lookup). +- Negative filtering is done by prefixing the name with `!`, e.g. to skip IPv6 and QUIC addrs: `?filter-addrs=!ip6,!quic-v1`. Negative filtering checks that the name does not appear in any of the addresses (logical AND). - If no parameter is passed, the default behavior is to return the original list of addresses unchanged. -- If only negative filters are provided, addresses not passing any of the negative filters are included. -- If positive filters are provided, only addresses passing at least one positive filter (and no negative filters) are included. +- If only negative filters are provided, addresses not matching any of the negative filters are included. +- If positive filters are provided, only addresses matching at least one positive filter (and no negative filters) are included. - If both positive and negative filters are provided, the address must pass all negative filters and at least one positive filter to be included. -- If there are no multiaddrs that match the passed transports, the provider is omitted from the response. +- If no addresses remain after filtering, the record is omitted from the response. - Filtering is case-insensitive. ##### `filter-protocols` (providers request query parameter) @@ -110,10 +113,10 @@ Optional `?filter-protocols` to apply IPFS Protocol Filtering from [IPIP-484](ht - The `filter-protocols` parameter is a comma-separated (`,` or `%2C`) list of transfer protocol names, e.g. `?filter-protocols=unknown,transport-bitswap,transport-ipfs-gateway-http`. - Transfer protocols names should be treated as opaque strings and have a max length of 63 characters. A non-exhaustive list of transfer protocols are defined per convention in the [multicodec registry](https://github.com/multiformats/multicodec/blob/3b7b52deb31481790bc4bae984d8675bda4e0c82/table.csv#L149-L151). - Implementations MUST preserve all transfer protocol names when returning a positive result that matches one or more of them. -- A special `unknown` name can be be passed to include providers whose transfer protocol list is empty (unknown), e.g. `?filter-protocols=unknown`. This allows for including providers returned from the DHT that do not contain explicit transfer protocol information. -- Providers are filtered by checking if the transfer protocol name appears in the `Protocols` array (logical OR). -- If the provider doesn't match any of the passed transfer protocols, the provider is omitted from the response. -- If a provider passes the filter, it is returned unchanged, i.e. the full set of protocols is returned including protocols that not included in the filter. (note that this is different from `filter-addrs` where only the multiaddrs that pass the filter are returned) +- A special `unknown` name can be passed to include records whose transfer protocol list is empty (unknown), e.g. `?filter-protocols=unknown`. This allows for including providers returned from the DHT that do not contain explicit transfer protocol information. +- Records are filtered by checking if the transfer protocol name appears in the `Protocols` array (logical OR). This applies to both [Peer schema](#peer-schema) and [Generic schema](#generic-schema) records. +- If the record doesn't match any of the passed transfer protocols, it is omitted from the response. +- If a record passes the filter, it is returned unchanged, i.e. the full set of protocols is returned including protocols not included in the filter. (note that this is different from `filter-addrs` where only the addresses that pass the filter are returned) - Filtering is case-insensitive. - If no parameter is passed, the default behavior is to not filter by transfer protocol. @@ -425,6 +428,50 @@ the case, the field MUST be ignored. ::: +### Generic Schema + +The `generic` schema represents a provider that is not necessarily a libp2p peer. Unlike the `peer` schema, identifiers are not limited to PeerIDs and addresses are not limited to multiaddrs. See [IPIP-0518](https://specs.ipfs.tech/ipips/ipip-0518/) for motivation and design rationale. + +```json +{ + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + ... +} +``` + +- `ID`: a string identifier for the provider. SHOULD be self-authenticating (e.g. `did:key`) and less than 100 bytes. +- `Addrs`: an optional list of addresses as strings. Addresses are duck-typed based on their format: + - If a string starts with `/`, it is parsed as a [multiaddr][multiaddr] + - Otherwise, it is parsed as a URI per :cite[rfc3986] + - Clients MUST skip addresses they cannot parse or do not support and continue with remaining entries. This includes URIs with unrecognized schemes, unsupported multiaddrs, or all multiaddrs if the client only supports URIs. +- `Protocols`: an optional list of transfer protocol names associated with this record. Protocol names are opaque strings with a max length of 63 characters, established by rough consensus across compatible implementations per the [robustness principle](https://specs.ipfs.tech/architecture/principles/#robustness). + +Servers and caching proxies MUST act as pass-through and return `Addrs` and `Protocols` as-is, unless explicitly filtered by the client via `?filter-addrs` or `?filter-protocols` query parameters. + +The total serialized size of a single `generic` record MUST be less than 10 KiB. + +:::note + +To allow for protocol-specific fields and future-proofing, the parser MUST +allow for unknown fields, and the clients MUST ignore unknown ones. + +Below is an example of a `generic` record with protocol-specific extra fields: + +```json +{ + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://provider.example.com"], + "Protocols": ["example-future-protocol"], + "example-future-protocol": {"version": 2, "features": ["foo"]} +} +``` + +::: + ### Legacy Schemas Legacy schemas include `ID` and optional `Addrs` list just like