From c879baf0ba91ae6f2a18433cbaa5c50cb4e464e7 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Mon, 13 Oct 2025 19:05:18 +0200 Subject: [PATCH 1/3] ipip-518: http(s) urls in routing v1 allows http(s) urls alongside multiaddrs in addrs field --- src/http-gateways/path-gateway.md | 4 +- src/ipips/ipip-0518.md | 290 ++++++++++++++++++++++++++++++ src/routing/http-routing-v1.md | 27 ++- 3 files changed, 311 insertions(+), 10 deletions(-) create mode 100644 src/ipips/ipip-0518.md diff --git a/src/http-gateways/path-gateway.md b/src/http-gateways/path-gateway.md index af4c46c7..68d4ed5c 100644 --- a/src/http-gateways/path-gateway.md +++ b/src/http-gateways/path-gateway.md @@ -35,7 +35,7 @@ editors: name: Protocol Labs url: https://protocol.ai/ xref: - - url + - rfc3986 - trustless-gateway - subdomain-gateway - dnslink-gateway @@ -511,7 +511,7 @@ When deserialized responses are enabled, and no explicit response format is provided with the request, and the requested data itself has no built-in content type metadata, implementations SHOULD perform content type sniffing based on file name -(from :ref[url] path, or optional [`filename`](#filename-request-query-parameter) parameter) +(from URI path, or optional [`filename`](#filename-request-query-parameter) parameter) and magic bytes to improve the utility of produced responses. For example: diff --git a/src/ipips/ipip-0518.md b/src/ipips/ipip-0518.md new file mode 100644 index 00000000..188c7aba --- /dev/null +++ b/src/ipips/ipip-0518.md @@ -0,0 +1,290 @@ +--- +title: "IPIP-0518: HTTP(S) URLs in Routing V1 API" +date: 2025-10-13 +ipip: proposal +editors: + - name: Marcin Rataj + github: lidel + url: https://lidel.org/ + affiliation: + name: Shipyard + url: https://ipshipyard.com +relatedIssues: + - https://github.com/ipfs/specs/issues/192 + - https://github.com/ipfs/specs/issues/496 + - https://github.com/multiformats/multiaddr/issues/63 + - https://github.com/multiformats/multiaddr/issues/87 + - https://github.com/ipshipyard/roadmaps/issues/15 + - https://github.com/ipfs/specs/pull/518 +order: 518 +tags: ['ipips'] +xref: + - rfc3986 +--- + +## Summary + +Allow HTTP(S) URLs alongside multiaddrs in the `Addrs` field of the Peer schema in the Delegated Routing V1 HTTP API to enable easier integration with HTTP-based infrastructure. + +## Motivation + +The current Delegated Routing V1 HTTP API requires all peer addresses to be encoded as [multiaddrs](https://github.com/multiformats/multiaddr). While multiaddrs provide a flexible and protocol-agnostic way to represent network addresses, many IPFS services are primarily accessible via HTTP(S) endpoints, including: + +- IPFS Gateways (both path and subdomain gateways) +- Delegated routing endpoints themselves +- HTTP-based content providers and pinning services + +Converting HTTP(S) URLs to multiaddrs requires additional complexity: +- HTTP URLs must be encoded as `/dns4/example.com/tcp/80/http` or `/dns4/example.com/tcp/443/https` +- This conversion is not intuitive for developers familiar with web standards +- It does not capture HTTP semantics where the same website can be exposed on both TCP (HTTP/1.1, HTTP/2) and UDP (HTTP/3) +- A single `https://example.com` URL automatically supports multiple transport protocols, but multiaddr representation requires separate entries for each transport +- Parsing multiaddrs back to URLs requires additional libraries and logic + +By allowing native HTTP(S) URLs in the `Addrs` field, we can: +- Simplify integration with existing web infrastructure +- Reduce conversion overhead for HTTP-based services +- Improve developer experience by using familiar URL formats +- Improve interoperability with the wider HTTP and URI ecosystem +- Enable future-proofing for non-HTTP URI schemes in ecosystem experimentation without requiring permission from gatekeepers +- Maintain backward compatibility with existing multiaddr-based implementations + +## Detailed design + +### Changes to the Peer Schema + +The `Addrs` field in the [Peer Schema](https://specs.ipfs.tech/routing/http-routing-v1/#peer-schema) will accept both multiaddr strings and HTTP(S) URL strings: + +```json +{ + "Schema": "peer", + "ID": "bafz...", + "Addrs": [ + "/ip4/192.168.1.1/tcp/4001", + "/dns4/libp2p-peer.example.com/tcp/4001/ws", + "https://trustless-gateway.example.org", + "https://custom-port.example.net:8443" + ], + "Protocols": ["transport-bitswap", ...] +} +``` + +### Parsing Logic + +Implementations MUST use the following logic to distinguish between multiaddrs and URLs: + +1. If a string in `Addrs` starts with `/` (forward slash), parse it as a multiaddr +2. Otherwise, attempt to parse it as a URI according to :cite[rfc3986] +3. If neither parsing succeeds, or if the address type is not supported by the implementation, the address MUST be ignored (skipped) +4. Processing MUST continue with the remaining addresses in the array +5. Implementations SHOULD log warnings for addresses they cannot parse or do not support + +This approach ensures forward compatibility: new address types can be introduced without breaking existing clients, as unsupported addresses are simply skipped. + +### Supported URL Schemes + +Initially, only the following URL schemes SHOULD be supported: +- `http://` - HTTP endpoints +- `https://` - HTTPS endpoints + +Future specifications MAY add support for additional schemes. + +### URL Requirements + +URLs in the `Addrs` field: +- MUST be absolute URLs (not relative) +- MUST include the scheme (`http://` or `https://`) +- SHOULD NOT include paths, query parameters, and fragments, but clients MUST account for them being present as part of defensive programming and either act on them, ignore them, or skip such addresses +- SHOULD point to endpoints that support IPFS protocols listed in the `Protocols` field + +### Examples + +#### HTTPS-only Content Provider + +```json +{ + "Schema": "peer", + "ID": "12D3KooWExample...", + "Addrs": [ + "https://trustless-gateway.example.com" + ], + "Protocols": ["transport-ipfs-gateway-http"] +} +``` + +#### Hybrid Peer with Multiple Transports + +```json +{ + "Schema": "peer", + "ID": "12D3KooWExample...", + "Addrs": [ + "/ip4/192.168.1.1/tcp/4001", + "/ip4/192.168.1.1/udp/4001/quic-v1", + "https://my-node.example.org:8080" + ], + "Protocols": ["transport-bitswap", "transport-ipfs-gateway-http"] +} +``` + +## Design rationale + +### Why not create a new field? + +Adding URLs to the existing `Addrs` field rather than creating a new field (e.g., `URLs`) has several advantages: +- Maintains backward compatibility - existing clients continue to work +- Avoids duplication when the same endpoint can be expressed as both multiaddr and URL +- Simplifies the schema without adding complexity +- Follows the principle that addresses are addresses, regardless of encoding + +### Clear disambiguation + +The parsing rule (strings starting with `/` are multiaddrs, others are URIs) provides clear, unambiguous disambiguation: +- Multiaddrs ALWAYS start with `/` by specification +- Valid URLs NEVER start with `/` (they start with a scheme like `http://`) +- This makes parsing deterministic and fast + +### Incremental adoption + +This change allows for incremental adoption: +- Clients that don't understand URLs can simply skip them +- Servers can start including URLs immediately for URL-aware clients +- No flag day or coordinated upgrade required + +## User benefit + +This change benefits multiple user groups: + +### For developers + +- Simplified integration with existing HTTP infrastructure +- No need for multiaddr encoding/decoding libraries for HTTP endpoints +- Clearer, more readable configurations and debugging +- Barrier of adoption is removed: developers can implement HTTP-based routing and retrieval without having to re-implement libp2p concepts like Multiaddr, making it orders of magnitude easier to create light IPFS clients + +### For service providers + +- Easier to advertise HTTP-based services +- Can provide URLs that include paths and query parameters if needed +- Reduced complexity in route announcements + +### For end users + +- Potentially faster connection establishment to HTTP services +- Better compatibility with web-based IPFS implementations +- Lower barrier for creating new clients gives end users more choice and less vendor lock-in +- Provides viable escape path in case any of the open source projects gets captured by forces that do not put end user's good first + +## Compatibility + +This IPIP is fully backward and forward compatible: + +### For existing clients + +- Clients that only understand multiaddrs MUST skip URL entries they don't recognize (this is already implemented and proven to work when new protocols like `/quic`, `/quic-v1`, `/webtransport`, and `/webrtc-direct` were rolled out) +- Clients MUST continue processing remaining addresses even when encountering unsupported entries +- No changes required to existing parsing logic for multiaddr strings +- The `Addrs` field remains an array of strings + +### Forward compatibility + +- The requirement to skip unsupported addresses ensures that new address types can be added in the future +- Clients MUST NOT fail when encountering unknown address formats +- This allows the ecosystem to evolve without breaking existing implementations or without the need for permission or central coordination + +### For existing servers + +- Servers can continue sending only multiaddrs +- No changes required if URLs are not used + +### Migration path + +1. Servers can start including both multiaddrs and URLs for the same endpoints +2. Clients can be updated to parse URLs at their own pace +3. Eventually, servers may choose to only send URLs for HTTP(S) endpoints + +## Security + +### URL validation + +Implementations SHOULD validate URLs to prevent security issues: +- Verify the URL scheme is allowed (`http://` or `https://`) +- Consider rate limiting for URL-based connections if non-success (!=200) responses are received +- Validate URL length limits (DNS names are limited to 253 characters; practical URL length is typically 2048-8192 characters depending on implementation) + +### HTTPS preference + +Implementations SHOULD ignore `http://` URLs and only act on `https://` URLs for security and performance (HTTP/2 multiplexing) reasons. + +The `http://` scheme SHOULD be allowed only for testing and private LAN deployments, and only when an explicit opt-in flag is set by the end user. + +### DNS considerations + +URLs rely on DNS resolution, which has different security properties than IP-based multiaddrs. The same rules that apply to `/dns`, `/dns4`, and `/dns6` multiaddrs apply here: +- DNS responses can be spoofed if DNSSEC is not used +- Clients SHOULD use secure DNS transports where available +- Certificate validation MUST be performed for HTTPS URLs + +## Alternatives + +### Separate URL field + +Adding a separate `URLs` field was considered but rejected because: +- It would complicate the schema +- It could lead to confusion about which field to use +- It wouldn't be backward compatible + +### URL-to-multiaddr conversion requirement + +Requiring all HTTP endpoints to be encoded as multiaddrs was the status quo but has proven cumbersome in practice. Multiple implementations on NPM and Golang alone behaved in slightly different fashion around how the schema, default port, optional path, fragment, and HTTP basic-auth were handled. This led to hard-to-debug errors due to multiaddr-URL conversion being ultimately lossy and 1:1 round-trip not being possible (see [multiaddr#63](https://github.com/multiformats/multiaddr/issues/63)). + +### Custom multiaddr protocols with keyword arguments + +Adding keyword arguments to multiaddr protocols was proposed in [multiaddr#87](https://github.com/multiformats/multiaddr/issues/87) to allow expressing `https://` URLs as multiaddrs without losing any information on conversion. This approach was not adopted because it would add even more complexity that multiaddr implementers would have to deal with. + +This solution was not feasible - adding native URI support is better as it removes walls and obstacles, rather than making existing ones taller. + +## Test fixtures + +Implementations can test compatibility using these example responses: + +### Mixed addresses response + +```json +{ + "Providers": [ + { + "Schema": "peer", + "ID": "12D3KooWTest1...", + "Addrs": [ + "/ip4/127.0.0.1/tcp/4001", + "http://localhost:8080", + "/dns4/example.com/tcp/443/https", + "https://example.net" + ], + "Protocols": ["transport-bitswap", "transport-ipfs-gateway-http"] + } + ] +} +``` + +### URL-only response + +```json +{ + "Providers": [ + { + "Schema": "peer", + "ID": "12D3KooWTest2...", + "Addrs": [ + "https://trustless-gateway.example.org" + ], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +## Copyright + +Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/). \ No newline at end of file diff --git a/src/routing/http-routing-v1.md b/src/routing/http-routing-v1.md index cbd44c34..ad76dd9c 100644 --- a/src/routing/http-routing-v1.md +++ b/src/routing/http-routing-v1.md @@ -36,7 +36,9 @@ editors: url: https://ipshipyard.com xref: - ipip-0337 + - ipip-0518 - ipns-record + - rfc3986 order: 0 tags: ['routing'] --- @@ -52,7 +54,11 @@ As such, human-readable encodings of types are preferred. This specification may ## Common Data Types - CIDs are always string-encoded using a [multibase]-encoded [CIDv1]. -- Multiaddrs are string-encoded according to the [human-readable multiaddr specification][multiaddr]. +- Addresses in the `Addrs` field can be: + - Multiaddrs: string-encoded according to the [human-readable multiaddr specification][multiaddr], always starting with `/` + - HTTP(S) URLs: absolute URLs with `http://` or `https://` schemes, parsed as URIs according to :cite[rfc3986] + - Parsing logic: if a string starts with `/`, parse as multiaddr; otherwise, parse as URI + - Unsupported addresses: implementations MUST skip addresses they cannot parse or do not support, and MUST continue processing remaining addresses (see [IPIP-0518](https://specs.ipfs.tech/ipips/ipip-0518/)) - Peer IDs are string-encoded according [PeerID string representation specification][peer-id-representation]: either a Multihash in Base58btc, or a CIDv1 with libp2p-key (`0x72`) codec in Base36 or Base32. - Multibase bytes are string-encoded according to [the Multibase spec][multibase], and SHOULD use base64. - Timestamps are Unix millisecond epoch timestamps. @@ -77,16 +83,18 @@ This API uses a standard version prefix in the path, such as `/v1/...`. If a bac Optional `?filter-addrs` to apply Network Address Filtering from [IPIP-484](https://specs.ipfs.tech/ipips/ipip-0484/). -- `?filter-addrs=` optional parameter that indicates which network transports to return by filtering the multiaddrs in the `Addrs` field of the [Peer schema](#peer-schema). +- `?filter-addrs=` optional parameter that indicates which network transports to return by filtering the addresses in the `Addrs` field of the [Peer schema](#peer-schema). - The value of the `filter-addrs` parameter is a comma-separated (`,` or `%2C`) list of network transport protocol _name strings_ as defined in the [multiaddr protocol registry](https://github.com/multiformats/multiaddr/blob/master/protocols.csv), e.g. `?filter-addrs=tls,webrtc-direct,webtransport`. -- `unknown` can be be passed to include providers whose multiaddrs are unknown, e.g. `?filter-addrs=unknown`. This allows for not removing providers whose multiaddrs are unknown at the time of filtering (e.g. keeping DHT results that require additional peer lookup). -- Multiaddrs are filtered by checking if the protocol name appears in any of the multiaddrs (logical OR). -- Negative filtering is done by prefixing the protocol name with `!`, e.g. to skip IPv6 and QUIC addrs: `?filter-addrs=!ip6,!quic-v1`. Note that negative filtering is done by checking if the protocol name does not appear in any of the multiaddrs (logical AND). +- `unknown` can be be passed to include providers whose addresses are unknown, e.g. `?filter-addrs=unknown`. This allows for not removing providers whose addresses are unknown at the time of filtering (e.g. keeping DHT results that require additional peer lookup). +- Addresses are filtered by checking if the protocol name appears in any of the multiaddrs, or if the URI scheme matches for HTTP(S) URLs (logical OR in both cases). + - Example: `http` can be be passed to include providers whose addresses are HTTP-compatible. This will include `http://` `https://` URIs, and `/http` `/https` and `/tls/http` Multiaddrs. + - For the purpose of filtering, implementations SHOULD include `/tls/http` Multiaddrs when `https` is passed as a filter to ensure composed multiaddrs are included in results. +- Negative filtering is done by prefixing the protocol name with `!`, e.g. to skip IPv6 and QUIC addrs: `?filter-addrs=!ip6,!quic-v1`. Note that negative filtering is done by checking if the protocol name does not appear in any of the addresses (logical AND). - If no parameter is passed, the default behavior is to return the original list of addresses unchanged. - If only negative filters are provided, addresses not passing any of the negative filters are included. - If positive filters are provided, only addresses passing at least one positive filter (and no negative filters) are included. - If both positive and negative filters are provided, the address must pass all negative filters and at least one positive filter to be included. -- If there are no multiaddrs that match the passed transports, the provider is omitted from the response. +- If there are no addresses that match the passed transports, the provider is omitted from the response. - Filtering is case-insensitive. ##### `filter-protocols` (providers request query parameter) @@ -315,14 +323,17 @@ The `peer` schema represents an arbitrary peer. { "Schema": "peer", "ID": "bafz...", - "Addrs": ["/ip4/..."], + "Addrs": ["/ip4/...", "https://trustless-gateway.example.com"], "Protocols": ["transport-bitswap", ...] ... } ``` - `ID`: the [Peer ID][peer-id] as Multihash in Base58btc or CIDv1 with libp2p-key codec. -- `Addrs`: an optional list of known [multiaddrs][multiaddr] for this peer. +- `Addrs`: an optional list of known addresses for this peer, which can include both: + - [Multiaddrs][multiaddr]: strings starting with `/`, e.g., `/ip4/192.168.1.1/tcp/4001` + - HTTP(S) URLs: absolute URLs with `http://` or `https://` schemes, e.g., `https://trustless-gateway.example.com` + - Implementations MUST skip addresses they cannot parse or do not support and continue with remaining addresses - If missing or empty, it means the router server is missing that information, and the client should use `ID` to lookup updated peer information. - `Protocols`: an optional list of protocols known to be supported by this peer. - If missing or empty, it means the router server is missing that information, and the client should use `ID` and `Addrs` to lookup connect to the peer and use the [libp2p identify protocol](https://github.com/libp2p/specs/tree/master/identify) to learn about supported ones. From e74d686e3eff8e1fdf35d4d89786b0b42f9fffe9 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Wed, 11 Feb 2026 23:13:17 +0100 Subject: [PATCH 2/3] ipip-518: pivot to generic schema with URI support pivot IPIP from modifying peer schema to introducing a new `generic` schema that supports URIs alongside multiaddrs. existing clients skip unknown schemas, so this is backward compatible. - generic schema: arbitrary IDs (did:key, UUID), duck-typed Addrs (multiaddrs and/or URIs), opaque protocol names - filter-addrs extended to match URI schemes in generic schema - filter-protocols extended to apply to generic schema records - servers/proxies must pass-through addrs and protocols as-is - 10 KiB record size limit - no gatekeeping: protocol names established by rough consensus, no central registry required --- src/ipips/ipip-0518.md | 359 +++++++++++++++++++-------------- src/routing/http-routing-v1.md | 86 +++++--- 2 files changed, 268 insertions(+), 177 deletions(-) diff --git a/src/ipips/ipip-0518.md b/src/ipips/ipip-0518.md index 188c7aba..c7d6e67d 100644 --- a/src/ipips/ipip-0518.md +++ b/src/ipips/ipip-0518.md @@ -1,6 +1,6 @@ --- -title: "IPIP-0518: HTTP(S) URLs in Routing V1 API" -date: 2025-10-13 +title: "IPIP-0518: URIs in Routing V1 API via Generic Schema" +date: 2026-02-11 ipip: proposal editors: - name: Marcin Rataj @@ -9,6 +9,12 @@ editors: affiliation: name: Shipyard url: https://ipshipyard.com +thanks: + - name: Adin Schmahmann + github: aschmahmann + affiliation: + name: Shipyard + url: https://ipshipyard.com relatedIssues: - https://github.com/ipfs/specs/issues/192 - https://github.com/ipfs/specs/issues/496 @@ -24,267 +30,316 @@ xref: ## Summary -Allow HTTP(S) URLs alongside multiaddrs in the `Addrs` field of the Peer schema in the Delegated Routing V1 HTTP API to enable easier integration with HTTP-based infrastructure. +Introduce a `generic` record schema for the Delegated Routing V1 HTTP API that supports URIs alongside multiaddrs in the `Addrs` field. Unlike the `peer` schema, which is tied to libp2p PeerIDs and multiaddrs, `generic` supports arbitrary identifiers and address formats including HTTP(S) URLs and other URI schemes. This enables HTTP-only providers, WebSeeds, and other non-libp2p use cases without breaking existing clients. ## Motivation -The current Delegated Routing V1 HTTP API requires all peer addresses to be encoded as [multiaddrs](https://github.com/multiformats/multiaddr). While multiaddrs provide a flexible and protocol-agnostic way to represent network addresses, many IPFS services are primarily accessible via HTTP(S) endpoints, including: +The Delegated Routing V1 HTTP API currently requires all provider records to use the `peer` schema, which mandates a libp2p PeerID as the identifier and multiaddrs as the address format. -- IPFS Gateways (both path and subdomain gateways) -- Delegated routing endpoints themselves +Many IPFS services are primarily accessible via HTTP(S) and do not use libp2p: + +- IPFS Gateways (path and subdomain) - HTTP-based content providers and pinning services +- WebSeed providers + +Converting HTTP(S) URLs to multiaddrs is lossy and error-prone: -Converting HTTP(S) URLs to multiaddrs requires additional complexity: - HTTP URLs must be encoded as `/dns4/example.com/tcp/80/http` or `/dns4/example.com/tcp/443/https` -- This conversion is not intuitive for developers familiar with web standards -- It does not capture HTTP semantics where the same website can be exposed on both TCP (HTTP/1.1, HTTP/2) and UDP (HTTP/3) -- A single `https://example.com` URL automatically supports multiple transport protocols, but multiaddr representation requires separate entries for each transport -- Parsing multiaddrs back to URLs requires additional libraries and logic - -By allowing native HTTP(S) URLs in the `Addrs` field, we can: -- Simplify integration with existing web infrastructure -- Reduce conversion overhead for HTTP-based services -- Improve developer experience by using familiar URL formats -- Improve interoperability with the wider HTTP and URI ecosystem -- Enable future-proofing for non-HTTP URI schemes in ecosystem experimentation without requiring permission from gatekeepers -- Maintain backward compatibility with existing multiaddr-based implementations +- URL-to-multiaddr round-trips are not lossless (see [multiaddr#63](https://github.com/multiformats/multiaddr/issues/63)) +- Multiple implementations handle edge cases differently (default ports, paths, fragments, HTTP basic-auth) +- A single `https://example.com` URL supports HTTP/1.1, HTTP/2, and HTTP/3, but multiaddr requires separate entries per transport +- Requiring multiaddr libraries raises the barrier for lightweight HTTP-only clients + +A new schema decouples provider records from libp2p, allowing the ecosystem to experiment with HTTP-only providers, WebSeeds, alternative protocols, and other novel concepts without vendor lock-in -- no need for explicit entries in [multicodec table.csv](https://github.com/multiformats/multicodec/blob/master/table.csv) or being blocked by ecosystem-wide adoption of a new addressing scheme. Existing clients remain unaffected. ## Detailed design -### Changes to the Peer Schema +### Generic Schema -The `Addrs` field in the [Peer Schema](https://specs.ipfs.tech/routing/http-routing-v1/#peer-schema) will accept both multiaddr strings and HTTP(S) URL strings: +A new `generic` schema is added to the [Known Schemas](https://specs.ipfs.tech/routing/http-routing-v1/#known-schemas) section of the Routing V1 spec. ```json { - "Schema": "peer", - "ID": "bafz...", - "Addrs": [ - "/ip4/192.168.1.1/tcp/4001", - "/dns4/libp2p-peer.example.com/tcp/4001/ws", - "https://trustless-gateway.example.org", - "https://custom-port.example.net:8443" - ], - "Protocols": ["transport-bitswap", ...] + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com", "/ip4/1.2.3.4/tcp/5000"], + "Protocols": ["transport-ipfs-gateway-http"] } ``` -### Parsing Logic +Fields: -Implementations MUST use the following logic to distinguish between multiaddrs and URLs: +- `ID`: a string identifier for the provider. Unlike the `peer` schema, this is not restricted to libp2p PeerIDs. Implementations SHOULD use identifiers that are self-authenticating (e.g. `did:key`), sufficiently unique, and less than 100 bytes. +- `Addrs`: an optional list of addresses as strings. Addresses are duck-typed by prefix: + - If a string starts with `/`, it is parsed as a [multiaddr](https://github.com/multiformats/multiaddr) + - Otherwise, it is parsed as a URI per :cite[rfc3986] + - Clients MUST skip addresses they cannot parse or do not support and continue with remaining entries. This includes URIs with unrecognized schemes, unsupported multiaddrs, or all multiaddrs if the client only supports URIs. +- `Protocols`: an optional list of transfer protocol names associated with this record. Protocol names are opaque strings with a max length of 63 characters, established by rough consensus across compatible implementations per the [robustness principle](https://specs.ipfs.tech/architecture/principles/#robustness). This is a deliberate departure from the `peer` schema, which suggested protocol names require registration in [multicodec table.csv](https://github.com/multiformats/multicodec/blob/master/table.csv), creating an IANA-like chokepoint for adopting new protocols. The `generic` schema removes this gatekeeping: anyone can return novel addresses and protocol names without external approval, and clients that do not recognize them simply skip them without breaking. -1. If a string in `Addrs` starts with `/` (forward slash), parse it as a multiaddr -2. Otherwise, attempt to parse it as a URI according to :cite[rfc3986] -3. If neither parsing succeeds, or if the address type is not supported by the implementation, the address MUST be ignored (skipped) -4. Processing MUST continue with the remaining addresses in the array -5. Implementations SHOULD log warnings for addresses they cannot parse or do not support +Servers and caching proxies MUST act as pass-through and return `Addrs` and `Protocols` as-is, unless explicitly filtered by the client via `?filter-addrs` or `?filter-protocols` query parameters. -This approach ensures forward compatibility: new address types can be introduced without breaking existing clients, as unsupported addresses are simply skipped. +To allow for protocol-specific fields and future-proofing, the parser MUST allow unknown fields, and clients MUST ignore fields they do not recognize. -### Supported URL Schemes +The total serialized size of a single `generic` record MUST be less than 10 KiB. -Initially, only the following URL schemes SHOULD be supported: -- `http://` - HTTP endpoints -- `https://` - HTTPS endpoints +### Supported URI Schemes -Future specifications MAY add support for additional schemes. +Initially, `https://` SHOULD be supported as the primary URI scheme. -### URL Requirements +Other URI schemes (e.g. `magnet:`, `foo://`, or any future scheme) MAY appear in `Addrs`. Clients MUST skip URIs with schemes they do not support. This ensures new URI schemes can be introduced over time without breaking existing clients or requiring central coordination. -URLs in the `Addrs` field: -- MUST be absolute URLs (not relative) -- MUST include the scheme (`http://` or `https://`) -- SHOULD NOT include paths, query parameters, and fragments, but clients MUST account for them being present as part of defensive programming and either act on them, ignore them, or skip such addresses -- SHOULD point to endpoints that support IPFS protocols listed in the `Protocols` field +### URI Requirements -### Examples +URIs in the `Addrs` field: -#### HTTPS-only Content Provider +- MUST be absolute URIs (not relative references) +- MUST include the scheme (e.g. `https://`, `magnet:`) +- MAY include paths, query parameters, or fragments, but clients MUST handle their presence defensively +- SHOULD point to endpoints that support protocols listed in the `Protocols` field -```json -{ - "Schema": "peer", - "ID": "12D3KooWExample...", - "Addrs": [ - "https://trustless-gateway.example.com" - ], - "Protocols": ["transport-ipfs-gateway-http"] -} -``` +### Interaction with `filter-addrs` + +The `filter-addrs` query parameter from [IPIP-0484](https://specs.ipfs.tech/ipips/ipip-0484/) applies to `generic` records the same way it applies to `peer` records: + +- Multiaddr addresses (strings starting with `/`) are filtered by multiaddr protocol name. +- URI addresses (strings not starting with `/`) are filtered by URI scheme name. For example, `?filter-addrs=https` matches `https://example.com`. +- This is naturally consistent: `https` is both a multiaddr protocol name (matching `/dns/example.com/tcp/443/https`) and a URI scheme (matching `https://example.com`). +- `?filter-addrs=unknown` includes `generic` records with no known addresses. +- If no addresses remain after filtering, the `generic` record is omitted from the response. + +### Relationship to Peer Schema -#### Hybrid Peer with Multiple Transports +The `peer` schema remains unchanged. It represents a libp2p node identified by PeerID with multiaddr addresses. The `generic` schema is complementary: + +| | `peer` schema | `generic` schema | +|---|---|---| +| `ID` | libp2p PeerID | any string (e.g. `did:key`) | +| `Addrs` | multiaddrs only | multiaddrs and/or URIs | +| use case | libp2p-native providers | HTTP-only, WebSeed, custom protocols | + +Routing servers MAY emit both schema types for the same provider: ```json { - "Schema": "peer", - "ID": "12D3KooWExample...", - "Addrs": [ - "/ip4/192.168.1.1/tcp/4001", - "/ip4/192.168.1.1/udp/4001/quic-v1", - "https://my-node.example.org:8080" - ], - "Protocols": ["transport-bitswap", "transport-ipfs-gateway-http"] + "Providers": [ + { + "Schema": "peer", + "ID": "12D3KooW...", + "Addrs": ["/ip4/192.168.1.1/tcp/4001"], + "Protocols": ["transport-bitswap"] + }, + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] } ``` ## Design rationale -### Why not create a new field? - -Adding URLs to the existing `Addrs` field rather than creating a new field (e.g., `URLs`) has several advantages: -- Maintains backward compatibility - existing clients continue to work -- Avoids duplication when the same endpoint can be expressed as both multiaddr and URL -- Simplifies the schema without adding complexity -- Follows the principle that addresses are addresses, regardless of encoding +### Why a new schema instead of modifying Peer -### Clear disambiguation +The `peer` schema has a hard dependency on libp2p: `ID` is a PeerID and `Addrs` are multiaddrs. Existing clients parse every entry in `Addrs` as a multiaddr. Introducing URIs into the `Addrs` field of the `peer` schema would cause parse errors in all third-party clients that have not been updated, breaking backward compatibility. -The parsing rule (strings starting with `/` are multiaddrs, others are URIs) provides clear, unambiguous disambiguation: -- Multiaddrs ALWAYS start with `/` by specification -- Valid URLs NEVER start with `/` (they start with a scheme like `http://`) -- This makes parsing deterministic and fast +Previous rollouts of new multiaddr protocols (`/quic-v1`, `/webtransport`, `/webrtc-direct`) did not break clients because those strings still parsed as valid multiaddrs, even when the client could not dial them. URIs are not multiaddrs and will fail multiaddr parsing. -### Incremental adoption +By introducing a new schema, we leverage the existing requirement that clients MUST skip records with unknown schemas: -This change allows for incremental adoption: -- Clients that don't understand URLs can simply skip them -- Servers can start including URLs immediately for URL-aware clients +- Existing clients continue to work, only seeing `peer` records they already understand +- Updated clients opt in to `generic` records at their own pace - No flag day or coordinated upgrade required -## User benefit +### Incremental migration + +Libp2p-native peers continue using the `peer` schema as-is. The migration only impacts providers that are not actual libp2p peers -- such as HTTP-only Trustless Gateways that today must be shoehorned into the `peer` schema with a synthetic PeerID. During the transition period, routing servers can return both `peer` and `generic` records for the same provider. Clients that understand `generic` use the richer address information; others fall back to `peer` records with the synthetic PeerID. + +### Decoupling from libp2p -This change benefits multiple user groups: +The `generic` schema removes the hard requirement on libp2p PeerIDs and multiaddrs. This lowers the barrier for building lightweight IPFS clients that only speak HTTP, and enables experimentation with new provider types (WebSeeds, S3-backed storage) without requiring changes to the libp2p specification or multiaddr registry. + +## User benefit ### For developers -- Simplified integration with existing HTTP infrastructure -- No need for multiaddr encoding/decoding libraries for HTTP endpoints -- Clearer, more readable configurations and debugging -- Barrier of adoption is removed: developers can implement HTTP-based routing and retrieval without having to re-implement libp2p concepts like Multiaddr, making it orders of magnitude easier to create light IPFS clients +- HTTP-only providers and HTTP-only stacks can be built without multiaddr encoding/decoding libraries. Lower cognitive overhead: everyone familiar with `https://` URIs knows how to work with them. +- Alternative URI schemes are also easier to integrate than new multiaddr protocols +- Lightweight HTTP-only IPFS clients become feasible without re-implementing libp2p concepts ### For service providers -- Easier to advertise HTTP-based services -- Can provide URLs that include paths and query parameters if needed -- Reduced complexity in route announcements +- HTTP(S) endpoints advertised directly as URLs +- Custom address formats supported without multiaddr registry changes +- Protocol-specific metadata via extra fields ### For end users -- Potentially faster connection establishment to HTTP services -- Better compatibility with web-based IPFS implementations -- Lower barrier for creating new clients gives end users more choice and less vendor lock-in -- Provides viable escape path in case any of the open source projects gets captured by forces that do not put end user's good first +- Lower barrier for new client implementations increases ecosystem diversity +- HTTP-only providers improve compatibility with web-based IPFS implementations ## Compatibility -This IPIP is fully backward and forward compatible: +### Backward compatibility -### For existing clients - -- Clients that only understand multiaddrs MUST skip URL entries they don't recognize (this is already implemented and proven to work when new protocols like `/quic`, `/quic-v1`, `/webtransport`, and `/webrtc-direct` were rolled out) -- Clients MUST continue processing remaining addresses even when encountering unsupported entries -- No changes required to existing parsing logic for multiaddr strings -- The `Addrs` field remains an array of strings +Fully backward compatible. Existing clients skip `generic` records because they use an unknown schema. The `peer` schema is unchanged. ### Forward compatibility -- The requirement to skip unsupported addresses ensures that new address types can be added in the future -- Clients MUST NOT fail when encountering unknown address formats -- This allows the ecosystem to evolve without breaking existing implementations or without the need for permission or central coordination - -### For existing servers +Unknown fields MUST be ignored by clients. New address formats and protocol-specific fields can be added without breaking existing implementations. -- Servers can continue sending only multiaddrs -- No changes required if URLs are not used +URIs in `Addrs` are not limited to a specific scheme. Clients parsing a `generic` record MUST skip addresses with unrecognized URI schemes, which allows the ecosystem to introduce addressing beyond `https://` without requiring coordination or simultaneous upgrades. ### Migration path -1. Servers can start including both multiaddrs and URLs for the same endpoints -2. Clients can be updated to parse URLs at their own pace -3. Eventually, servers may choose to only send URLs for HTTP(S) endpoints +1. Routing servers emit `generic` records alongside existing `peer` records +2. Clients add support for `generic` schema at their own pace +3. HTTP-only providers that previously required multiaddr conversion can switch to `generic` with native URI addresses ## Security -### URL validation +### URI validation -Implementations SHOULD validate URLs to prevent security issues: -- Verify the URL scheme is allowed (`http://` or `https://`) -- Consider rate limiting for URL-based connections if non-success (!=200) responses are received -- Validate URL length limits (DNS names are limited to 253 characters; practical URL length is typically 2048-8192 characters depending on implementation) +Implementations SHOULD validate URIs: -### HTTPS preference +- Verify the URI scheme is supported (e.g. `https://`) +- Validate URI length limits (practical limit: 2048-8192 characters) +- Apply scheme-specific rate limits where appropriate (e.g. rate-limiting HTTP requests to URIs returning non-success responses) -Implementations SHOULD ignore `http://` URLs and only act on `https://` URLs for security and performance (HTTP/2 multiplexing) reasons. +### HTTPS preference -The `http://` scheme SHOULD be allowed only for testing and private LAN deployments, and only when an explicit opt-in flag is set by the end user. +For HTTP-based URIs, implementations SHOULD prefer `https://`. The `http://` scheme SHOULD only be allowed for testing and private LAN deployments, gated behind an explicit opt-in flag. ### DNS considerations -URLs rely on DNS resolution, which has different security properties than IP-based multiaddrs. The same rules that apply to `/dns`, `/dns4`, and `/dns6` multiaddrs apply here: -- DNS responses can be spoofed if DNSSEC is not used +HTTP(S) URIs rely on DNS resolution. The same security considerations that apply to `/dns`, `/dns4`, and `/dns6` multiaddrs apply here: + +- DNS responses can be spoofed without DNSSEC - Clients SHOULD use secure DNS transports where available -- Certificate validation MUST be performed for HTTPS URLs +- Certificate validation MUST be performed for HTTPS URIs on the public internet + +### ID trust + +The `generic` schema `ID` field is self-reported. Clients SHOULD use self-authenticating identifiers (e.g. `did:key`) and verify signatures where applicable. Reputation and resource allocation decisions SHOULD be tied to `ID`. ## Alternatives -### Separate URL field +### URIs in Peer Schema Addrs field + +Adding URIs directly to the `Addrs` field of the existing `peer` schema was considered. The `peer` schema was introduced in [IPIP-0337](https://specs.ipfs.tech/ipips/ipip-0337/) and has been used in production by multiple independent implementations for years. Changing the semantics of `Addrs` from multiaddr-only to a mixed format would break all third-party clients that parse entries as multiaddrs. Unlike new multiaddr protocols which still parse as valid multiaddrs, URIs are a fundamentally different format and cause parse errors. A new schema avoids this by leveraging the existing unknown-schema-skipping behavior. -Adding a separate `URLs` field was considered but rejected because: -- It would complicate the schema -- It could lead to confusion about which field to use -- It wouldn't be backward compatible +### URI-to-multiaddr conversion -### URL-to-multiaddr conversion requirement +The status quo requires converting HTTP URLs to multiaddrs like `/dns4/example.com/tcp/443/https`. This conversion is lossy: URI paths, fragments, query parameters, and HTTP/3 transport information are lost. Multiple implementations handle edge cases differently, leading to interoperability issues (see [multiaddr#63](https://github.com/multiformats/multiaddr/issues/63)). It also means libp2p-specific address libraries and parsers have to be implemented by every new client, increasing complexity and raising the barrier for new implementations. -Requiring all HTTP endpoints to be encoded as multiaddrs was the status quo but has proven cumbersome in practice. Multiple implementations on NPM and Golang alone behaved in slightly different fashion around how the schema, default port, optional path, fragment, and HTTP basic-auth were handled. This led to hard-to-debug errors due to multiaddr-URL conversion being ultimately lossy and 1:1 round-trip not being possible (see [multiaddr#63](https://github.com/multiformats/multiaddr/issues/63)). +### Custom multiaddr keyword arguments -### Custom multiaddr protocols with keyword arguments +Adding keyword arguments to multiaddr protocols was proposed in [multiaddr#87](https://github.com/multiformats/multiaddr/issues/87). This would increase complexity for all multiaddr implementers without addressing the fundamental desire to use standard URIs. -Adding keyword arguments to multiaddr protocols was proposed in [multiaddr#87](https://github.com/multiformats/multiaddr/issues/87) to allow expressing `https://` URLs as multiaddrs without losing any information on conversion. This approach was not adopted because it would add even more complexity that multiaddr implementers would have to deal with. +### Separate URI field in Peer Schema -This solution was not feasible - adding native URI support is better as it removes walls and obstacles, rather than making existing ones taller. +Adding a separate `URIs` field to the `peer` schema would complicate the schema and create ambiguity about which field to check for addresses. A new schema is a cleaner separation: `peer` stays focused on libp2p peers, `generic` handles everything else. ## Test fixtures -Implementations can test compatibility using these example responses: +### HTTPS-only provider -### Mixed addresses response +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +### Provider with protocol-specific metadata and custom URI scheme ```json { "Providers": [ { - "Schema": "peer", - "ID": "12D3KooWTest1...", - "Addrs": [ - "/ip4/127.0.0.1/tcp/4001", - "http://localhost:8080", - "/dns4/example.com/tcp/443/https", - "https://example.net" - ], - "Protocols": ["transport-bitswap", "transport-ipfs-gateway-http"] + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["foo://custom-storage.example.com/bucket"], + "Protocols": ["example-future-protocol"], + "example-future-protocol": {"version": 2, "features": ["foo"]} } ] } ``` -### URL-only response +Clients that do not recognize the `foo://` URI scheme MUST skip that address. + +### Provider with opaque identifier + +The `ID` field is not restricted to `did:key`. Any string identifier can be used: + +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "550e8400-e29b-41d4-a716-446655440000", + "Addrs": ["https://cdn.example.com"], + "Protocols": ["transport-ipfs-gateway-http", "example-future-protocol"] + } + ] +} +``` + +### Mixed response with both schemas ```json { "Providers": [ { "Schema": "peer", - "ID": "12D3KooWTest2...", + "ID": "12D3KooW...", "Addrs": [ - "https://trustless-gateway.example.org" + "/ip4/192.168.1.1/tcp/4001", + "/ip4/192.168.1.1/udp/4001/quic-v1" ], + "Protocols": ["transport-bitswap"] + }, + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], "Protocols": ["transport-ipfs-gateway-http"] } ] } ``` +### Filtering with `filter-addrs` + +Given a response containing: + +```json +{ + "Providers": [ + { + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://provider.example.com", "/ip4/1.2.3.4/tcp/443/https"], + "Protocols": ["transport-ipfs-gateway-http"] + } + ] +} +``` + +A request with `?filter-addrs=https` returns both addresses, because `https` matches the URI `https://provider.example.com` by URI scheme and the multiaddr `/ip4/1.2.3.4/tcp/443/https` by multiaddr protocol name. + +A request with `?filter-addrs=tcp` returns only the multiaddr `/ip4/1.2.3.4/tcp/443/https`, because `tcp` does not match the URI scheme `https`. + +A request with `?filter-addrs=!https` omits the record entirely, because all addresses are removed by the negative filter. + ## Copyright -Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/). \ No newline at end of file +Copyright and related rights waived via [CC0](https://creativecommons.org/publicdomain/zero/1.0/). diff --git a/src/routing/http-routing-v1.md b/src/routing/http-routing-v1.md index ad76dd9c..b34564d1 100644 --- a/src/routing/http-routing-v1.md +++ b/src/routing/http-routing-v1.md @@ -4,7 +4,7 @@ description: > Delegated routing is a mechanism for IPFS implementations to use for offloading content routing, peer routing and naming to another process/server. This specification describes an HTTP API for delegated routing of content, peers, and IPNS. -date: 2024-10-29 +date: 2026-02-11 maturity: reliable editors: - name: Gus Eggert @@ -54,11 +54,7 @@ As such, human-readable encodings of types are preferred. This specification may ## Common Data Types - CIDs are always string-encoded using a [multibase]-encoded [CIDv1]. -- Addresses in the `Addrs` field can be: - - Multiaddrs: string-encoded according to the [human-readable multiaddr specification][multiaddr], always starting with `/` - - HTTP(S) URLs: absolute URLs with `http://` or `https://` schemes, parsed as URIs according to :cite[rfc3986] - - Parsing logic: if a string starts with `/`, parse as multiaddr; otherwise, parse as URI - - Unsupported addresses: implementations MUST skip addresses they cannot parse or do not support, and MUST continue processing remaining addresses (see [IPIP-0518](https://specs.ipfs.tech/ipips/ipip-0518/)) +- Multiaddrs are string-encoded according to the [human-readable multiaddr specification][multiaddr]. - Peer IDs are string-encoded according [PeerID string representation specification][peer-id-representation]: either a Multihash in Base58btc, or a CIDv1 with libp2p-key (`0x72`) codec in Base36 or Base32. - Multibase bytes are string-encoded according to [the Multibase spec][multibase], and SHOULD use base64. - Timestamps are Unix millisecond epoch timestamps. @@ -83,18 +79,17 @@ This API uses a standard version prefix in the path, such as `/v1/...`. If a bac Optional `?filter-addrs` to apply Network Address Filtering from [IPIP-484](https://specs.ipfs.tech/ipips/ipip-0484/). -- `?filter-addrs=` optional parameter that indicates which network transports to return by filtering the addresses in the `Addrs` field of the [Peer schema](#peer-schema). -- The value of the `filter-addrs` parameter is a comma-separated (`,` or `%2C`) list of network transport protocol _name strings_ as defined in the [multiaddr protocol registry](https://github.com/multiformats/multiaddr/blob/master/protocols.csv), e.g. `?filter-addrs=tls,webrtc-direct,webtransport`. -- `unknown` can be be passed to include providers whose addresses are unknown, e.g. `?filter-addrs=unknown`. This allows for not removing providers whose addresses are unknown at the time of filtering (e.g. keeping DHT results that require additional peer lookup). -- Addresses are filtered by checking if the protocol name appears in any of the multiaddrs, or if the URI scheme matches for HTTP(S) URLs (logical OR in both cases). - - Example: `http` can be be passed to include providers whose addresses are HTTP-compatible. This will include `http://` `https://` URIs, and `/http` `/https` and `/tls/http` Multiaddrs. - - For the purpose of filtering, implementations SHOULD include `/tls/http` Multiaddrs when `https` is passed as a filter to ensure composed multiaddrs are included in results. -- Negative filtering is done by prefixing the protocol name with `!`, e.g. to skip IPv6 and QUIC addrs: `?filter-addrs=!ip6,!quic-v1`. Note that negative filtering is done by checking if the protocol name does not appear in any of the addresses (logical AND). +- `?filter-addrs=` optional parameter that indicates which network addresses to return by filtering the `Addrs` field of [Peer schema](#peer-schema) and [Generic schema](#generic-schema) provider records. +- The value is a comma-separated (`,` or `%2C`) list of filter names. +- For multiaddr addresses (all addresses in the `peer` schema, and strings starting with `/` in the `generic` schema): filter names are matched against multiaddr protocol _name strings_ as defined in the [multiaddr protocol registry](https://github.com/multiformats/multiaddr/blob/master/protocols.csv), e.g. `?filter-addrs=tls,webrtc-direct,webtransport`. A multiaddr matches a filter if the protocol name appears anywhere in the multiaddr (logical OR). +- For URI addresses in the [Generic schema](#generic-schema) (strings not starting with `/`): filter names are matched against the URI scheme per :cite[rfc3986]. For example, `?filter-addrs=https` matches `https://example.com`. +- `unknown` can be passed to include records whose addresses are unknown, e.g. `?filter-addrs=unknown`. This allows for not removing records whose addresses are unknown at the time of filtering (e.g. keeping DHT results that require additional peer lookup). +- Negative filtering is done by prefixing the name with `!`, e.g. to skip IPv6 and QUIC addrs: `?filter-addrs=!ip6,!quic-v1`. Negative filtering checks that the name does not appear in any of the addresses (logical AND). - If no parameter is passed, the default behavior is to return the original list of addresses unchanged. -- If only negative filters are provided, addresses not passing any of the negative filters are included. -- If positive filters are provided, only addresses passing at least one positive filter (and no negative filters) are included. +- If only negative filters are provided, addresses not matching any of the negative filters are included. +- If positive filters are provided, only addresses matching at least one positive filter (and no negative filters) are included. - If both positive and negative filters are provided, the address must pass all negative filters and at least one positive filter to be included. -- If there are no addresses that match the passed transports, the provider is omitted from the response. +- If no addresses remain after filtering, the record is omitted from the response. - Filtering is case-insensitive. ##### `filter-protocols` (providers request query parameter) @@ -104,10 +99,10 @@ Optional `?filter-protocols` to apply IPFS Protocol Filtering from [IPIP-484](ht - The `filter-protocols` parameter is a comma-separated (`,` or `%2C`) list of transfer protocol names, e.g. `?filter-protocols=unknown,transport-bitswap,transport-ipfs-gateway-http`. - Transfer protocols names should be treated as opaque strings and have a max length of 63 characters. A non-exhaustive list of transfer protocols are defined per convention in the [multicodec registry](https://github.com/multiformats/multicodec/blob/3b7b52deb31481790bc4bae984d8675bda4e0c82/table.csv#L149-L151). - Implementations MUST preserve all transfer protocol names when returning a positive result that matches one or more of them. -- A special `unknown` name can be be passed to include providers whose transfer protocol list is empty (unknown), e.g. `?filter-protocols=unknown`. This allows for including providers returned from the DHT that do not contain explicit transfer protocol information. -- Providers are filtered by checking if the transfer protocol name appears in the `Protocols` array (logical OR). -- If the provider doesn't match any of the passed transfer protocols, the provider is omitted from the response. -- If a provider passes the filter, it is returned unchanged, i.e. the full set of protocols is returned including protocols that not included in the filter. (note that this is different from `filter-addrs` where only the multiaddrs that pass the filter are returned) +- A special `unknown` name can be passed to include records whose transfer protocol list is empty (unknown), e.g. `?filter-protocols=unknown`. This allows for including providers returned from the DHT that do not contain explicit transfer protocol information. +- Records are filtered by checking if the transfer protocol name appears in the `Protocols` array (logical OR). This applies to both [Peer schema](#peer-schema) and [Generic schema](#generic-schema) records. +- If the record doesn't match any of the passed transfer protocols, it is omitted from the response. +- If a record passes the filter, it is returned unchanged, i.e. the full set of protocols is returned including protocols not included in the filter. (note that this is different from `filter-addrs` where only the addresses that pass the filter are returned) - Filtering is case-insensitive. - If no parameter is passed, the default behavior is to not filter by transfer protocol. @@ -323,17 +318,14 @@ The `peer` schema represents an arbitrary peer. { "Schema": "peer", "ID": "bafz...", - "Addrs": ["/ip4/...", "https://trustless-gateway.example.com"], + "Addrs": ["/ip4/..."], "Protocols": ["transport-bitswap", ...] ... } ``` - `ID`: the [Peer ID][peer-id] as Multihash in Base58btc or CIDv1 with libp2p-key codec. -- `Addrs`: an optional list of known addresses for this peer, which can include both: - - [Multiaddrs][multiaddr]: strings starting with `/`, e.g., `/ip4/192.168.1.1/tcp/4001` - - HTTP(S) URLs: absolute URLs with `http://` or `https://` schemes, e.g., `https://trustless-gateway.example.com` - - Implementations MUST skip addresses they cannot parse or do not support and continue with remaining addresses +- `Addrs`: an optional list of known [multiaddrs][multiaddr] for this peer. - If missing or empty, it means the router server is missing that information, and the client should use `ID` to lookup updated peer information. - `Protocols`: an optional list of protocols known to be supported by this peer. - If missing or empty, it means the router server is missing that information, and the client should use `ID` and `Addrs` to lookup connect to the peer and use the [libp2p identify protocol](https://github.com/libp2p/specs/tree/master/identify) to learn about supported ones. @@ -363,6 +355,50 @@ the case, the field MUST be ignored. ::: +### Generic Schema + +The `generic` schema represents a provider that is not necessarily a libp2p peer. Unlike the `peer` schema, identifiers are not limited to PeerIDs and addresses are not limited to multiaddrs. See [IPIP-0518](https://specs.ipfs.tech/ipips/ipip-0518/) for motivation and design rationale. + +```json +{ + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://trustless-gateway.example.com"], + "Protocols": ["transport-ipfs-gateway-http"] + ... +} +``` + +- `ID`: a string identifier for the provider. SHOULD be self-authenticating (e.g. `did:key`) and less than 100 bytes. +- `Addrs`: an optional list of addresses as strings. Addresses are duck-typed based on their format: + - If a string starts with `/`, it is parsed as a [multiaddr][multiaddr] + - Otherwise, it is parsed as a URI per :cite[rfc3986] + - Clients MUST skip addresses they cannot parse or do not support and continue with remaining entries. This includes URIs with unrecognized schemes, unsupported multiaddrs, or all multiaddrs if the client only supports URIs. +- `Protocols`: an optional list of transfer protocol names associated with this record. Protocol names are opaque strings with a max length of 63 characters, established by rough consensus across compatible implementations per the [robustness principle](https://specs.ipfs.tech/architecture/principles/#robustness). + +Servers and caching proxies MUST act as pass-through and return `Addrs` and `Protocols` as-is, unless explicitly filtered by the client via `?filter-addrs` or `?filter-protocols` query parameters. + +The total serialized size of a single `generic` record MUST be less than 10 KiB. + +:::note + +To allow for protocol-specific fields and future-proofing, the parser MUST +allow for unknown fields, and the clients MUST ignore unknown ones. + +Below is an example of a `generic` record with protocol-specific extra fields: + +```json +{ + "Schema": "generic", + "ID": "did:key:z6Mkm1...", + "Addrs": ["https://provider.example.com"], + "Protocols": ["example-future-protocol"], + "example-future-protocol": {"version": 2, "features": ["foo"]} +} +``` + +::: + ### Legacy Schemas Legacy schemas include `ID` and optional `Addrs` list just like From 894f934975379ad0af7262a1dd8b81dafc9e3f41 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Thu, 12 Feb 2026 00:45:14 +0100 Subject: [PATCH 3/3] chore: upgrade spec-generator to 1.7.0 adds remark-gfm support for GFM table rendering --- package-lock.json | 259 +++++++++++++++++++++++++++++++++++++++++++++- package.json | 2 +- 2 files changed, 256 insertions(+), 5 deletions(-) diff --git a/package-lock.json b/package-lock.json index bdd27102..86333d1e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,7 +8,7 @@ "name": "ipfs-specs-website", "version": "1.0.0", "dependencies": { - "spec-generator": "^1.6.1" + "spec-generator": "^1.7.0" } }, "node_modules/@11ty/dependency-tree": { @@ -5201,6 +5201,16 @@ "markdown-it": "bin/markdown-it.js" } }, + "node_modules/markdown-table": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.4.tgz", + "integrity": "sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==", + "license": "MIT", + "funding": { + "type": "github", + "url": "https://github.com/sponsors/wooorm" + } + }, "node_modules/marked": { "version": "12.0.2", "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.2.tgz", @@ -5354,6 +5364,107 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/mdast-util-gfm": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm/-/mdast-util-gfm-3.1.0.tgz", + "integrity": "sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==", + "license": "MIT", + "dependencies": { + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-gfm-autolink-literal": "^2.0.0", + "mdast-util-gfm-footnote": "^2.0.0", + "mdast-util-gfm-strikethrough": "^2.0.0", + "mdast-util-gfm-table": "^2.0.0", + "mdast-util-gfm-task-list-item": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-autolink-literal": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-autolink-literal/-/mdast-util-gfm-autolink-literal-2.0.1.tgz", + "integrity": "sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "ccount": "^2.0.0", + "devlop": "^1.0.0", + "mdast-util-find-and-replace": "^3.0.0", + "micromark-util-character": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-footnote": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-footnote/-/mdast-util-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.1.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-strikethrough": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-strikethrough/-/mdast-util-gfm-strikethrough-2.0.0.tgz", + "integrity": "sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-table": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-table/-/mdast-util-gfm-table-2.0.0.tgz", + "integrity": "sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "markdown-table": "^3.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/mdast-util-gfm-task-list-item": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/mdast-util-gfm-task-list-item/-/mdast-util-gfm-task-list-item-2.0.0.tgz", + "integrity": "sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "devlop": "^1.0.0", + "mdast-util-from-markdown": "^2.0.0", + "mdast-util-to-markdown": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/mdast-util-phrasing": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz", @@ -5565,6 +5676,127 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/micromark-extension-gfm": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm/-/micromark-extension-gfm-3.0.0.tgz", + "integrity": "sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==", + "license": "MIT", + "dependencies": { + "micromark-extension-gfm-autolink-literal": "^2.0.0", + "micromark-extension-gfm-footnote": "^2.0.0", + "micromark-extension-gfm-strikethrough": "^2.0.0", + "micromark-extension-gfm-table": "^2.0.0", + "micromark-extension-gfm-tagfilter": "^2.0.0", + "micromark-extension-gfm-task-list-item": "^2.0.0", + "micromark-util-combine-extensions": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-autolink-literal": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-autolink-literal/-/micromark-extension-gfm-autolink-literal-2.1.0.tgz", + "integrity": "sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==", + "license": "MIT", + "dependencies": { + "micromark-util-character": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-footnote": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-footnote/-/micromark-extension-gfm-footnote-2.1.0.tgz", + "integrity": "sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-core-commonmark": "^2.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-normalize-identifier": "^2.0.0", + "micromark-util-sanitize-uri": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-strikethrough": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-strikethrough/-/micromark-extension-gfm-strikethrough-2.1.0.tgz", + "integrity": "sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-util-chunked": "^2.0.0", + "micromark-util-classify-character": "^2.0.0", + "micromark-util-resolve-all": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-table": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-table/-/micromark-extension-gfm-table-2.1.1.tgz", + "integrity": "sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-tagfilter": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-tagfilter/-/micromark-extension-gfm-tagfilter-2.0.0.tgz", + "integrity": "sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==", + "license": "MIT", + "dependencies": { + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, + "node_modules/micromark-extension-gfm-task-list-item": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/micromark-extension-gfm-task-list-item/-/micromark-extension-gfm-task-list-item-2.1.0.tgz", + "integrity": "sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==", + "license": "MIT", + "dependencies": { + "devlop": "^1.0.0", + "micromark-factory-space": "^2.0.0", + "micromark-util-character": "^2.0.0", + "micromark-util-symbol": "^2.0.0", + "micromark-util-types": "^2.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/micromark-factory-destination": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz", @@ -8658,6 +8890,24 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/remark-gfm": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/remark-gfm/-/remark-gfm-4.0.1.tgz", + "integrity": "sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg==", + "license": "MIT", + "dependencies": { + "@types/mdast": "^4.0.0", + "mdast-util-gfm": "^3.0.0", + "micromark-extension-gfm": "^3.0.0", + "remark-parse": "^11.0.0", + "remark-stringify": "^11.0.0", + "unified": "^11.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/unified" + } + }, "node_modules/remark-heading-id": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/remark-heading-id/-/remark-heading-id-1.0.1.tgz", @@ -9320,9 +9570,9 @@ "integrity": "sha512-zC8zGoGkmc8J9ndvml8Xksr1Amk9qBujgbF0JAIWO7kXr43w0h/0GJNM/Vustixu+YE8N/MTrQ7N31FvHUACxQ==" }, "node_modules/spec-generator": { - "version": "1.6.1", - "resolved": "https://registry.npmjs.org/spec-generator/-/spec-generator-1.6.1.tgz", - "integrity": "sha512-yDzubb+cWKPlg82SQSaFeHjHVbKu58tlcvbnAy8yFtxnikUL2c06GViBw7yAOZPYjTS/meZ7vQp61IJ0myG0XQ==", + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/spec-generator/-/spec-generator-1.7.0.tgz", + "integrity": "sha512-U5itp3X8mU84chN0xmwgEtAaq/VL4gbC4AK5EdqJxGydhUhz+OnNUcRTKP9v3k9wYVaXbNIeoOzVBnzrNDV1XQ==", "license": "MIT", "dependencies": { "@11ty/eleventy": "^2.0.1", @@ -9342,6 +9592,7 @@ "pluralize": "^8.0.0", "remark": "^15.0.1", "remark-directive": "^3.0.0", + "remark-gfm": "^4.0.1", "remark-heading-id": "^1.0.1", "remark-html": "^16.0.1", "remark-squeeze-paragraphs": "^6.0.0", diff --git a/package.json b/package.json index 53ed9595..2a9f851c 100644 --- a/package.json +++ b/package.json @@ -10,6 +10,6 @@ "license": "", "private": true, "dependencies": { - "spec-generator": "^1.6.1" + "spec-generator": "^1.7.0" } }