From ee037344d75406dce9a6172c2d5912a2c9cdba7d Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Wed, 25 Feb 2026 15:45:18 -0500 Subject: [PATCH 1/7] feat(persistence): add AWS S3 backend with CRUD, versioning, bulk ops and env-gated integration tests - Implement S3 backend behind 's3' feature using aws_sdk_s3 only - Add CRUD, vread, history, bulk export, bulk submit - Add bucket-per-tenant and prefix-per-tenant modes - Integrate with capability matrix - Add env-gated real AWS integration tests - Update documentation - No automatic bucket creation (HeadBucket validation only) --- Cargo.lock | 493 +++++++- crates/persistence/Cargo.toml | 6 +- crates/persistence/README.md | 46 +- crates/persistence/src/backends/mod.rs | 4 +- crates/persistence/src/backends/s3/backend.rs | 251 ++++ .../src/backends/s3/bulk_export.rs | 536 ++++++++ .../src/backends/s3/bulk_submit.rs | 976 +++++++++++++++ crates/persistence/src/backends/s3/bundle.rs | 419 +++++++ crates/persistence/src/backends/s3/client.rs | 301 +++++ crates/persistence/src/backends/s3/config.rs | 152 +++ .../src/backends/s3/docs/README.md | 61 + .../backends/s3/docs/discussion_roadmap.md | 985 +++++++++++++++ .../persistence/src/backends/s3/keyspace.rs | 253 ++++ crates/persistence/src/backends/s3/mod.rs | 21 + crates/persistence/src/backends/s3/models.rs | 34 + crates/persistence/src/backends/s3/storage.rs | 957 ++++++++++++++ crates/persistence/src/backends/s3/tests.rs | 1096 +++++++++++++++++ .../persistence/tests/common/capabilities.rs | 13 +- crates/persistence/tests/s3_tests.rs | 280 +++++ 19 files changed, 6804 insertions(+), 80 deletions(-) create mode 100644 crates/persistence/src/backends/s3/backend.rs create mode 100644 crates/persistence/src/backends/s3/bulk_export.rs create mode 100644 crates/persistence/src/backends/s3/bulk_submit.rs create mode 100644 crates/persistence/src/backends/s3/bundle.rs create mode 100644 crates/persistence/src/backends/s3/client.rs create mode 100644 crates/persistence/src/backends/s3/config.rs create mode 100644 crates/persistence/src/backends/s3/docs/README.md create mode 100644 crates/persistence/src/backends/s3/docs/discussion_roadmap.md create mode 100644 crates/persistence/src/backends/s3/keyspace.rs create mode 100644 crates/persistence/src/backends/s3/mod.rs create mode 100644 crates/persistence/src/backends/s3/models.rs create mode 100644 crates/persistence/src/backends/s3/storage.rs create mode 100644 crates/persistence/src/backends/s3/tests.rs create mode 100644 crates/persistence/tests/s3_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 1b2c4272..79c38be9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -489,8 +489,8 @@ dependencies = [ "aws-sdk-ssooidc", "aws-sdk-sts", "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-http 0.63.4", + "aws-smithy-json 0.62.4", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -550,7 +550,8 @@ dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", - "aws-smithy-http", + "aws-smithy-eventstream", + "aws-smithy-http 0.63.4", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -558,7 +559,9 @@ dependencies = [ "bytes", "bytes-utils", "fastrand", + "http 0.2.12", "http 1.4.0", + "http-body 0.4.6", "http-body 1.0.1", "percent-encoding", "pin-project-lite", @@ -566,6 +569,40 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-s3" +version = "1.119.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d65fddc3844f902dfe1864acb8494db5f9342015ee3ab7890270d36fbd2e01c" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http 0.62.6", + "aws-smithy-json 0.61.9", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "lru", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + [[package]] name = "aws-sdk-sso" version = "1.94.0" @@ -575,8 +612,8 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-http 0.63.4", + "aws-smithy-json 0.62.4", "aws-smithy-observability", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -599,8 +636,8 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-http 0.63.4", + "aws-smithy-json 0.62.4", "aws-smithy-observability", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -623,8 +660,8 @@ dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", - "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-http 0.63.4", + "aws-smithy-json 0.62.4", "aws-smithy-observability", "aws-smithy-query", "aws-smithy-runtime", @@ -646,19 +683,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68f6ae9b71597dc5fd115d52849d7a5556ad9265885ad3492ea8d73b93bbc46e" dependencies = [ "aws-credential-types", - "aws-smithy-http", + "aws-smithy-eventstream", + "aws-smithy-http 0.63.4", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", + "crypto-bigint 0.5.5", "form_urlencoded", "hex", "hmac", "http 0.2.12", "http 1.4.0", + "p256", "percent-encoding", + "ring", "sha2", + "subtle", "time", "tracing", + "zeroize", ] [[package]] @@ -672,6 +715,59 @@ dependencies = [ "tokio", ] +[[package]] +name = "aws-smithy-checksums" +version = "0.63.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87294a084b43d649d967efe58aa1f9e0adc260e13a6938eb904c0ae9b45824ae" +dependencies = [ + "aws-smithy-http 0.62.6", + "aws-smithy-types", + "bytes", + "crc-fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c0b3e587fbaa5d7f7e870544508af8ce82ea47cd30376e69e1e37c4ac746f79" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + +[[package]] +name = "aws-smithy-http" +version = "0.62.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b" +dependencies = [ + "aws-smithy-eventstream", + "aws-smithy-runtime-api", + "aws-smithy-types", + "bytes", + "bytes-utils", + "futures-core", + "futures-util", + "http 0.2.12", + "http 1.4.0", + "http-body 0.4.6", + "percent-encoding", + "pin-project-lite", + "pin-utils", + "tracing", +] + [[package]] name = "aws-smithy-http" version = "0.63.4" @@ -702,21 +798,36 @@ dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", "aws-smithy-types", - "h2", + "h2 0.3.27", + "h2 0.4.13", + "http 0.2.12", "http 1.4.0", - "hyper", - "hyper-rustls", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper 1.8.1", + "hyper-rustls 0.24.2", + "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", - "rustls", + "rustls 0.21.12", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower", "tracing", ] +[[package]] +name = "aws-smithy-json" +version = "0.61.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49fa1213db31ac95288d981476f78d05d9cbb0353d22cdf3472cc05bb02f6551" +dependencies = [ + "aws-smithy-types", +] + [[package]] name = "aws-smithy-json" version = "0.62.4" @@ -752,7 +863,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8fd3dfc18c1ce097cf81fced7192731e63809829c6cbf933c1ec47452d08e1aa" dependencies = [ "aws-smithy-async", - "aws-smithy-http", + "aws-smithy-http 0.63.4", "aws-smithy-http-client", "aws-smithy-observability", "aws-smithy-runtime-api", @@ -796,6 +907,7 @@ dependencies = [ "base64-simd", "bytes", "bytes-utils", + "futures-core", "http 0.2.12", "http 1.4.0", "http-body 0.4.6", @@ -808,6 +920,8 @@ dependencies = [ "ryu", "serde", "time", + "tokio", + "tokio-util", ] [[package]] @@ -846,7 +960,7 @@ dependencies = [ "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "itoa", "matchit", @@ -900,7 +1014,7 @@ dependencies = [ "expect-json", "http 1.4.0", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "mime", "pretty_assertions", @@ -929,6 +1043,12 @@ dependencies = [ "tokio", ] +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + [[package]] name = "base64" version = "0.21.7" @@ -1009,16 +1129,16 @@ dependencies = [ "home", "http 1.4.0", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-named-pipe", - "hyper-rustls", + "hyper-rustls 0.27.7", "hyper-util", "hyperlocal", "log", "num", "pin-project-lite", "rand 0.9.2", - "rustls", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-pemfile", "rustls-pki-types", @@ -1490,6 +1610,12 @@ version = "0.4.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75984efb6ed102a0d42db99afb6c1948f0380d1d91808d5529916e6c08b49d8d" +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-random" version = "0.1.18" @@ -1591,6 +1717,19 @@ version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" +[[package]] +name = "crc-fast" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ddc2d09feefeee8bd78101665bd8645637828fa9317f9f292496dbbd8c65ff3" +dependencies = [ + "crc", + "digest", + "rand 0.9.2", + "regex", + "rustversion", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -1684,6 +1823,28 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core 0.6.4", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.7" @@ -1821,6 +1982,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "deranged" version = "0.5.6" @@ -1970,6 +2141,18 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der", + "elliptic-curve", + "rfc6979", + "signature", +] + [[package]] name = "either" version = "1.15.0" @@ -1997,6 +2180,26 @@ dependencies = [ "void", ] +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der", + "digest", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core 0.6.4", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "email_address" version = "0.2.9" @@ -2127,6 +2330,16 @@ dependencies = [ "web-time", ] +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + [[package]] name = "filetime" version = "0.2.27" @@ -2374,6 +2587,36 @@ dependencies = [ "wasip3", ] +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.13.0", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.13" @@ -2567,6 +2810,7 @@ dependencies = [ "async-trait", "aws-config", "aws-credential-types", + "aws-sdk-s3", "axum", "base64 0.22.1", "cdrs-tokio", @@ -2582,7 +2826,6 @@ dependencies = [ "json-patch", "mongodb", "neo4rs", - "object_store", "parking_lot", "paste", "postgres-types", @@ -2851,6 +3094,30 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "135b12329e5e3ce057a9f972339ea52bc954fe1e9358ef27f95e89716fbc5424" +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.8.1" @@ -2861,7 +3128,7 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", + "h2 0.4.13", "http 1.4.0", "http-body 1.0.1", "httparse", @@ -2881,7 +3148,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b7d8abf35697b81a825e386fc151e0d503e8cb5fcb93cc8669c376dfd6f278" dependencies = [ "hex", - "hyper", + "hyper 1.8.1", "hyper-util", "pin-project-lite", "tokio", @@ -2889,6 +3156,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "tokio", + "tokio-rustls 0.24.1", +] + [[package]] name = "hyper-rustls" version = "0.27.7" @@ -2896,13 +3178,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.4.0", - "hyper", + "hyper 1.8.1", "hyper-util", - "rustls", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tower-service", ] @@ -2912,7 +3194,7 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b90d566bffbce6a75bd8b09a05aa8c2cb1fabb6cb348f8840c9e4c90a0d83b0" dependencies = [ - "hyper", + "hyper 1.8.1", "hyper-util", "pin-project-lite", "tokio", @@ -2927,7 +3209,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "native-tls", "tokio", @@ -2947,7 +3229,7 @@ dependencies = [ "futures-util", "http 1.4.0", "http-body 1.0.1", - "hyper", + "hyper 1.8.1", "ipnet", "libc", "percent-encoding", @@ -2968,7 +3250,7 @@ checksum = "986c5ce3b994526b3cd75578e62554abd09f0899d6206de48b3e96ab34ccc8c7" dependencies = [ "hex", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "pin-project-lite", "tokio", @@ -3456,6 +3738,15 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.5", +] + [[package]] name = "lru-slab" version = "0.1.2" @@ -3668,7 +3959,7 @@ dependencies = [ "percent-encoding", "rand 0.9.2", "rustc_version_runtime", - "rustls", + "rustls 0.23.36", "rustversion", "serde", "serde_bytes", @@ -3681,7 +3972,7 @@ dependencies = [ "take_mut", "thiserror 2.0.18", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "typed-builder", "uuid", @@ -3740,7 +4031,7 @@ dependencies = [ "serde", "thiserror 1.0.69", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.4", "url", "webpki-roots 0.26.11", ] @@ -3885,7 +4176,7 @@ dependencies = [ "futures", "httparse", "humantime", - "hyper", + "hyper 1.8.1", "itertools 0.13.0", "md-5", "parking_lot", @@ -4022,6 +4313,17 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e" +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + [[package]] name = "parking_lot" version = "0.12.5" @@ -4284,6 +4586,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.32" @@ -4623,7 +4935,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", + "rustls 0.23.36", "socket2 0.6.2", "thiserror 2.0.18", "tokio", @@ -4643,7 +4955,7 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.36", "rustls-pki-types", "slab", "thiserror 2.0.18", @@ -4899,12 +5211,12 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.4.13", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", - "hyper-rustls", + "hyper 1.8.1", + "hyper-rustls 0.27.7", "hyper-tls", "hyper-util", "js-sys", @@ -4914,7 +5226,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.36", "rustls-native-certs 0.8.3", "rustls-pki-types", "serde", @@ -4923,7 +5235,7 @@ dependencies = [ "sync_wrapper", "tokio", "tokio-native-tls", - "tokio-rustls", + "tokio-rustls 0.26.4", "tokio-util", "tower", "tower-http 0.6.8", @@ -4956,6 +5268,17 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.14" @@ -5101,6 +5424,18 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + [[package]] name = "rustls" version = "0.23.36" @@ -5112,7 +5447,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.9", "subtle", "zeroize", ] @@ -5161,6 +5496,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.9" @@ -5242,12 +5587,36 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "seahash" version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -5462,6 +5831,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core 0.6.4", +] + [[package]] name = "simd-adler32" version = "0.3.8" @@ -5539,6 +5918,16 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "stable_deref_trait" version = "1.2.1" @@ -5948,13 +6337,23 @@ dependencies = [ "whoami", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls", + "rustls 0.23.36", "tokio", ] @@ -6034,11 +6433,11 @@ dependencies = [ "axum", "base64 0.22.1", "bytes", - "h2", + "h2 0.4.13", "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-timeout", "hyper-util", "percent-encoding", @@ -6344,7 +6743,7 @@ dependencies = [ "base64 0.22.1", "log", "percent-encoding", - "rustls", + "rustls 0.23.36", "rustls-pki-types", "ureq-proto", "utf-8", diff --git a/crates/persistence/Cargo.toml b/crates/persistence/Cargo.toml index 07694cd8..c257a3c2 100644 --- a/crates/persistence/Cargo.toml +++ b/crates/persistence/Cargo.toml @@ -21,7 +21,7 @@ cassandra = ["dep:cdrs-tokio", "dep:cdrs-tokio-helpers-derive"] mongodb = ["dep:mongodb"] neo4j = ["dep:neo4rs"] elasticsearch = ["dep:elasticsearch"] -s3 = ["dep:object_store", "dep:aws-config", "dep:aws-credential-types"] +s3 = ["dep:aws-sdk-s3", "dep:aws-config", "dep:aws-credential-types"] # Configuration advisor binary advisor = ["dep:axum", "dep:tower-http", "dep:tracing-subscriber"] @@ -75,8 +75,8 @@ neo4rs = { version = "0.8", optional = true } # Elasticsearch backend elasticsearch = { version = "8.15.0-alpha.1", optional = true } -# S3/Object storage backend -object_store = { version = "0.11", features = ["aws"], optional = true } +# S3 backend +aws-sdk-s3 = { version = "1", optional = true } aws-config = { version = "1", optional = true } aws-credential-types = { version = "1", optional = true } diff --git a/crates/persistence/README.md b/crates/persistence/README.md index 89465d33..3ec05f93 100644 --- a/crates/persistence/README.md +++ b/crates/persistence/README.md @@ -300,37 +300,37 @@ The matrix below shows which FHIR operations each backend supports. This reflect | Feature | SQLite | PostgreSQL | MongoDB | Cassandra | Neo4j | Elasticsearch | S3 | |---------|--------|------------|---------|-----------|-------|---------------|-----| | **Core Operations** | -| [CRUD](https://build.fhir.org/http.html#crud) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ○ | -| [Versioning (vread)](https://build.fhir.org/http.html#vread) | ✓ | ✓ | ○ | ○ | ○ | ○ | ○ | -| [Optimistic Locking](https://build.fhir.org/http.html#concurrency) | ✓ | ✓ | ○ | ○ | ○ | ✗ | ✗ | -| [Instance History](https://build.fhir.org/http.html#history) | ✓ | ✓ | ○ | ○ | ○ | ✗ | ○ | -| [Type History](https://build.fhir.org/http.html#history) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ✗ | -| [System History](https://build.fhir.org/http.html#history) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ✗ | -| [Batch Bundles](https://build.fhir.org/http.html#batch) | ✓ | ✓ | ○ | ○ | ○ | ○ | ○ | -| [Transaction Bundles](https://build.fhir.org/http.html#transaction) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ✗ | +| [CRUD](https://build.fhir.org/http.html#crud) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✓ | +| [Versioning (vread)](https://build.fhir.org/http.html#vread) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | +| [Optimistic Locking](https://build.fhir.org/http.html#concurrency) | ✓ | ✓ | ○ | ○ | ○ | ✗ | ✓ | +| [Instance History](https://build.fhir.org/http.html#history) | ✓ | ✓ | ○ | ○ | ○ | ✗ | ✓ | +| [Type History](https://build.fhir.org/http.html#history) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ✓ | +| [System History](https://build.fhir.org/http.html#history) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ✓ | +| [Batch Bundles](https://build.fhir.org/http.html#batch) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | +| [Transaction Bundles](https://build.fhir.org/http.html#transaction) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ◐ | | [Conditional Operations](https://build.fhir.org/http.html#cond-update) | ✓ | ✓ | ○ | ✗ | ○ | ○ | ✗ | | [Conditional Patch](https://build.fhir.org/http.html#patch) | ✓ | ✓ | ○ | ✗ | ○ | ○ | ✗ | | [Delete History](https://build.fhir.org/http.html#delete) | ✓ | ✓ | ○ | ✗ | ○ | ✗ | ✗ | | **Multitenancy** | -| Shared Schema | ✓ | ✓ | ○ | ○ | ○ | ✓ | ○ | +| Shared Schema | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✓ | | Schema-per-Tenant | ✗ | ○ | ○ | ✗ | ✗ | ✗ | ✗ | -| Database-per-Tenant | ✓ | ○ | ○ | ○ | ○ | ○ | ○ | +| Database-per-Tenant | ✓ | ○ | ○ | ○ | ○ | ○ | ✓ | | Row-Level Security | ✗ | ○ | ✗ | ✗ | ✗ | ✗ | ✗ | | **[Search Parameters](https://build.fhir.org/search.html#ptypes)** | | [String](https://build.fhir.org/search.html#string) | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | | [Token](https://build.fhir.org/search.html#token) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✗ | | [Reference](https://build.fhir.org/search.html#reference) | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | -| [Date](https://build.fhir.org/search.html#date) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ○ | -| [Number](https://build.fhir.org/search.html#number) | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ○ | -| [Quantity](https://build.fhir.org/search.html#quantity) | ✓ | ✓ | ○ | ✗ | ✗ | ✓ | ○ | -| [URI](https://build.fhir.org/search.html#uri) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ○ | +| [Date](https://build.fhir.org/search.html#date) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✗ | +| [Number](https://build.fhir.org/search.html#number) | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | +| [Quantity](https://build.fhir.org/search.html#quantity) | ✓ | ✓ | ○ | ✗ | ✗ | ✓ | ✗ | +| [URI](https://build.fhir.org/search.html#uri) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✗ | | [Composite](https://build.fhir.org/search.html#composite) | ✓ | ○ | ○ | ✗ | ○ | ✓ | ✗ | | **[Search Modifiers](https://build.fhir.org/search.html#modifiers)** | -| [:exact](https://build.fhir.org/search.html#modifiers) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ○ | +| [:exact](https://build.fhir.org/search.html#modifiers) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✗ | | [:contains](https://build.fhir.org/search.html#modifiers) | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | | [:text](https://build.fhir.org/search.html#modifiers) (full-text) | ✓ | ◐ | ○ | ✗ | ✗ | ✓ | ✗ | -| [:not](https://build.fhir.org/search.html#modifiers) | ✓ | ○ | ○ | ✗ | ○ | ✓ | ○ | -| [:missing](https://build.fhir.org/search.html#modifiers) | ✓ | ○ | ○ | ✗ | ○ | ✓ | ○ | +| [:not](https://build.fhir.org/search.html#modifiers) | ✓ | ○ | ○ | ✗ | ○ | ✓ | ✗ | +| [:missing](https://build.fhir.org/search.html#modifiers) | ✓ | ○ | ○ | ✗ | ○ | ✓ | ✗ | | [:above / :below](https://build.fhir.org/search.html#modifiers) | ✗ | †○ | †○ | ✗ | ○ | ✓ | ✗ | | [:in / :not-in](https://build.fhir.org/search.html#modifiers) | ✗ | †○ | †○ | ✗ | ○ | †○ | ✗ | | [:of-type](https://build.fhir.org/search.html#modifiers) | ✓ | ○ | ○ | ✗ | ○ | ✓ | ✗ | @@ -346,13 +346,15 @@ The matrix below shows which FHIR operations each backend supports. This reflect | [_revinclude](https://build.fhir.org/search.html#revinclude) | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | | **[Pagination](https://build.fhir.org/http.html#paging)** | | Offset | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | -| Cursor (keyset) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ○ | +| Cursor (keyset) | ✓ | ✓ | ○ | ○ | ○ | ✓ | ✗ | | **[Sorting](https://build.fhir.org/search.html#sort)** | | Single field | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | | Multiple fields | ✓ | ✓ | ○ | ✗ | ○ | ✓ | ✗ | | **[Bulk Operations](https://hl7.org/fhir/uv/bulkdata/)** | -| [Bulk Export](https://hl7.org/fhir/uv/bulkdata/export.html) | ✓ | ✓ | ○ | ○ | ○ | ○ | ○ | -| [Bulk Submit](https://hackmd.io/@argonaut/rJoqHZrPle) | ✓ | ✓ | ○ | ○ | ○ | ○ | ○ | +| [Bulk Export](https://hl7.org/fhir/uv/bulkdata/export.html) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | +| [Bulk Submit](https://hackmd.io/@argonaut/rJoqHZrPle) | ✓ | ✓ | ○ | ○ | ○ | ○ | ✓ | + +The S3 backend is intentionally storage-focused (CRUD/version/history/bulk) and does not act as a full FHIR search engine. For query-heavy deployments, use a DB/search backend as primary query engine and compose S3 as archive/bulk/history storage. ### Primary/Secondary Role Matrix @@ -368,7 +370,7 @@ Backends can serve as primary (CRUD, versioning, transactions) or secondary (opt | Cassandra alone | Cassandra | — | Planned | High write throughput | | Cassandra + Elasticsearch | Cassandra | Elasticsearch (search) | Planned | Write-heavy + search | | MongoDB alone | MongoDB | — | Planned | Document-centric | -| S3 alone | S3 | — | Planned | Archival/bulk storage | +| S3 alone | S3 | — | ✓ Implemented (storage-focused) | Archival/bulk/history storage | | S3 + Elasticsearch | S3 | Elasticsearch (search) | Planned | Large-scale + search | ### Backend Selection Guide @@ -393,7 +395,7 @@ Backends can serve as primary (CRUD, versioning, transactions) or secondary (opt | `mongodb` | MongoDB document store | mongodb | | `neo4j` | Neo4j graph database | neo4rs | | `elasticsearch` | Elasticsearch search | elasticsearch | -| `s3` | AWS S3 object storage | object_store | +| `s3` | AWS S3 object storage | aws-sdk-s3 | ## Building & Running Storage Backends diff --git a/crates/persistence/src/backends/mod.rs b/crates/persistence/src/backends/mod.rs index dca84266..a1a77f49 100644 --- a/crates/persistence/src/backends/mod.rs +++ b/crates/persistence/src/backends/mod.rs @@ -50,5 +50,5 @@ pub mod postgres; #[cfg(feature = "elasticsearch")] pub mod elasticsearch; // -// #[cfg(feature = "s3")] -// pub mod s3; +#[cfg(feature = "s3")] +pub mod s3; diff --git a/crates/persistence/src/backends/s3/backend.rs b/crates/persistence/src/backends/s3/backend.rs new file mode 100644 index 00000000..7ea9919f --- /dev/null +++ b/crates/persistence/src/backends/s3/backend.rs @@ -0,0 +1,251 @@ +use std::future::Future; +use std::sync::Arc; + +use async_trait::async_trait; + +use crate::core::{Backend, BackendCapability, BackendKind}; +use crate::error::{BackendError, StorageError, StorageResult}; +use crate::tenant::{TenantContext, TenantId}; + +use super::client::{AwsS3Client, S3Api, S3ClientError}; +use super::config::{S3BackendConfig, S3TenancyMode}; +use super::keyspace::S3Keyspace; + +/// AWS S3 backend for object-storage persistence. +#[derive(Clone)] +pub struct S3Backend { + pub(crate) config: S3BackendConfig, + pub(crate) client: Arc, +} + +impl std::fmt::Debug for S3Backend { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("S3Backend") + .field("config", &self.config) + .finish_non_exhaustive() + } +} + +#[derive(Debug)] +pub struct S3Connection; + +#[derive(Debug, Clone)] +pub(crate) struct TenantLocation { + pub bucket: String, + pub keyspace: S3Keyspace, +} + +impl S3Backend { + /// Creates a new S3 backend using AWS standard credential provider chain. + pub fn new(config: S3BackendConfig) -> StorageResult { + Self::from_env(config) + } + + /// Creates a new S3 backend using environment/provider chain credentials. + pub fn from_env(mut config: S3BackendConfig) -> StorageResult { + config.validate()?; + + if config.region.is_none() { + config.region = std::env::var("AWS_REGION").ok(); + } + + let sdk_config = block_on(AwsS3Client::load_sdk_config(config.region.as_deref()))?; + let client = Arc::new(AwsS3Client::from_sdk_config(&sdk_config)); + + let backend = Self { config, client }; + + if backend.config.validate_buckets_on_startup { + block_on(backend.validate_buckets())??; + } + + Ok(backend) + } + + #[cfg(test)] + pub(crate) fn with_client( + config: S3BackendConfig, + client: Arc, + ) -> StorageResult { + config.validate()?; + Ok(Self { config, client }) + } + + pub(crate) async fn validate_buckets(&self) -> StorageResult<()> { + for bucket in self.config.configured_buckets() { + self.client + .head_bucket(&bucket) + .await + .map_err(|e| self.map_client_error(e))?; + } + Ok(()) + } + + pub(crate) fn tenant_location(&self, tenant: &TenantContext) -> StorageResult { + let global_prefix = self + .config + .prefix + .as_ref() + .map(|p| p.trim_matches('/').to_string()) + .filter(|p| !p.is_empty()); + + match &self.config.tenancy_mode { + S3TenancyMode::PrefixPerTenant { bucket } => Ok(TenantLocation { + bucket: bucket.clone(), + keyspace: S3Keyspace::new(global_prefix) + .with_tenant_prefix(tenant.tenant_id().as_str()), + }), + S3TenancyMode::BucketPerTenant { + tenant_bucket_map, + default_system_bucket, + } => { + let tenant_id = tenant.tenant_id().as_str(); + let bucket = tenant_bucket_map + .get(tenant_id) + .cloned() + .or_else(|| { + if tenant.tenant_id().is_system() { + default_system_bucket.clone() + } else { + None + } + }) + .ok_or_else(|| { + StorageError::Tenant(crate::error::TenantError::InvalidTenant { + tenant_id: TenantId::new(tenant_id), + }) + })?; + + Ok(TenantLocation { + bucket, + keyspace: S3Keyspace::new(global_prefix), + }) + } + } + } + + pub(crate) fn map_client_error(&self, error: S3ClientError) -> StorageError { + match error { + S3ClientError::NotFound => StorageError::Backend(BackendError::Unavailable { + backend_name: "s3".to_string(), + message: "resource not found in S3".to_string(), + }), + S3ClientError::PreconditionFailed => StorageError::Backend(BackendError::QueryError { + message: "S3 precondition failed".to_string(), + }), + S3ClientError::Throttled(message) => StorageError::Backend(BackendError::Unavailable { + backend_name: "s3".to_string(), + message, + }), + S3ClientError::Unavailable(message) => { + StorageError::Backend(BackendError::Unavailable { + backend_name: "s3".to_string(), + message, + }) + } + S3ClientError::InvalidInput(message) => { + StorageError::Validation(crate::error::ValidationError::InvalidResource { + message, + details: Vec::new(), + }) + } + S3ClientError::Internal(message) => StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message, + source: None, + }), + } + } +} + +#[async_trait] +impl Backend for S3Backend { + type Connection = S3Connection; + + fn kind(&self) -> BackendKind { + BackendKind::S3 + } + + fn name(&self) -> &'static str { + "s3" + } + + fn supports(&self, capability: BackendCapability) -> bool { + matches!( + capability, + BackendCapability::Crud + | BackendCapability::Versioning + | BackendCapability::InstanceHistory + | BackendCapability::TypeHistory + | BackendCapability::SystemHistory + | BackendCapability::OptimisticLocking + | BackendCapability::CursorPagination + | BackendCapability::BulkExport + | BackendCapability::BulkImport + | BackendCapability::SharedSchema + | BackendCapability::DatabasePerTenant + ) + } + + fn capabilities(&self) -> Vec { + vec![ + BackendCapability::Crud, + BackendCapability::Versioning, + BackendCapability::InstanceHistory, + BackendCapability::TypeHistory, + BackendCapability::SystemHistory, + BackendCapability::OptimisticLocking, + BackendCapability::CursorPagination, + BackendCapability::BulkExport, + BackendCapability::BulkImport, + BackendCapability::SharedSchema, + BackendCapability::DatabasePerTenant, + ] + } + + async fn acquire(&self) -> Result { + Ok(S3Connection) + } + + async fn release(&self, _conn: Self::Connection) {} + + async fn health_check(&self) -> Result<(), BackendError> { + self.validate_buckets().await.map_err(|err| match err { + StorageError::Backend(backend_err) => backend_err, + other => BackendError::Internal { + backend_name: "s3".to_string(), + message: other.to_string(), + source: None, + }, + }) + } + + async fn initialize(&self) -> Result<(), BackendError> { + self.health_check().await + } + + async fn migrate(&self) -> Result<(), BackendError> { + // No schema migrations for object storage. + self.health_check().await + } +} + +fn block_on(future: F) -> StorageResult +where + F: Future, +{ + if let Ok(handle) = tokio::runtime::Handle::try_current() { + Ok(tokio::task::block_in_place(|| handle.block_on(future))) + } else { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .map_err(|e| { + StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: format!("failed to create runtime: {e}"), + source: None, + }) + })?; + Ok(rt.block_on(future)) + } +} diff --git a/crates/persistence/src/backends/s3/bulk_export.rs b/crates/persistence/src/backends/s3/bulk_export.rs new file mode 100644 index 00000000..2234444c --- /dev/null +++ b/crates/persistence/src/backends/s3/bulk_export.rs @@ -0,0 +1,536 @@ +use std::collections::BTreeSet; + +use async_trait::async_trait; +use chrono::Utc; + +use crate::core::bulk_export::{ + BulkExportStorage, ExportDataProvider, ExportJobId, ExportManifest, ExportOutputFile, + ExportProgress, ExportRequest, ExportStatus, NdjsonBatch, TypeExportProgress, +}; +use crate::error::{BulkExportError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +use super::backend::{S3Backend, TenantLocation}; +use super::models::ExportJobState; + +#[async_trait] +impl BulkExportStorage for S3Backend { + async fn start_export( + &self, + tenant: &TenantContext, + request: ExportRequest, + ) -> StorageResult { + if request.output_format != "application/fhir+ndjson" { + return Err(StorageError::BulkExport( + BulkExportError::UnsupportedFormat { + format: request.output_format, + }, + )); + } + + let active_exports = self.list_exports(tenant, false).await?; + if active_exports.len() >= 5 { + return Err(StorageError::BulkExport( + BulkExportError::TooManyConcurrentExports { max_concurrent: 5 }, + )); + } + + let job_id = ExportJobId::new(); + let progress = ExportProgress::accepted(job_id.clone(), request.level.clone(), Utc::now()); + let state = ExportJobState { + request, + progress, + manifest: None, + }; + + self.save_export_state(tenant, &job_id, &state).await?; + + if let Err(err) = self.run_export_job(tenant, &job_id).await { + let _ = self + .mark_export_failed(tenant, &job_id, &err.to_string()) + .await; + } + + Ok(job_id) + } + + async fn get_export_status( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + Ok(self.load_export_state(tenant, job_id).await?.progress) + } + + async fn cancel_export( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let mut state = self.load_export_state(tenant, job_id).await?; + + if state.progress.status.is_terminal() { + return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { + job_id: job_id.to_string(), + expected: "accepted or in-progress".to_string(), + actual: state.progress.status.to_string(), + })); + } + + state.progress.status = ExportStatus::Cancelled; + state.progress.completed_at = Some(Utc::now()); + state.progress.error_message = None; + state.progress.current_type = None; + + self.save_export_state(tenant, job_id, &state).await + } + + async fn delete_export( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let location = self.tenant_location(tenant)?; + + if !self.export_job_exists(&location, job_id).await? { + return Err(StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + })); + } + + let prefix = location.keyspace.export_job_prefix(job_id.as_str()); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + self.delete_object(&location.bucket, &object.key).await?; + } + + Ok(()) + } + + async fn get_export_manifest( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + let state = self.load_export_state(tenant, job_id).await?; + + if state.progress.status != ExportStatus::Complete { + return Err(StorageError::BulkExport(BulkExportError::InvalidJobState { + job_id: job_id.to_string(), + expected: "complete".to_string(), + actual: state.progress.status.to_string(), + })); + } + + if let Some(manifest) = state.manifest { + return Ok(manifest); + } + + let location = self.tenant_location(tenant)?; + let manifest_key = location.keyspace.export_job_manifest_key(job_id.as_str()); + let manifest = self + .get_json_object::(&location.bucket, &manifest_key) + .await? + .map(|(manifest, _)| manifest) + .ok_or_else(|| { + StorageError::BulkExport(BulkExportError::InvalidJobState { + job_id: job_id.to_string(), + expected: "complete with manifest".to_string(), + actual: "complete-without-manifest".to_string(), + }) + })?; + + Ok(manifest) + } + + async fn list_exports( + &self, + tenant: &TenantContext, + include_completed: bool, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.export_jobs_prefix(); + + let mut exports = Vec::new(); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + if !object.key.ends_with("/state.json") { + continue; + } + + if let Some((state, _)) = self + .get_json_object::(&location.bucket, &object.key) + .await? + { + if include_completed || state.progress.status.is_active() { + exports.push(state.progress); + } + } + } + + exports.sort_by(|a, b| b.transaction_time.cmp(&a.transaction_time)); + Ok(exports) + } +} + +#[async_trait] +impl ExportDataProvider for S3Backend { + async fn list_export_types( + &self, + tenant: &TenantContext, + request: &ExportRequest, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + + if !request.resource_types.is_empty() { + let mut found = Vec::new(); + for resource_type in &request.resource_types { + let count = self + .count_export_resources(tenant, request, resource_type) + .await?; + if count > 0 { + found.push(resource_type.clone()); + } + } + return Ok(found); + } + + let mut types = BTreeSet::new(); + for key in self.list_current_keys(&location, None).await? { + if let Some(resource_type) = parse_resource_type_from_current_key(&key) { + types.insert(resource_type); + } + } + + Ok(types.into_iter().collect()) + } + + async fn count_export_resources( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let keys = self + .list_current_keys(&location, Some(resource_type)) + .await?; + + let mut count = 0u64; + for key in keys { + let Some((resource, _)) = self + .get_json_object::(&location.bucket, &key) + .await? + else { + continue; + }; + + if resource.is_deleted() { + continue; + } + + if let Some(since) = request.since { + if resource.last_modified() < since { + continue; + } + } + + count += 1; + } + + Ok(count) + } + + async fn fetch_export_batch( + &self, + tenant: &TenantContext, + request: &ExportRequest, + resource_type: &str, + cursor: Option<&str>, + batch_size: u32, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let mut keys = self + .list_current_keys(&location, Some(resource_type)) + .await?; + keys.sort(); + + let mut lines = Vec::new(); + for key in keys { + let Some((resource, _)) = self + .get_json_object::(&location.bucket, &key) + .await? + else { + continue; + }; + + if resource.is_deleted() { + continue; + } + + if let Some(since) = request.since { + if resource.last_modified() < since { + continue; + } + } + + lines.push(serde_json::to_string(resource.content()).map_err(|e| { + StorageError::BulkExport(BulkExportError::WriteError { + message: format!("failed to serialize NDJSON line: {e}"), + }) + })?); + } + + let offset = parse_export_cursor(cursor)?; + let start = offset.min(lines.len()); + let end = start.saturating_add(batch_size as usize).min(lines.len()); + + let batch_lines = lines[start..end].to_vec(); + let is_last = end >= lines.len(); + let next_cursor = if is_last { None } else { Some(end.to_string()) }; + + Ok(NdjsonBatch { + lines: batch_lines, + next_cursor, + is_last, + }) + } +} + +impl S3Backend { + async fn run_export_job( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult<()> { + let location = self.tenant_location(tenant)?; + let mut state = self.load_export_state(tenant, job_id).await?; + + state.progress.status = ExportStatus::InProgress; + state.progress.started_at = Some(Utc::now()); + state.progress.error_message = None; + state.progress.current_type = None; + state.progress.type_progress.clear(); + + self.save_export_state(tenant, job_id, &state).await?; + + let resource_types = self.list_export_types(tenant, &state.request).await?; + let mut output_files: Vec = Vec::new(); + + for resource_type in resource_types { + state.progress.current_type = Some(resource_type.clone()); + self.save_export_state(tenant, job_id, &state).await?; + + let mut type_progress = TypeExportProgress::new(resource_type.clone()); + type_progress.total_count = Some( + self.count_export_resources(tenant, &state.request, &resource_type) + .await?, + ); + + let mut cursor: Option = None; + let mut part_lines: Vec = Vec::new(); + let mut part_number: u32 = 1; + + loop { + let batch = self + .fetch_export_batch( + tenant, + &state.request, + &resource_type, + cursor.as_deref(), + state.request.batch_size.max(1), + ) + .await?; + + for line in batch.lines { + part_lines.push(line); + if part_lines.len() >= self.config.bulk_export_part_size as usize { + let written = self + .write_export_part( + &location, + job_id, + &resource_type, + part_number, + &part_lines, + ) + .await?; + output_files.push(written); + type_progress.exported_count += part_lines.len() as u64; + type_progress.cursor_state = batch.next_cursor.clone(); + self.save_export_type_progress(&location, job_id, &type_progress) + .await?; + part_lines.clear(); + part_number += 1; + } + } + + cursor = batch.next_cursor; + if batch.is_last { + break; + } + } + + if !part_lines.is_empty() { + let written = self + .write_export_part(&location, job_id, &resource_type, part_number, &part_lines) + .await?; + output_files.push(written); + type_progress.exported_count += part_lines.len() as u64; + part_lines.clear(); + } + + type_progress.cursor_state = None; + self.save_export_type_progress(&location, job_id, &type_progress) + .await?; + state.progress.type_progress.push(type_progress); + } + + state.progress.status = ExportStatus::Complete; + state.progress.completed_at = Some(Utc::now()); + state.progress.current_type = None; + state.progress.error_message = None; + + let manifest = ExportManifest { + transaction_time: state.progress.transaction_time, + request: format!("$export?job={}", job_id), + requires_access_token: true, + output: output_files, + error: Vec::new(), + message: None, + extension: None, + }; + + state.manifest = Some(manifest.clone()); + + let manifest_key = location.keyspace.export_job_manifest_key(job_id.as_str()); + let manifest_payload = self.serialize_json(&manifest)?; + self.put_json_object( + &location.bucket, + &manifest_key, + &manifest_payload, + None, + None, + ) + .await?; + + self.save_export_state(tenant, job_id, &state).await + } + + async fn write_export_part( + &self, + location: &TenantLocation, + job_id: &ExportJobId, + resource_type: &str, + part_number: u32, + lines: &[String], + ) -> StorageResult { + let key = + location + .keyspace + .export_job_output_key(job_id.as_str(), resource_type, part_number); + let mut body = lines.join("\n"); + body.push('\n'); + + self.put_bytes_object( + &location.bucket, + &key, + body.as_bytes(), + Some("application/fhir+ndjson"), + ) + .await?; + + Ok( + ExportOutputFile::new(resource_type, format!("s3://{}/{}", location.bucket, key)) + .with_count(lines.len() as u64), + ) + } + + async fn export_job_exists( + &self, + location: &TenantLocation, + job_id: &ExportJobId, + ) -> StorageResult { + let key = location.keyspace.export_job_state_key(job_id.as_str()); + Ok(self + .client + .head_object(&location.bucket, &key) + .await + .map_err(|e| self.map_client_error(e))? + .is_some()) + } + + async fn load_export_state( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let key = location.keyspace.export_job_state_key(job_id.as_str()); + self.get_json_object::(&location.bucket, &key) + .await? + .map(|(state, _)| state) + .ok_or_else(|| { + StorageError::BulkExport(BulkExportError::JobNotFound { + job_id: job_id.to_string(), + }) + }) + } + + async fn save_export_state( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + state: &ExportJobState, + ) -> StorageResult<()> { + let location = self.tenant_location(tenant)?; + let key = location.keyspace.export_job_state_key(job_id.as_str()); + let payload = self.serialize_json(state)?; + self.put_json_object(&location.bucket, &key, &payload, None, None) + .await?; + Ok(()) + } + + async fn mark_export_failed( + &self, + tenant: &TenantContext, + job_id: &ExportJobId, + message: &str, + ) -> StorageResult<()> { + let mut state = self.load_export_state(tenant, job_id).await?; + state.progress.status = ExportStatus::Error; + state.progress.completed_at = Some(Utc::now()); + state.progress.current_type = None; + state.progress.error_message = Some(message.to_string()); + self.save_export_state(tenant, job_id, &state).await + } + + async fn save_export_type_progress( + &self, + location: &TenantLocation, + job_id: &ExportJobId, + progress: &TypeExportProgress, + ) -> StorageResult<()> { + let key = location + .keyspace + .export_job_progress_key(job_id.as_str(), &progress.resource_type); + let payload = self.serialize_json(progress)?; + self.put_json_object(&location.bucket, &key, &payload, None, None) + .await?; + Ok(()) + } +} + +fn parse_export_cursor(cursor: Option<&str>) -> StorageResult { + match cursor { + None => Ok(0), + Some(raw) => raw.parse::().map_err(|_| { + StorageError::BulkExport(BulkExportError::InvalidRequest { + message: format!("invalid export cursor: {raw}"), + }) + }), + } +} + +fn parse_resource_type_from_current_key(key: &str) -> Option { + let parts: Vec<&str> = key.split('/').collect(); + let resources_idx = parts.iter().position(|segment| *segment == "resources")?; + parts.get(resources_idx + 1).map(|s| s.to_string()) +} diff --git a/crates/persistence/src/backends/s3/bulk_submit.rs b/crates/persistence/src/backends/s3/bulk_submit.rs new file mode 100644 index 00000000..dd87180e --- /dev/null +++ b/crates/persistence/src/backends/s3/bulk_submit.rs @@ -0,0 +1,976 @@ +use async_trait::async_trait; +use chrono::Utc; +use helios_fhir::FhirVersion; +use tokio::io::{AsyncBufRead, AsyncBufReadExt}; +use uuid::Uuid; + +use crate::core::ResourceStorage; +use crate::core::VersionedStorage; +use crate::core::bulk_submit::{ + BulkEntryOutcome, BulkEntryResult, BulkProcessingOptions, BulkSubmitProvider, + BulkSubmitRollbackProvider, ChangeType, EntryCountSummary, ManifestStatus, NdjsonEntry, + StreamProcessingResult, StreamingBulkSubmitProvider, SubmissionChange, SubmissionId, + SubmissionManifest, SubmissionStatus, SubmissionSummary, +}; +use crate::error::{BulkSubmitError, ResourceError, StorageError, StorageResult}; +use crate::tenant::TenantContext; + +use super::backend::{S3Backend, TenantLocation}; +use super::models::{SubmissionManifestState, SubmissionState}; + +#[async_trait] +impl BulkSubmitProvider for S3Backend { + async fn create_submission( + &self, + tenant: &TenantContext, + id: &SubmissionId, + metadata: Option, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let state_key = location + .keyspace + .submit_state_key(&id.submitter, &id.submission_id); + + if self + .client + .head_object(&location.bucket, &state_key) + .await + .map_err(|e| self.map_client_error(e))? + .is_some() + { + return Err(StorageError::BulkSubmit( + BulkSubmitError::DuplicateSubmission { + submitter: id.submitter.clone(), + submission_id: id.submission_id.clone(), + }, + )); + } + + let mut summary = SubmissionSummary::new(id.clone()); + if let Some(metadata) = metadata { + summary = summary.with_metadata(metadata); + } + + let state = SubmissionState { + summary: summary.clone(), + abort_reason: None, + }; + + self.save_submission_state(&location, id, &state).await?; + Ok(summary) + } + + async fn get_submission( + &self, + tenant: &TenantContext, + id: &SubmissionId, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + Ok(self + .load_submission_state_optional(&location, id) + .await? + .map(|s| s.summary)) + } + + async fn list_submissions( + &self, + tenant: &TenantContext, + submitter: Option<&str>, + status: Option, + limit: u32, + offset: u32, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.submit_root_prefix(); + + let mut submissions = Vec::new(); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + if !object.key.ends_with("/state.json") { + continue; + } + + let Some((state, _)) = self + .get_json_object::(&location.bucket, &object.key) + .await? + else { + continue; + }; + + if let Some(submitter_filter) = submitter { + if state.summary.id.submitter != submitter_filter { + continue; + } + } + + if let Some(status_filter) = status { + if state.summary.status != status_filter { + continue; + } + } + + submissions.push(state.summary); + } + + submissions.sort_by(|a, b| b.created_at.cmp(&a.created_at)); + + let start = (offset as usize).min(submissions.len()); + let end = start.saturating_add(limit as usize).min(submissions.len()); + Ok(submissions[start..end].to_vec()) + } + + async fn complete_submission( + &self, + tenant: &TenantContext, + id: &SubmissionId, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let mut state = self.load_submission_state(&location, id).await?; + + if state.summary.status != SubmissionStatus::InProgress { + return Err(StorageError::BulkSubmit(BulkSubmitError::AlreadyComplete { + submission_id: id.submission_id.clone(), + })); + } + + let now = Utc::now(); + state.summary.status = SubmissionStatus::Complete; + state.summary.updated_at = now; + state.summary.completed_at = Some(now); + + self.save_submission_state(&location, id, &state).await?; + Ok(state.summary) + } + + async fn abort_submission( + &self, + tenant: &TenantContext, + id: &SubmissionId, + reason: &str, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let mut state = self.load_submission_state(&location, id).await?; + + if state.summary.status != SubmissionStatus::InProgress { + return Err(StorageError::BulkSubmit(BulkSubmitError::AlreadyComplete { + submission_id: id.submission_id.clone(), + })); + } + + let mut pending_count = 0u64; + let manifests = self.list_manifest_states(&location, id).await?; + for mut manifest in manifests { + if matches!( + manifest.manifest.status, + ManifestStatus::Pending | ManifestStatus::Processing + ) { + pending_count += 1; + manifest.manifest.status = ManifestStatus::Failed; + self.save_manifest_state(&location, id, &manifest).await?; + } + } + + let now = Utc::now(); + state.summary.status = SubmissionStatus::Aborted; + state.summary.updated_at = now; + state.summary.completed_at = Some(now); + state.abort_reason = Some(reason.to_string()); + + self.save_submission_state(&location, id, &state).await?; + Ok(pending_count) + } + + async fn add_manifest( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_url: Option<&str>, + replaces_manifest_url: Option<&str>, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let mut submission = self.load_submission_state(&location, submission_id).await?; + + match submission.summary.status { + SubmissionStatus::InProgress => {} + SubmissionStatus::Complete => { + return Err(StorageError::BulkSubmit(BulkSubmitError::InvalidState { + submission_id: submission_id.submission_id.clone(), + expected: "in-progress".to_string(), + actual: "complete".to_string(), + })); + } + SubmissionStatus::Aborted => { + return Err(StorageError::BulkSubmit(BulkSubmitError::Aborted { + submission_id: submission_id.submission_id.clone(), + reason: submission + .abort_reason + .clone() + .unwrap_or_else(|| "aborted".to_string()), + })); + } + } + + let manifest_id = Uuid::new_v4().to_string(); + let mut manifest = SubmissionManifest::new(manifest_id); + if let Some(manifest_url) = manifest_url { + manifest = manifest.with_url(manifest_url); + } + if let Some(replaces_manifest_url) = replaces_manifest_url { + manifest = manifest.with_replaces(replaces_manifest_url); + } + + self.save_manifest_state( + &location, + submission_id, + &SubmissionManifestState { + manifest: manifest.clone(), + }, + ) + .await?; + + submission.summary.manifest_count += 1; + submission.summary.updated_at = Utc::now(); + self.save_submission_state(&location, submission_id, &submission) + .await?; + + Ok(manifest) + } + + async fn get_manifest( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_id: &str, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + Ok(self + .load_manifest_state_optional(&location, submission_id, manifest_id) + .await? + .map(|state| state.manifest)) + } + + async fn list_manifests( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let mut manifests = self + .list_manifest_states(&location, submission_id) + .await? + .into_iter() + .map(|state| state.manifest) + .collect::>(); + + manifests.sort_by(|a, b| a.added_at.cmp(&b.added_at)); + Ok(manifests) + } + + async fn process_entries( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_id: &str, + entries: Vec, + options: &BulkProcessingOptions, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let mut submission = self.load_submission_state(&location, submission_id).await?; + + match submission.summary.status { + SubmissionStatus::InProgress => {} + SubmissionStatus::Complete => { + return Err(StorageError::BulkSubmit(BulkSubmitError::AlreadyComplete { + submission_id: submission_id.submission_id.clone(), + })); + } + SubmissionStatus::Aborted => { + return Err(StorageError::BulkSubmit(BulkSubmitError::Aborted { + submission_id: submission_id.submission_id.clone(), + reason: submission + .abort_reason + .clone() + .unwrap_or_else(|| "aborted".to_string()), + })); + } + } + + let mut manifest_state = self + .load_manifest_state_optional(&location, submission_id, manifest_id) + .await? + .ok_or_else(|| { + StorageError::BulkSubmit(BulkSubmitError::ManifestNotFound { + submission_id: submission_id.submission_id.clone(), + manifest_id: manifest_id.to_string(), + }) + })?; + + manifest_state.manifest.status = ManifestStatus::Processing; + self.save_manifest_state(&location, submission_id, &manifest_state) + .await?; + + let mut results = Vec::new(); + let mut error_count = 0u32; + + for entry in entries { + if options.max_errors > 0 && error_count >= options.max_errors { + if !options.continue_on_error { + return Err(StorageError::BulkSubmit( + BulkSubmitError::MaxErrorsExceeded { + submission_id: submission_id.submission_id.clone(), + max_errors: options.max_errors, + }, + )); + } + + let skipped = BulkEntryResult::skipped( + entry.line_number, + &entry.resource_type, + "max errors exceeded", + ); + self.persist_entry_result(&location, submission_id, manifest_id, &skipped) + .await?; + results.push(skipped); + continue; + } + + self.persist_raw_entry(&location, submission_id, manifest_id, &entry) + .await?; + + let result = match self + .process_single_entry(tenant, submission_id, manifest_id, &entry, options) + .await + { + Ok(result) => result, + Err(err) => BulkEntryResult::processing_error( + entry.line_number, + &entry.resource_type, + Self::bulk_submit_operation_outcome(&err), + ), + }; + + if result.is_error() { + error_count += 1; + } + + self.persist_entry_result(&location, submission_id, manifest_id, &result) + .await?; + results.push(result); + } + + let success_count = results.iter().filter(|r| r.is_success()).count() as u64; + let failed_count = results.iter().filter(|r| r.is_error()).count() as u64; + let skipped_count = results + .iter() + .filter(|r| r.outcome == BulkEntryOutcome::Skipped) + .count() as u64; + + manifest_state.manifest.total_entries += results.len() as u64; + manifest_state.manifest.processed_entries += results.len() as u64; + manifest_state.manifest.failed_entries += failed_count; + manifest_state.manifest.status = if failed_count > 0 { + ManifestStatus::Failed + } else { + ManifestStatus::Completed + }; + + self.save_manifest_state(&location, submission_id, &manifest_state) + .await?; + + submission.summary.total_entries += results.len() as u64; + submission.summary.success_count += success_count; + submission.summary.error_count += failed_count; + submission.summary.skipped_count += skipped_count; + submission.summary.updated_at = Utc::now(); + self.save_submission_state(&location, submission_id, &submission) + .await?; + + Ok(results) + } + + async fn get_entry_results( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_id: &str, + outcome_filter: Option, + limit: u32, + offset: u32, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let mut results = self + .load_entry_results(&location, submission_id, manifest_id) + .await?; + + if let Some(filter) = outcome_filter { + results.retain(|r| r.outcome == filter); + } + + results.sort_by_key(|r| r.line_number); + + let start = (offset as usize).min(results.len()); + let end = start.saturating_add(limit as usize).min(results.len()); + Ok(results[start..end].to_vec()) + } + + async fn get_entry_counts( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_id: &str, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let mut summary = EntryCountSummary::new(); + + for result in self + .load_entry_results(&location, submission_id, manifest_id) + .await? + { + summary.increment(result.outcome); + } + + Ok(summary) + } +} + +#[async_trait] +impl StreamingBulkSubmitProvider for S3Backend { + async fn process_ndjson_stream( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_id: &str, + resource_type: &str, + mut reader: Box, + options: &BulkProcessingOptions, + ) -> StorageResult { + let mut result = StreamProcessingResult::new(); + let mut line_number = 0u64; + let mut batch = Vec::new(); + + loop { + let mut line = String::new(); + let bytes_read = reader.read_line(&mut line).await.map_err(|e| { + StorageError::BulkSubmit(BulkSubmitError::ParseError { + line: line_number, + message: format!("failed to read line: {e}"), + }) + })?; + + if bytes_read == 0 { + break; + } + + line_number += 1; + result.lines_processed = line_number; + + let line = line.trim(); + if line.is_empty() { + continue; + } + + match NdjsonEntry::parse(line_number, line) { + Ok(entry) => { + if entry.resource_type != resource_type { + result.counts.increment(BulkEntryOutcome::ValidationError); + if !options.continue_on_error + && (options.max_errors == 0 + || result.counts.error_count() >= options.max_errors as u64) + { + return Ok(result.aborted("max errors exceeded")); + } + continue; + } + batch.push(entry); + } + Err(parse_err) => { + result.counts.increment(BulkEntryOutcome::ValidationError); + if !options.continue_on_error + && (options.max_errors == 0 + || result.counts.error_count() >= options.max_errors as u64) + { + return Ok(result.aborted(format!("parse error: {parse_err}"))); + } + } + } + + if batch.len() >= options.batch_size as usize { + let batch_results = self + .process_entries( + tenant, + submission_id, + manifest_id, + std::mem::take(&mut batch), + options, + ) + .await?; + + for entry_result in batch_results { + result.counts.increment(entry_result.outcome); + } + + if !options.continue_on_error + && options.max_errors > 0 + && result.counts.error_count() >= options.max_errors as u64 + { + return Ok(result.aborted("max errors exceeded")); + } + } + } + + if !batch.is_empty() { + let batch_results = self + .process_entries(tenant, submission_id, manifest_id, batch, options) + .await?; + for entry_result in batch_results { + result.counts.increment(entry_result.outcome); + } + } + + Ok(result) + } +} + +#[async_trait] +impl BulkSubmitRollbackProvider for S3Backend { + async fn record_change( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + change: &SubmissionChange, + ) -> StorageResult<()> { + let location = self.tenant_location(tenant)?; + let key = location.keyspace.submit_change_key( + &submission_id.submitter, + &submission_id.submission_id, + &change.change_id, + ); + + let payload = self.serialize_json(change)?; + self.put_json_object(&location.bucket, &key, &payload, None, None) + .await?; + Ok(()) + } + + async fn list_changes( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + limit: u32, + offset: u32, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let mut changes = self.load_changes(&location, submission_id).await?; + changes.sort_by(|a, b| b.changed_at.cmp(&a.changed_at)); + + let start = (offset as usize).min(changes.len()); + let end = start.saturating_add(limit as usize).min(changes.len()); + Ok(changes[start..end].to_vec()) + } + + async fn rollback_change( + &self, + tenant: &TenantContext, + _submission_id: &SubmissionId, + change: &SubmissionChange, + ) -> StorageResult { + match change.change_type { + ChangeType::Create => match self + .delete(tenant, &change.resource_type, &change.resource_id) + .await + { + Ok(()) + | Err(StorageError::Resource(ResourceError::NotFound { .. })) + | Err(StorageError::Resource(ResourceError::Gone { .. })) => Ok(true), + Err(err) => Err(err), + }, + ChangeType::Update => { + if let Some(previous_version) = change.previous_version.as_deref() { + if let Some(snapshot) = self + .vread( + tenant, + &change.resource_type, + &change.resource_id, + previous_version, + ) + .await? + { + self.restore_resource_from_snapshot(tenant, &snapshot) + .await?; + return Ok(true); + } + } + + if let Some(previous_content) = &change.previous_content { + if let Some(current) = self + .read(tenant, &change.resource_type, &change.resource_id) + .await? + { + self.update(tenant, ¤t, previous_content.clone()) + .await?; + return Ok(true); + } + } + + Ok(false) + } + } + } +} + +impl S3Backend { + async fn process_single_entry( + &self, + tenant: &TenantContext, + submission_id: &SubmissionId, + manifest_id: &str, + entry: &NdjsonEntry, + options: &BulkProcessingOptions, + ) -> StorageResult { + if let Some(resource_type) = entry.resource.get("resourceType").and_then(|v| v.as_str()) { + if resource_type != entry.resource_type { + return Ok(BulkEntryResult::validation_error( + entry.line_number, + &entry.resource_type, + serde_json::json!({ + "resourceType": "OperationOutcome", + "issue": [{ + "severity": "error", + "code": "invalid", + "diagnostics": format!( + "resourceType mismatch: entry={}, payload={}", + entry.resource_type, resource_type + ) + }] + }), + )); + } + } + + if let Some(id) = entry.resource_id.as_deref() { + match self.read(tenant, &entry.resource_type, id).await { + Ok(Some(current)) => { + if !options.allow_updates { + return Ok(BulkEntryResult::skipped( + entry.line_number, + &entry.resource_type, + "updates not allowed", + )); + } + + let updated = self + .update(tenant, ¤t, entry.resource.clone()) + .await?; + + let change = SubmissionChange::update( + manifest_id, + &entry.resource_type, + updated.id(), + current.version_id(), + updated.version_id(), + current.content().clone(), + ); + self.record_change(tenant, submission_id, &change).await?; + + Ok(BulkEntryResult::success( + entry.line_number, + &entry.resource_type, + updated.id(), + false, + )) + } + Ok(None) | Err(StorageError::Resource(ResourceError::Gone { .. })) => { + let created = self + .create( + tenant, + &entry.resource_type, + entry.resource.clone(), + FhirVersion::default(), + ) + .await?; + + let change = SubmissionChange::create( + manifest_id, + &entry.resource_type, + created.id(), + created.version_id(), + ); + self.record_change(tenant, submission_id, &change).await?; + + Ok(BulkEntryResult::success( + entry.line_number, + &entry.resource_type, + created.id(), + true, + )) + } + Err(err) => Err(err), + } + } else { + let created = self + .create( + tenant, + &entry.resource_type, + entry.resource.clone(), + FhirVersion::default(), + ) + .await?; + + let change = SubmissionChange::create( + manifest_id, + &entry.resource_type, + created.id(), + created.version_id(), + ); + self.record_change(tenant, submission_id, &change).await?; + + Ok(BulkEntryResult::success( + entry.line_number, + &entry.resource_type, + created.id(), + true, + )) + } + } + + async fn persist_raw_entry( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + manifest_id: &str, + entry: &NdjsonEntry, + ) -> StorageResult<()> { + let key = location.keyspace.submit_raw_line_key( + &submission_id.submitter, + &submission_id.submission_id, + manifest_id, + entry.line_number, + ); + + let mut line = serde_json::to_string(&entry.resource).map_err(|e| { + StorageError::BulkSubmit(BulkSubmitError::ParseError { + line: entry.line_number, + message: format!("failed to serialize raw NDJSON entry: {e}"), + }) + })?; + line.push('\n'); + + self.put_bytes_object( + &location.bucket, + &key, + line.as_bytes(), + Some("application/fhir+ndjson"), + ) + .await?; + + Ok(()) + } + + async fn persist_entry_result( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + manifest_id: &str, + result: &BulkEntryResult, + ) -> StorageResult<()> { + let key = location.keyspace.submit_result_line_key( + &submission_id.submitter, + &submission_id.submission_id, + manifest_id, + result.line_number, + ); + let payload = self.serialize_json(result)?; + self.put_json_object(&location.bucket, &key, &payload, None, None) + .await?; + Ok(()) + } + + async fn load_entry_results( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + manifest_id: &str, + ) -> StorageResult> { + let prefix = format!( + "{}results/{}/", + location + .keyspace + .submit_prefix(&submission_id.submitter, &submission_id.submission_id), + manifest_id + ); + + let mut results = Vec::new(); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + if !object.key.ends_with(".json") { + continue; + } + + if let Some((result, _)) = self + .get_json_object::(&location.bucket, &object.key) + .await? + { + results.push(result); + } + } + + Ok(results) + } + + async fn load_changes( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + ) -> StorageResult> { + let prefix = format!( + "{}changes/", + location + .keyspace + .submit_prefix(&submission_id.submitter, &submission_id.submission_id) + ); + + let mut changes = Vec::new(); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + if !object.key.ends_with(".json") { + continue; + } + + if let Some((change, _)) = self + .get_json_object::(&location.bucket, &object.key) + .await? + { + changes.push(change); + } + } + + Ok(changes) + } + + async fn load_submission_state( + &self, + location: &TenantLocation, + id: &SubmissionId, + ) -> StorageResult { + self.load_submission_state_optional(location, id) + .await? + .ok_or_else(|| { + StorageError::BulkSubmit(BulkSubmitError::SubmissionNotFound { + submitter: id.submitter.clone(), + submission_id: id.submission_id.clone(), + }) + }) + } + + async fn load_submission_state_optional( + &self, + location: &TenantLocation, + id: &SubmissionId, + ) -> StorageResult> { + let key = location + .keyspace + .submit_state_key(&id.submitter, &id.submission_id); + Ok(self + .get_json_object::(&location.bucket, &key) + .await? + .map(|(state, _)| state)) + } + + async fn save_submission_state( + &self, + location: &TenantLocation, + id: &SubmissionId, + state: &SubmissionState, + ) -> StorageResult<()> { + let key = location + .keyspace + .submit_state_key(&id.submitter, &id.submission_id); + let payload = self.serialize_json(state)?; + self.put_json_object(&location.bucket, &key, &payload, None, None) + .await?; + Ok(()) + } + + async fn load_manifest_state_optional( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + manifest_id: &str, + ) -> StorageResult> { + let key = location.keyspace.submit_manifest_key( + &submission_id.submitter, + &submission_id.submission_id, + manifest_id, + ); + + Ok(self + .get_json_object::(&location.bucket, &key) + .await? + .map(|(state, _)| state)) + } + + async fn save_manifest_state( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + state: &SubmissionManifestState, + ) -> StorageResult<()> { + let key = location.keyspace.submit_manifest_key( + &submission_id.submitter, + &submission_id.submission_id, + &state.manifest.manifest_id, + ); + + let payload = self.serialize_json(state)?; + self.put_json_object(&location.bucket, &key, &payload, None, None) + .await?; + Ok(()) + } + + async fn list_manifest_states( + &self, + location: &TenantLocation, + submission_id: &SubmissionId, + ) -> StorageResult> { + let prefix = format!( + "{}manifests/", + location + .keyspace + .submit_prefix(&submission_id.submitter, &submission_id.submission_id) + ); + + let mut manifests = Vec::new(); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + if !object.key.ends_with(".json") { + continue; + } + + if let Some((state, _)) = self + .get_json_object::(&location.bucket, &object.key) + .await? + { + manifests.push(state); + } + } + + Ok(manifests) + } + + fn bulk_submit_operation_outcome(err: &StorageError) -> serde_json::Value { + let code = match err { + StorageError::Validation(_) => "invalid", + StorageError::Tenant(_) => "forbidden", + StorageError::Resource(ResourceError::NotFound { .. }) => "not-found", + StorageError::Resource(ResourceError::Gone { .. }) => "deleted", + StorageError::Resource(ResourceError::AlreadyExists { .. }) => "conflict", + StorageError::Concurrency(_) => "conflict", + _ => "exception", + }; + + serde_json::json!({ + "resourceType": "OperationOutcome", + "issue": [{ + "severity": "error", + "code": code, + "diagnostics": err.to_string() + }] + }) + } +} diff --git a/crates/persistence/src/backends/s3/bundle.rs b/crates/persistence/src/backends/s3/bundle.rs new file mode 100644 index 00000000..44351615 --- /dev/null +++ b/crates/persistence/src/backends/s3/bundle.rs @@ -0,0 +1,419 @@ +use std::collections::HashMap; + +use async_trait::async_trait; +use helios_fhir::FhirVersion; +use serde_json::{Value, json}; + +use crate::core::{ + BundleEntry, BundleEntryResult, BundleMethod, BundleProvider, BundleResult, BundleType, + ResourceStorage, VersionedStorage, +}; +use crate::error::{BackendError, ResourceError, StorageError, TransactionError, ValidationError}; +use crate::tenant::TenantContext; +use crate::types::StoredResource; + +use super::backend::S3Backend; + +#[derive(Debug, Clone)] +enum CompensationAction { + Delete { resource_type: String, id: String }, + Restore { snapshot: StoredResource }, +} + +#[async_trait] +impl BundleProvider for S3Backend { + async fn process_transaction( + &self, + tenant: &TenantContext, + entries: Vec, + ) -> Result { + let mut results = Vec::with_capacity(entries.len()); + let mut compensations: Vec = Vec::new(); + let mut reference_map: HashMap = HashMap::new(); + let mut entries = entries; + + for (idx, entry) in entries.iter_mut().enumerate() { + if let Some(resource) = entry.resource.as_mut() { + resolve_bundle_references(resource, &reference_map); + } + + let (result, compensation) = match self.execute_bundle_entry(tenant, entry).await { + Ok(v) => v, + Err(err) => { + let base = format!("entry failed: {err}"); + let message = self + .rollback_compensations(tenant, compensations) + .await + .map(|_| base.clone()) + .unwrap_or_else(|rollback_err| { + format!("{base}; rollback failed: {rollback_err}") + }); + return Err(TransactionError::BundleError { + index: idx, + message, + }); + } + }; + + if result.status >= 400 { + let base = format!("entry failed with status {}", result.status); + let message = self + .rollback_compensations(tenant, compensations) + .await + .map(|_| base.clone()) + .unwrap_or_else(|rollback_err| { + format!("{base}; rollback failed: {rollback_err}") + }); + return Err(TransactionError::BundleError { + index: idx, + message, + }); + } + + if entry.method == BundleMethod::Post { + if let (Some(full_url), Some(location)) = (&entry.full_url, &result.location) { + let resolved = location + .split("/_history") + .next() + .unwrap_or(location) + .to_string(); + reference_map.insert(full_url.clone(), resolved); + } + } + + if let Some(compensation) = compensation { + compensations.push(compensation); + } + + results.push(result); + } + + Ok(BundleResult { + bundle_type: BundleType::Transaction, + entries: results, + }) + } + + async fn process_batch( + &self, + tenant: &TenantContext, + entries: Vec, + ) -> crate::error::StorageResult { + let mut results = Vec::with_capacity(entries.len()); + + for entry in &entries { + results.push(self.process_batch_entry(tenant, entry).await); + } + + Ok(BundleResult { + bundle_type: BundleType::Batch, + entries: results, + }) + } +} + +impl S3Backend { + async fn process_batch_entry( + &self, + tenant: &TenantContext, + entry: &BundleEntry, + ) -> BundleEntryResult { + match self.execute_bundle_entry(tenant, entry).await { + Ok((result, _)) => result, + Err(err) => Self::bundle_error_result(&err), + } + } + + async fn execute_bundle_entry( + &self, + tenant: &TenantContext, + entry: &BundleEntry, + ) -> crate::error::StorageResult<(BundleEntryResult, Option)> { + match entry.method { + BundleMethod::Get => { + let (resource_type, id) = self.parse_url(&entry.url)?; + match self.read(tenant, &resource_type, &id).await { + Ok(Some(resource)) => Ok((BundleEntryResult::ok(resource), None)), + Ok(None) => Ok(( + BundleEntryResult::error( + 404, + json!({ + "resourceType": "OperationOutcome", + "issue": [{"severity": "error", "code": "not-found"}] + }), + ), + None, + )), + Err(StorageError::Resource(ResourceError::Gone { .. })) => Ok(( + BundleEntryResult::error( + 410, + json!({ + "resourceType": "OperationOutcome", + "issue": [{"severity": "error", "code": "deleted"}] + }), + ), + None, + )), + Err(err) => Err(err), + } + } + BundleMethod::Post => { + let resource = entry.resource.clone().ok_or_else(|| { + StorageError::Validation(ValidationError::MissingRequiredField { + field: "resource".to_string(), + }) + })?; + + let resource_type = resource + .get("resourceType") + .and_then(|v| v.as_str()) + .ok_or_else(|| { + StorageError::Validation(ValidationError::MissingRequiredField { + field: "resourceType".to_string(), + }) + })? + .to_string(); + + let created = self + .create(tenant, &resource_type, resource, FhirVersion::default()) + .await?; + + Ok(( + BundleEntryResult::created(created.clone()), + Some(CompensationAction::Delete { + resource_type: created.resource_type().to_string(), + id: created.id().to_string(), + }), + )) + } + BundleMethod::Put => { + let resource = entry.resource.clone().ok_or_else(|| { + StorageError::Validation(ValidationError::MissingRequiredField { + field: "resource".to_string(), + }) + })?; + + let (resource_type, id) = self.parse_url(&entry.url)?; + + let current = match self.read(tenant, &resource_type, &id).await { + Ok(value) => value, + Err(StorageError::Resource(ResourceError::Gone { .. })) => None, + Err(err) => return Err(err), + }; + + if let Some(existing) = current { + let updated = if let Some(if_match) = entry.if_match.as_deref() { + self.update_with_match(tenant, &resource_type, &id, if_match, resource) + .await? + } else { + self.update(tenant, &existing, resource).await? + }; + + Ok(( + BundleEntryResult::ok(updated), + Some(CompensationAction::Restore { snapshot: existing }), + )) + } else { + let (stored, created) = self + .create_or_update( + tenant, + &resource_type, + &id, + resource, + FhirVersion::default(), + ) + .await?; + + let result = if created { + BundleEntryResult::created(stored.clone()) + } else { + BundleEntryResult::ok(stored.clone()) + }; + + let compensation = if created { + Some(CompensationAction::Delete { + resource_type: stored.resource_type().to_string(), + id: stored.id().to_string(), + }) + } else { + None + }; + + Ok((result, compensation)) + } + } + BundleMethod::Delete => { + let (resource_type, id) = self.parse_url(&entry.url)?; + + let snapshot = self.read(tenant, &resource_type, &id).await.ok().flatten(); + + let delete_result = if let Some(if_match) = entry.if_match.as_deref() { + self.delete_with_match(tenant, &resource_type, &id, if_match) + .await + } else { + self.delete(tenant, &resource_type, &id).await + }; + + match delete_result { + Ok(()) => Ok(( + BundleEntryResult::deleted(), + snapshot.map(|s| CompensationAction::Restore { snapshot: s }), + )), + Err(StorageError::Resource(ResourceError::NotFound { .. })) + | Err(StorageError::Resource(ResourceError::Gone { .. })) => { + Ok((BundleEntryResult::deleted(), None)) + } + Err(err) => Err(err), + } + } + BundleMethod::Patch => Ok(( + BundleEntryResult::error( + 501, + json!({ + "resourceType": "OperationOutcome", + "issue": [{ + "severity": "error", + "code": "not-supported", + "diagnostics": "PATCH is not supported by the S3 bundle backend" + }] + }), + ), + None, + )), + } + } + + async fn rollback_compensations( + &self, + tenant: &TenantContext, + compensations: Vec, + ) -> Result<(), String> { + let mut failures = Vec::new(); + + for compensation in compensations.into_iter().rev() { + if let Err(err) = self.apply_compensation(tenant, compensation).await { + failures.push(err.to_string()); + } + } + + if failures.is_empty() { + Ok(()) + } else { + Err(failures.join("; ")) + } + } + + async fn apply_compensation( + &self, + tenant: &TenantContext, + compensation: CompensationAction, + ) -> crate::error::StorageResult<()> { + match compensation { + CompensationAction::Delete { resource_type, id } => { + match self.delete(tenant, &resource_type, &id).await { + Ok(()) + | Err(StorageError::Resource(ResourceError::NotFound { .. })) + | Err(StorageError::Resource(ResourceError::Gone { .. })) => Ok(()), + Err(err) => Err(err), + } + } + CompensationAction::Restore { snapshot } => { + self.restore_resource_from_snapshot(tenant, &snapshot) + .await?; + Ok(()) + } + } + } + + fn bundle_error_result(err: &StorageError) -> BundleEntryResult { + BundleEntryResult::error( + Self::storage_error_status(err), + Self::operation_outcome(err), + ) + } + + fn storage_error_status(err: &StorageError) -> u16 { + match err { + StorageError::Validation(_) | StorageError::Search(_) => 400, + StorageError::Tenant(_) => 403, + StorageError::Resource(ResourceError::NotFound { .. }) => 404, + StorageError::Resource(ResourceError::VersionNotFound { .. }) => 404, + StorageError::Resource(ResourceError::Gone { .. }) => 410, + StorageError::Resource(ResourceError::AlreadyExists { .. }) => 409, + StorageError::Concurrency(_) => 409, + StorageError::Backend(BackendError::UnsupportedCapability { .. }) => 501, + StorageError::BulkExport(_) | StorageError::BulkSubmit(_) => 500, + StorageError::Transaction(_) => 409, + StorageError::Backend(_) => 500, + } + } + + fn operation_outcome(err: &StorageError) -> Value { + let code = match err { + StorageError::Validation(_) => "invalid", + StorageError::Tenant(_) => "forbidden", + StorageError::Resource(ResourceError::NotFound { .. }) => "not-found", + StorageError::Resource(ResourceError::VersionNotFound { .. }) => "not-found", + StorageError::Resource(ResourceError::Gone { .. }) => "deleted", + StorageError::Resource(ResourceError::AlreadyExists { .. }) => "conflict", + StorageError::Concurrency(_) => "conflict", + StorageError::Backend(BackendError::UnsupportedCapability { .. }) => "not-supported", + _ => "exception", + }; + + json!({ + "resourceType": "OperationOutcome", + "issue": [{ + "severity": "error", + "code": code, + "diagnostics": err.to_string() + }] + }) + } + + fn parse_url(&self, url: &str) -> crate::error::StorageResult<(String, String)> { + let path = url + .strip_prefix("http://") + .or_else(|| url.strip_prefix("https://")) + .map(|s| s.find('/').map(|idx| &s[idx..]).unwrap_or(s)) + .unwrap_or(url); + + let path = path.trim_start_matches('/'); + let parts: Vec<&str> = path.split('/').filter(|s| !s.is_empty()).collect(); + + if parts.len() >= 2 { + let len = parts.len(); + Ok((parts[len - 2].to_string(), parts[len - 1].to_string())) + } else { + Err(StorageError::Validation( + ValidationError::InvalidReference { + reference: url.to_string(), + message: "URL must be in format ResourceType/id".to_string(), + }, + )) + } + } +} + +fn resolve_bundle_references(value: &mut Value, reference_map: &HashMap) { + match value { + Value::Object(map) => { + if let Some(Value::String(reference)) = map.get("reference") { + if reference.starts_with("urn:uuid:") { + if let Some(resolved) = reference_map.get(reference) { + map.insert("reference".to_string(), Value::String(resolved.clone())); + } + } + } + for value in map.values_mut() { + resolve_bundle_references(value, reference_map); + } + } + Value::Array(items) => { + for item in items { + resolve_bundle_references(item, reference_map); + } + } + _ => {} + } +} diff --git a/crates/persistence/src/backends/s3/client.rs b/crates/persistence/src/backends/s3/client.rs new file mode 100644 index 00000000..bd4f83b0 --- /dev/null +++ b/crates/persistence/src/backends/s3/client.rs @@ -0,0 +1,301 @@ +use async_trait::async_trait; +use aws_config::{BehaviorVersion, Region, SdkConfig}; +use aws_sdk_s3::Client; +use aws_sdk_s3::error::ProvideErrorMetadata; +use aws_sdk_s3::primitives::ByteStream; +use chrono::{DateTime, Utc}; + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ObjectMetadata { + pub etag: Option, + pub last_modified: Option>, + pub size: i64, +} + +#[derive(Debug, Clone)] +pub struct ObjectData { + pub bytes: Vec, + pub metadata: ObjectMetadata, +} + +#[derive(Debug, Clone)] +#[allow(dead_code)] +pub struct ListObjectItem { + pub key: String, + pub etag: Option, + pub last_modified: Option>, + pub size: i64, +} + +#[derive(Debug, Clone)] +pub struct ListObjectsResult { + pub items: Vec, + pub next_continuation_token: Option, +} + +#[derive(Debug, Clone)] +pub enum S3ClientError { + NotFound, + PreconditionFailed, + Throttled(String), + Unavailable(String), + InvalidInput(String), + Internal(String), +} + +#[async_trait] +pub trait S3Api: Send + Sync { + async fn head_bucket(&self, bucket: &str) -> Result<(), S3ClientError>; + + async fn head_object( + &self, + bucket: &str, + key: &str, + ) -> Result, S3ClientError>; + + async fn get_object( + &self, + bucket: &str, + key: &str, + ) -> Result, S3ClientError>; + + async fn put_object( + &self, + bucket: &str, + key: &str, + body: Vec, + content_type: Option<&str>, + if_match: Option<&str>, + if_none_match: Option<&str>, + ) -> Result; + + async fn delete_object(&self, bucket: &str, key: &str) -> Result<(), S3ClientError>; + + async fn list_objects( + &self, + bucket: &str, + prefix: &str, + continuation: Option<&str>, + max_keys: Option, + ) -> Result; +} + +#[derive(Debug, Clone)] +pub struct AwsS3Client { + client: Client, +} + +impl AwsS3Client { + pub fn from_sdk_config(config: &SdkConfig) -> Self { + Self { + client: Client::new(config), + } + } + + pub async fn load_sdk_config(region: Option<&str>) -> SdkConfig { + let mut loader = aws_config::defaults(BehaviorVersion::latest()); + if let Some(region) = region { + loader = loader.region(Region::new(region.to_string())); + } + loader.load().await + } +} + +#[async_trait] +impl S3Api for AwsS3Client { + async fn head_bucket(&self, bucket: &str) -> Result<(), S3ClientError> { + self.client + .head_bucket() + .bucket(bucket) + .send() + .await + .map_err(map_sdk_error)?; + Ok(()) + } + + async fn head_object( + &self, + bucket: &str, + key: &str, + ) -> Result, S3ClientError> { + match self + .client + .head_object() + .bucket(bucket) + .key(key) + .send() + .await + { + Ok(out) => Ok(Some(ObjectMetadata { + etag: out.e_tag().map(|s| s.to_string()), + last_modified: None, + size: out.content_length().unwrap_or_default(), + })), + Err(err) => { + let mapped = map_sdk_error(err); + if matches!(mapped, S3ClientError::NotFound) { + Ok(None) + } else { + Err(mapped) + } + } + } + } + + async fn get_object( + &self, + bucket: &str, + key: &str, + ) -> Result, S3ClientError> { + match self + .client + .get_object() + .bucket(bucket) + .key(key) + .send() + .await + { + Ok(out) => { + let etag = out.e_tag().map(|s| s.to_string()); + let bytes = out + .body + .collect() + .await + .map_err(|e| { + S3ClientError::Internal(format!("failed to collect object body: {e}")) + })? + .into_bytes() + .to_vec(); + Ok(Some(ObjectData { + metadata: ObjectMetadata { + etag, + last_modified: None, + size: bytes.len() as i64, + }, + bytes, + })) + } + Err(err) => { + let mapped = map_sdk_error(err); + if matches!(mapped, S3ClientError::NotFound) { + Ok(None) + } else { + Err(mapped) + } + } + } + } + + async fn put_object( + &self, + bucket: &str, + key: &str, + body: Vec, + content_type: Option<&str>, + if_match: Option<&str>, + if_none_match: Option<&str>, + ) -> Result { + let mut req = self + .client + .put_object() + .bucket(bucket) + .key(key) + .body(ByteStream::from(body)); + + if let Some(content_type) = content_type { + req = req.content_type(content_type); + } + if let Some(if_match) = if_match { + req = req.if_match(if_match); + } + if let Some(if_none_match) = if_none_match { + req = req.if_none_match(if_none_match); + } + + let out = req.send().await.map_err(map_sdk_error)?; + + Ok(ObjectMetadata { + etag: out.e_tag().map(|s| s.to_string()), + last_modified: None, + size: 0, + }) + } + + async fn delete_object(&self, bucket: &str, key: &str) -> Result<(), S3ClientError> { + self.client + .delete_object() + .bucket(bucket) + .key(key) + .send() + .await + .map_err(map_sdk_error)?; + Ok(()) + } + + async fn list_objects( + &self, + bucket: &str, + prefix: &str, + continuation: Option<&str>, + max_keys: Option, + ) -> Result { + let mut req = self.client.list_objects_v2().bucket(bucket).prefix(prefix); + + if let Some(token) = continuation { + req = req.continuation_token(token); + } + if let Some(max_keys) = max_keys { + req = req.max_keys(max_keys); + } + + let out = req.send().await.map_err(map_sdk_error)?; + let mut items = Vec::new(); + + for item in out.contents() { + if let Some(key) = item.key() { + items.push(ListObjectItem { + key: key.to_string(), + etag: item.e_tag().map(|s| s.to_string()), + last_modified: None, + size: item.size().unwrap_or_default(), + }); + } + } + + Ok(ListObjectsResult { + items, + next_continuation_token: out.next_continuation_token().map(|s| s.to_string()), + }) + } +} + +fn map_sdk_error(err: aws_sdk_s3::error::SdkError) -> S3ClientError +where + E: ProvideErrorMetadata + std::fmt::Debug, +{ + let fallback = format!("{err:?}"); + + match err { + aws_sdk_s3::error::SdkError::ServiceError(service_err) => { + let code = service_err.err().code().unwrap_or("Unknown"); + let message = service_err + .err() + .message() + .map(str::to_string) + .unwrap_or_else(|| fallback.clone()); + match code { + "NoSuchKey" | "NotFound" | "NoSuchBucket" => S3ClientError::NotFound, + "PreconditionFailed" => S3ClientError::PreconditionFailed, + "SlowDown" | "Throttling" | "ThrottlingException" => { + S3ClientError::Throttled(message) + } + "InvalidBucketName" | "InvalidArgument" => S3ClientError::InvalidInput(message), + _ => S3ClientError::Internal(message), + } + } + aws_sdk_s3::error::SdkError::TimeoutError(_) => S3ClientError::Unavailable(fallback), + aws_sdk_s3::error::SdkError::DispatchFailure(_) => S3ClientError::Unavailable(fallback), + _ => S3ClientError::Internal(fallback), + } +} diff --git a/crates/persistence/src/backends/s3/config.rs b/crates/persistence/src/backends/s3/config.rs new file mode 100644 index 00000000..5b44a4be --- /dev/null +++ b/crates/persistence/src/backends/s3/config.rs @@ -0,0 +1,152 @@ +use std::collections::{HashMap, HashSet}; + +use serde::{Deserialize, Serialize}; + +use crate::error::{BackendError, StorageError, StorageResult}; + +/// Tenant-to-bucket resolution for S3. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "mode", rename_all = "snake_case")] +pub enum S3TenancyMode { + /// All tenants share one bucket with tenant-specific key prefixes. + PrefixPerTenant { + /// Shared bucket name. + bucket: String, + }, + + /// Each tenant maps to a specific bucket. + /// + /// The system tenant can use `default_system_bucket`. + BucketPerTenant { + /// Explicit tenant -> bucket map. + tenant_bucket_map: HashMap, + /// Optional fallback bucket for `__system__` tenant. + default_system_bucket: Option, + }, +} + +/// Configuration for the AWS S3 backend. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct S3BackendConfig { + /// How tenant data is mapped to buckets/prefixes. + pub tenancy_mode: S3TenancyMode, + + /// Optional global key prefix applied before backend keys. + pub prefix: Option, + + /// AWS region override (falls back to provider chain if unset). + pub region: Option, + + /// Validate all configured buckets on startup with `HeadBucket`. + pub validate_buckets_on_startup: bool, + + /// Max NDJSON lines per export output part. + pub bulk_export_part_size: u32, + + /// Default ingestion batch size for bulk submit processing. + pub bulk_submit_batch_size: u32, +} + +impl Default for S3BackendConfig { + fn default() -> Self { + Self { + tenancy_mode: S3TenancyMode::PrefixPerTenant { + bucket: "hfs".to_string(), + }, + prefix: None, + region: None, + validate_buckets_on_startup: true, + bulk_export_part_size: 10_000, + bulk_submit_batch_size: 100, + } + } +} + +impl S3BackendConfig { + /// Validates configuration invariants. + pub fn validate(&self) -> StorageResult<()> { + if self.bulk_export_part_size == 0 { + return Err(StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: "bulk_export_part_size must be > 0".to_string(), + source: None, + })); + } + + if self.bulk_submit_batch_size == 0 { + return Err(StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: "bulk_submit_batch_size must be > 0".to_string(), + source: None, + })); + } + + match &self.tenancy_mode { + S3TenancyMode::PrefixPerTenant { bucket } => { + if bucket.trim().is_empty() { + return Err(StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: "prefix-per-tenant bucket must not be empty".to_string(), + source: None, + })); + } + } + S3TenancyMode::BucketPerTenant { + tenant_bucket_map, + default_system_bucket, + } => { + if tenant_bucket_map.is_empty() && default_system_bucket.is_none() { + return Err(StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: "bucket-per-tenant requires at least one mapped bucket or default_system_bucket" + .to_string(), + source: None, + })); + } + + if tenant_bucket_map.values().any(|b| b.trim().is_empty()) { + return Err(StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: "bucket-per-tenant mapping contains empty bucket name".to_string(), + source: None, + })); + } + + if default_system_bucket + .as_ref() + .map(|b| b.trim().is_empty()) + .unwrap_or(false) + { + return Err(StorageError::Backend(BackendError::Internal { + backend_name: "s3".to_string(), + message: "default_system_bucket must not be empty when provided" + .to_string(), + source: None, + })); + } + } + } + + Ok(()) + } + + /// Returns a de-duplicated set of all buckets referenced by this config. + pub fn configured_buckets(&self) -> HashSet { + let mut out = HashSet::new(); + match &self.tenancy_mode { + S3TenancyMode::PrefixPerTenant { bucket } => { + out.insert(bucket.clone()); + } + S3TenancyMode::BucketPerTenant { + tenant_bucket_map, + default_system_bucket, + } => { + out.extend(tenant_bucket_map.values().cloned()); + if let Some(bucket) = default_system_bucket { + out.insert(bucket.clone()); + } + } + } + out + } +} diff --git a/crates/persistence/src/backends/s3/docs/README.md b/crates/persistence/src/backends/s3/docs/README.md new file mode 100644 index 00000000..e4b29cfa --- /dev/null +++ b/crates/persistence/src/backends/s3/docs/README.md @@ -0,0 +1,61 @@ +# S3 Backend (`aws_sdk_s3`) Guarantees and Limits + +This backend is an object-storage persistence backend for Helios. It is intentionally focused on storage, versioning/history, and bulk workflows, not advanced FHIR query execution. + +## Scope and Role + +- Primary responsibilities: + - CRUD persistence of resources + - Versioning (`vread`, `list_versions`, optimistic conflict checks) + - Instance/type/system history via immutable history objects plus history index events + - Batch bundles and best-effort transaction bundles (non-atomic with compensating rollback attempts) + - Bulk export (NDJSON objects + manifest/progress state in S3) + - Bulk submit (ingest + raw artifact persistence + rollback change log) + - Tenant isolation through: + - `PrefixPerTenant` + - `BucketPerTenant` (explicit tenant→bucket map) + +- Explicit non-goals for this backend: + - Advanced FHIR search semantics as the primary query engine (`date/number/quantity` comparison semantics, full chained query planning, `_has`, include/revinclude fanout planning, full cursor keyset query engine) + +For query-heavy production deployments, run a DB/search backend as primary query engine and use S3 for bulk/history/archive responsibilities. + +## Object Model + +Resource objects: + +- Current pointer: `.../resources/{type}/{id}/current.json` +- Immutable history version: `.../resources/{type}/{id}/_history/{version}.json` +- Type history event: `.../history/type/{type}/{ts}_{id}_{version}_{suffix}.json` +- System history event: `.../history/system/{ts}_{type}_{id}_{version}_{suffix}.json` + +Bulk export: + +- `.../bulk/export/jobs/{job_id}/state.json` +- `.../bulk/export/jobs/{job_id}/progress/{type}.json` +- `.../bulk/export/jobs/{job_id}/output/{type}/part-{n}.ndjson` +- `.../bulk/export/jobs/{job_id}/manifest.json` + +Bulk submit: + +- `.../bulk/submit/{submitter}/{submission_id}/state.json` +- `.../bulk/submit/{submitter}/{submission_id}/manifests/{manifest_id}.json` +- `.../bulk/submit/{submitter}/{submission_id}/raw/{manifest_id}/line-{line}.ndjson` +- `.../bulk/submit/{submitter}/{submission_id}/results/{manifest_id}/line-{line}.json` +- `.../bulk/submit/{submitter}/{submission_id}/changes/{change_id}.json` + +## Consistency and Transaction Notes + +- The backend never creates buckets. +- Startup/runtime bucket checks use `HeadBucket` only. +- Optimistic locking relies on version checks plus S3 preconditions (`If-Match`, `If-None-Match`) where applicable. +- Transaction bundle behavior is best-effort: + - Entries are applied sequentially. + - On failure, rollback is attempted in reverse order. + - Rollback is not guaranteed under concurrent writes or partial failures. + +## AWS Credentials and Region + +- Uses AWS SDK for Rust (`aws_sdk_s3`) with standard provider chain. +- Region may be provided in config or via `AWS_REGION`. +- Environment credentials (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional `AWS_SESSION_TOKEN`) are supported by provider chain behavior. diff --git a/crates/persistence/src/backends/s3/docs/discussion_roadmap.md b/crates/persistence/src/backends/s3/docs/discussion_roadmap.md new file mode 100644 index 00000000..f1a59209 --- /dev/null +++ b/crates/persistence/src/backends/s3/docs/discussion_roadmap.md @@ -0,0 +1,985 @@ +Introduction +As I write this in early 2026, I don't think it is an understatement to say that the opportunities and impact that are upon us with AI in healthcare feels like a Cambrian Explosion moment. Healthcare professionals, administrators, and patients alike will be increasingly chatting with, talking directly to, and collaborating with artificial intelligence software systems in entirely new ways. This will need to be done safely and carefully. +What worked five years ago, or even two years ago, is increasingly inadequate for the demands of clinical AI, population health analytics, and real-time decision support. For technical architects navigating this shift, the challenge isn't just scaling storage; it's rethinking the entire data architecture. +This discussion document shares my thoughts about an approach to persistence for the Helios FHIR Server. +This document is an architecture strategy document. In other words, it describes the main motivating direction, building blocks, and key technology ingredients that will makeup the persistence design for the Helios FHIR Server. It is not intended to be a comprehensive set of requirements and design, but instead contains enough of a starting point such that readers can understand our approach to persistence, and understand why we decided to make the decisions that we did. +Who should read this? +The Helios FHIR Server is open source software, and is being developed in the open. If you have some interest in persistence design for healthcare software - this document is for you! +My hope is that you will think about the contents of this document, comment and provide feedback! +AI Is Driving New Requirements on Data +AI workloads have upended traditional assumptions about data access patterns. Training models demand sustained high-throughput reads across massive datasets, while inference requires low-latency access to distributed data sources. In healthcare, this is compounded by the explosive growth of unstructured data. Radiology images, pathology slides, genomic sequences, clinical notes, and waveform data from monitoring devices to name a few. Structured EHR data, once the center of gravity, is increasingly extracted from the EMR and compared with other external data sources. Architectures optimized for transactional workloads simply cannot deliver the performance AI pipelines require, and retrofitting them is often a losing battle. +Separation of Storage and Compute +Decoupling storage from compute has moved from a cloud-native best practice to an architectural necessity, yet many FHIR server implementations haven't caught up. While cloud-based analytics platforms routinely embrace this separation, transactional FHIR servers often remain tightly coupled to their persistence layers, treating database and application as an inseparable unit. This creates painful trade-offs: over-provisioning compute to get adequate storage, or vice versa. A modern FHIR server must separate these concerns as a core architectural principle, allowing the API layer to scale horizontally for request throughput while the persistence layer scales independently for capacity and query performance. In healthcare AI workloads, this separation is especially critical. Spin up GPU clusters for model training without provisioning redundant storage, or expand storage for imaging archives without paying for idle compute. The persistence layer becomes a service with its own scaling characteristics rather than a monolithic dependency. This separation is now expected as a defining characteristic of production-ready FHIR infrastructure. +Medallion Architecture Within FHIR Persistence +We have seen our largest petabyte-scale customers transition to a Medallion Architecture strategy for their FHIR data. The bronze layer represents resources as received, preserving original payloads, source system identifiers, and ingestion metadata for auditability and replay. The silver layer applies normalization: terminology mapping, reference resolution, deduplication of resources that represent the same clinical entity, and enforcement of business rules that go beyond FHIR validation. The gold layer materializes optimized views for specific consumers, denormalized patient summaries for clinical applications, flattened tabular projections for analytics, or pre-computed feature sets for ML pipelines. +Hybrid and Multi-Cloud Architectures +The reality for most health IT systems is a hybrid footprint: on-premises data centers housing legacy systems and sensitive workloads, cloud platforms providing elastic compute for AI and analytics, and edge infrastructure at clinical sites. Multi-cloud strategies add another dimension, whether driven by M&A activity, best-of-breed vendor selection, or risk diversification. +Security-First and Zero-Trust Patterns in FHIR Persistence +The persistence layer is where FHIR data lives at rest, making it the most critical surface for security enforcement. Zero-trust principles must be embedded in the persistence design itself, not just the API layer above it. This means encryption at rest as a baseline, but also fine-grained access control at the resource, compartment or even finer-grained levels - ensuring that database-level access cannot bypass FHIR authorization semantics. Audit logging must capture all persistence operations with sufficient detail for HIPAA accounting-of-disclosures requirements. This typically means persisting AuditEvent resources to a separately controlled store. Consent enforcement, particularly for sensitive resource types like mental health or substance abuse records under 42 CFR Part 2, often requires persistence-layer support through segmentation, tagging, or dynamic filtering. Treating security as an API-layer concern while leaving the persistence layer permissive creates unacceptable risk. +Data Retention, Tiering, and Cost Optimization +FHIR persistence layers accumulate data over years and decades. Version history, provenance records, and audit logs all create significant cost pressure. Intelligent tiering within the persistence layer moves older resource versions and infrequently accessed resources to lower-cost storage classes while keeping current data on performant storage. The architectural challenge is maintaining query semantics across tiers: a search that spans active and archived resources should work transparently, even if archived retrieval is slower. Retention policies must account for regulatory requirements that vary by resource type. Imaging studies may have different retention mandates than clinical notes. A well-designed persistence layer makes tiering a configuration concern rather than an architectural constraint. +Different Data Technologies for Different Problems +A FHIR persistence layer that commits to a single storage technology is making a bet that one tool can serve all masters. This is a bet that rarely pays off as requirements evolve. The reality is that different access patterns, query types, and workloads have fundamentally different performance characteristics, and no single database technology optimizes for all of them. A patient lookup by identifier, a population-level cohort query, a graph traversal of care team relationships, and a semantic similarity search for clinical trial matching across different terminology code systems are all legitimate operations against FHIR data, yet each performs best on a different underlying technology. +Modern FHIR persistence architectures increasingly embrace polyglot persistence, which means routing data to the storage technology best suited for how that data will be accessed, while maintaining a unified FHIR API layer above. +* Relational Databases remain the workhorse for transactional FHIR operations, offering ACID guarantees, mature tooling, and well-understood query optimization for structured data with predictable access patterns. +* NoSQL Databases - particularly document stores - align naturally with FHIR's resource model, persisting resources as complete documents without the impedance mismatch of relational decomposition, and scaling horizontally for high-throughput ingestion. Additionally, Cassandra has been exceptional at handling web-scale data requirements without breaking the bank. +* Data Lakes provide cost-effective, schema-flexible storage for raw FHIR resources and bulk exports, serving as the foundation for large-scale analytics and ML training pipelines that need to process millions of resources. +* Data Warehouses deliver optimized analytical query performance over structured, transformed FHIR data, enabling population health analytics, quality measure computation, and business intelligence workloads that would overwhelm transactional systems. +* Graph Databases excel at traversing relationships. Patient to provider to organization to care team is an example relationship pathway that are represented as references in FHIR but are expensive to navigate through recursive joins in relational systems. +* Vector Databases enable semantic search and similarity matching over embedded representations of clinical text, supporting AI use cases like similar-patient retrieval, terminology matching, and contextual search that go beyond keyword-based FHIR queries. +* Block Storage provides the high-performance, low-latency foundation for database engines themselves, while also serving large binary attachments, imaging data, scanned documents, and waveforms that are referenced by FHIR resources but impractical to store within the resource payload. +The architectural discipline is not choosing one technology but designing the abstraction layer that routes FHIR operations to the appropriate backend while maintaining consistency, security, and a coherent developer experience. +Positioning the Helios FHIR Server in the FHIR Server Landscape + +The FHIR server landscape can be understood along two architectural dimensions: how tightly the implementation is coupled to its storage technology, and whether the system supports multiple specialized data stores or requires a single backend. +The vertical axis distinguishes between servers with tightly-coupled persistence where the implementation is deeply intertwined with a specific database technology, and those offering an extensible interface layer that abstracts storage concerns behind well-defined interfaces. A FHIR Server built directly on JPA (Java Persistence API) is such an example, meaning its data access patterns, query capabilities, and performance characteristics are fundamentally shaped by relational database assumptions. In contrast, an extensible interface layer defines traits or interfaces that can be implemented for any storage technology, allowing the same FHIR API to sit atop different backends without rewriting core logic. +The horizontal axis captures the difference between single storage backend architectures and polyglot persistence. Polyglot persistence is an architectural pattern where different types of data are routed to the storage technologies best suited for how that data will be accessed. For example, a polyglot system might store clinical documents in an object store optimized for large binary content, maintain patient relationships in a graph database for efficient traversal, and keep structured observations in a columnar store for fast analytical queries all while presenting a unified FHIR API to consuming applications. Most existing FHIR servers force all resources into a single database, sacrificing performance and flexibility for implementation simplicity. +The Helios FHIR Server occupies the upper-right quadrant: it combines a trait-based, open-source interface layer built in Rust with native support for polyglot persistence. This architecture allows organizations to optimize storage decisions for their specific access patterns while maintaining full FHIR compliance at the API layer. +Decomposing the FHIR Specification: Separation of Concerns in Persistence Design +The FHIR specification is vast. It defines resource structures, REST interactions, search semantics, terminology operations, versioning behavior, and much more. A monolithic interface, or trait that attempts to capture all of this becomes unwieldy, difficult to implement, and impossible to optimize for specific storage technologies. The Helios FHIR Server persistence design takes a different approach: decompose the specification into cohesive concerns, express each as a focused trait, and compose them to build complete storage backends. +Learning from Diesel: Type-Safe Database Abstractions +Before diving into our trait design, it's worth examining what we can learn from Diesel, Rust's most mature database abstraction layer. Diesel has solved many of the problems we face - multi-backend support, compile-time query validation, extensibility, and its design choices offer valuable lessons. +Backend Abstraction via Traits, Not Enums: Diesel defines a Backend trait that captures the differences between database systems (PostgreSQL, MySQL, SQLite) without coupling to specific implementations. The Backend trait specifies how SQL is generated, how bind parameters are collected, and how types are mapped. This allows new backends to be added without modifying core code. This is exactly what we need for polyglot FHIR persistence. +QueryFragment for Composable SQL Generation: Diesel's QueryFragment trait represents any piece of SQL that can be rendered. A WHERE clause, a JOIN, an entire SELECT statement all implement QueryFragment. This composability lets complex queries be built from simple pieces. For FHIR search, we can adopt a similar pattern: each search parameter modifier becomes a fragment that can be composed into complete queries. +Type-Level Query Validation: Diesel catches many errors at compile time by encoding schema information in the type system. While we can't achieve the same level of compile-time validation for dynamic FHIR queries, we can use Rust's type system to ensure that storage backends only claim to support operations they actually implement. +MultiConnection for Runtime Backend Selection: Diesel's #[derive(MultiConnection)] generates an enum that wraps multiple connection types, dispatching operations to the appropriate backend at runtime. This pattern directly applies to polyglot persistence. We can route FHIR operations to different backends based on query characteristics. +Extensibility via sql_function! and Custom Types: Diesel makes it trivial to add custom SQL functions and types. For FHIR, this translates to extensibility for custom search parameters, terminology operations, and backend-specific optimizations. +The Core Resource Storage Trait +At the foundation is the ResourceStorage trait, which handles the fundamental persistence of FHIR resources. This trait intentionally knows nothing about search, nothing about REST semantics, nothing about transactions. It simply stores and retrieves resources by type and identifier. +Multitenancy is not optional in this design. Every operation requires a TenantContext, making it impossible at the type level to accidentally execute a query without tenant scoping. There is no "escape hatch" that bypasses tenant isolation. +use async_trait::async_trait; +use serde_json::Value; + +/// Represents a stored FHIR resource with metadata. +pub struct StoredResource { + pub resource_type: String, + pub id: String, + pub version_id: String, + pub last_updated: chrono::DateTime, + pub tenant_id: TenantId, + pub resource: Value, +} + +/// Core trait for resource storage operations. +/// +/// All operations are tenant-scoped. There is no non-tenant code path - +/// the type system enforces that tenant context is always provided. +#[async_trait] +pub trait ResourceStorage: Send + Sync { + /// Creates a new resource within a tenant's scope, assigning an ID if not provided. + async fn create( + &self, + tenant: &TenantContext, + resource: &Value, + ) -> Result; + + /// Reads the current version of a resource within a tenant's scope. + /// Returns NotFound if the resource exists but belongs to a different tenant. + async fn read( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> Result, StorageError>; + + /// Updates a resource within a tenant's scope, returning the new version. + async fn update( + &self, + tenant: &TenantContext, + resource: &Value, + ) -> Result; + + /// Deletes a resource within a tenant's scope (soft delete preserving history where supported). + async fn delete( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> Result<(), StorageError>; + + /// Returns the storage backend identifier for logging and diagnostics. + fn backend_name(&self) -> &'static str; +} + +Notice what's absent: there's no if_match parameter for optimistic concurrency, no version-specific reads, no history. Those capabilities belong to separate traits that extend the base functionality. A storage backend that doesn't support versioning simply doesn't implement the versioning trait. +Multitenancy: A Cross-Cutting Concern +Multitenancy has downstream implications for every layer of a FHIR server, from indexing strategy to reference validation to search semantics. By requiring tenant context at the lowest storage layer, we ensure that isolation guarantees propagate upward through the entire system. +Isolation Strategies +There are three fundamental approaches to tenant isolation, each with different trade-offs: +* Database-per-tenant: Strongest isolation, simplest security model, easier compliance story. The downside is operational overhead that grows linearly with tenants. Connection pool management becomes complex, and schema migrations are painful at scale. +* Schema-per-tenant: Good isolation within a single database instance, allows tenant-specific indexing. PostgreSQL handles this well. Still has schema migration coordination challenges. +* Shared schema with tenant discriminator: Most operationally efficient at scale, single migration path. The risk is that every query must include tenant filtering. One missed WHERE clause and you have a data breach. +For SQL-backed FHIR persistence, the shared schema approach with a tenant_id discriminator is pragmatic, but the enforcement layer must be airtight - you literally cannot construct a storage operation without providing tenant context. +Tenant Context as a Type-Level Guarantee +Borrowing from Diesel's approach to type safety, we can make tenant context explicit in the type system. Rather than passing tenant IDs as strings that might be forgotten, we create a wrapper type that must be present for any storage operation: +/// A validated tenant context. Operations that access tenant data +/// require this type, making it impossible to forget tenant filtering. +#[derive(Debug, Clone)] +pub struct TenantContext { + tenant_id: TenantId, + /// Permissions determine what operations are allowed + permissions: TenantPermissions, + /// Whether this context can access shared/system resources + can_access_shared: bool, +} + +/// The system tenant for shared resources (terminology, conformance) +pub const SYSTEM_TENANT: TenantId = TenantId::system(); + +/// Marker trait for operations that are tenant-scoped +pub trait TenantScoped { + fn tenant(&self) -> &TenantContext; +} + +Shared Resources and the System Tenant +CodeSystems, ValueSets, StructureDefinitions, and other conformance resources are typically shared across tenants. We designate a "system" tenant that holds these shared resources: +/// Determines whether a resource type should be tenant-specific or shared. +pub trait ResourceTenancy { + /// Returns the tenancy model for a resource type. + fn tenancy_model(&self, resource_type: &str) -> TenancyModel; +} + +pub enum TenancyModel { + /// Resource is always tenant-specific (e.g., Patient, Observation) + TenantScoped, + /// Resource is always shared (e.g., CodeSystem, ValueSet) + Shared, + /// Resource can be either, determined by business rules + Configurable, +} + +Index Design for Multitenancy +Search performance in a multitenant system depends critically on index design. The tenant_id must be the leading column in composite indexes: +-- Good: tenant_id leads, enabling efficient tenant-scoped queries +CREATE INDEX idx_patient_identifier ON patient (tenant_id, identifier_system, identifier_value); + +-- Bad: tenant_id not leading, will scan all tenants +CREATE INDEX idx_patient_identifier ON patient (identifier_system, identifier_value, tenant_id); + +Versioning as a Separate Concern +FHIR's versioning model is sophisticated: every update creates a new version, version IDs are opaque strings, and the vread interaction retrieves historical versions. Not all storage backends can efficiently support this. An append-only data lake handles versioning naturally; a key-value store might not. +/// Adds version-aware operations to base storage. +#[async_trait] +pub trait VersionedStorage: ResourceStorage { + /// Reads a specific version of a resource within a tenant's scope. + async fn vread( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + version_id: &str, + ) -> Result, StorageError>; + + /// Updates with optimistic concurrency control. + /// Fails with VersionConflict if current version doesn't match expected. + async fn update_with_match( + &self, + tenant: &TenantContext, + resource: &Value, + expected_version: &str, + ) -> Result; +} + +History: Building on Versioning +History access naturally extends versioning. If a backend can read specific versions, it can also enumerate them: +/// Provides access to resource history. +#[async_trait] +pub trait HistoryProvider: VersionedStorage { + /// Returns the history of a specific resource within a tenant's scope. + async fn history_instance( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + params: &HistoryParams, + ) -> Result; + + /// Returns the history of all resources of a type within a tenant's scope. + async fn history_type( + &self, + tenant: &TenantContext, + resource_type: &str, + params: &HistoryParams, + ) -> Result; + + /// Returns the history of all resources within a tenant's scope. + async fn history_system( + &self, + tenant: &TenantContext, + params: &HistoryParams, + ) -> Result; +} + +/// Parameters for history queries, matching FHIR's _since, _at, _count parameters. +pub struct HistoryParams { + pub since: Option>, + pub at: Option>, + pub count: Option, +} + +The trait hierarchy HistoryProvider: VersionedStorage: ResourceStorage means that any storage backend supporting history automatically supports versioned reads and basic CRUD - all within tenant boundaries. The type system enforces this relationship. +The Search Abstraction: Decomposing FHIR's Query Model +Search is where the FHIR specification becomes genuinely complex. There are eight search parameter types (number, date, string, token, reference, quantity, uri, composite), sixteen modifiers (:exact, :contains, :not, :missing, :above, :below, :in, :not-in, :of-type, :identifier, :text, :code-text, :text-advanced, :iterate, plus resource type modifiers on references), six comparison prefixes (eq, ne, lt, le, gt, ge, sa, eb, ap), chained parameters, reverse chaining (_has), _include and _revinclude directives, and advanced filtering via _filter. A single search query can combine all of these all while respecting tenant boundaries. +Modeling search as a single trait would be a mistake. Instead, we decompose it into layers - and here, Diesel's QueryFragment pattern proves invaluable. +The SearchFragment Pattern (Inspired by Diesel's QueryFragment) +Diesel's QueryFragment trait allows any piece of SQL to be composable. We adapt this pattern for FHIR search, creating fragments that can be combined into complete search queries: +/// A fragment of a FHIR search that can be rendered to a backend-specific query. +/// Inspired by Diesel's QueryFragment pattern. +pub trait SearchFragment { + /// Renders this fragment to the backend's query representation. + fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError>; + + /// Whether this fragment can be efficiently evaluated by the backend. + /// Returns false if the backend would need to do post-filtering. + fn is_native(&self, backend: &B) -> bool; + + /// Estimated cost of evaluating this fragment (for query planning). + fn estimated_cost(&self, backend: &B) -> QueryCost; +} + +/// A search backend that can evaluate SearchFragments. +pub trait SearchBackend: Send + Sync { + type QueryBuilder; + type QueryResult; + + /// Creates a new query builder for this backend. + fn query_builder(&self, resource_type: &str) -> Self::QueryBuilder; + + /// Executes a built query. + async fn execute(&self, query: Self::QueryBuilder) -> Result; +} + +Each search modifier becomes a fragment that knows how to render itself: +/// Fragment for the :exact modifier on string parameters. +pub struct ExactStringMatch { + pub parameter: String, + pub path: FhirPath, + pub value: String, +} + +impl SearchFragment for ExactStringMatch +where + B: SupportsExactMatch, +{ + fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError> { + builder.add_exact_string_match(&self.path, &self.value) + } + + fn is_native(&self, _backend: &B) -> bool { + true // Most backends support exact string matching natively + } + + fn estimated_cost(&self, backend: &B) -> QueryCost { + backend.cost_for_exact_match(&self.path) + } +} + +/// Fragment for the :above modifier on token parameters (terminology subsumption). +pub struct SubsumesMatch { + pub parameter: String, + pub path: FhirPath, + pub system: String, + pub code: String, +} + +impl SearchFragment for SubsumesMatch +where + B: SupportsTerminologySearch, +{ + fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError> { + builder.add_subsumes_match(&self.path, &self.system, &self.code) + } + + fn is_native(&self, backend: &B) -> bool { + // Only native if the backend has integrated terminology support + backend.has_native_terminology() + } + + fn estimated_cost(&self, backend: &B) -> QueryCost { + if self.is_native(backend) { + backend.cost_for_subsumption(&self.path) + } else { + QueryCost::RequiresExpansion // Will need to expand the code set first + } + } +} + +Search Parameter Types +First, we model the search parameter types and their associated matching logic: +/// The type of a search parameter, determining matching semantics. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchParamType { + Number, + Date, + String, + Token, + Reference, + Quantity, + Uri, + Composite, + Special, +} + +/// Comparison prefixes for ordered types (number, date, quantity). +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum SearchPrefix { + #[default] + Eq, // equals (default) + Ne, // not equals + Lt, // less than + Le, // less than or equals + Gt, // greater than + Ge, // greater than or equals + Sa, // starts after + Eb, // ends before + Ap, // approximately +} + +/// Modifiers that alter search behavior. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SearchModifier { + Exact, // String: case-sensitive, full match + Contains, // String: substring match + Text, // Token/Reference: search display text + TextAdvanced, // Token/Reference: advanced text search + CodeText, // Token: search code text + Not, // Token: negation + Missing(bool), // All: test for presence/absence + Above, // Token/Reference/Uri: hierarchical above + Below, // Token/Reference/Uri: hierarchical below + In, // Token: value set membership + NotIn, // Token: value set non-membership + OfType, // Token (Identifier): type-qualified search + Identifier, // Reference: search by identifier + Type(String), // Reference: restrict to resource type + Iterate, // _include/_revinclude: recursive inclusion +} + +The Core Search Trait +The base search trait handles fundamental query execution without advanced features: +/// A parsed search parameter with its value and modifiers. +#[derive(Debug, Clone)] +pub struct SearchParameter { + pub name: String, + pub param_type: SearchParamType, + pub modifier: Option, + pub prefix: Option, + pub values: Vec, // Multiple values = OR +} + +/// A complete search query with all parameters. +#[derive(Debug, Clone, Default)] +pub struct SearchQuery { + /// Filter parameters (AND-joined) + pub parameters: Vec, + /// Sort specifications + pub sort: Vec, + /// Pagination + pub count: Option, + pub offset: Option, + /// Result modifiers + pub summary: Option, + pub elements: Option>, + /// Include directives + pub include: Vec, + pub revinclude: Vec, +} + +/// Base search capability for a storage backend. +#[async_trait] +pub trait SearchProvider: ResourceStorage { + /// Executes a search query against a resource type within a tenant's scope. + async fn search( + &self, + tenant: &TenantContext, + resource_type: &str, + query: &SearchQuery, + ) -> Result; + + +} + +Advanced Search Capabilities as Extension Traits +Not every storage backend can support every search feature. A relational database might handle token searches efficiently but struggle with subsumption queries that require terminology reasoning. A vector database might excel at text search but lack native support for date range queries. We model these variations as extension traits. +Chained Search Provider: +/// Adds support for chained parameter searches. +/// +/// Chaining allows searching by properties of referenced resources, +/// e.g., `Observation?patient.name=Smith`. This typically requires +/// join operations or graph traversal and must respect tenant boundaries +/// when following references. +#[async_trait] +pub trait ChainedSearchProvider: SearchProvider { + /// Executes a search with chained parameters within a tenant's scope. + /// + /// The implementation must ensure that chained references do not + /// cross tenant boundaries except for shared resources. + async fn search_with_chain( + &self, + tenant: &TenantContext, + resource_type: &str, + chain: &ChainedParameter, + terminal_condition: &SearchCondition, + ) -> Result; + + /// Executes a reverse chain (_has) search within a tenant's scope. + /// + /// Finds resources that are referenced by other resources matching + /// the given criteria, respecting tenant isolation. + async fn search_reverse_chain( + &self, + tenant: &TenantContext, + resource_type: &str, + has_param: &HasParameter, + ) -> Result; + + /// Returns the maximum chain depth supported by this backend. + fn max_chain_depth(&self) -> usize { + 4 // Reasonable default; deep chains are expensive + } +} + +/// Represents a chained parameter like `patient.organization.name` +pub struct ChainedParameter { + /// The chain of reference parameters to follow + pub chain: Vec, +} + +pub struct ChainLink { + /// The search parameter name (must be a reference type) + pub parameter: String, + /// Optional type restriction for polymorphic references + pub target_type: Option, +} + +/// Represents a _has parameter for reverse chaining +pub struct HasParameter { + /// The resource type that references us + pub referencing_type: String, + /// The reference parameter on that type pointing to us + pub reference_param: String, + /// The condition to apply to the referencing resource + pub condition: SearchCondition, + /// Nested _has for multi-level reverse chains + pub nested: Option>, +} + +Terminology Search Provider: +/// Adds terminology-aware search capabilities. +/// +/// Supports the `:above`, `:below`, `:in`, and `:not-in` modifiers +/// which require understanding of code system hierarchies and value set +/// membership. Terminology resources are typically shared across tenants, +/// but the search itself is tenant-scoped. +#[async_trait] +pub trait TerminologySearchProvider: SearchProvider { + /// Expands a code using `:below` semantics (descendants) within tenant scope. + /// + /// Returns all codes subsumed by the given code. These codes are then + /// used to filter resources belonging to the specified tenant. + async fn expand_below( + &self, + tenant: &TenantContext, + system: &str, + code: &str, + ) -> Result, StorageError>; + + /// Expands a code using `:above` semantics (ancestors) within tenant scope. + async fn expand_above( + &self, + tenant: &TenantContext, + system: &str, + code: &str, + ) -> Result, StorageError>; + + /// Checks value set membership for `:in` modifier within tenant scope. + /// + /// The value set itself may be shared or tenant-specific; the implementation + /// must resolve the correct value set based on tenant context. + async fn check_membership( + &self, + tenant: &TenantContext, + valueset_url: &str, + system: &str, + code: &str, + ) -> Result; + + /// Expands a value set to all member codes within tenant scope. + /// + /// Used for `:in` searches when the backend can efficiently filter + /// by an expanded code list. + async fn expand_valueset( + &self, + tenant: &TenantContext, + valueset_url: &str, + ) -> Result, StorageError>; +} + +pub struct ExpandedCode { + pub system: String, + pub code: String, + pub display: Option, +} + +Text Search Provider: +/// Adds full-text search capabilities. +/// +/// Supports `_text` (narrative search) and `_content` (full resource search) +/// parameters, as well as the `:text` modifier on string parameters. +/// All searches are scoped to the specified tenant. +#[async_trait] +pub trait TextSearchProvider: SearchProvider { + /// Searches resource narratives within a tenant's scope. + /// + /// Matches against the XHTML content in `Resource.text.div`. + async fn search_text( + &self, + tenant: &TenantContext, + resource_type: &str, + text_query: &str, + additional_params: &SearchQuery, + ) -> Result; + + /// Searches full resource content within a tenant's scope. + /// + /// Matches against all string content in the resource JSON. + async fn search_content( + &self, + tenant: &TenantContext, + resource_type: &str, + content_query: &str, + additional_params: &SearchQuery, + ) -> Result; + + /// Executes a text search on a specific parameter within tenant scope. + /// + /// Used for the `:text` modifier on string and token parameters, + /// e.g., `Condition?code:text=heart attack`. + async fn search_parameter_text( + &self, + tenant: &TenantContext, + resource_type: &str, + parameter: &str, + text_query: &str, + ) -> Result; +} + +This decomposition has practical consequences. When configuring a polyglot persistence layer, we can route terminology-aware searches to a backend that integrates with a terminology server (perhaps backed by a graph database), while directing simple token matches to a faster document store. The trait system makes these routing decisions explicit and type-safe. +Transactions: When Atomicity Matters +FHIR defines batch and transaction bundles. A batch processes entries independently; a transaction either succeeds completely or fails entirely with no partial effects. This all-or-nothing semantics requires database-level transaction support - something not all storage technologies provide natively. +/// Provides ACID transaction support. +/// +/// Transactions group multiple operations into an atomic unit. All +/// operations within a transaction are tenant-scoped; a single transaction +/// cannot span multiple tenants. +#[async_trait] +pub trait TransactionProvider: ResourceStorage { + /// Begins a new transaction within tenant scope. + /// + /// All operations on the returned Transaction object are scoped + /// to the specified tenant and will be committed or rolled back + /// as a unit. + async fn begin_transaction( + &self, + tenant: &TenantContext, + ) -> Result, StorageError>; +} + +/// An active transaction. +/// +/// Operations within a transaction see their own uncommitted changes +/// but are isolated from concurrent transactions. +#[async_trait] +pub trait Transaction: Send + Sync { + /// Returns the tenant context for this transaction. + fn tenant(&self) -> &TenantContext; + + /// Creates a resource within this transaction. + async fn create(&mut self, resource: &Value) -> Result; + + /// Reads a resource within this transaction (sees uncommitted changes). + async fn read( + &self, + resource_type: &str, + id: &str, + ) -> Result, StorageError>; + + /// Updates a resource within this transaction. + async fn update(&mut self, resource: &Value) -> Result; + + /// Deletes a resource within this transaction. + async fn delete(&mut self, resource_type: &str, id: &str) -> Result<(), StorageError>; + + /// Commits all operations in this transaction atomically. + async fn commit(self: Box) -> Result<(), StorageError>; + + /// Rolls back all operations in this transaction. + async fn rollback(self: Box) -> Result<(), StorageError>; +} + +#[derive(Debug, Clone, Copy, Default)] +pub enum IsolationLevel { + #[default] + ReadCommitted, + RepeatableRead, + Serializable, +} + +A storage backend that doesn't support transactions can still handle batch operations. It simply processes each entry independently, accepting that failures may leave partial results. The trait separation makes this distinction clear: code that requires atomicity takes &dyn TransactionProvider, while code that can tolerate partial failures takes &dyn ResourceStorage. +Audit Events: A Separated Persistence Store +AuditEvent resources should be ideally stored separately from clinical data. This isn't just a security concern, it's also an architectural one. Audit logs have different access patterns (append-heavy, rarely queried except during investigations), different retention requirements (often longer than clinical data), and different security constraints (must be tamper-evident, may require separate access controls). +/// Specialized storage for audit events. +/// +/// Audit storage is intentionally separate from clinical data storage. +/// It typically has different characteristics: +/// - Append-only or append-heavy workload +/// - Different retention policies +/// - Tamper-evident storage requirements +/// - Separate access control +#[async_trait] +pub trait AuditStorage: Send + Sync { + /// Records an audit event. This operation should be highly available + /// and should not fail clinical operations if audit storage is degraded. + async fn record(&self, tenant: &TenantContext, event: &AuditEvent) -> Result; + + /// Queries audit events within a time range. + async fn query( + &self, + criteria: &AuditQuery, + ) -> Result, AuditError>; + + /// Retrieves audit events for a specific resource (accounting of disclosures). + async fn disclosures_for_resource( + &self, + resource_type: &str, + resource_id: &str, + period: &DateRange, + ) -> Result, AuditError>; +} + +/// Audit query criteria supporting HIPAA accounting requirements. +#[derive(Debug, Clone, Default)] +pub struct AuditQuery { + pub patient_id: Option, + pub agent_id: Option, + pub action: Option, + pub period: Option, + pub resource_type: Option, + pub outcome: Option, +} + +The separation of AuditStorage from ResourceStorage enables critical architectural flexibility. Audit events can flow to a dedicated time-series database optimized for append-only writes, or to an immutable ledger for tamper evidence, or to a separate cloud account for security isolation. +The REST Layer: Mapping HTTP to Storage +The FHIR REST API defines interactions (read, vread, update, create, delete, search, etc.) that map HTTP verbs and URL patterns to operations. This mapping is a separate concern from storage. The same storage backend might be accessed via REST, GraphQL, messaging, or bulk export. +/// Interaction types defined by the FHIR REST specification. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Interaction { + Read, + Vread, + Update, + Patch, + Delete, + History, + Create, + Search, + Capabilities, + Batch, + Transaction, +} + +/// Scope at which an interaction operates. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum InteractionScope { + Instance, // Operations on a specific resource instance + Type, // Operations on a resource type + System, // System-wide operations +} + +/// Result of a REST interaction, capturing both outcome and metadata. +pub struct InteractionResult { + pub resource: Option, + pub status: HttpStatus, + pub etag: Option, + pub last_modified: Option>, + pub location: Option, + pub outcome: Option, +} + +/// Orchestrates REST interactions by coordinating storage traits. +#[async_trait] +pub trait RestHandler: Send + Sync { + /// Processes a FHIR REST interaction. + async fn handle( + &self, + interaction: Interaction, + scope: InteractionScope, + context: &InteractionContext, + ) -> Result; +} + +The RestHandler is a coordination layer that combines multiple storage traits to implement FHIR REST semantics. A read interaction needs only ResourceStorage. A vread needs VersionedStorage. A search with _include needs both SearchProvider and ResourceStorage. The REST handler composes these capabilities based on what the request requires and what the storage backend provides. +Capability Statements: Documenting What Storage Supports +The FHIR specification requires servers to publish a CapabilityStatement declaring which interactions, resources, and search parameters they support. When storage backends have different capabilities, this statement must accurately reflect the union of what's available and identify gaps. +Diesel solves a similar problem with its type system. Operations that aren't supported simply don't compile. For FHIR, we need runtime capability discovery because queries are dynamic. We model storage capabilities as a queryable trait that can generate CapabilityStatement fragments: +/// Declares the capabilities of a storage backend. +/// Inspired by Diesel's approach to backend-specific features. +pub trait StorageCapabilities { + /// Returns supported interactions for a resource type. + fn supported_interactions(&self, resource_type: &str) -> Vec; + + /// Returns supported search parameters for a resource type. + fn supported_search_params(&self, resource_type: &str) -> Vec; + + /// Returns supported search modifiers for a parameter type. + fn supported_modifiers(&self, param_type: SearchParamType) -> Vec; + + /// Returns whether chained search is supported. + fn supports_chaining(&self) -> bool; + + /// Returns whether reverse chaining (_has) is supported. + fn supports_reverse_chaining(&self) -> bool; + + /// Returns whether _include is supported. + fn supports_include(&self) -> bool; + + /// Returns whether _revinclude is supported. + fn supports_revinclude(&self) -> bool; + + /// Returns supported transaction isolation levels. + fn supported_isolation_levels(&self) -> Vec; + + /// Generates a FHIR CapabilityStatement fragment for this backend. + fn to_capability_statement(&self) -> Value; +} + +/// Describes support for a specific search parameter. +#[derive(Debug, Clone)] +pub struct SearchParamCapability { + pub name: String, + pub param_type: SearchParamType, + pub modifiers: Vec, + pub prefixes: Vec, + pub documentation: Option, +} + +/// Marker traits for optional capabilities, enabling compile-time +/// capability checking where possible (similar to Diesel's backend features). +pub trait SupportsExactMatch: SearchBackend {} +pub trait SupportsContainsMatch: SearchBackend {} +pub trait SupportsTerminologySearch: SearchBackend {} +pub trait SupportsFullTextSearch: SearchBackend {} +pub trait SupportsChainedSearch: SearchBackend {} +pub trait SupportsReverseChaining: SearchBackend {} + +Dynamic Capability Checking +For operations that can't be checked at compile time, we provide runtime capability checking that fails fast with clear error messages: +/// Validates that a search query can be executed by this backend. +pub trait QueryValidator: StorageCapabilities { + /// Checks if all features required by the query are supported. + fn validate_query(&self, query: &SearchQuery) -> Result<(), UnsupportedFeature>; + + /// Returns which parts of a query would need post-processing. + fn requires_post_processing(&self, query: &SearchQuery) -> Vec; +} + +#[derive(Debug)] +pub struct UnsupportedFeature { + pub feature: String, + pub parameter: Option, + pub suggestion: Option, +} + +impl std::fmt::Display for UnsupportedFeature { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Unsupported feature: {}", self.feature)?; + if let Some(ref param) = self.parameter { + write!(f, " on parameter '{}'", param)?; + } + if let Some(ref suggestion) = self.suggestion { + write!(f, ". Suggestion: {}", suggestion)?; + } + Ok(()) + } +} + +The Feature Support Matrix +Different storage technologies have different strengths. A key deliverable of the Helios FHIR Server's persistence design is a clear feature support matrix that documents what each storage backend provides. This (example, work-in-progress) matrix drives both the CapabilityStatement generation and helps operators choose the right backend for their workload. +Feature PostgreSQL MongoDB Cassandra Neo4j Elasticsearch S3/Parquet +Basic CRUD ✓ ✓ ✓ ✓ ✓ Read-only +Versioning ✓ ✓ ✓ ✓ ✓ ✓ +History ✓ ✓ Limited ✓ ✓ ✓ +Transactions ✓ ✓ Limited ✓ ✗ ✗ +Multitenancy RLS + App Collection/DB Keyspace Labels Index per tenant Prefix-based +Search: String ✓ ✓ Limited ✓ ✓ ✓ +Search: Token ✓ ✓ ✓ ✓ ✓ ✓ +Search: Reference ✓ ✓ Limited ✓ ✓ ✓ +Search: Date Range ✓ ✓ ✓ ✓ ✓ ✓ +Search: Quantity ✓ ✓ Limited Limited ✓ ✓ +Modifier: :exact ✓ ✓ ✓ ✓ ✓ ✓ +Modifier: :contains ✓ ✓ ✗ ✓ ✓ ✗ +Modifier: :not ✓ ✓ ✗ ✓ ✓ ✓ +Modifier: :missing ✓ ✓ Limited ✓ ✓ ✓ +Modifier: :above/:below With terminology With terminology ✗ ✓ With terminology ✗ +Modifier: :in/:not-in With terminology With terminology ✗ ✓ With terminology ✗ +Chained Parameters ✓ ✓ ✗ ✓ Limited ✗ +Reverse Chaining (_has) ✓ ✓ ✗ ✓ ✗ ✗ +_include ✓ ✓ ✗ ✓ ✓ ✗ +_revinclude ✓ ✓ ✗ ✓ ✓ ✗ +Full-text Search ✓ ✓ ✗ ✗ ✓ ✗ +Bulk Export ✓ ✓ ✓ ✓ ✓ ✓ +This matrix isn't static. It's generated from the StorageCapabilities implementations. When a new storage backend is added or an existing one gains features, the matrix updates automatically. +Composing Storage Backends (Inspired by Diesel's MultiConnection) +Diesel's MultiConnection derive macro generates an enum that wraps multiple connection types, dispatching to the appropriate backend at runtime. We adapt this pattern for polyglot FHIR persistence, but with intelligent routing based on query characteristics: +/// Routes operations to appropriate storage backends based on capabilities +/// and query characteristics. Similar to Diesel's MultiConnection but with +/// query-aware routing. +pub struct CompositeStorage { + /// Primary transactional store for CRUD operations + primary: Arc, + + /// Search-optimized store (may be the same as primary) + search: Arc, + + /// Terminology service for subsumption queries + terminology: Arc, + + /// Graph store for relationship traversal + graph: Option>, + + /// Full-text search engine + text: Option>, + + /// Audit log store (always separate) + audit: Arc, + + /// Bulk export store + bulk: Arc, + + /// Query cost estimator for routing decisions + cost_estimator: Arc, +} + +The routing logic becomes explicit policy that considers both capabilities and cost: +impl CompositeStorage { + async fn route_search( + &self, + tenant: &TenantContext, + query: &SearchQuery, + ) -> Result { + // If query contains _text or _content, route to text search + if query.has_text_search() { + if let Some(ref text) = self.text { + return text.search(tenant, &query.resource_type, query).await; + } + return Err(StorageError::UnsupportedFeature(UnsupportedFeature { + feature: "full-text search".into(), + parameter: query.text_param_name(), + suggestion: Some("Remove _text/_content parameters or enable Elasticsearch backend".into()), + })); + } + + // If query contains :above or :below modifiers, involve terminology + if query.has_terminology_modifiers() { + return self.search_with_terminology(tenant, query).await; + } + + // If query contains chained parameters, prefer graph store + if query.has_chaining() { + if let Some(ref graph) = self.graph { + let graph_cost = self.cost_estimator.estimate_cost(query, graph.as_ref()); + let primary_cost = self.cost_estimator.estimate_cost(query, self.search.as_ref()); + + if graph_cost < primary_cost { + return graph.search(tenant, &query.resource_type, query).await; + } + } + } + + // Default to primary search + self.search.search(tenant, &query.resource_type, query).await + } + + /// Ensures _include and _revinclude respect tenant boundaries. + async fn apply_includes( + &self, + tenant: &TenantContext, + matches: Vec, + query: &SearchQuery, + ) -> Result { + let mut included = Vec::new(); + + // Process _include directives + for include in &query.include { + let resolved = self.search.resolve_includes(tenant, &matches, &[include.clone()]).await?; + included.extend(resolved); + } + + // Process _revinclude directives + for revinclude in &query.revinclude { + let resolved = self.search.resolve_revincludes(tenant, &matches, &[revinclude.clone()]).await?; + included.extend(resolved); + } + + Ok(SearchBundle { + matches, + included, + total: None, + }) + } +} + +/// Estimates the cost of executing a query on different backends. +pub trait QueryCostEstimator: Send + Sync { + fn estimate_cost( + &self, + query: &SearchQuery, + backend: &dyn StorageCapabilities, + ) -> QueryCost; +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum QueryCost { + /// Query can be executed efficiently + Optimal(u64), + /// Query requires some post-processing + Acceptable(u64), + /// Query will be slow, consider alternatives + Expensive(u64), + /// Query requires expanding a code set first + RequiresExpansion, + /// Backend cannot execute this query + Unsupported, +} + +The Path Forward +This trait-based decomposition provides a foundation for building a FHIR persistence layer that can evolve with requirements. When AI workloads demand vector similarity search, we add a VectorSearchProvider trait and plug in a vector database. When regulatory requirements demand immutable audit trails, we implement AuditStorage against an append-only ledger. When performance analysis reveals that graph traversals are bottlenecking population health queries, we route those operations to a dedicated graph database. +Extensibility Following Diesel's Model: Just as Diesel's sql_function! macro makes it trivial to add custom SQL functions, our design should make it easy to add custom search parameters and modifiers. A healthcare organization might need a custom :phonetic modifier for patient name matching, or a :geo-near modifier for location-based searches. The SearchFragment pattern enables this: +// Adding a custom phonetic search modifier is straightforward +pub struct PhoneticMatch { + pub parameter: String, + pub path: FhirPath, + pub value: String, + pub algorithm: PhoneticAlgorithm, +} + +impl SearchFragment for PhoneticMatch +where + B: SupportsPhoneticSearch, // Custom capability marker +{ + fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError> { + builder.add_phonetic_match(&self.path, &self.value, &self.algorithm) + } + + fn is_native(&self, backend: &B) -> bool { + backend.has_phonetic_support(&self.algorithm) + } + + fn estimated_cost(&self, backend: &B) -> QueryCost { + backend.cost_for_phonetic(&self.path) + } +} + +This is what it means to build FHIR persistence for the AI era: not a monolithic database adapter, but a composable system of specialized capabilities that can be assembled to meet the specific needs of each deployment with tenant isolation, search routing, and extensibility built into the architecture from the start. +Thank you! +I very much look forward to your thoughts on these ideas and to the discussions that follow. +Sincerely,
-Steve diff --git a/crates/persistence/src/backends/s3/keyspace.rs b/crates/persistence/src/backends/s3/keyspace.rs new file mode 100644 index 00000000..2cb3ba63 --- /dev/null +++ b/crates/persistence/src/backends/s3/keyspace.rs @@ -0,0 +1,253 @@ +use chrono::{DateTime, Utc}; + +/// Keyspace builder for S3 object paths. +#[derive(Debug, Clone)] +pub struct S3Keyspace { + base_prefix: Option, +} + +impl S3Keyspace { + pub fn new(base_prefix: Option) -> Self { + let base_prefix = base_prefix + .map(|p| p.trim_matches('/').to_string()) + .filter(|p| !p.is_empty()); + Self { base_prefix } + } + + pub fn with_tenant_prefix(&self, tenant_id: &str) -> Self { + let tenant = tenant_id.trim_matches('/'); + let merged = match &self.base_prefix { + Some(base) => format!("{}/{}", base, tenant), + None => tenant.to_string(), + }; + Self::new(Some(merged)) + } + + pub fn current_resource_key(&self, resource_type: &str, id: &str) -> String { + self.join(&["resources", resource_type, id, "current.json"]) + } + + pub fn history_version_key(&self, resource_type: &str, id: &str, version_id: &str) -> String { + self.join(&[ + "resources", + resource_type, + id, + "_history", + &format!("{}.json", version_id), + ]) + } + + pub fn history_versions_prefix(&self, resource_type: &str, id: &str) -> String { + self.join(&["resources", resource_type, id, "_history/"]) + } + + pub fn resources_prefix(&self) -> String { + self.join(&["resources/"]) + } + + pub fn resource_type_prefix(&self, resource_type: &str) -> String { + self.join(&["resources", resource_type, "/"]) + } + + pub fn history_type_event_key( + &self, + resource_type: &str, + timestamp: DateTime, + id: &str, + version_id: &str, + suffix: &str, + ) -> String { + self.join(&[ + "history", + "type", + resource_type, + &format!( + "{}_{}_{}_{}.json", + timestamp.timestamp_millis(), + sanitize(id), + version_id, + suffix + ), + ]) + } + + pub fn history_system_event_key( + &self, + resource_type: &str, + timestamp: DateTime, + id: &str, + version_id: &str, + suffix: &str, + ) -> String { + self.join(&[ + "history", + "system", + &format!( + "{}_{}_{}_{}_{}.json", + timestamp.timestamp_millis(), + sanitize(resource_type), + sanitize(id), + version_id, + suffix + ), + ]) + } + + pub fn history_type_prefix(&self, resource_type: &str) -> String { + self.join(&["history", "type", resource_type, "/"]) + } + + pub fn history_system_prefix(&self) -> String { + self.join(&["history", "system/"]) + } + + pub fn export_job_state_key(&self, job_id: &str) -> String { + self.join(&["bulk", "export", "jobs", job_id, "state.json"]) + } + + pub fn export_job_progress_key(&self, job_id: &str, resource_type: &str) -> String { + self.join(&[ + "bulk", + "export", + "jobs", + job_id, + "progress", + &format!("{}.json", resource_type), + ]) + } + + pub fn export_job_manifest_key(&self, job_id: &str) -> String { + self.join(&["bulk", "export", "jobs", job_id, "manifest.json"]) + } + + pub fn export_job_output_key(&self, job_id: &str, resource_type: &str, part: u32) -> String { + self.join(&[ + "bulk", + "export", + "jobs", + job_id, + "output", + resource_type, + &format!("part-{}.ndjson", part), + ]) + } + + pub fn export_jobs_prefix(&self) -> String { + self.join(&["bulk", "export", "jobs/"]) + } + + pub fn export_job_prefix(&self, job_id: &str) -> String { + self.join(&["bulk", "export", "jobs", job_id, "/"]) + } + + pub fn submit_state_key(&self, submitter: &str, submission_id: &str) -> String { + self.join(&["bulk", "submit", submitter, submission_id, "state.json"]) + } + + pub fn submit_manifest_key( + &self, + submitter: &str, + submission_id: &str, + manifest_id: &str, + ) -> String { + self.join(&[ + "bulk", + "submit", + submitter, + submission_id, + "manifests", + &format!("{}.json", manifest_id), + ]) + } + + pub fn submit_raw_line_key( + &self, + submitter: &str, + submission_id: &str, + manifest_id: &str, + line: u64, + ) -> String { + self.join(&[ + "bulk", + "submit", + submitter, + submission_id, + "raw", + manifest_id, + &format!("line-{}.ndjson", line), + ]) + } + + pub fn submit_result_line_key( + &self, + submitter: &str, + submission_id: &str, + manifest_id: &str, + line: u64, + ) -> String { + self.join(&[ + "bulk", + "submit", + submitter, + submission_id, + "results", + manifest_id, + &format!("line-{}.json", line), + ]) + } + + pub fn submit_change_key( + &self, + submitter: &str, + submission_id: &str, + change_id: &str, + ) -> String { + self.join(&[ + "bulk", + "submit", + submitter, + submission_id, + "changes", + &format!("{}.json", change_id), + ]) + } + + pub fn submit_prefix(&self, submitter: &str, submission_id: &str) -> String { + self.join(&["bulk", "submit", submitter, submission_id, "/"]) + } + + pub fn submit_root_prefix(&self) -> String { + self.join(&["bulk", "submit/"]) + } + + fn join(&self, parts: &[&str]) -> String { + let mut segs: Vec = Vec::new(); + if let Some(prefix) = &self.base_prefix { + segs.push(prefix.clone()); + } + + for part in parts { + let trimmed = part.trim_matches('/'); + if trimmed.is_empty() { + continue; + } + segs.push(trimmed.to_string()); + } + + let mut out = segs.join("/"); + if parts.last().map(|p| p.ends_with('/')).unwrap_or(false) && !out.ends_with('/') { + out.push('/'); + } + out + } +} + +fn sanitize(value: &str) -> String { + value + .chars() + .map(|c| match c { + '/' | '\\' | ' ' => '_', + _ => c, + }) + .collect() +} diff --git a/crates/persistence/src/backends/s3/mod.rs b/crates/persistence/src/backends/s3/mod.rs new file mode 100644 index 00000000..741e5d61 --- /dev/null +++ b/crates/persistence/src/backends/s3/mod.rs @@ -0,0 +1,21 @@ +//! AWS S3 backend implementation. +//! +//! This backend is optimized for object-storage persistence workloads: +//! CRUD, versioning/history, and bulk operations. It is intentionally not a +//! general-purpose FHIR search/query engine. + +mod backend; +mod bulk_export; +mod bulk_submit; +mod bundle; +mod client; +mod config; +mod keyspace; +mod models; +mod storage; + +pub use backend::S3Backend; +pub use config::{S3BackendConfig, S3TenancyMode}; + +#[cfg(test)] +mod tests; diff --git a/crates/persistence/src/backends/s3/models.rs b/crates/persistence/src/backends/s3/models.rs new file mode 100644 index 00000000..b71dc429 --- /dev/null +++ b/crates/persistence/src/backends/s3/models.rs @@ -0,0 +1,34 @@ +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; + +use crate::core::bulk_export::{ExportManifest, ExportProgress, ExportRequest}; +use crate::core::bulk_submit::{SubmissionManifest, SubmissionSummary}; +use crate::core::history::HistoryMethod; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct HistoryIndexEvent { + pub resource_type: String, + pub id: String, + pub version_id: String, + pub timestamp: DateTime, + pub method: HistoryMethod, + pub deleted: bool, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExportJobState { + pub request: ExportRequest, + pub progress: ExportProgress, + pub manifest: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SubmissionState { + pub summary: SubmissionSummary, + pub abort_reason: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SubmissionManifestState { + pub manifest: SubmissionManifest, +} diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs new file mode 100644 index 00000000..842ece85 --- /dev/null +++ b/crates/persistence/src/backends/s3/storage.rs @@ -0,0 +1,957 @@ +use async_trait::async_trait; +use helios_fhir::FhirVersion; +use serde::Serialize; +use serde::de::DeserializeOwned; +use serde_json::Value; +use uuid::Uuid; + +use crate::core::history::{ + HistoryEntry, HistoryMethod, HistoryPage, HistoryParams, InstanceHistoryProvider, + SystemHistoryProvider, TypeHistoryProvider, +}; +use crate::core::{ResourceStorage, VersionedStorage, normalize_etag}; +use crate::error::{ + BackendError, ConcurrencyError, ResourceError, SearchError, StorageError, StorageResult, +}; +use crate::tenant::TenantContext; +use crate::types::{ + CursorValue, Page, PageCursor, PageInfo, Pagination, PaginationMode, ResourceMethod, + StoredResource, +}; + +use super::backend::{S3Backend, TenantLocation}; +use super::client::{ListObjectItem, ObjectMetadata}; +use super::models::HistoryIndexEvent; + +#[derive(Debug, Clone)] +pub(crate) struct CurrentResourceWithMeta { + pub resource: StoredResource, + pub etag: Option, +} + +impl S3Backend { + pub(crate) fn serialize_json(&self, value: &T) -> StorageResult> { + serde_json::to_vec(value).map_err(|e| { + StorageError::Backend(BackendError::SerializationError { + message: format!("failed to serialize JSON payload: {e}"), + }) + }) + } + + pub(crate) fn deserialize_json(&self, bytes: &[u8]) -> StorageResult { + serde_json::from_slice(bytes).map_err(|e| { + StorageError::Backend(BackendError::SerializationError { + message: format!("failed to deserialize JSON payload: {e}"), + }) + }) + } + + pub(crate) async fn put_json_object( + &self, + bucket: &str, + key: &str, + value: &[u8], + if_match: Option<&str>, + if_none_match: Option<&str>, + ) -> StorageResult { + self.client + .put_object( + bucket, + key, + value.to_vec(), + Some("application/json"), + if_match, + if_none_match, + ) + .await + .map_err(|e| self.map_client_error(e)) + } + + pub(crate) async fn put_bytes_object( + &self, + bucket: &str, + key: &str, + value: &[u8], + content_type: Option<&str>, + ) -> StorageResult { + self.client + .put_object(bucket, key, value.to_vec(), content_type, None, None) + .await + .map_err(|e| self.map_client_error(e)) + } + + pub(crate) async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()> { + self.client + .delete_object(bucket, key) + .await + .map_err(|e| self.map_client_error(e)) + } + + pub(crate) async fn get_json_object( + &self, + bucket: &str, + key: &str, + ) -> StorageResult> { + match self.client.get_object(bucket, key).await { + Ok(Some(object)) => { + let value = self.deserialize_json::(&object.bytes)?; + Ok(Some((value, object.metadata))) + } + Ok(None) => Ok(None), + Err(err) => Err(self.map_client_error(err)), + } + } + + pub(crate) async fn list_objects_all( + &self, + bucket: &str, + prefix: &str, + ) -> StorageResult> { + let mut out = Vec::new(); + let mut token: Option = None; + + loop { + let page = self + .client + .list_objects(bucket, prefix, token.as_deref(), Some(1000)) + .await + .map_err(|e| self.map_client_error(e))?; + out.extend(page.items); + token = page.next_continuation_token; + if token.is_none() { + break; + } + } + + Ok(out) + } + + pub(crate) async fn load_current_with_meta( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let key = location.keyspace.current_resource_key(resource_type, id); + + let loaded = self + .get_json_object::(&location.bucket, &key) + .await?; + + Ok(loaded.map(|(resource, metadata)| CurrentResourceWithMeta { + resource, + etag: metadata.etag, + })) + } + + pub(crate) async fn put_history_and_indexes( + &self, + location: &TenantLocation, + resource: &StoredResource, + method: HistoryMethod, + ) -> StorageResult<()> { + let history_key = location.keyspace.history_version_key( + resource.resource_type(), + resource.id(), + resource.version_id(), + ); + let payload = self.serialize_json(resource)?; + self.put_json_object(&location.bucket, &history_key, &payload, None, None) + .await?; + + let event = HistoryIndexEvent { + resource_type: resource.resource_type().to_string(), + id: resource.id().to_string(), + version_id: resource.version_id().to_string(), + timestamp: resource.last_modified(), + method, + deleted: resource.is_deleted(), + }; + let event_payload = self.serialize_json(&event)?; + let suffix = Uuid::new_v4().simple().to_string(); + + let type_key = location.keyspace.history_type_event_key( + resource.resource_type(), + resource.last_modified(), + resource.id(), + resource.version_id(), + &suffix, + ); + let system_key = location.keyspace.history_system_event_key( + resource.resource_type(), + resource.last_modified(), + resource.id(), + resource.version_id(), + &suffix, + ); + + self.put_json_object(&location.bucket, &type_key, &event_payload, None, None) + .await?; + self.put_json_object(&location.bucket, &system_key, &event_payload, None, None) + .await?; + + Ok(()) + } + + pub(crate) fn history_method_for(resource: &StoredResource) -> HistoryMethod { + match resource.method() { + Some(ResourceMethod::Post) => HistoryMethod::Post, + Some(ResourceMethod::Put) => HistoryMethod::Put, + Some(ResourceMethod::Patch) => HistoryMethod::Patch, + Some(ResourceMethod::Delete) => HistoryMethod::Delete, + None => { + if resource.is_deleted() { + HistoryMethod::Delete + } else { + HistoryMethod::Put + } + } + } + } + + pub(crate) fn page_history( + &self, + mut entries: Vec, + pagination: &Pagination, + ) -> StorageResult { + entries.sort_by(|a, b| b.timestamp.cmp(&a.timestamp)); + + let total = entries.len(); + let offset = decode_pagination_offset(pagination)?; + let count = pagination.count as usize; + let end = offset.saturating_add(count).min(total); + + let items = if offset >= total { + Vec::new() + } else { + entries[offset..end].to_vec() + }; + + let has_next = end < total; + let has_previous = offset > 0; + + let next_cursor = if has_next { + Some(PageCursor::new(vec![CursorValue::Number(end as i64)], end.to_string()).encode()) + } else { + None + }; + + let previous_cursor = if has_previous { + let prev = offset.saturating_sub(count); + Some(PageCursor::new(vec![CursorValue::Number(prev as i64)], prev.to_string()).encode()) + } else { + None + }; + + Ok(Page::new( + items, + PageInfo { + next_cursor, + previous_cursor, + total: Some(total as u64), + has_next, + has_previous, + }, + )) + } + + pub(crate) async fn list_current_keys( + &self, + location: &TenantLocation, + resource_type: Option<&str>, + ) -> StorageResult> { + let prefix = if let Some(resource_type) = resource_type { + location.keyspace.resource_type_prefix(resource_type) + } else { + location.keyspace.resources_prefix() + }; + + let keys = self + .list_objects_all(&location.bucket, &prefix) + .await? + .into_iter() + .map(|i| i.key) + .filter(|key| key.ends_with("/current.json")) + .collect(); + + Ok(keys) + } + + pub(crate) async fn load_history_event_entries( + &self, + location: &TenantLocation, + prefix: &str, + ) -> StorageResult> { + let mut entries = Vec::new(); + let objects = self.list_objects_all(&location.bucket, prefix).await?; + + for object in objects { + let Some((event, _)) = self + .get_json_object::(&location.bucket, &object.key) + .await? + else { + continue; + }; + + let history_key = location.keyspace.history_version_key( + &event.resource_type, + &event.id, + &event.version_id, + ); + + if let Some((resource, _)) = self + .get_json_object::(&location.bucket, &history_key) + .await? + { + entries.push(HistoryEntry { + resource, + method: event.method, + timestamp: event.timestamp, + }); + } + } + + Ok(entries) + } + + pub(crate) fn ensure_resource_shape( + &self, + resource_type: &str, + id: &str, + mut resource: Value, + ) -> Value { + if let Some(object) = resource.as_object_mut() { + object.insert( + "resourceType".to_string(), + Value::String(resource_type.to_string()), + ); + object.insert("id".to_string(), Value::String(id.to_string())); + } + resource + } + + /// Restores a resource snapshot as the latest version. + /// + /// If a current version exists (including tombstones), this writes a new + /// version from that current pointer. If the resource is missing, this + /// recreates version `1` from the snapshot content. + pub(crate) async fn restore_resource_from_snapshot( + &self, + tenant: &TenantContext, + snapshot: &StoredResource, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let resource_type = snapshot.resource_type(); + let id = snapshot.id(); + let current_key = location.keyspace.current_resource_key(resource_type, id); + + let content = self.ensure_resource_shape(resource_type, id, snapshot.content().clone()); + + if let Some(current) = self + .load_current_with_meta(tenant, resource_type, id) + .await? + { + let restored = current.resource.new_version(content, ResourceMethod::Put); + let payload = self.serialize_json(&restored)?; + self.put_json_object( + &location.bucket, + ¤t_key, + &payload, + current.etag.as_deref(), + None, + ) + .await?; + self.put_history_and_indexes(&location, &restored, HistoryMethod::Put) + .await?; + Ok(restored) + } else { + let restored = StoredResource::new( + resource_type, + id, + tenant.tenant_id().clone(), + content, + snapshot.fhir_version(), + ); + let payload = self.serialize_json(&restored)?; + self.put_json_object(&location.bucket, ¤t_key, &payload, None, Some("*")) + .await?; + self.put_history_and_indexes(&location, &restored, HistoryMethod::Post) + .await?; + Ok(restored) + } + } +} + +#[async_trait] +impl ResourceStorage for S3Backend { + fn backend_name(&self) -> &'static str { + "s3" + } + + async fn create( + &self, + tenant: &TenantContext, + resource_type: &str, + resource: Value, + fhir_version: FhirVersion, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + + let id = resource + .get("id") + .and_then(|v| v.as_str()) + .map(str::to_string) + .unwrap_or_else(|| Uuid::new_v4().to_string()); + + let current_key = location.keyspace.current_resource_key(resource_type, &id); + + if self + .client + .head_object(&location.bucket, ¤t_key) + .await + .map_err(|e| self.map_client_error(e))? + .is_some() + { + return Err(StorageError::Resource(ResourceError::AlreadyExists { + resource_type: resource_type.to_string(), + id, + })); + } + + let content = self.ensure_resource_shape(resource_type, &id, resource); + let stored = StoredResource::new( + resource_type, + &id, + tenant.tenant_id().clone(), + content, + fhir_version, + ); + + let payload = self.serialize_json(&stored)?; + match self + .put_json_object(&location.bucket, ¤t_key, &payload, None, Some("*")) + .await + { + Ok(_) => { + self.put_history_and_indexes(&location, &stored, HistoryMethod::Post) + .await?; + Ok(stored) + } + Err(StorageError::Backend(BackendError::QueryError { .. })) => { + Err(StorageError::Resource(ResourceError::AlreadyExists { + resource_type: resource_type.to_string(), + id, + })) + } + Err(e) => Err(e), + } + } + + async fn create_or_update( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + resource: Value, + fhir_version: FhirVersion, + ) -> StorageResult<(StoredResource, bool)> { + match self.read(tenant, resource_type, id).await { + Ok(Some(current)) => { + let updated = self.update(tenant, ¤t, resource).await?; + Ok((updated, false)) + } + Ok(None) => { + let created = self + .create( + tenant, + resource_type, + self.ensure_resource_shape(resource_type, id, resource), + fhir_version, + ) + .await?; + Ok((created, true)) + } + Err(err) => Err(err), + } + } + + async fn read( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> StorageResult> { + let Some(current) = self + .load_current_with_meta(tenant, resource_type, id) + .await? + else { + return Ok(None); + }; + + if current.resource.is_deleted() { + return Err(StorageError::Resource(ResourceError::Gone { + resource_type: resource_type.to_string(), + id: id.to_string(), + deleted_at: current.resource.deleted_at(), + })); + } + + Ok(Some(current.resource)) + } + + async fn update( + &self, + tenant: &TenantContext, + current: &StoredResource, + resource: Value, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let resource_type = current.resource_type(); + let id = current.id(); + let current_key = location.keyspace.current_resource_key(resource_type, id); + + let Some(actual) = self + .load_current_with_meta(tenant, resource_type, id) + .await? + else { + return Err(StorageError::Resource(ResourceError::NotFound { + resource_type: resource_type.to_string(), + id: id.to_string(), + })); + }; + + if actual.resource.is_deleted() { + return Err(StorageError::Resource(ResourceError::NotFound { + resource_type: resource_type.to_string(), + id: id.to_string(), + })); + } + + if actual.resource.version_id() != current.version_id() { + return Err(StorageError::Concurrency( + ConcurrencyError::VersionConflict { + resource_type: resource_type.to_string(), + id: id.to_string(), + expected_version: current.version_id().to_string(), + actual_version: actual.resource.version_id().to_string(), + }, + )); + } + + let new_content = self.ensure_resource_shape(resource_type, id, resource); + let updated = actual + .resource + .new_version(new_content, ResourceMethod::Put); + + let payload = self.serialize_json(&updated)?; + match self + .put_json_object( + &location.bucket, + ¤t_key, + &payload, + actual.etag.as_deref(), + None, + ) + .await + { + Ok(_) => { + self.put_history_and_indexes(&location, &updated, HistoryMethod::Put) + .await?; + Ok(updated) + } + Err(StorageError::Backend(BackendError::QueryError { .. })) => { + let latest = self + .load_current_with_meta(tenant, resource_type, id) + .await? + .map(|v| v.resource.version_id().to_string()) + .unwrap_or_else(|| "unknown".to_string()); + Err(StorageError::Concurrency( + ConcurrencyError::VersionConflict { + resource_type: resource_type.to_string(), + id: id.to_string(), + expected_version: current.version_id().to_string(), + actual_version: latest, + }, + )) + } + Err(err) => Err(err), + } + } + + async fn delete( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> StorageResult<()> { + let location = self.tenant_location(tenant)?; + let current_key = location.keyspace.current_resource_key(resource_type, id); + + let Some(actual) = self + .load_current_with_meta(tenant, resource_type, id) + .await? + else { + return Err(StorageError::Resource(ResourceError::NotFound { + resource_type: resource_type.to_string(), + id: id.to_string(), + })); + }; + + if actual.resource.is_deleted() { + return Err(StorageError::Resource(ResourceError::Gone { + resource_type: resource_type.to_string(), + id: id.to_string(), + deleted_at: actual.resource.deleted_at(), + })); + } + + let deleted = actual.resource.mark_deleted(); + let payload = self.serialize_json(&deleted)?; + + match self + .put_json_object( + &location.bucket, + ¤t_key, + &payload, + actual.etag.as_deref(), + None, + ) + .await + { + Ok(_) => { + self.put_history_and_indexes(&location, &deleted, HistoryMethod::Delete) + .await?; + Ok(()) + } + Err(StorageError::Backend(BackendError::QueryError { .. })) => Err( + StorageError::Concurrency(ConcurrencyError::OptimisticLockFailure { + resource_type: resource_type.to_string(), + id: id.to_string(), + expected_etag: actual.etag.unwrap_or_default(), + actual_etag: None, + }), + ), + Err(err) => Err(err), + } + } + + async fn count( + &self, + tenant: &TenantContext, + resource_type: Option<&str>, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let keys = self.list_current_keys(&location, resource_type).await?; + + let mut count = 0u64; + for key in keys { + if let Some((resource, _)) = self + .get_json_object::(&location.bucket, &key) + .await? + { + if !resource.is_deleted() { + count += 1; + } + } + } + + Ok(count) + } +} + +#[async_trait] +impl VersionedStorage for S3Backend { + async fn vread( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + version_id: &str, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let key = location + .keyspace + .history_version_key(resource_type, id, version_id); + + let resource = self + .get_json_object::(&location.bucket, &key) + .await? + .map(|(r, _)| r); + + Ok(resource) + } + + async fn update_with_match( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + expected_version: &str, + resource: Value, + ) -> StorageResult { + let Some(actual) = self + .load_current_with_meta(tenant, resource_type, id) + .await? + else { + return Err(StorageError::Resource(ResourceError::NotFound { + resource_type: resource_type.to_string(), + id: id.to_string(), + })); + }; + + if actual.resource.is_deleted() { + return Err(StorageError::Resource(ResourceError::NotFound { + resource_type: resource_type.to_string(), + id: id.to_string(), + })); + } + + let expected = normalize_etag(expected_version); + let actual_version = actual.resource.version_id(); + if expected != actual_version { + return Err(StorageError::Concurrency( + ConcurrencyError::VersionConflict { + resource_type: resource_type.to_string(), + id: id.to_string(), + expected_version: expected.to_string(), + actual_version: actual_version.to_string(), + }, + )); + } + + self.update(tenant, &actual.resource, resource).await + } + + async fn delete_with_match( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + expected_version: &str, + ) -> StorageResult<()> { + let Some(actual) = self + .load_current_with_meta(tenant, resource_type, id) + .await? + else { + return Err(StorageError::Resource(ResourceError::NotFound { + resource_type: resource_type.to_string(), + id: id.to_string(), + })); + }; + + let expected = normalize_etag(expected_version); + let actual_version = actual.resource.version_id(); + if expected != actual_version { + return Err(StorageError::Concurrency( + ConcurrencyError::VersionConflict { + resource_type: resource_type.to_string(), + id: id.to_string(), + expected_version: expected.to_string(), + actual_version: actual_version.to_string(), + }, + )); + } + + self.delete(tenant, resource_type, id).await + } + + async fn list_versions( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> StorageResult> { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.history_versions_prefix(resource_type, id); + + let mut versions = Vec::new(); + for object in self.list_objects_all(&location.bucket, &prefix).await? { + let Some(version) = parse_version_from_history_key(&object.key) else { + continue; + }; + versions.push(version); + } + + versions.sort_by_key(|v| v.parse::().unwrap_or_default()); + versions.dedup(); + Ok(versions) + } +} + +#[async_trait] +impl InstanceHistoryProvider for S3Backend { + async fn history_instance( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + params: &HistoryParams, + ) -> StorageResult { + let versions = self.list_versions(tenant, resource_type, id).await?; + let mut entries = Vec::new(); + + for version in versions { + let Some(resource) = self.vread(tenant, resource_type, id, &version).await? else { + continue; + }; + + if !params.include_deleted && resource.is_deleted() { + continue; + } + + if let Some(since) = params.since { + if resource.last_modified() < since { + continue; + } + } + if let Some(before) = params.before { + if resource.last_modified() >= before { + continue; + } + } + + entries.push(HistoryEntry { + method: Self::history_method_for(&resource), + timestamp: resource.last_modified(), + resource, + }); + } + + self.page_history(entries, ¶ms.pagination) + } + + async fn history_instance_count( + &self, + tenant: &TenantContext, + resource_type: &str, + id: &str, + ) -> StorageResult { + Ok(self.list_versions(tenant, resource_type, id).await?.len() as u64) + } +} + +#[async_trait] +impl TypeHistoryProvider for S3Backend { + async fn history_type( + &self, + tenant: &TenantContext, + resource_type: &str, + params: &HistoryParams, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.history_type_prefix(resource_type); + let mut entries = self.load_history_event_entries(&location, &prefix).await?; + + entries.retain(|entry| { + (params.include_deleted || !entry.resource.is_deleted()) + && params + .since + .map(|since| entry.timestamp >= since) + .unwrap_or(true) + && params + .before + .map(|before| entry.timestamp < before) + .unwrap_or(true) + }); + + self.page_history(entries, ¶ms.pagination) + } + + async fn history_type_count( + &self, + tenant: &TenantContext, + resource_type: &str, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.history_type_prefix(resource_type); + Ok(self + .list_objects_all(&location.bucket, &prefix) + .await? + .len() as u64) + } +} + +#[async_trait] +impl SystemHistoryProvider for S3Backend { + async fn history_system( + &self, + tenant: &TenantContext, + params: &HistoryParams, + ) -> StorageResult { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.history_system_prefix(); + let mut entries = self.load_history_event_entries(&location, &prefix).await?; + + entries.retain(|entry| { + (params.include_deleted || !entry.resource.is_deleted()) + && params + .since + .map(|since| entry.timestamp >= since) + .unwrap_or(true) + && params + .before + .map(|before| entry.timestamp < before) + .unwrap_or(true) + }); + + self.page_history(entries, ¶ms.pagination) + } + + async fn history_system_count(&self, tenant: &TenantContext) -> StorageResult { + let location = self.tenant_location(tenant)?; + let prefix = location.keyspace.history_system_prefix(); + Ok(self + .list_objects_all(&location.bucket, &prefix) + .await? + .len() as u64) + } +} + +fn parse_version_from_history_key(key: &str) -> Option { + if !key.ends_with(".json") { + return None; + } + let filename = key.rsplit('/').next()?; + let version = filename.strip_suffix(".json")?; + if version.is_empty() { + None + } else { + Some(version.to_string()) + } +} + +fn decode_pagination_offset(pagination: &Pagination) -> StorageResult { + match &pagination.mode { + PaginationMode::Offset(offset) => Ok(*offset as usize), + PaginationMode::Cursor(None) => Ok(0), + PaginationMode::Cursor(Some(cursor)) => { + if let Some(CursorValue::Number(offset)) = cursor.sort_values().first() { + return Ok((*offset).max(0) as usize); + } + + if let Ok(parsed) = cursor.resource_id().parse::() { + return Ok(parsed); + } + + Err(StorageError::Search(SearchError::InvalidCursor { + cursor: cursor.encode(), + })) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_version_key() { + assert_eq!( + parse_version_from_history_key("a/b/3.json"), + Some("3".to_string()) + ); + assert_eq!(parse_version_from_history_key("a/b/.json"), None); + assert_eq!(parse_version_from_history_key("a/b/3"), None); + } +} diff --git a/crates/persistence/src/backends/s3/tests.rs b/crates/persistence/src/backends/s3/tests.rs new file mode 100644 index 00000000..8bb2a248 --- /dev/null +++ b/crates/persistence/src/backends/s3/tests.rs @@ -0,0 +1,1096 @@ +use std::collections::{HashMap, HashSet}; +use std::io::Cursor; +use std::sync::{Arc, Mutex}; + +use async_trait::async_trait; +use chrono::{DateTime, Utc}; +use helios_fhir::FhirVersion; +use serde_json::json; +use tokio::io::BufReader; + +use crate::backends::s3::backend::S3Backend; +use crate::backends::s3::client::{ + ListObjectItem, ListObjectsResult, ObjectData, ObjectMetadata, S3Api, S3ClientError, +}; +use crate::backends::s3::config::{S3BackendConfig, S3TenancyMode}; +use crate::core::bulk_export::{BulkExportStorage, ExportDataProvider, ExportRequest}; +use crate::core::bulk_submit::{ + BulkProcessingOptions, BulkSubmitProvider, BulkSubmitRollbackProvider, NdjsonEntry, + StreamingBulkSubmitProvider, SubmissionId, SubmissionStatus, +}; +use crate::core::history::{ + HistoryParams, InstanceHistoryProvider, SystemHistoryProvider, TypeHistoryProvider, +}; +use crate::core::transaction::{BundleEntry, BundleMethod, BundleProvider}; +use crate::core::{ResourceStorage, VersionedStorage}; +use crate::error::{ + BulkExportError, BulkSubmitError, ConcurrencyError, ResourceError, SearchError, StorageError, + TenantError, TransactionError, +}; +use crate::tenant::{TenantContext, TenantId, TenantPermissions}; +use crate::types::{CursorValue, PageCursor, Pagination, PaginationMode}; + +#[derive(Debug, Clone)] +struct MockObject { + body: Vec, + etag: String, + last_modified: DateTime, +} + +#[derive(Debug, Default)] +struct MockState { + buckets: HashSet, + objects: HashMap<(String, String), MockObject>, + etag_counter: u64, + put_count: u64, + fail_put_after: Option, + fail_deletes: bool, +} + +#[derive(Debug, Clone, Default)] +struct MockS3Client { + state: Arc>, +} + +impl MockS3Client { + fn with_buckets(buckets: &[&str]) -> Self { + let mut state = MockState::default(); + state.buckets = buckets.iter().map(|b| (*b).to_string()).collect(); + Self { + state: Arc::new(Mutex::new(state)), + } + } + + fn set_fail_put_after(&self, put_count: u64) { + let mut state = self.state.lock().unwrap(); + state.fail_put_after = Some(put_count); + } + + fn bucket_object_count(&self, bucket: &str) -> usize { + let state = self.state.lock().unwrap(); + state.objects.keys().filter(|(b, _)| b == bucket).count() + } +} + +#[async_trait] +impl S3Api for MockS3Client { + async fn head_bucket(&self, bucket: &str) -> Result<(), S3ClientError> { + let state = self.state.lock().unwrap(); + if state.buckets.contains(bucket) { + Ok(()) + } else { + Err(S3ClientError::NotFound) + } + } + + async fn head_object( + &self, + bucket: &str, + key: &str, + ) -> Result, S3ClientError> { + let state = self.state.lock().unwrap(); + Ok(state + .objects + .get(&(bucket.to_string(), key.to_string())) + .map(|object| ObjectMetadata { + etag: Some(object.etag.clone()), + last_modified: Some(object.last_modified), + size: object.body.len() as i64, + })) + } + + async fn get_object( + &self, + bucket: &str, + key: &str, + ) -> Result, S3ClientError> { + let state = self.state.lock().unwrap(); + Ok(state + .objects + .get(&(bucket.to_string(), key.to_string())) + .map(|object| ObjectData { + bytes: object.body.clone(), + metadata: ObjectMetadata { + etag: Some(object.etag.clone()), + last_modified: Some(object.last_modified), + size: object.body.len() as i64, + }, + })) + } + + async fn put_object( + &self, + bucket: &str, + key: &str, + body: Vec, + _content_type: Option<&str>, + if_match: Option<&str>, + if_none_match: Option<&str>, + ) -> Result { + let mut state = self.state.lock().unwrap(); + if !state.buckets.contains(bucket) { + return Err(S3ClientError::NotFound); + } + state.put_count += 1; + if let Some(fail_after) = state.fail_put_after { + if state.put_count > fail_after { + return Err(S3ClientError::Internal("forced put failure".to_string())); + } + } + + let entry_key = (bucket.to_string(), key.to_string()); + let existing = state.objects.get(&entry_key).cloned(); + + if let Some("*") = if_none_match { + if existing.is_some() { + return Err(S3ClientError::PreconditionFailed); + } + } + + if let Some(expected) = if_match { + let Some(existing) = existing.as_ref() else { + return Err(S3ClientError::PreconditionFailed); + }; + if existing.etag != expected { + return Err(S3ClientError::PreconditionFailed); + } + } + + state.etag_counter += 1; + let etag = format!("etag-{}", state.etag_counter); + let object = MockObject { + body, + etag: etag.clone(), + last_modified: Utc::now(), + }; + state.objects.insert(entry_key, object); + + Ok(ObjectMetadata { + etag: Some(etag), + last_modified: Some(Utc::now()), + size: 0, + }) + } + + async fn delete_object(&self, bucket: &str, key: &str) -> Result<(), S3ClientError> { + let mut state = self.state.lock().unwrap(); + if state.fail_deletes { + return Err(S3ClientError::Internal("forced delete failure".to_string())); + } + state.objects.remove(&(bucket.to_string(), key.to_string())); + Ok(()) + } + + async fn list_objects( + &self, + bucket: &str, + prefix: &str, + continuation: Option<&str>, + max_keys: Option, + ) -> Result { + let state = self.state.lock().unwrap(); + let mut keys = state + .objects + .iter() + .filter(|((b, key), _)| b == bucket && key.starts_with(prefix)) + .map(|((_, key), value)| ListObjectItem { + key: key.clone(), + etag: Some(value.etag.clone()), + last_modified: Some(value.last_modified), + size: value.body.len() as i64, + }) + .collect::>(); + + keys.sort_by(|a, b| a.key.cmp(&b.key)); + + let start = continuation + .and_then(|token| token.parse::().ok()) + .unwrap_or(0) + .min(keys.len()); + let max = max_keys.unwrap_or(1000).max(1) as usize; + let end = start.saturating_add(max).min(keys.len()); + + let items = keys[start..end].to_vec(); + let next_continuation_token = if end < keys.len() { + Some(end.to_string()) + } else { + None + }; + + Ok(ListObjectsResult { + items, + next_continuation_token, + }) + } +} + +fn make_prefix_backend(mock: Arc) -> S3Backend { + let config = S3BackendConfig { + tenancy_mode: S3TenancyMode::PrefixPerTenant { + bucket: "test-bucket".to_string(), + }, + validate_buckets_on_startup: false, + ..Default::default() + }; + + S3Backend::with_client(config, mock).expect("backend") +} + +fn make_bucket_backend(mock: Arc) -> S3Backend { + let mut tenant_bucket_map = HashMap::new(); + tenant_bucket_map.insert("tenant-a".to_string(), "bucket-a".to_string()); + tenant_bucket_map.insert("tenant-b".to_string(), "bucket-b".to_string()); + + let config = S3BackendConfig { + tenancy_mode: S3TenancyMode::BucketPerTenant { + tenant_bucket_map, + default_system_bucket: Some("system-bucket".to_string()), + }, + validate_buckets_on_startup: false, + ..Default::default() + }; + + S3Backend::with_client(config, mock).expect("backend") +} + +fn tenant(id: &str) -> TenantContext { + TenantContext::new(TenantId::new(id), TenantPermissions::full_access()) +} + +#[tokio::test] +async fn crud_happy_path_and_count() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let created = backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"p1","active":true}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let read = backend + .read(&tenant, "Patient", "p1") + .await + .unwrap() + .unwrap(); + assert_eq!(read.id(), created.id()); + + let updated = backend + .update( + &tenant, + &read, + json!({"resourceType":"Patient","id":"p1","active":false}), + ) + .await + .unwrap(); + assert_eq!(updated.version_id(), "2"); + + let count_before_delete = backend.count(&tenant, Some("Patient")).await.unwrap(); + assert_eq!(count_before_delete, 1); + + backend.delete(&tenant, "Patient", "p1").await.unwrap(); + + let count_after_delete = backend.count(&tenant, Some("Patient")).await.unwrap(); + assert_eq!(count_after_delete, 0); +} + +#[tokio::test] +async fn crud_duplicate_create_and_missing_read() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"dup"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let duplicate = backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"dup"}), + FhirVersion::default(), + ) + .await; + + assert!(matches!( + duplicate, + Err(StorageError::Resource(ResourceError::AlreadyExists { .. })) + )); + + let missing = backend.read(&tenant, "Patient", "missing").await.unwrap(); + assert!(missing.is_none()); +} + +#[tokio::test] +async fn crud_concurrent_create_race() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let b1 = backend.clone(); + let b2 = backend.clone(); + let t1 = tenant.clone(); + let t2 = tenant.clone(); + + let fut1 = tokio::spawn(async move { + b1.create( + &t1, + "Patient", + json!({"resourceType":"Patient","id":"race"}), + FhirVersion::default(), + ) + .await + }); + let fut2 = tokio::spawn(async move { + b2.create( + &t2, + "Patient", + json!({"resourceType":"Patient","id":"race"}), + FhirVersion::default(), + ) + .await + }); + + let r1 = fut1.await.unwrap(); + let r2 = fut2.await.unwrap(); + + let success_count = [r1.is_ok(), r2.is_ok()].into_iter().filter(|v| *v).count(); + let exists_count = [r1, r2] + .into_iter() + .filter(|r| { + matches!( + r, + Err(StorageError::Resource(ResourceError::AlreadyExists { .. })) + ) + }) + .count(); + + assert_eq!(success_count, 1); + assert_eq!(exists_count, 1); +} + +#[tokio::test] +async fn versioning_vread_and_conflict() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let created = backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"v1","active":true}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let updated = backend + .update_with_match( + &tenant, + "Patient", + "v1", + created.version_id(), + json!({"resourceType":"Patient","id":"v1","active":false}), + ) + .await + .unwrap(); + + assert_eq!(updated.version_id(), "2"); + + let versions = backend + .list_versions(&tenant, "Patient", "v1") + .await + .unwrap(); + assert_eq!(versions, vec!["1".to_string(), "2".to_string()]); + + let first = backend + .vread(&tenant, "Patient", "v1", "1") + .await + .unwrap() + .unwrap(); + assert_eq!(first.version_id(), "1"); + + let stale = backend + .update_with_match( + &tenant, + "Patient", + "v1", + "1", + json!({"resourceType":"Patient","id":"v1","active":true}), + ) + .await; + + assert!(matches!( + stale, + Err(StorageError::Concurrency( + ConcurrencyError::VersionConflict { .. } + )) + )); +} + +#[tokio::test] +async fn versioning_parallel_updates_one_conflicts() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let current = backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"parallel"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let expected = current.version_id().to_string(); + + let b1 = backend.clone(); + let b2 = backend.clone(); + let t1 = tenant.clone(); + let t2 = tenant.clone(); + + let f1 = tokio::spawn(async move { + b1.update_with_match( + &t1, + "Patient", + "parallel", + &expected, + json!({"resourceType":"Patient","id":"parallel","a":1}), + ) + .await + }); + + let f2 = tokio::spawn(async move { + b2.update_with_match( + &t2, + "Patient", + "parallel", + "1", + json!({"resourceType":"Patient","id":"parallel","b":2}), + ) + .await + }); + + let r1 = f1.await.unwrap(); + let r2 = f2.await.unwrap(); + + let successes = [r1.is_ok(), r2.is_ok()].into_iter().filter(|v| *v).count(); + assert_eq!(successes, 1); +} + +#[tokio::test] +async fn history_instance_type_system_and_invalid_cursor() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let created = backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"h1"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let updated = backend + .update( + &tenant, + &created, + json!({"resourceType":"Patient","id":"h1","active":true}), + ) + .await + .unwrap(); + + backend.delete(&tenant, "Patient", "h1").await.unwrap(); + + let history = backend + .history_instance( + &tenant, + "Patient", + "h1", + &HistoryParams::new().include_deleted(true), + ) + .await + .unwrap(); + + assert_eq!(history.items.len(), 3); + assert_eq!(history.items[0].resource.version_id(), "3"); + assert_eq!(history.items[1].resource.version_id(), updated.version_id()); + + let type_history = backend + .history_type( + &tenant, + "Patient", + &HistoryParams::new().include_deleted(true), + ) + .await + .unwrap(); + assert!(type_history.items.len() >= 3); + + let system_history = backend + .history_system(&tenant, &HistoryParams::new().include_deleted(true)) + .await + .unwrap(); + assert!(system_history.items.len() >= 3); + + let bad_cursor = PageCursor::new(vec![CursorValue::String("bad".to_string())], "oops").encode(); + let params = HistoryParams { + pagination: Pagination { + count: 10, + mode: PaginationMode::Cursor(Some(PageCursor::decode(&bad_cursor).unwrap())), + }, + ..HistoryParams::new() + }; + + let invalid = backend + .history_instance(&tenant, "Patient", "h1", ¶ms) + .await; + + assert!(matches!( + invalid, + Err(StorageError::Search(SearchError::InvalidCursor { .. })) + )); +} + +#[tokio::test] +async fn bundle_batch_mixed_results() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let entries = vec![ + BundleEntry { + method: BundleMethod::Post, + url: "Patient".to_string(), + resource: Some(json!({"resourceType":"Patient","id":"b1"})), + ..Default::default() + }, + BundleEntry { + method: BundleMethod::Get, + url: "Patient/missing".to_string(), + ..Default::default() + }, + ]; + + let result = backend.process_batch(&tenant, entries).await.unwrap(); + assert_eq!(result.entries.len(), 2); + assert_eq!(result.entries[0].status, 201); + assert_eq!(result.entries[1].status, 404); +} + +#[tokio::test] +async fn bundle_transaction_success_and_reference_resolution() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let entries = vec![ + BundleEntry { + method: BundleMethod::Post, + full_url: Some("urn:uuid:patient-1".to_string()), + url: "Patient".to_string(), + resource: Some(json!({"resourceType":"Patient","id":"tx-p1"})), + ..Default::default() + }, + BundleEntry { + method: BundleMethod::Post, + url: "Observation".to_string(), + resource: Some(json!({ + "resourceType":"Observation", + "id":"obs-1", + "subject": {"reference": "urn:uuid:patient-1"} + })), + ..Default::default() + }, + ]; + + let result = backend.process_transaction(&tenant, entries).await.unwrap(); + assert_eq!(result.entries.len(), 2); + + let obs = backend + .read(&tenant, "Observation", "obs-1") + .await + .unwrap() + .unwrap(); + let reference = obs + .content() + .pointer("/subject/reference") + .and_then(|v| v.as_str()) + .unwrap(); + + assert_eq!(reference, "Patient/tx-p1"); +} + +#[tokio::test] +async fn bundle_transaction_failure_rolls_back() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let entries = vec![ + BundleEntry { + method: BundleMethod::Post, + url: "Patient".to_string(), + resource: Some(json!({"resourceType":"Patient","id":"rollback-me"})), + ..Default::default() + }, + BundleEntry { + method: BundleMethod::Post, + url: "Patient".to_string(), + resource: Some(json!({"id":"missing-resource-type"})), + ..Default::default() + }, + ]; + + let result = backend.process_transaction(&tenant, entries).await; + assert!(matches!(result, Err(TransactionError::BundleError { .. }))); + + let read = backend.read(&tenant, "Patient", "rollback-me").await; + assert!(matches!( + read, + Err(StorageError::Resource(ResourceError::Gone { .. })) + )); +} + +#[tokio::test] +async fn bundle_transaction_reports_rollback_failure() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + // First create writes 4 objects (current + history + type index + system index). + // Start failing puts after that so compensation during rollback fails. + mock.set_fail_put_after(4); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let entries = vec![ + BundleEntry { + method: BundleMethod::Post, + url: "Patient".to_string(), + resource: Some(json!({"resourceType":"Patient","id":"rollback-failure"})), + ..Default::default() + }, + BundleEntry { + method: BundleMethod::Post, + url: "Patient".to_string(), + resource: Some(json!({"id":"invalid"})), + ..Default::default() + }, + ]; + + let result = backend.process_transaction(&tenant, entries).await; + match result { + Err(TransactionError::BundleError { message, .. }) => { + assert!(message.contains("rollback failed")); + } + other => panic!("expected rollback failure bundle error, got {other:?}"), + } +} + +#[tokio::test] +async fn bulk_export_start_manifest_and_delete() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":"e1"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let request = ExportRequest::system().with_types(vec!["Patient".to_string()]); + let job_id = backend.start_export(&tenant, request).await.unwrap(); + + let progress = backend.get_export_status(&tenant, &job_id).await.unwrap(); + assert_eq!( + progress.status, + crate::core::bulk_export::ExportStatus::Complete + ); + + let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); + assert!(!manifest.output.is_empty()); + assert!(manifest.output[0].url.starts_with("s3://")); + + backend.delete_export(&tenant, &job_id).await.unwrap(); + let deleted = backend.get_export_status(&tenant, &job_id).await; + assert!(matches!( + deleted, + Err(StorageError::BulkExport( + BulkExportError::JobNotFound { .. } + )) + )); +} + +#[tokio::test] +async fn bulk_export_invalid_format_and_fetch_batch_cursor() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + for i in 0..3 { + backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":format!("p{}", i)}), + FhirVersion::default(), + ) + .await + .unwrap(); + } + + let invalid = backend + .start_export( + &tenant, + ExportRequest { + output_format: "application/json".to_string(), + ..ExportRequest::system() + }, + ) + .await; + assert!(matches!( + invalid, + Err(StorageError::BulkExport( + BulkExportError::UnsupportedFormat { .. } + )) + )); + + let request = ExportRequest::system(); + let batch1 = backend + .fetch_export_batch(&tenant, &request, "Patient", None, 2) + .await + .unwrap(); + assert_eq!(batch1.lines.len(), 2); + assert!(!batch1.is_last); + + let batch2 = backend + .fetch_export_batch( + &tenant, + &request, + "Patient", + batch1.next_cursor.as_deref(), + 2, + ) + .await + .unwrap(); + assert_eq!(batch2.lines.len(), 1); + assert!(batch2.is_last); +} + +#[tokio::test] +async fn bulk_submit_lifecycle_and_processing() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let submission_id = SubmissionId::new("client-a", "sub-1"); + let summary = backend + .create_submission(&tenant, &submission_id, None) + .await + .unwrap(); + assert_eq!(summary.status, SubmissionStatus::InProgress); + + let manifest = backend + .add_manifest(&tenant, &submission_id, None, None) + .await + .unwrap(); + + let entries = vec![ + NdjsonEntry::new(1, "Patient", json!({"resourceType":"Patient","id":"bs1"})), + NdjsonEntry::new(2, "Patient", json!({"resourceType":"Patient","id":"bs2"})), + ]; + + let results = backend + .process_entries( + &tenant, + &submission_id, + &manifest.manifest_id, + entries, + &BulkProcessingOptions::new(), + ) + .await + .unwrap(); + assert_eq!(results.len(), 2); + assert!(results.iter().all(|r| r.is_success())); + + let counts = backend + .get_entry_counts(&tenant, &submission_id, &manifest.manifest_id) + .await + .unwrap(); + assert_eq!(counts.total, 2); + assert_eq!(counts.success, 2); + + let completed = backend + .complete_submission(&tenant, &submission_id) + .await + .unwrap(); + assert_eq!(completed.status, SubmissionStatus::Complete); +} + +#[tokio::test] +async fn bulk_submit_duplicate_abort_and_rollback() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let submission_id = SubmissionId::new("client-a", "sub-dup"); + backend + .create_submission(&tenant, &submission_id, None) + .await + .unwrap(); + + let duplicate = backend + .create_submission(&tenant, &submission_id, None) + .await; + assert!(matches!( + duplicate, + Err(StorageError::BulkSubmit( + BulkSubmitError::DuplicateSubmission { .. } + )) + )); + + let manifest = backend + .add_manifest(&tenant, &submission_id, None, None) + .await + .unwrap(); + + let entries = vec![NdjsonEntry::new( + 1, + "Patient", + json!({"resourceType":"Patient","id":"rollback-submit"}), + )]; + backend + .process_entries( + &tenant, + &submission_id, + &manifest.manifest_id, + entries, + &BulkProcessingOptions::new(), + ) + .await + .unwrap(); + + let changes = backend + .list_changes(&tenant, &submission_id, 10, 0) + .await + .unwrap(); + assert_eq!(changes.len(), 1); + let rolled_back = backend + .rollback_change(&tenant, &submission_id, &changes[0]) + .await + .unwrap(); + assert!(rolled_back); + + // Keep one manifest pending so abort reports a cancellation count. + backend + .add_manifest(&tenant, &submission_id, None, None) + .await + .unwrap(); + + let cancelled = backend + .abort_submission(&tenant, &submission_id, "test abort") + .await + .unwrap(); + assert_eq!(cancelled, 1); +} + +#[tokio::test] +async fn bulk_submit_stream_and_parallel_manifests_max_errors() { + let mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let backend = make_prefix_backend(mock); + let tenant = tenant("tenant-a"); + + let submission_id = SubmissionId::new("client-stream", "sub-stream"); + backend + .create_submission(&tenant, &submission_id, None) + .await + .unwrap(); + + let m1 = backend + .add_manifest(&tenant, &submission_id, None, None) + .await + .unwrap(); + let m2 = backend + .add_manifest(&tenant, &submission_id, None, None) + .await + .unwrap(); + + let ndjson = "{\"resourceType\":\"Patient\",\"id\":\"stream-1\"}\n"; + let reader = Box::new(BufReader::new(Cursor::new(ndjson.as_bytes().to_vec()))); + let stream_result = backend + .process_ndjson_stream( + &tenant, + &submission_id, + &m1.manifest_id, + "Patient", + reader, + &BulkProcessingOptions::new(), + ) + .await + .unwrap(); + assert_eq!(stream_result.counts.success, 1); + + let strict = BulkProcessingOptions::new() + .with_max_errors(1) + .with_continue_on_error(false); + + let b1 = backend.clone(); + let b2 = backend.clone(); + let t1 = tenant.clone(); + let t2 = tenant.clone(); + let sub1 = submission_id.clone(); + let sub2 = submission_id.clone(); + let m1_id = m1.manifest_id.clone(); + let m2_id = m2.manifest_id.clone(); + + let f1 = tokio::spawn(async move { + b1.process_entries( + &t1, + &sub1, + &m1_id, + vec![ + NdjsonEntry::new( + 1, + "Patient", + json!({"resourceType":"Observation","id":"x1"}), + ), + NdjsonEntry::new(2, "Patient", json!({"resourceType":"Patient","id":"x1"})), + NdjsonEntry::new(3, "Patient", json!({"resourceType":"Patient","id":"x2"})), + ], + &strict, + ) + .await + }); + + let f2 = tokio::spawn(async move { + b2.process_entries( + &t2, + &sub2, + &m2_id, + vec![NdjsonEntry::new( + 1, + "Patient", + json!({"resourceType":"Patient","id":"parallel-ok"}), + )], + &BulkProcessingOptions::new(), + ) + .await + }); + + let r1 = f1.await.unwrap(); + let r2 = f2.await.unwrap(); + + assert!(matches!( + r1, + Err(StorageError::BulkSubmit( + BulkSubmitError::MaxErrorsExceeded { .. } + )) + )); + assert!(r2.is_ok()); +} + +#[tokio::test] +async fn tenancy_prefix_and_bucket_modes() { + let prefix_mock = Arc::new(MockS3Client::with_buckets(&["test-bucket"])); + let prefix_backend = make_prefix_backend(prefix_mock); + + let ta = tenant("tenant-a"); + let tb = tenant("tenant-b"); + + prefix_backend + .create( + &ta, + "Patient", + json!({"resourceType":"Patient","id":"same","a":1}), + FhirVersion::default(), + ) + .await + .unwrap(); + prefix_backend + .create( + &tb, + "Patient", + json!({"resourceType":"Patient","id":"same","b":2}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let ra = prefix_backend + .read(&ta, "Patient", "same") + .await + .unwrap() + .unwrap(); + let rb = prefix_backend + .read(&tb, "Patient", "same") + .await + .unwrap() + .unwrap(); + + assert_eq!(ra.content()["a"], 1); + assert_eq!(rb.content()["b"], 2); + + let bucket_mock = Arc::new(MockS3Client::with_buckets(&[ + "bucket-a", + "bucket-b", + "system-bucket", + ])); + let bucket_backend = make_bucket_backend(bucket_mock.clone()); + + bucket_backend + .create( + &ta, + "Patient", + json!({"resourceType":"Patient","id":"same"}), + FhirVersion::default(), + ) + .await + .unwrap(); + bucket_backend + .create( + &tb, + "Patient", + json!({"resourceType":"Patient","id":"same"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + assert!(bucket_mock.bucket_object_count("bucket-a") > 0); + assert!(bucket_mock.bucket_object_count("bucket-b") > 0); + + let missing_tenant = tenant("tenant-c"); + let missing = bucket_backend + .create( + &missing_tenant, + "Patient", + json!({"resourceType":"Patient","id":"x"}), + FhirVersion::default(), + ) + .await; + + assert!(matches!( + missing, + Err(StorageError::Tenant(TenantError::InvalidTenant { .. })) + )); +} diff --git a/crates/persistence/tests/common/capabilities.rs b/crates/persistence/tests/common/capabilities.rs index 717b4637..90bd10ad 100644 --- a/crates/persistence/tests/common/capabilities.rs +++ b/crates/persistence/tests/common/capabilities.rs @@ -278,11 +278,11 @@ impl CapabilityMatrix { matrix.set_backend_capabilities( BackendKind::S3, vec![ - (BackendCapability::Crud, SupportLevel::Planned), + (BackendCapability::Crud, SupportLevel::Implemented), (BackendCapability::Versioning, SupportLevel::Implemented), (BackendCapability::InstanceHistory, SupportLevel::Implemented), - (BackendCapability::TypeHistory, SupportLevel::NotPlanned), - (BackendCapability::SystemHistory, SupportLevel::NotPlanned), + (BackendCapability::TypeHistory, SupportLevel::Implemented), + (BackendCapability::SystemHistory, SupportLevel::Implemented), (BackendCapability::BasicSearch, SupportLevel::NotPlanned), (BackendCapability::DateSearch, SupportLevel::NotPlanned), (BackendCapability::ReferenceSearch, SupportLevel::NotPlanned), @@ -294,13 +294,14 @@ impl CapabilityMatrix { (BackendCapability::TerminologySearch, SupportLevel::NotPlanned), (BackendCapability::Transactions, SupportLevel::NotPlanned), (BackendCapability::OptimisticLocking, SupportLevel::Implemented), - (BackendCapability::CursorPagination, SupportLevel::Partial), + (BackendCapability::CursorPagination, SupportLevel::Implemented), (BackendCapability::OffsetPagination, SupportLevel::NotPlanned), (BackendCapability::Sorting, SupportLevel::NotPlanned), (BackendCapability::BulkExport, SupportLevel::Implemented), - (BackendCapability::SharedSchema, SupportLevel::Planned), + (BackendCapability::BulkImport, SupportLevel::Implemented), + (BackendCapability::SharedSchema, SupportLevel::Implemented), (BackendCapability::SchemaPerTenant, SupportLevel::NotPlanned), - (BackendCapability::DatabasePerTenant, SupportLevel::Planned), + (BackendCapability::DatabasePerTenant, SupportLevel::Implemented), ], ); diff --git a/crates/persistence/tests/s3_tests.rs b/crates/persistence/tests/s3_tests.rs new file mode 100644 index 00000000..cdff998a --- /dev/null +++ b/crates/persistence/tests/s3_tests.rs @@ -0,0 +1,280 @@ +//! S3 backend tests. +//! +//! - Fast local tests live under `src/backends/s3/tests.rs` with a mock S3 client. +//! - Real AWS tests in this file are opt-in via `RUN_AWS_S3_TESTS=1`. + +#![cfg(feature = "s3")] + +use std::collections::HashMap; + +use helios_fhir::FhirVersion; +use helios_persistence::backends::s3::{S3Backend, S3BackendConfig, S3TenancyMode}; +use helios_persistence::core::bulk_export::{BulkExportStorage, ExportRequest}; +use helios_persistence::core::bulk_submit::{ + BulkProcessingOptions, BulkSubmitProvider, NdjsonEntry, SubmissionId, +}; +use helios_persistence::core::history::{HistoryParams, InstanceHistoryProvider}; +use helios_persistence::core::transaction::{BundleEntry, BundleMethod, BundleProvider}; +use helios_persistence::core::{ + Backend, BackendCapability, BackendKind, ResourceStorage, VersionedStorage, +}; +use helios_persistence::error::{ResourceError, StorageError}; +use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; +use serde_json::json; +use uuid::Uuid; + +fn run_aws_tests() -> bool { + std::env::var("RUN_AWS_S3_TESTS").ok().as_deref() == Some("1") +} + +fn tenant(id: &str) -> TenantContext { + TenantContext::new(TenantId::new(id), TenantPermissions::full_access()) +} + +fn make_prefix_backend(prefix: String) -> S3Backend { + let bucket = std::env::var("HFS_S3_TEST_BUCKET") + .expect("HFS_S3_TEST_BUCKET must be set when RUN_AWS_S3_TESTS=1"); + + let config = S3BackendConfig { + tenancy_mode: S3TenancyMode::PrefixPerTenant { bucket }, + prefix: Some(prefix), + region: std::env::var("AWS_REGION").ok(), + validate_buckets_on_startup: true, + ..Default::default() + }; + + S3Backend::from_env(config).expect("create S3 backend") +} + +fn make_bucket_per_tenant_backend(prefix: String) -> Option { + let bucket_a = std::env::var("HFS_S3_TEST_BUCKET_TENANT_A").ok()?; + let bucket_b = std::env::var("HFS_S3_TEST_BUCKET_TENANT_B").ok()?; + + let mut tenant_bucket_map = HashMap::new(); + tenant_bucket_map.insert("tenant-a".to_string(), bucket_a); + tenant_bucket_map.insert("tenant-b".to_string(), bucket_b); + + let config = S3BackendConfig { + tenancy_mode: S3TenancyMode::BucketPerTenant { + tenant_bucket_map, + default_system_bucket: None, + }, + prefix: Some(prefix), + region: std::env::var("AWS_REGION").ok(), + validate_buckets_on_startup: true, + ..Default::default() + }; + + Some(S3Backend::from_env(config).expect("create bucket-per-tenant S3 backend")) +} + +#[test] +fn test_s3_capabilities_declared() { + let config = S3BackendConfig { + tenancy_mode: S3TenancyMode::PrefixPerTenant { + bucket: "dummy".to_string(), + }, + validate_buckets_on_startup: false, + ..Default::default() + }; + + // Build with provider chain config load, no AWS calls because validation is disabled. + let backend = S3Backend::from_env(config).expect("backend creation"); + + assert_eq!(backend.kind(), BackendKind::S3); + assert!(backend.supports(BackendCapability::Crud)); + assert!(backend.supports(BackendCapability::Versioning)); + assert!(backend.supports(BackendCapability::InstanceHistory)); + assert!(backend.supports(BackendCapability::TypeHistory)); + assert!(backend.supports(BackendCapability::SystemHistory)); + assert!(backend.supports(BackendCapability::BulkExport)); + assert!(backend.supports(BackendCapability::BulkImport)); + assert!(!backend.supports(BackendCapability::BasicSearch)); + assert!(!backend.supports(BackendCapability::Transactions)); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_aws_crud_versioning_history() { + if !run_aws_tests() { + eprintln!("skipping AWS test (set RUN_AWS_S3_TESTS=1)"); + return; + } + + let backend = make_prefix_backend(format!("integration/{}/crud", Uuid::new_v4())); + let tenant = tenant("aws-tenant-a"); + + let id = format!("p-{}", Uuid::new_v4()); + let created = backend + .create( + &tenant, + "Patient", + json!({"resourceType":"Patient","id":id,"active":true}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let updated = backend + .update_with_match( + &tenant, + "Patient", + created.id(), + created.version_id(), + json!({"resourceType":"Patient","id":created.id(),"active":false}), + ) + .await + .unwrap(); + + let first = backend + .vread(&tenant, "Patient", created.id(), "1") + .await + .unwrap(); + assert!(first.is_some()); + + let history = backend + .history_instance( + &tenant, + "Patient", + created.id(), + &HistoryParams::new().include_deleted(true), + ) + .await + .unwrap(); + assert!(history.items.len() >= 2); + + let stale = backend + .update_with_match( + &tenant, + "Patient", + updated.id(), + "1", + json!({"resourceType":"Patient","id":updated.id()}), + ) + .await; + assert!(stale.is_err()); + + backend + .delete(&tenant, "Patient", created.id()) + .await + .unwrap(); + + let gone = backend.read(&tenant, "Patient", created.id()).await; + assert!(matches!( + gone, + Err(StorageError::Resource(ResourceError::Gone { .. })) + )); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_aws_bundle_bulk_export_and_submit() { + if !run_aws_tests() { + eprintln!("skipping AWS test (set RUN_AWS_S3_TESTS=1)"); + return; + } + + let backend = make_prefix_backend(format!("integration/{}/bulk", Uuid::new_v4())); + let tenant = tenant("aws-tenant-b"); + + let entries = vec![BundleEntry { + method: BundleMethod::Post, + url: "Patient".to_string(), + resource: Some(json!({"resourceType":"Patient","id":format!("b-{}", Uuid::new_v4())})), + ..Default::default() + }]; + let bundle = backend.process_batch(&tenant, entries).await.unwrap(); + assert_eq!(bundle.entries.len(), 1); + assert_eq!(bundle.entries[0].status, 201); + + let job_id = backend + .start_export( + &tenant, + ExportRequest::system().with_types(vec!["Patient".to_string()]), + ) + .await + .unwrap(); + let manifest = backend.get_export_manifest(&tenant, &job_id).await.unwrap(); + assert!(!manifest.output.is_empty()); + + let submission_id = SubmissionId::new("aws-client", format!("sub-{}", Uuid::new_v4())); + backend + .create_submission(&tenant, &submission_id, None) + .await + .unwrap(); + let manifest_state = backend + .add_manifest(&tenant, &submission_id, None, None) + .await + .unwrap(); + + let results = backend + .process_entries( + &tenant, + &submission_id, + &manifest_state.manifest_id, + vec![NdjsonEntry::new( + 1, + "Patient", + json!({"resourceType":"Patient","id":format!("s-{}", Uuid::new_v4())}), + )], + &BulkProcessingOptions::new(), + ) + .await + .unwrap(); + + assert_eq!(results.len(), 1); + assert!(results[0].is_success()); +} + +#[tokio::test] +async fn test_aws_bucket_per_tenant_mode_if_configured() { + if !run_aws_tests() { + eprintln!("skipping AWS test (set RUN_AWS_S3_TESTS=1)"); + return; + } + + let Some(backend) = + make_bucket_per_tenant_backend(format!("integration/{}/tenancy", Uuid::new_v4())) + else { + eprintln!( + "skipping bucket-per-tenant AWS test (set HFS_S3_TEST_BUCKET_TENANT_A and HFS_S3_TEST_BUCKET_TENANT_B)" + ); + return; + }; + + let tenant_a = tenant("tenant-a"); + let tenant_b = tenant("tenant-b"); + let id = format!("tenant-same-{}", Uuid::new_v4()); + + backend + .create( + &tenant_a, + "Patient", + json!({"resourceType":"Patient","id":id,"flag":"a"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + backend + .create( + &tenant_b, + "Patient", + json!({"resourceType":"Patient","id":id,"flag":"b"}), + FhirVersion::default(), + ) + .await + .unwrap(); + + let ra = backend + .read(&tenant_a, "Patient", &id) + .await + .unwrap() + .unwrap(); + let rb = backend + .read(&tenant_b, "Patient", &id) + .await + .unwrap() + .unwrap(); + + assert_eq!(ra.content()["flag"], "a"); + assert_eq!(rb.content()["flag"], "b"); +} From 06a76478341a1b027b9d472b1d979a835496794b Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Wed, 25 Feb 2026 15:59:52 -0500 Subject: [PATCH 2/7] deleted discussion file --- .../backends/s3/docs/discussion_roadmap.md | 985 ------------------ 1 file changed, 985 deletions(-) delete mode 100644 crates/persistence/src/backends/s3/docs/discussion_roadmap.md diff --git a/crates/persistence/src/backends/s3/docs/discussion_roadmap.md b/crates/persistence/src/backends/s3/docs/discussion_roadmap.md deleted file mode 100644 index f1a59209..00000000 --- a/crates/persistence/src/backends/s3/docs/discussion_roadmap.md +++ /dev/null @@ -1,985 +0,0 @@ -Introduction -As I write this in early 2026, I don't think it is an understatement to say that the opportunities and impact that are upon us with AI in healthcare feels like a Cambrian Explosion moment. Healthcare professionals, administrators, and patients alike will be increasingly chatting with, talking directly to, and collaborating with artificial intelligence software systems in entirely new ways. This will need to be done safely and carefully. -What worked five years ago, or even two years ago, is increasingly inadequate for the demands of clinical AI, population health analytics, and real-time decision support. For technical architects navigating this shift, the challenge isn't just scaling storage; it's rethinking the entire data architecture. -This discussion document shares my thoughts about an approach to persistence for the Helios FHIR Server. -This document is an architecture strategy document. In other words, it describes the main motivating direction, building blocks, and key technology ingredients that will makeup the persistence design for the Helios FHIR Server. It is not intended to be a comprehensive set of requirements and design, but instead contains enough of a starting point such that readers can understand our approach to persistence, and understand why we decided to make the decisions that we did. -Who should read this? -The Helios FHIR Server is open source software, and is being developed in the open. If you have some interest in persistence design for healthcare software - this document is for you! -My hope is that you will think about the contents of this document, comment and provide feedback! -AI Is Driving New Requirements on Data -AI workloads have upended traditional assumptions about data access patterns. Training models demand sustained high-throughput reads across massive datasets, while inference requires low-latency access to distributed data sources. In healthcare, this is compounded by the explosive growth of unstructured data. Radiology images, pathology slides, genomic sequences, clinical notes, and waveform data from monitoring devices to name a few. Structured EHR data, once the center of gravity, is increasingly extracted from the EMR and compared with other external data sources. Architectures optimized for transactional workloads simply cannot deliver the performance AI pipelines require, and retrofitting them is often a losing battle. -Separation of Storage and Compute -Decoupling storage from compute has moved from a cloud-native best practice to an architectural necessity, yet many FHIR server implementations haven't caught up. While cloud-based analytics platforms routinely embrace this separation, transactional FHIR servers often remain tightly coupled to their persistence layers, treating database and application as an inseparable unit. This creates painful trade-offs: over-provisioning compute to get adequate storage, or vice versa. A modern FHIR server must separate these concerns as a core architectural principle, allowing the API layer to scale horizontally for request throughput while the persistence layer scales independently for capacity and query performance. In healthcare AI workloads, this separation is especially critical. Spin up GPU clusters for model training without provisioning redundant storage, or expand storage for imaging archives without paying for idle compute. The persistence layer becomes a service with its own scaling characteristics rather than a monolithic dependency. This separation is now expected as a defining characteristic of production-ready FHIR infrastructure. -Medallion Architecture Within FHIR Persistence -We have seen our largest petabyte-scale customers transition to a Medallion Architecture strategy for their FHIR data. The bronze layer represents resources as received, preserving original payloads, source system identifiers, and ingestion metadata for auditability and replay. The silver layer applies normalization: terminology mapping, reference resolution, deduplication of resources that represent the same clinical entity, and enforcement of business rules that go beyond FHIR validation. The gold layer materializes optimized views for specific consumers, denormalized patient summaries for clinical applications, flattened tabular projections for analytics, or pre-computed feature sets for ML pipelines. -Hybrid and Multi-Cloud Architectures -The reality for most health IT systems is a hybrid footprint: on-premises data centers housing legacy systems and sensitive workloads, cloud platforms providing elastic compute for AI and analytics, and edge infrastructure at clinical sites. Multi-cloud strategies add another dimension, whether driven by M&A activity, best-of-breed vendor selection, or risk diversification. -Security-First and Zero-Trust Patterns in FHIR Persistence -The persistence layer is where FHIR data lives at rest, making it the most critical surface for security enforcement. Zero-trust principles must be embedded in the persistence design itself, not just the API layer above it. This means encryption at rest as a baseline, but also fine-grained access control at the resource, compartment or even finer-grained levels - ensuring that database-level access cannot bypass FHIR authorization semantics. Audit logging must capture all persistence operations with sufficient detail for HIPAA accounting-of-disclosures requirements. This typically means persisting AuditEvent resources to a separately controlled store. Consent enforcement, particularly for sensitive resource types like mental health or substance abuse records under 42 CFR Part 2, often requires persistence-layer support through segmentation, tagging, or dynamic filtering. Treating security as an API-layer concern while leaving the persistence layer permissive creates unacceptable risk. -Data Retention, Tiering, and Cost Optimization -FHIR persistence layers accumulate data over years and decades. Version history, provenance records, and audit logs all create significant cost pressure. Intelligent tiering within the persistence layer moves older resource versions and infrequently accessed resources to lower-cost storage classes while keeping current data on performant storage. The architectural challenge is maintaining query semantics across tiers: a search that spans active and archived resources should work transparently, even if archived retrieval is slower. Retention policies must account for regulatory requirements that vary by resource type. Imaging studies may have different retention mandates than clinical notes. A well-designed persistence layer makes tiering a configuration concern rather than an architectural constraint. -Different Data Technologies for Different Problems -A FHIR persistence layer that commits to a single storage technology is making a bet that one tool can serve all masters. This is a bet that rarely pays off as requirements evolve. The reality is that different access patterns, query types, and workloads have fundamentally different performance characteristics, and no single database technology optimizes for all of them. A patient lookup by identifier, a population-level cohort query, a graph traversal of care team relationships, and a semantic similarity search for clinical trial matching across different terminology code systems are all legitimate operations against FHIR data, yet each performs best on a different underlying technology. -Modern FHIR persistence architectures increasingly embrace polyglot persistence, which means routing data to the storage technology best suited for how that data will be accessed, while maintaining a unified FHIR API layer above. -* Relational Databases remain the workhorse for transactional FHIR operations, offering ACID guarantees, mature tooling, and well-understood query optimization for structured data with predictable access patterns. -* NoSQL Databases - particularly document stores - align naturally with FHIR's resource model, persisting resources as complete documents without the impedance mismatch of relational decomposition, and scaling horizontally for high-throughput ingestion. Additionally, Cassandra has been exceptional at handling web-scale data requirements without breaking the bank. -* Data Lakes provide cost-effective, schema-flexible storage for raw FHIR resources and bulk exports, serving as the foundation for large-scale analytics and ML training pipelines that need to process millions of resources. -* Data Warehouses deliver optimized analytical query performance over structured, transformed FHIR data, enabling population health analytics, quality measure computation, and business intelligence workloads that would overwhelm transactional systems. -* Graph Databases excel at traversing relationships. Patient to provider to organization to care team is an example relationship pathway that are represented as references in FHIR but are expensive to navigate through recursive joins in relational systems. -* Vector Databases enable semantic search and similarity matching over embedded representations of clinical text, supporting AI use cases like similar-patient retrieval, terminology matching, and contextual search that go beyond keyword-based FHIR queries. -* Block Storage provides the high-performance, low-latency foundation for database engines themselves, while also serving large binary attachments, imaging data, scanned documents, and waveforms that are referenced by FHIR resources but impractical to store within the resource payload. -The architectural discipline is not choosing one technology but designing the abstraction layer that routes FHIR operations to the appropriate backend while maintaining consistency, security, and a coherent developer experience. -Positioning the Helios FHIR Server in the FHIR Server Landscape - -The FHIR server landscape can be understood along two architectural dimensions: how tightly the implementation is coupled to its storage technology, and whether the system supports multiple specialized data stores or requires a single backend. -The vertical axis distinguishes between servers with tightly-coupled persistence where the implementation is deeply intertwined with a specific database technology, and those offering an extensible interface layer that abstracts storage concerns behind well-defined interfaces. A FHIR Server built directly on JPA (Java Persistence API) is such an example, meaning its data access patterns, query capabilities, and performance characteristics are fundamentally shaped by relational database assumptions. In contrast, an extensible interface layer defines traits or interfaces that can be implemented for any storage technology, allowing the same FHIR API to sit atop different backends without rewriting core logic. -The horizontal axis captures the difference between single storage backend architectures and polyglot persistence. Polyglot persistence is an architectural pattern where different types of data are routed to the storage technologies best suited for how that data will be accessed. For example, a polyglot system might store clinical documents in an object store optimized for large binary content, maintain patient relationships in a graph database for efficient traversal, and keep structured observations in a columnar store for fast analytical queries all while presenting a unified FHIR API to consuming applications. Most existing FHIR servers force all resources into a single database, sacrificing performance and flexibility for implementation simplicity. -The Helios FHIR Server occupies the upper-right quadrant: it combines a trait-based, open-source interface layer built in Rust with native support for polyglot persistence. This architecture allows organizations to optimize storage decisions for their specific access patterns while maintaining full FHIR compliance at the API layer. -Decomposing the FHIR Specification: Separation of Concerns in Persistence Design -The FHIR specification is vast. It defines resource structures, REST interactions, search semantics, terminology operations, versioning behavior, and much more. A monolithic interface, or trait that attempts to capture all of this becomes unwieldy, difficult to implement, and impossible to optimize for specific storage technologies. The Helios FHIR Server persistence design takes a different approach: decompose the specification into cohesive concerns, express each as a focused trait, and compose them to build complete storage backends. -Learning from Diesel: Type-Safe Database Abstractions -Before diving into our trait design, it's worth examining what we can learn from Diesel, Rust's most mature database abstraction layer. Diesel has solved many of the problems we face - multi-backend support, compile-time query validation, extensibility, and its design choices offer valuable lessons. -Backend Abstraction via Traits, Not Enums: Diesel defines a Backend trait that captures the differences between database systems (PostgreSQL, MySQL, SQLite) without coupling to specific implementations. The Backend trait specifies how SQL is generated, how bind parameters are collected, and how types are mapped. This allows new backends to be added without modifying core code. This is exactly what we need for polyglot FHIR persistence. -QueryFragment for Composable SQL Generation: Diesel's QueryFragment trait represents any piece of SQL that can be rendered. A WHERE clause, a JOIN, an entire SELECT statement all implement QueryFragment. This composability lets complex queries be built from simple pieces. For FHIR search, we can adopt a similar pattern: each search parameter modifier becomes a fragment that can be composed into complete queries. -Type-Level Query Validation: Diesel catches many errors at compile time by encoding schema information in the type system. While we can't achieve the same level of compile-time validation for dynamic FHIR queries, we can use Rust's type system to ensure that storage backends only claim to support operations they actually implement. -MultiConnection for Runtime Backend Selection: Diesel's #[derive(MultiConnection)] generates an enum that wraps multiple connection types, dispatching operations to the appropriate backend at runtime. This pattern directly applies to polyglot persistence. We can route FHIR operations to different backends based on query characteristics. -Extensibility via sql_function! and Custom Types: Diesel makes it trivial to add custom SQL functions and types. For FHIR, this translates to extensibility for custom search parameters, terminology operations, and backend-specific optimizations. -The Core Resource Storage Trait -At the foundation is the ResourceStorage trait, which handles the fundamental persistence of FHIR resources. This trait intentionally knows nothing about search, nothing about REST semantics, nothing about transactions. It simply stores and retrieves resources by type and identifier. -Multitenancy is not optional in this design. Every operation requires a TenantContext, making it impossible at the type level to accidentally execute a query without tenant scoping. There is no "escape hatch" that bypasses tenant isolation. -use async_trait::async_trait; -use serde_json::Value; - -/// Represents a stored FHIR resource with metadata. -pub struct StoredResource { - pub resource_type: String, - pub id: String, - pub version_id: String, - pub last_updated: chrono::DateTime, - pub tenant_id: TenantId, - pub resource: Value, -} - -/// Core trait for resource storage operations. -/// -/// All operations are tenant-scoped. There is no non-tenant code path - -/// the type system enforces that tenant context is always provided. -#[async_trait] -pub trait ResourceStorage: Send + Sync { - /// Creates a new resource within a tenant's scope, assigning an ID if not provided. - async fn create( - &self, - tenant: &TenantContext, - resource: &Value, - ) -> Result; - - /// Reads the current version of a resource within a tenant's scope. - /// Returns NotFound if the resource exists but belongs to a different tenant. - async fn read( - &self, - tenant: &TenantContext, - resource_type: &str, - id: &str, - ) -> Result, StorageError>; - - /// Updates a resource within a tenant's scope, returning the new version. - async fn update( - &self, - tenant: &TenantContext, - resource: &Value, - ) -> Result; - - /// Deletes a resource within a tenant's scope (soft delete preserving history where supported). - async fn delete( - &self, - tenant: &TenantContext, - resource_type: &str, - id: &str, - ) -> Result<(), StorageError>; - - /// Returns the storage backend identifier for logging and diagnostics. - fn backend_name(&self) -> &'static str; -} - -Notice what's absent: there's no if_match parameter for optimistic concurrency, no version-specific reads, no history. Those capabilities belong to separate traits that extend the base functionality. A storage backend that doesn't support versioning simply doesn't implement the versioning trait. -Multitenancy: A Cross-Cutting Concern -Multitenancy has downstream implications for every layer of a FHIR server, from indexing strategy to reference validation to search semantics. By requiring tenant context at the lowest storage layer, we ensure that isolation guarantees propagate upward through the entire system. -Isolation Strategies -There are three fundamental approaches to tenant isolation, each with different trade-offs: -* Database-per-tenant: Strongest isolation, simplest security model, easier compliance story. The downside is operational overhead that grows linearly with tenants. Connection pool management becomes complex, and schema migrations are painful at scale. -* Schema-per-tenant: Good isolation within a single database instance, allows tenant-specific indexing. PostgreSQL handles this well. Still has schema migration coordination challenges. -* Shared schema with tenant discriminator: Most operationally efficient at scale, single migration path. The risk is that every query must include tenant filtering. One missed WHERE clause and you have a data breach. -For SQL-backed FHIR persistence, the shared schema approach with a tenant_id discriminator is pragmatic, but the enforcement layer must be airtight - you literally cannot construct a storage operation without providing tenant context. -Tenant Context as a Type-Level Guarantee -Borrowing from Diesel's approach to type safety, we can make tenant context explicit in the type system. Rather than passing tenant IDs as strings that might be forgotten, we create a wrapper type that must be present for any storage operation: -/// A validated tenant context. Operations that access tenant data -/// require this type, making it impossible to forget tenant filtering. -#[derive(Debug, Clone)] -pub struct TenantContext { - tenant_id: TenantId, - /// Permissions determine what operations are allowed - permissions: TenantPermissions, - /// Whether this context can access shared/system resources - can_access_shared: bool, -} - -/// The system tenant for shared resources (terminology, conformance) -pub const SYSTEM_TENANT: TenantId = TenantId::system(); - -/// Marker trait for operations that are tenant-scoped -pub trait TenantScoped { - fn tenant(&self) -> &TenantContext; -} - -Shared Resources and the System Tenant -CodeSystems, ValueSets, StructureDefinitions, and other conformance resources are typically shared across tenants. We designate a "system" tenant that holds these shared resources: -/// Determines whether a resource type should be tenant-specific or shared. -pub trait ResourceTenancy { - /// Returns the tenancy model for a resource type. - fn tenancy_model(&self, resource_type: &str) -> TenancyModel; -} - -pub enum TenancyModel { - /// Resource is always tenant-specific (e.g., Patient, Observation) - TenantScoped, - /// Resource is always shared (e.g., CodeSystem, ValueSet) - Shared, - /// Resource can be either, determined by business rules - Configurable, -} - -Index Design for Multitenancy -Search performance in a multitenant system depends critically on index design. The tenant_id must be the leading column in composite indexes: --- Good: tenant_id leads, enabling efficient tenant-scoped queries -CREATE INDEX idx_patient_identifier ON patient (tenant_id, identifier_system, identifier_value); - --- Bad: tenant_id not leading, will scan all tenants -CREATE INDEX idx_patient_identifier ON patient (identifier_system, identifier_value, tenant_id); - -Versioning as a Separate Concern -FHIR's versioning model is sophisticated: every update creates a new version, version IDs are opaque strings, and the vread interaction retrieves historical versions. Not all storage backends can efficiently support this. An append-only data lake handles versioning naturally; a key-value store might not. -/// Adds version-aware operations to base storage. -#[async_trait] -pub trait VersionedStorage: ResourceStorage { - /// Reads a specific version of a resource within a tenant's scope. - async fn vread( - &self, - tenant: &TenantContext, - resource_type: &str, - id: &str, - version_id: &str, - ) -> Result, StorageError>; - - /// Updates with optimistic concurrency control. - /// Fails with VersionConflict if current version doesn't match expected. - async fn update_with_match( - &self, - tenant: &TenantContext, - resource: &Value, - expected_version: &str, - ) -> Result; -} - -History: Building on Versioning -History access naturally extends versioning. If a backend can read specific versions, it can also enumerate them: -/// Provides access to resource history. -#[async_trait] -pub trait HistoryProvider: VersionedStorage { - /// Returns the history of a specific resource within a tenant's scope. - async fn history_instance( - &self, - tenant: &TenantContext, - resource_type: &str, - id: &str, - params: &HistoryParams, - ) -> Result; - - /// Returns the history of all resources of a type within a tenant's scope. - async fn history_type( - &self, - tenant: &TenantContext, - resource_type: &str, - params: &HistoryParams, - ) -> Result; - - /// Returns the history of all resources within a tenant's scope. - async fn history_system( - &self, - tenant: &TenantContext, - params: &HistoryParams, - ) -> Result; -} - -/// Parameters for history queries, matching FHIR's _since, _at, _count parameters. -pub struct HistoryParams { - pub since: Option>, - pub at: Option>, - pub count: Option, -} - -The trait hierarchy HistoryProvider: VersionedStorage: ResourceStorage means that any storage backend supporting history automatically supports versioned reads and basic CRUD - all within tenant boundaries. The type system enforces this relationship. -The Search Abstraction: Decomposing FHIR's Query Model -Search is where the FHIR specification becomes genuinely complex. There are eight search parameter types (number, date, string, token, reference, quantity, uri, composite), sixteen modifiers (:exact, :contains, :not, :missing, :above, :below, :in, :not-in, :of-type, :identifier, :text, :code-text, :text-advanced, :iterate, plus resource type modifiers on references), six comparison prefixes (eq, ne, lt, le, gt, ge, sa, eb, ap), chained parameters, reverse chaining (_has), _include and _revinclude directives, and advanced filtering via _filter. A single search query can combine all of these all while respecting tenant boundaries. -Modeling search as a single trait would be a mistake. Instead, we decompose it into layers - and here, Diesel's QueryFragment pattern proves invaluable. -The SearchFragment Pattern (Inspired by Diesel's QueryFragment) -Diesel's QueryFragment trait allows any piece of SQL to be composable. We adapt this pattern for FHIR search, creating fragments that can be combined into complete search queries: -/// A fragment of a FHIR search that can be rendered to a backend-specific query. -/// Inspired by Diesel's QueryFragment pattern. -pub trait SearchFragment { - /// Renders this fragment to the backend's query representation. - fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError>; - - /// Whether this fragment can be efficiently evaluated by the backend. - /// Returns false if the backend would need to do post-filtering. - fn is_native(&self, backend: &B) -> bool; - - /// Estimated cost of evaluating this fragment (for query planning). - fn estimated_cost(&self, backend: &B) -> QueryCost; -} - -/// A search backend that can evaluate SearchFragments. -pub trait SearchBackend: Send + Sync { - type QueryBuilder; - type QueryResult; - - /// Creates a new query builder for this backend. - fn query_builder(&self, resource_type: &str) -> Self::QueryBuilder; - - /// Executes a built query. - async fn execute(&self, query: Self::QueryBuilder) -> Result; -} - -Each search modifier becomes a fragment that knows how to render itself: -/// Fragment for the :exact modifier on string parameters. -pub struct ExactStringMatch { - pub parameter: String, - pub path: FhirPath, - pub value: String, -} - -impl SearchFragment for ExactStringMatch -where - B: SupportsExactMatch, -{ - fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError> { - builder.add_exact_string_match(&self.path, &self.value) - } - - fn is_native(&self, _backend: &B) -> bool { - true // Most backends support exact string matching natively - } - - fn estimated_cost(&self, backend: &B) -> QueryCost { - backend.cost_for_exact_match(&self.path) - } -} - -/// Fragment for the :above modifier on token parameters (terminology subsumption). -pub struct SubsumesMatch { - pub parameter: String, - pub path: FhirPath, - pub system: String, - pub code: String, -} - -impl SearchFragment for SubsumesMatch -where - B: SupportsTerminologySearch, -{ - fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError> { - builder.add_subsumes_match(&self.path, &self.system, &self.code) - } - - fn is_native(&self, backend: &B) -> bool { - // Only native if the backend has integrated terminology support - backend.has_native_terminology() - } - - fn estimated_cost(&self, backend: &B) -> QueryCost { - if self.is_native(backend) { - backend.cost_for_subsumption(&self.path) - } else { - QueryCost::RequiresExpansion // Will need to expand the code set first - } - } -} - -Search Parameter Types -First, we model the search parameter types and their associated matching logic: -/// The type of a search parameter, determining matching semantics. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SearchParamType { - Number, - Date, - String, - Token, - Reference, - Quantity, - Uri, - Composite, - Special, -} - -/// Comparison prefixes for ordered types (number, date, quantity). -#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] -pub enum SearchPrefix { - #[default] - Eq, // equals (default) - Ne, // not equals - Lt, // less than - Le, // less than or equals - Gt, // greater than - Ge, // greater than or equals - Sa, // starts after - Eb, // ends before - Ap, // approximately -} - -/// Modifiers that alter search behavior. -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum SearchModifier { - Exact, // String: case-sensitive, full match - Contains, // String: substring match - Text, // Token/Reference: search display text - TextAdvanced, // Token/Reference: advanced text search - CodeText, // Token: search code text - Not, // Token: negation - Missing(bool), // All: test for presence/absence - Above, // Token/Reference/Uri: hierarchical above - Below, // Token/Reference/Uri: hierarchical below - In, // Token: value set membership - NotIn, // Token: value set non-membership - OfType, // Token (Identifier): type-qualified search - Identifier, // Reference: search by identifier - Type(String), // Reference: restrict to resource type - Iterate, // _include/_revinclude: recursive inclusion -} - -The Core Search Trait -The base search trait handles fundamental query execution without advanced features: -/// A parsed search parameter with its value and modifiers. -#[derive(Debug, Clone)] -pub struct SearchParameter { - pub name: String, - pub param_type: SearchParamType, - pub modifier: Option, - pub prefix: Option, - pub values: Vec, // Multiple values = OR -} - -/// A complete search query with all parameters. -#[derive(Debug, Clone, Default)] -pub struct SearchQuery { - /// Filter parameters (AND-joined) - pub parameters: Vec, - /// Sort specifications - pub sort: Vec, - /// Pagination - pub count: Option, - pub offset: Option, - /// Result modifiers - pub summary: Option, - pub elements: Option>, - /// Include directives - pub include: Vec, - pub revinclude: Vec, -} - -/// Base search capability for a storage backend. -#[async_trait] -pub trait SearchProvider: ResourceStorage { - /// Executes a search query against a resource type within a tenant's scope. - async fn search( - &self, - tenant: &TenantContext, - resource_type: &str, - query: &SearchQuery, - ) -> Result; - - -} - -Advanced Search Capabilities as Extension Traits -Not every storage backend can support every search feature. A relational database might handle token searches efficiently but struggle with subsumption queries that require terminology reasoning. A vector database might excel at text search but lack native support for date range queries. We model these variations as extension traits. -Chained Search Provider: -/// Adds support for chained parameter searches. -/// -/// Chaining allows searching by properties of referenced resources, -/// e.g., `Observation?patient.name=Smith`. This typically requires -/// join operations or graph traversal and must respect tenant boundaries -/// when following references. -#[async_trait] -pub trait ChainedSearchProvider: SearchProvider { - /// Executes a search with chained parameters within a tenant's scope. - /// - /// The implementation must ensure that chained references do not - /// cross tenant boundaries except for shared resources. - async fn search_with_chain( - &self, - tenant: &TenantContext, - resource_type: &str, - chain: &ChainedParameter, - terminal_condition: &SearchCondition, - ) -> Result; - - /// Executes a reverse chain (_has) search within a tenant's scope. - /// - /// Finds resources that are referenced by other resources matching - /// the given criteria, respecting tenant isolation. - async fn search_reverse_chain( - &self, - tenant: &TenantContext, - resource_type: &str, - has_param: &HasParameter, - ) -> Result; - - /// Returns the maximum chain depth supported by this backend. - fn max_chain_depth(&self) -> usize { - 4 // Reasonable default; deep chains are expensive - } -} - -/// Represents a chained parameter like `patient.organization.name` -pub struct ChainedParameter { - /// The chain of reference parameters to follow - pub chain: Vec, -} - -pub struct ChainLink { - /// The search parameter name (must be a reference type) - pub parameter: String, - /// Optional type restriction for polymorphic references - pub target_type: Option, -} - -/// Represents a _has parameter for reverse chaining -pub struct HasParameter { - /// The resource type that references us - pub referencing_type: String, - /// The reference parameter on that type pointing to us - pub reference_param: String, - /// The condition to apply to the referencing resource - pub condition: SearchCondition, - /// Nested _has for multi-level reverse chains - pub nested: Option>, -} - -Terminology Search Provider: -/// Adds terminology-aware search capabilities. -/// -/// Supports the `:above`, `:below`, `:in`, and `:not-in` modifiers -/// which require understanding of code system hierarchies and value set -/// membership. Terminology resources are typically shared across tenants, -/// but the search itself is tenant-scoped. -#[async_trait] -pub trait TerminologySearchProvider: SearchProvider { - /// Expands a code using `:below` semantics (descendants) within tenant scope. - /// - /// Returns all codes subsumed by the given code. These codes are then - /// used to filter resources belonging to the specified tenant. - async fn expand_below( - &self, - tenant: &TenantContext, - system: &str, - code: &str, - ) -> Result, StorageError>; - - /// Expands a code using `:above` semantics (ancestors) within tenant scope. - async fn expand_above( - &self, - tenant: &TenantContext, - system: &str, - code: &str, - ) -> Result, StorageError>; - - /// Checks value set membership for `:in` modifier within tenant scope. - /// - /// The value set itself may be shared or tenant-specific; the implementation - /// must resolve the correct value set based on tenant context. - async fn check_membership( - &self, - tenant: &TenantContext, - valueset_url: &str, - system: &str, - code: &str, - ) -> Result; - - /// Expands a value set to all member codes within tenant scope. - /// - /// Used for `:in` searches when the backend can efficiently filter - /// by an expanded code list. - async fn expand_valueset( - &self, - tenant: &TenantContext, - valueset_url: &str, - ) -> Result, StorageError>; -} - -pub struct ExpandedCode { - pub system: String, - pub code: String, - pub display: Option, -} - -Text Search Provider: -/// Adds full-text search capabilities. -/// -/// Supports `_text` (narrative search) and `_content` (full resource search) -/// parameters, as well as the `:text` modifier on string parameters. -/// All searches are scoped to the specified tenant. -#[async_trait] -pub trait TextSearchProvider: SearchProvider { - /// Searches resource narratives within a tenant's scope. - /// - /// Matches against the XHTML content in `Resource.text.div`. - async fn search_text( - &self, - tenant: &TenantContext, - resource_type: &str, - text_query: &str, - additional_params: &SearchQuery, - ) -> Result; - - /// Searches full resource content within a tenant's scope. - /// - /// Matches against all string content in the resource JSON. - async fn search_content( - &self, - tenant: &TenantContext, - resource_type: &str, - content_query: &str, - additional_params: &SearchQuery, - ) -> Result; - - /// Executes a text search on a specific parameter within tenant scope. - /// - /// Used for the `:text` modifier on string and token parameters, - /// e.g., `Condition?code:text=heart attack`. - async fn search_parameter_text( - &self, - tenant: &TenantContext, - resource_type: &str, - parameter: &str, - text_query: &str, - ) -> Result; -} - -This decomposition has practical consequences. When configuring a polyglot persistence layer, we can route terminology-aware searches to a backend that integrates with a terminology server (perhaps backed by a graph database), while directing simple token matches to a faster document store. The trait system makes these routing decisions explicit and type-safe. -Transactions: When Atomicity Matters -FHIR defines batch and transaction bundles. A batch processes entries independently; a transaction either succeeds completely or fails entirely with no partial effects. This all-or-nothing semantics requires database-level transaction support - something not all storage technologies provide natively. -/// Provides ACID transaction support. -/// -/// Transactions group multiple operations into an atomic unit. All -/// operations within a transaction are tenant-scoped; a single transaction -/// cannot span multiple tenants. -#[async_trait] -pub trait TransactionProvider: ResourceStorage { - /// Begins a new transaction within tenant scope. - /// - /// All operations on the returned Transaction object are scoped - /// to the specified tenant and will be committed or rolled back - /// as a unit. - async fn begin_transaction( - &self, - tenant: &TenantContext, - ) -> Result, StorageError>; -} - -/// An active transaction. -/// -/// Operations within a transaction see their own uncommitted changes -/// but are isolated from concurrent transactions. -#[async_trait] -pub trait Transaction: Send + Sync { - /// Returns the tenant context for this transaction. - fn tenant(&self) -> &TenantContext; - - /// Creates a resource within this transaction. - async fn create(&mut self, resource: &Value) -> Result; - - /// Reads a resource within this transaction (sees uncommitted changes). - async fn read( - &self, - resource_type: &str, - id: &str, - ) -> Result, StorageError>; - - /// Updates a resource within this transaction. - async fn update(&mut self, resource: &Value) -> Result; - - /// Deletes a resource within this transaction. - async fn delete(&mut self, resource_type: &str, id: &str) -> Result<(), StorageError>; - - /// Commits all operations in this transaction atomically. - async fn commit(self: Box) -> Result<(), StorageError>; - - /// Rolls back all operations in this transaction. - async fn rollback(self: Box) -> Result<(), StorageError>; -} - -#[derive(Debug, Clone, Copy, Default)] -pub enum IsolationLevel { - #[default] - ReadCommitted, - RepeatableRead, - Serializable, -} - -A storage backend that doesn't support transactions can still handle batch operations. It simply processes each entry independently, accepting that failures may leave partial results. The trait separation makes this distinction clear: code that requires atomicity takes &dyn TransactionProvider, while code that can tolerate partial failures takes &dyn ResourceStorage. -Audit Events: A Separated Persistence Store -AuditEvent resources should be ideally stored separately from clinical data. This isn't just a security concern, it's also an architectural one. Audit logs have different access patterns (append-heavy, rarely queried except during investigations), different retention requirements (often longer than clinical data), and different security constraints (must be tamper-evident, may require separate access controls). -/// Specialized storage for audit events. -/// -/// Audit storage is intentionally separate from clinical data storage. -/// It typically has different characteristics: -/// - Append-only or append-heavy workload -/// - Different retention policies -/// - Tamper-evident storage requirements -/// - Separate access control -#[async_trait] -pub trait AuditStorage: Send + Sync { - /// Records an audit event. This operation should be highly available - /// and should not fail clinical operations if audit storage is degraded. - async fn record(&self, tenant: &TenantContext, event: &AuditEvent) -> Result; - - /// Queries audit events within a time range. - async fn query( - &self, - criteria: &AuditQuery, - ) -> Result, AuditError>; - - /// Retrieves audit events for a specific resource (accounting of disclosures). - async fn disclosures_for_resource( - &self, - resource_type: &str, - resource_id: &str, - period: &DateRange, - ) -> Result, AuditError>; -} - -/// Audit query criteria supporting HIPAA accounting requirements. -#[derive(Debug, Clone, Default)] -pub struct AuditQuery { - pub patient_id: Option, - pub agent_id: Option, - pub action: Option, - pub period: Option, - pub resource_type: Option, - pub outcome: Option, -} - -The separation of AuditStorage from ResourceStorage enables critical architectural flexibility. Audit events can flow to a dedicated time-series database optimized for append-only writes, or to an immutable ledger for tamper evidence, or to a separate cloud account for security isolation. -The REST Layer: Mapping HTTP to Storage -The FHIR REST API defines interactions (read, vread, update, create, delete, search, etc.) that map HTTP verbs and URL patterns to operations. This mapping is a separate concern from storage. The same storage backend might be accessed via REST, GraphQL, messaging, or bulk export. -/// Interaction types defined by the FHIR REST specification. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Interaction { - Read, - Vread, - Update, - Patch, - Delete, - History, - Create, - Search, - Capabilities, - Batch, - Transaction, -} - -/// Scope at which an interaction operates. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum InteractionScope { - Instance, // Operations on a specific resource instance - Type, // Operations on a resource type - System, // System-wide operations -} - -/// Result of a REST interaction, capturing both outcome and metadata. -pub struct InteractionResult { - pub resource: Option, - pub status: HttpStatus, - pub etag: Option, - pub last_modified: Option>, - pub location: Option, - pub outcome: Option, -} - -/// Orchestrates REST interactions by coordinating storage traits. -#[async_trait] -pub trait RestHandler: Send + Sync { - /// Processes a FHIR REST interaction. - async fn handle( - &self, - interaction: Interaction, - scope: InteractionScope, - context: &InteractionContext, - ) -> Result; -} - -The RestHandler is a coordination layer that combines multiple storage traits to implement FHIR REST semantics. A read interaction needs only ResourceStorage. A vread needs VersionedStorage. A search with _include needs both SearchProvider and ResourceStorage. The REST handler composes these capabilities based on what the request requires and what the storage backend provides. -Capability Statements: Documenting What Storage Supports -The FHIR specification requires servers to publish a CapabilityStatement declaring which interactions, resources, and search parameters they support. When storage backends have different capabilities, this statement must accurately reflect the union of what's available and identify gaps. -Diesel solves a similar problem with its type system. Operations that aren't supported simply don't compile. For FHIR, we need runtime capability discovery because queries are dynamic. We model storage capabilities as a queryable trait that can generate CapabilityStatement fragments: -/// Declares the capabilities of a storage backend. -/// Inspired by Diesel's approach to backend-specific features. -pub trait StorageCapabilities { - /// Returns supported interactions for a resource type. - fn supported_interactions(&self, resource_type: &str) -> Vec; - - /// Returns supported search parameters for a resource type. - fn supported_search_params(&self, resource_type: &str) -> Vec; - - /// Returns supported search modifiers for a parameter type. - fn supported_modifiers(&self, param_type: SearchParamType) -> Vec; - - /// Returns whether chained search is supported. - fn supports_chaining(&self) -> bool; - - /// Returns whether reverse chaining (_has) is supported. - fn supports_reverse_chaining(&self) -> bool; - - /// Returns whether _include is supported. - fn supports_include(&self) -> bool; - - /// Returns whether _revinclude is supported. - fn supports_revinclude(&self) -> bool; - - /// Returns supported transaction isolation levels. - fn supported_isolation_levels(&self) -> Vec; - - /// Generates a FHIR CapabilityStatement fragment for this backend. - fn to_capability_statement(&self) -> Value; -} - -/// Describes support for a specific search parameter. -#[derive(Debug, Clone)] -pub struct SearchParamCapability { - pub name: String, - pub param_type: SearchParamType, - pub modifiers: Vec, - pub prefixes: Vec, - pub documentation: Option, -} - -/// Marker traits for optional capabilities, enabling compile-time -/// capability checking where possible (similar to Diesel's backend features). -pub trait SupportsExactMatch: SearchBackend {} -pub trait SupportsContainsMatch: SearchBackend {} -pub trait SupportsTerminologySearch: SearchBackend {} -pub trait SupportsFullTextSearch: SearchBackend {} -pub trait SupportsChainedSearch: SearchBackend {} -pub trait SupportsReverseChaining: SearchBackend {} - -Dynamic Capability Checking -For operations that can't be checked at compile time, we provide runtime capability checking that fails fast with clear error messages: -/// Validates that a search query can be executed by this backend. -pub trait QueryValidator: StorageCapabilities { - /// Checks if all features required by the query are supported. - fn validate_query(&self, query: &SearchQuery) -> Result<(), UnsupportedFeature>; - - /// Returns which parts of a query would need post-processing. - fn requires_post_processing(&self, query: &SearchQuery) -> Vec; -} - -#[derive(Debug)] -pub struct UnsupportedFeature { - pub feature: String, - pub parameter: Option, - pub suggestion: Option, -} - -impl std::fmt::Display for UnsupportedFeature { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "Unsupported feature: {}", self.feature)?; - if let Some(ref param) = self.parameter { - write!(f, " on parameter '{}'", param)?; - } - if let Some(ref suggestion) = self.suggestion { - write!(f, ". Suggestion: {}", suggestion)?; - } - Ok(()) - } -} - -The Feature Support Matrix -Different storage technologies have different strengths. A key deliverable of the Helios FHIR Server's persistence design is a clear feature support matrix that documents what each storage backend provides. This (example, work-in-progress) matrix drives both the CapabilityStatement generation and helps operators choose the right backend for their workload. -Feature PostgreSQL MongoDB Cassandra Neo4j Elasticsearch S3/Parquet -Basic CRUD ✓ ✓ ✓ ✓ ✓ Read-only -Versioning ✓ ✓ ✓ ✓ ✓ ✓ -History ✓ ✓ Limited ✓ ✓ ✓ -Transactions ✓ ✓ Limited ✓ ✗ ✗ -Multitenancy RLS + App Collection/DB Keyspace Labels Index per tenant Prefix-based -Search: String ✓ ✓ Limited ✓ ✓ ✓ -Search: Token ✓ ✓ ✓ ✓ ✓ ✓ -Search: Reference ✓ ✓ Limited ✓ ✓ ✓ -Search: Date Range ✓ ✓ ✓ ✓ ✓ ✓ -Search: Quantity ✓ ✓ Limited Limited ✓ ✓ -Modifier: :exact ✓ ✓ ✓ ✓ ✓ ✓ -Modifier: :contains ✓ ✓ ✗ ✓ ✓ ✗ -Modifier: :not ✓ ✓ ✗ ✓ ✓ ✓ -Modifier: :missing ✓ ✓ Limited ✓ ✓ ✓ -Modifier: :above/:below With terminology With terminology ✗ ✓ With terminology ✗ -Modifier: :in/:not-in With terminology With terminology ✗ ✓ With terminology ✗ -Chained Parameters ✓ ✓ ✗ ✓ Limited ✗ -Reverse Chaining (_has) ✓ ✓ ✗ ✓ ✗ ✗ -_include ✓ ✓ ✗ ✓ ✓ ✗ -_revinclude ✓ ✓ ✗ ✓ ✓ ✗ -Full-text Search ✓ ✓ ✗ ✗ ✓ ✗ -Bulk Export ✓ ✓ ✓ ✓ ✓ ✓ -This matrix isn't static. It's generated from the StorageCapabilities implementations. When a new storage backend is added or an existing one gains features, the matrix updates automatically. -Composing Storage Backends (Inspired by Diesel's MultiConnection) -Diesel's MultiConnection derive macro generates an enum that wraps multiple connection types, dispatching to the appropriate backend at runtime. We adapt this pattern for polyglot FHIR persistence, but with intelligent routing based on query characteristics: -/// Routes operations to appropriate storage backends based on capabilities -/// and query characteristics. Similar to Diesel's MultiConnection but with -/// query-aware routing. -pub struct CompositeStorage { - /// Primary transactional store for CRUD operations - primary: Arc, - - /// Search-optimized store (may be the same as primary) - search: Arc, - - /// Terminology service for subsumption queries - terminology: Arc, - - /// Graph store for relationship traversal - graph: Option>, - - /// Full-text search engine - text: Option>, - - /// Audit log store (always separate) - audit: Arc, - - /// Bulk export store - bulk: Arc, - - /// Query cost estimator for routing decisions - cost_estimator: Arc, -} - -The routing logic becomes explicit policy that considers both capabilities and cost: -impl CompositeStorage { - async fn route_search( - &self, - tenant: &TenantContext, - query: &SearchQuery, - ) -> Result { - // If query contains _text or _content, route to text search - if query.has_text_search() { - if let Some(ref text) = self.text { - return text.search(tenant, &query.resource_type, query).await; - } - return Err(StorageError::UnsupportedFeature(UnsupportedFeature { - feature: "full-text search".into(), - parameter: query.text_param_name(), - suggestion: Some("Remove _text/_content parameters or enable Elasticsearch backend".into()), - })); - } - - // If query contains :above or :below modifiers, involve terminology - if query.has_terminology_modifiers() { - return self.search_with_terminology(tenant, query).await; - } - - // If query contains chained parameters, prefer graph store - if query.has_chaining() { - if let Some(ref graph) = self.graph { - let graph_cost = self.cost_estimator.estimate_cost(query, graph.as_ref()); - let primary_cost = self.cost_estimator.estimate_cost(query, self.search.as_ref()); - - if graph_cost < primary_cost { - return graph.search(tenant, &query.resource_type, query).await; - } - } - } - - // Default to primary search - self.search.search(tenant, &query.resource_type, query).await - } - - /// Ensures _include and _revinclude respect tenant boundaries. - async fn apply_includes( - &self, - tenant: &TenantContext, - matches: Vec, - query: &SearchQuery, - ) -> Result { - let mut included = Vec::new(); - - // Process _include directives - for include in &query.include { - let resolved = self.search.resolve_includes(tenant, &matches, &[include.clone()]).await?; - included.extend(resolved); - } - - // Process _revinclude directives - for revinclude in &query.revinclude { - let resolved = self.search.resolve_revincludes(tenant, &matches, &[revinclude.clone()]).await?; - included.extend(resolved); - } - - Ok(SearchBundle { - matches, - included, - total: None, - }) - } -} - -/// Estimates the cost of executing a query on different backends. -pub trait QueryCostEstimator: Send + Sync { - fn estimate_cost( - &self, - query: &SearchQuery, - backend: &dyn StorageCapabilities, - ) -> QueryCost; -} - -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub enum QueryCost { - /// Query can be executed efficiently - Optimal(u64), - /// Query requires some post-processing - Acceptable(u64), - /// Query will be slow, consider alternatives - Expensive(u64), - /// Query requires expanding a code set first - RequiresExpansion, - /// Backend cannot execute this query - Unsupported, -} - -The Path Forward -This trait-based decomposition provides a foundation for building a FHIR persistence layer that can evolve with requirements. When AI workloads demand vector similarity search, we add a VectorSearchProvider trait and plug in a vector database. When regulatory requirements demand immutable audit trails, we implement AuditStorage against an append-only ledger. When performance analysis reveals that graph traversals are bottlenecking population health queries, we route those operations to a dedicated graph database. -Extensibility Following Diesel's Model: Just as Diesel's sql_function! macro makes it trivial to add custom SQL functions, our design should make it easy to add custom search parameters and modifiers. A healthcare organization might need a custom :phonetic modifier for patient name matching, or a :geo-near modifier for location-based searches. The SearchFragment pattern enables this: -// Adding a custom phonetic search modifier is straightforward -pub struct PhoneticMatch { - pub parameter: String, - pub path: FhirPath, - pub value: String, - pub algorithm: PhoneticAlgorithm, -} - -impl SearchFragment for PhoneticMatch -where - B: SupportsPhoneticSearch, // Custom capability marker -{ - fn apply(&self, builder: &mut B::QueryBuilder) -> Result<(), SearchError> { - builder.add_phonetic_match(&self.path, &self.value, &self.algorithm) - } - - fn is_native(&self, backend: &B) -> bool { - backend.has_phonetic_support(&self.algorithm) - } - - fn estimated_cost(&self, backend: &B) -> QueryCost { - backend.cost_for_phonetic(&self.path) - } -} - -This is what it means to build FHIR persistence for the AI era: not a monolithic database adapter, but a composable system of specialized capabilities that can be assembled to meet the specific needs of each deployment with tenant isolation, search routing, and extensibility built into the architecture from the start. -Thank you! -I very much look forward to your thoughts on these ideas and to the discussions that follow. -Sincerely,
-Steve From cd92540910444a3c0cdb52de7ac6afab9b6fd189 Mon Sep 17 00:00:00 2001 From: aacruzgon Date: Wed, 25 Feb 2026 16:25:15 -0500 Subject: [PATCH 3/7] Added s3 to the inferno test matrix --- .github/workflows/inferno.yml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/inferno.yml b/.github/workflows/inferno.yml index de691d0c..7cabcbeb 100644 --- a/.github/workflows/inferno.yml +++ b/.github/workflows/inferno.yml @@ -1,7 +1,7 @@ name: Inferno US Core Test Suite on: - workflow_dispatch: # Manual trigger only for initial implementation + workflow_dispatch: # Manual trigger only for initial implementation env: CARGO_TERM_COLOR: always @@ -52,8 +52,16 @@ jobs: fail-fast: false max-parallel: 3 matrix: - suite_id: [us_core_v311, us_core_v400, us_core_v501, us_core_v610, us_core_v700, us_core_v800] - backend: [sqlite, sqlite-elasticsearch, postgres] + suite_id: + [ + us_core_v311, + us_core_v400, + us_core_v501, + us_core_v610, + us_core_v700, + us_core_v800, + ] + backend: [sqlite, sqlite-elasticsearch, postgres, s3] include: - { suite_id: us_core_v311, version_label: "v3.1.1" } - { suite_id: us_core_v400, version_label: "v4.0.0" } From 5e3143a495d6198b1de666ca88bff93a36f95a5f Mon Sep 17 00:00:00 2001 From: smunini Date: Wed, 25 Feb 2026 16:52:34 -0500 Subject: [PATCH 4/7] docs(persistence): consolidate S3 backend docs into main README [skip ci] --- crates/persistence/README.md | 137 ++++++++++++++++-- .../src/backends/s3/docs/README.md | 61 -------- 2 files changed, 124 insertions(+), 74 deletions(-) delete mode 100644 crates/persistence/src/backends/s3/docs/README.md diff --git a/crates/persistence/README.md b/crates/persistence/README.md index 3ec05f93..0fcb65e8 100644 --- a/crates/persistence/README.md +++ b/crates/persistence/README.md @@ -128,18 +128,29 @@ helios-persistence/ │ │ │ └── search/ # Search query building │ │ │ ├── query_builder.rs # SQL with $N params, ILIKE, TIMESTAMPTZ │ │ │ └── writer.rs # Search index writer -│ │ └── elasticsearch/ # Search-optimized secondary backend -│ │ ├── backend.rs # ElasticsearchBackend with config -│ │ ├── storage.rs # ResourceStorage for sync support -│ │ ├── schema.rs # Index mappings and templates -│ │ ├── search_impl.rs # SearchProvider, TextSearchProvider -│ │ └── search/ # ES Query DSL translation -│ │ ├── query_builder.rs # FHIR SearchQuery → ES Query DSL -│ │ ├── fts.rs # Full-text search queries -│ │ ├── modifier_handlers.rs # :missing and other modifiers -│ │ └── parameter_handlers/ # Type-specific handlers -│ │ ├── string.rs, token.rs, date.rs, number.rs -│ │ ├── quantity.rs, reference.rs, uri.rs, composite.rs +│ │ ├── elasticsearch/ # Search-optimized secondary backend +│ │ │ ├── backend.rs # ElasticsearchBackend with config +│ │ │ ├── storage.rs # ResourceStorage for sync support +│ │ │ ├── schema.rs # Index mappings and templates +│ │ │ ├── search_impl.rs # SearchProvider, TextSearchProvider +│ │ │ └── search/ # ES Query DSL translation +│ │ │ ├── query_builder.rs # FHIR SearchQuery → ES Query DSL +│ │ │ ├── fts.rs # Full-text search queries +│ │ │ ├── modifier_handlers.rs # :missing and other modifiers +│ │ │ └── parameter_handlers/ # Type-specific handlers +│ │ │ ├── string.rs, token.rs, date.rs, number.rs +│ │ │ ├── quantity.rs, reference.rs, uri.rs, composite.rs +│ │ └── s3/ # AWS S3 object-storage backend +│ │ ├── backend.rs # S3Backend with connection management +│ │ ├── config.rs # S3BackendConfig, S3TenancyMode +│ │ ├── client.rs # S3Api trait and AwsS3Client implementation +│ │ ├── keyspace.rs # S3Keyspace key-path generation +│ │ ├── models.rs # HistoryIndexEvent, ExportJobState, SubmissionState +│ │ ├── storage.rs # ResourceStorage implementation +│ │ ├── bundle.rs # Batch/transaction bundle processing +│ │ ├── bulk_export.rs # BulkExportStorage implementation +│ │ ├── bulk_submit.rs # BulkSubmitProvider implementation +│ │ └── tests.rs # Integration tests │ ├── composite/ # Multi-backend coordination │ │ ├── config.rs # CompositeConfig and builder │ │ ├── analyzer.rs # Query feature detection @@ -618,6 +629,98 @@ let composite = CompositeStorage::new(config, backends)? .with_full_primary(sqlite); ``` +## S3 Backend + +The S3 backend is a storage-focused persistence backend using AWS S3 object storage. It handles CRUD, versioning/history, and bulk workflows but is intentionally not a FHIR search engine. For query-heavy deployments, compose S3 with a DB/search backend as the primary query engine. + +### Scope + +**Primary responsibilities:** +- CRUD persistence of resources +- Versioning (`vread`, `list_versions`, optimistic conflict checks) +- Instance/type/system history via immutable history objects plus history index events +- Batch bundles and best-effort transaction bundles (non-atomic with compensating rollback) +- Bulk export (NDJSON objects + manifest/progress state in S3) +- Bulk submit (ingest + raw artifact persistence + rollback change log) +- Tenant isolation (`PrefixPerTenant` or `BucketPerTenant`) + +**Explicit non-goals:** Advanced FHIR search semantics (date/number/quantity comparisons, chained query planning, `_has`, include/revinclude fanout, cursor keyset queries). + +### Configuration + +```rust +use helios_persistence::backends::s3::S3BackendConfig; + +let config = S3BackendConfig { + tenancy_mode: S3TenancyMode::PrefixPerTenant { + bucket: "hfs".to_string(), + }, + prefix: None, + region: None, + validate_buckets_on_startup: true, + bulk_export_part_size: 10_000, + bulk_submit_batch_size: 100, +}; +``` + +| Option | Default | Description | +|--------|---------|-------------| +| `tenancy_mode` | `PrefixPerTenant { bucket: "hfs" }` | Tenant-to-bucket mapping strategy | +| `prefix` | `None` | Optional global key prefix applied before backend keys | +| `region` | `None` | AWS region override (falls back to provider chain) | +| `validate_buckets_on_startup` | `true` | Validate configured buckets with `HeadBucket` on startup | +| `bulk_export_part_size` | `10000` | Max NDJSON lines per export output part | +| `bulk_submit_batch_size` | `100` | Default ingestion batch size for bulk submit processing | + +### Tenancy Modes + +| Mode | Description | +|------|-------------| +| **PrefixPerTenant** | All tenants share one bucket with tenant-specific key prefixes | +| **BucketPerTenant** | Each tenant maps to a specific bucket via an explicit tenant→bucket map | + +### Object Model + +Resource objects: + +| Object | Key Pattern | +|--------|-------------| +| Current pointer | `.../resources/{type}/{id}/current.json` | +| Immutable history version | `.../resources/{type}/{id}/_history/{version}.json` | +| Type history event | `.../history/type/{type}/{ts}_{id}_{version}_{suffix}.json` | +| System history event | `.../history/system/{ts}_{type}_{id}_{version}_{suffix}.json` | + +Bulk export objects: + +| Object | Key Pattern | +|--------|-------------| +| Job state | `.../bulk/export/jobs/{job_id}/state.json` | +| Progress | `.../bulk/export/jobs/{job_id}/progress/{type}.json` | +| Output | `.../bulk/export/jobs/{job_id}/output/{type}/part-{n}.ndjson` | +| Manifest | `.../bulk/export/jobs/{job_id}/manifest.json` | + +Bulk submit objects: + +| Object | Key Pattern | +|--------|-------------| +| Submission state | `.../bulk/submit/{submitter}/{submission_id}/state.json` | +| Manifest | `.../bulk/submit/{submitter}/{submission_id}/manifests/{manifest_id}.json` | +| Raw input | `.../bulk/submit/{submitter}/{submission_id}/raw/{manifest_id}/line-{line}.ndjson` | +| Results | `.../bulk/submit/{submitter}/{submission_id}/results/{manifest_id}/line-{line}.json` | +| Change log | `.../bulk/submit/{submitter}/{submission_id}/changes/{change_id}.json` | + +### Consistency and Transaction Notes + +- The backend never creates buckets — startup/runtime bucket checks use `HeadBucket` only. +- Optimistic locking relies on version checks plus S3 preconditions (`If-Match`, `If-None-Match`) where applicable. +- Transaction bundle behavior is best-effort: entries are applied sequentially, rollback is attempted in reverse order on failure, but rollback is not guaranteed under concurrent writes or partial failures. + +### AWS Credentials and Region + +Uses the AWS SDK for Rust ([`aws_sdk_s3`](https://docs.rs/aws-sdk-s3/latest/aws_sdk_s3/)) with standard provider chain: +- Region may be provided in config or via `AWS_REGION` +- Environment credentials (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional `AWS_SESSION_TOKEN`) are supported by provider chain behavior + ## Implementation Status ### Phase 1: Core Types ✓ @@ -774,11 +877,19 @@ The SQLite backend includes a complete FHIR search implementation using pre-comp - [x] Search offloading support - [x] ReindexableStorage implementation +### Phase 5c: S3 Backend ✓ +- [x] S3BackendConfig with PrefixPerTenant and BucketPerTenant tenancy modes +- [x] ResourceStorage implementation (CRUD via S3 objects) +- [x] VersionedStorage implementation (vread, optimistic locking) +- [x] History providers (instance, type, system via immutable history objects) +- [x] Batch and best-effort transaction bundles +- [x] BulkExportStorage implementation (NDJSON parts + manifest in S3) +- [x] BulkSubmitProvider implementation (ingest, raw artifacts, rollback change log) + ### Phase 5+: Additional Backends (Planned) - [ ] Cassandra backend (wide-column, partition keys) - [ ] MongoDB backend (document storage, aggregation) - [ ] Neo4j backend (graph queries, Cypher) -- [ ] S3 backend (bulk export, object storage) ### Phase 6: Composite Storage ✓ - [x] Query analysis and feature detection diff --git a/crates/persistence/src/backends/s3/docs/README.md b/crates/persistence/src/backends/s3/docs/README.md deleted file mode 100644 index e4b29cfa..00000000 --- a/crates/persistence/src/backends/s3/docs/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# S3 Backend (`aws_sdk_s3`) Guarantees and Limits - -This backend is an object-storage persistence backend for Helios. It is intentionally focused on storage, versioning/history, and bulk workflows, not advanced FHIR query execution. - -## Scope and Role - -- Primary responsibilities: - - CRUD persistence of resources - - Versioning (`vread`, `list_versions`, optimistic conflict checks) - - Instance/type/system history via immutable history objects plus history index events - - Batch bundles and best-effort transaction bundles (non-atomic with compensating rollback attempts) - - Bulk export (NDJSON objects + manifest/progress state in S3) - - Bulk submit (ingest + raw artifact persistence + rollback change log) - - Tenant isolation through: - - `PrefixPerTenant` - - `BucketPerTenant` (explicit tenant→bucket map) - -- Explicit non-goals for this backend: - - Advanced FHIR search semantics as the primary query engine (`date/number/quantity` comparison semantics, full chained query planning, `_has`, include/revinclude fanout planning, full cursor keyset query engine) - -For query-heavy production deployments, run a DB/search backend as primary query engine and use S3 for bulk/history/archive responsibilities. - -## Object Model - -Resource objects: - -- Current pointer: `.../resources/{type}/{id}/current.json` -- Immutable history version: `.../resources/{type}/{id}/_history/{version}.json` -- Type history event: `.../history/type/{type}/{ts}_{id}_{version}_{suffix}.json` -- System history event: `.../history/system/{ts}_{type}_{id}_{version}_{suffix}.json` - -Bulk export: - -- `.../bulk/export/jobs/{job_id}/state.json` -- `.../bulk/export/jobs/{job_id}/progress/{type}.json` -- `.../bulk/export/jobs/{job_id}/output/{type}/part-{n}.ndjson` -- `.../bulk/export/jobs/{job_id}/manifest.json` - -Bulk submit: - -- `.../bulk/submit/{submitter}/{submission_id}/state.json` -- `.../bulk/submit/{submitter}/{submission_id}/manifests/{manifest_id}.json` -- `.../bulk/submit/{submitter}/{submission_id}/raw/{manifest_id}/line-{line}.ndjson` -- `.../bulk/submit/{submitter}/{submission_id}/results/{manifest_id}/line-{line}.json` -- `.../bulk/submit/{submitter}/{submission_id}/changes/{change_id}.json` - -## Consistency and Transaction Notes - -- The backend never creates buckets. -- Startup/runtime bucket checks use `HeadBucket` only. -- Optimistic locking relies on version checks plus S3 preconditions (`If-Match`, `If-None-Match`) where applicable. -- Transaction bundle behavior is best-effort: - - Entries are applied sequentially. - - On failure, rollback is attempted in reverse order. - - Rollback is not guaranteed under concurrent writes or partial failures. - -## AWS Credentials and Region - -- Uses AWS SDK for Rust (`aws_sdk_s3`) with standard provider chain. -- Region may be provided in config or via `AWS_REGION`. -- Environment credentials (`AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, optional `AWS_SESSION_TOKEN`) are supported by provider chain behavior. From 07643622a6e67b2ee5e8bdd3e17dc581f3052955 Mon Sep 17 00:00:00 2001 From: Steve Munini Date: Tue, 3 Mar 2026 20:58:08 -0500 Subject: [PATCH 5/7] Added comments --- crates/persistence/src/backends/s3/backend.rs | 52 ++++++++++++++++ .../src/backends/s3/bulk_export.rs | 29 +++++++++ .../src/backends/s3/bulk_submit.rs | 28 +++++++++ crates/persistence/src/backends/s3/bundle.rs | 38 ++++++++++++ crates/persistence/src/backends/s3/client.rs | 62 +++++++++++++++++++ crates/persistence/src/backends/s3/config.rs | 2 + .../persistence/src/backends/s3/keyspace.rs | 59 ++++++++++++++++++ crates/persistence/src/backends/s3/models.rs | 32 ++++++++++ crates/persistence/src/backends/s3/storage.rs | 61 ++++++++++++++++++ crates/persistence/src/backends/s3/tests.rs | 32 ++++++++++ 10 files changed, 395 insertions(+) diff --git a/crates/persistence/src/backends/s3/backend.rs b/crates/persistence/src/backends/s3/backend.rs index 7ea9919f..954ec55c 100644 --- a/crates/persistence/src/backends/s3/backend.rs +++ b/crates/persistence/src/backends/s3/backend.rs @@ -1,3 +1,6 @@ +//! AWS S3 backend — struct definition, capability matrix, and Backend trait +//! implementation. + use std::future::Future; use std::sync::Arc; @@ -26,12 +29,24 @@ impl std::fmt::Debug for S3Backend { } } +/// Opaque connection handle for the S3 backend. +/// +/// S3 is stateless from the client's perspective — there is no persistent TCP +/// connection to acquire per-request. This marker type satisfies the `Backend` +/// trait's associated `Connection` type without holding any resources. #[derive(Debug)] pub struct S3Connection; +/// Resolved bucket name and key hierarchy for a single tenant. +/// +/// Computed once per storage operation from the `TenantContext` and the +/// backend configuration, then passed through the call stack within that +/// operation. #[derive(Debug, Clone)] pub(crate) struct TenantLocation { + /// S3 bucket that holds this tenant's data. pub bucket: String, + /// Keyspace builder scoped to this tenant's prefix hierarchy. pub keyspace: S3Keyspace, } @@ -42,6 +57,13 @@ impl S3Backend { } /// Creates a new S3 backend using environment/provider chain credentials. + /// + /// The region is resolved in priority order: `config.region`, then the + /// `AWS_REGION` environment variable, then the standard AWS SDK provider + /// chain (shared config file, EC2 instance metadata, etc.). + /// + /// If `validate_buckets_on_startup` is set, every configured bucket is + /// verified with a `HeadBucket` call before this function returns. pub fn from_env(mut config: S3BackendConfig) -> StorageResult { config.validate()?; @@ -61,6 +83,10 @@ impl S3Backend { Ok(backend) } + /// Creates a backend with an injected `S3Api` implementation. + /// + /// Intended exclusively for unit tests that supply a mock client. + /// Not compiled into non-test builds. #[cfg(test)] pub(crate) fn with_client( config: S3BackendConfig, @@ -70,6 +96,11 @@ impl S3Backend { Ok(Self { config, client }) } + /// Verifies that every bucket referenced in the configuration exists and + /// is accessible to the current credentials. + /// + /// Issues a `HeadBucket` request for each distinct bucket. Returns the + /// first error encountered; does not attempt to create missing buckets. pub(crate) async fn validate_buckets(&self) -> StorageResult<()> { for bucket in self.config.configured_buckets() { self.client @@ -80,6 +111,14 @@ impl S3Backend { Ok(()) } + /// Resolves the bucket and keyspace for the given tenant. + /// + /// In `PrefixPerTenant` mode all tenants share one bucket and are separated + /// by a key prefix derived from the tenant ID. In `BucketPerTenant` mode + /// each tenant maps to a dedicated bucket looked up from the config map. + /// + /// Returns a `TenantError` if the tenant has no bucket assignment in the + /// `BucketPerTenant` mapping. pub(crate) fn tenant_location(&self, tenant: &TenantContext) -> StorageResult { let global_prefix = self .config @@ -123,6 +162,11 @@ impl S3Backend { } } + /// Maps a low-level `S3ClientError` to the shared `StorageError` taxonomy. + /// + /// This is the error boundary between the S3 SDK layer and the storage + /// trait layer. Keeping the translation here ensures all storage operations + /// return consistent error variants regardless of the underlying transport. pub(crate) fn map_client_error(&self, error: S3ClientError) -> StorageError { match error { S3ClientError::NotFound => StorageError::Backend(BackendError::Unavailable { @@ -229,6 +273,14 @@ impl Backend for S3Backend { } } +/// Drives an async future to completion on the current thread. +/// +/// If a Tokio runtime is already active the future is driven via +/// `block_in_place` to avoid nesting runtimes. Otherwise a temporary +/// single-threaded runtime is created for the duration of the call. +/// +/// Used during synchronous backend construction (`from_env`) where async SDK +/// config loading must complete before the constructor can return. fn block_on(future: F) -> StorageResult where F: Future, diff --git a/crates/persistence/src/backends/s3/bulk_export.rs b/crates/persistence/src/backends/s3/bulk_export.rs index 2234444c..947f5dae 100644 --- a/crates/persistence/src/backends/s3/bulk_export.rs +++ b/crates/persistence/src/backends/s3/bulk_export.rs @@ -1,3 +1,9 @@ +//! Bulk export implementation for the S3 backend. +//! +//! Implements `BulkExportStorage` and `ExportDataProvider`. Export jobs are +//! persisted as a small JSON state object in S3 and run synchronously within +//! the `start_export` call, writing NDJSON output parts directly to S3. + use std::collections::BTreeSet; use async_trait::async_trait; @@ -296,6 +302,11 @@ impl ExportDataProvider for S3Backend { } impl S3Backend { + /// Drives a bulk export job to completion. + /// + /// Iterates over all matching resource types, fetches them in batches, and + /// writes NDJSON output parts to S3. Updates the job state object after + /// each type completes and writes the final manifest on success. async fn run_export_job( &self, tenant: &TenantContext, @@ -414,6 +425,8 @@ impl S3Backend { self.save_export_state(tenant, job_id, &state).await } + /// Writes a single NDJSON output part to S3 and returns an + /// `ExportOutputFile` describing the S3 location and line count. async fn write_export_part( &self, location: &TenantLocation, @@ -443,6 +456,7 @@ impl S3Backend { ) } + /// Returns `true` if the job state object exists in S3. async fn export_job_exists( &self, location: &TenantLocation, @@ -457,6 +471,9 @@ impl S3Backend { .is_some()) } + /// Loads and deserialises the export job state from S3. + /// + /// Returns `JobNotFound` if the state object does not exist. async fn load_export_state( &self, tenant: &TenantContext, @@ -474,6 +491,7 @@ impl S3Backend { }) } + /// Serialises and writes the export job state to S3. async fn save_export_state( &self, tenant: &TenantContext, @@ -488,6 +506,8 @@ impl S3Backend { Ok(()) } + /// Transitions the export job to the `Error` state, recording the failure + /// message in the state object. async fn mark_export_failed( &self, tenant: &TenantContext, @@ -502,6 +522,8 @@ impl S3Backend { self.save_export_state(tenant, job_id, &state).await } + /// Writes per-type export progress to S3 so that partial completion can be + /// inspected before the job finishes. async fn save_export_type_progress( &self, location: &TenantLocation, @@ -518,6 +540,9 @@ impl S3Backend { } } +/// Parses the numeric offset encoded in an export batch cursor. +/// +/// A `None` cursor is treated as offset `0` (start of the result set). fn parse_export_cursor(cursor: Option<&str>) -> StorageResult { match cursor { None => Ok(0), @@ -529,6 +554,10 @@ fn parse_export_cursor(cursor: Option<&str>) -> StorageResult { } } +/// Extracts the resource type from a `current.json` object key. +/// +/// Keys follow the pattern `…/resources///current.json`; the +/// segment immediately after `resources` is the resource type. fn parse_resource_type_from_current_key(key: &str) -> Option { let parts: Vec<&str> = key.split('/').collect(); let resources_idx = parts.iter().position(|segment| *segment == "resources")?; diff --git a/crates/persistence/src/backends/s3/bulk_submit.rs b/crates/persistence/src/backends/s3/bulk_submit.rs index dd87180e..faaa9257 100644 --- a/crates/persistence/src/backends/s3/bulk_submit.rs +++ b/crates/persistence/src/backends/s3/bulk_submit.rs @@ -1,3 +1,10 @@ +//! Bulk submission implementation for the S3 backend. +//! +//! Implements `BulkSubmitProvider`, `StreamingBulkSubmitProvider`, and +//! `BulkSubmitRollbackProvider`. All submission state — including manifests, +//! raw NDJSON lines, entry results, and change records — is persisted as +//! individual S3 objects keyed under the submission's prefix. + use async_trait::async_trait; use chrono::Utc; use helios_fhir::FhirVersion; @@ -618,6 +625,12 @@ impl BulkSubmitRollbackProvider for S3Backend { } impl S3Backend { + /// Processes a single NDJSON entry: validates it, upserts the resource, + /// and records a change log entry for rollback. + /// + /// Returns a `BulkEntryResult` describing the outcome. Storage errors are + /// promoted to entry-level processing errors rather than aborting the whole + /// batch. async fn process_single_entry( &self, tenant: &TenantContext, @@ -732,6 +745,10 @@ impl S3Backend { } } + /// Archives the raw NDJSON payload for a single entry to S3. + /// + /// Stored under `raw//.ndjson` so that the original data + /// is preserved for auditing after ingestion. async fn persist_raw_entry( &self, location: &TenantLocation, @@ -765,6 +782,7 @@ impl S3Backend { Ok(()) } + /// Persists the processing result for a single entry to S3. async fn persist_entry_result( &self, location: &TenantLocation, @@ -784,6 +802,7 @@ impl S3Backend { Ok(()) } + /// Loads all entry results for a manifest from S3. async fn load_entry_results( &self, location: &TenantLocation, @@ -815,6 +834,7 @@ impl S3Backend { Ok(results) } + /// Loads all change log records for a submission from S3. async fn load_changes( &self, location: &TenantLocation, @@ -844,6 +864,7 @@ impl S3Backend { Ok(changes) } + /// Loads the submission state, returning `SubmissionNotFound` if absent. async fn load_submission_state( &self, location: &TenantLocation, @@ -859,6 +880,7 @@ impl S3Backend { }) } + /// Loads the submission state, returning `None` if it does not exist. async fn load_submission_state_optional( &self, location: &TenantLocation, @@ -873,6 +895,7 @@ impl S3Backend { .map(|(state, _)| state)) } + /// Serialises and writes the submission state to S3. async fn save_submission_state( &self, location: &TenantLocation, @@ -888,6 +911,7 @@ impl S3Backend { Ok(()) } + /// Loads a manifest state from S3, returning `None` if it does not exist. async fn load_manifest_state_optional( &self, location: &TenantLocation, @@ -906,6 +930,7 @@ impl S3Backend { .map(|(state, _)| state)) } + /// Serialises and writes a manifest state to S3. async fn save_manifest_state( &self, location: &TenantLocation, @@ -924,6 +949,7 @@ impl S3Backend { Ok(()) } + /// Lists all manifest state objects for a submission. async fn list_manifest_states( &self, location: &TenantLocation, @@ -953,6 +979,8 @@ impl S3Backend { Ok(manifests) } + /// Builds a minimal OperationOutcome from a storage error for use in + /// per-entry failure records. fn bulk_submit_operation_outcome(err: &StorageError) -> serde_json::Value { let code = match err { StorageError::Validation(_) => "invalid", diff --git a/crates/persistence/src/backends/s3/bundle.rs b/crates/persistence/src/backends/s3/bundle.rs index 44351615..f2542381 100644 --- a/crates/persistence/src/backends/s3/bundle.rs +++ b/crates/persistence/src/backends/s3/bundle.rs @@ -1,3 +1,10 @@ +//! Bundle processing (batch and transaction) for the S3 backend. +//! +//! Transactions are implemented with a best-effort compensation log: each +//! successful operation records a [`CompensationAction`] that is applied in +//! reverse if a later operation fails. S3 does not provide atomic multi-object +//! operations, so the rollback is advisory rather than strictly atomic. + use std::collections::HashMap; use async_trait::async_trait; @@ -14,9 +21,16 @@ use crate::types::StoredResource; use super::backend::S3Backend; +/// An undo operation recorded for each successful step in a transaction. +/// +/// Applied in reverse order if a later step fails, approximating an atomic +/// transaction rollback against an eventually-consistent object store. #[derive(Debug, Clone)] enum CompensationAction { + /// Delete a newly-created resource to undo a POST entry. Delete { resource_type: String, id: String }, + /// Overwrite the current version with a captured snapshot to undo a PUT + /// or DELETE entry. Restore { snapshot: StoredResource }, } @@ -113,6 +127,9 @@ impl BundleProvider for S3Backend { } impl S3Backend { + /// Executes a single batch entry and converts any error into a 5xx + /// `BundleEntryResult` rather than propagating it, preserving best-effort + /// batch semantics. async fn process_batch_entry( &self, tenant: &TenantContext, @@ -124,6 +141,8 @@ impl S3Backend { } } + /// Executes a single bundle entry and returns the result together with an + /// optional compensation action for rollback. async fn execute_bundle_entry( &self, tenant: &TenantContext, @@ -283,6 +302,10 @@ impl S3Backend { } } + /// Applies compensation actions in reverse order to undo completed steps. + /// + /// Individual rollback failures are collected and returned as a joined + /// error string rather than stopping the rollback mid-way. async fn rollback_compensations( &self, tenant: &TenantContext, @@ -303,6 +326,10 @@ impl S3Backend { } } + /// Applies a single compensation action. + /// + /// `NotFound` and `Gone` errors on delete compensations are treated as + /// success since the intended post-rollback state is already achieved. async fn apply_compensation( &self, tenant: &TenantContext, @@ -325,6 +352,8 @@ impl S3Backend { } } + /// Converts a storage error into a bundle entry result with an appropriate + /// HTTP status and a minimal OperationOutcome body. fn bundle_error_result(err: &StorageError) -> BundleEntryResult { BundleEntryResult::error( Self::storage_error_status(err), @@ -332,6 +361,7 @@ impl S3Backend { ) } + /// Maps a `StorageError` to an HTTP status code suitable for a bundle entry. fn storage_error_status(err: &StorageError) -> u16 { match err { StorageError::Validation(_) | StorageError::Search(_) => 400, @@ -348,6 +378,7 @@ impl S3Backend { } } + /// Builds a minimal OperationOutcome `Value` from a `StorageError`. fn operation_outcome(err: &StorageError) -> Value { let code = match err { StorageError::Validation(_) => "invalid", @@ -371,6 +402,11 @@ impl S3Backend { }) } + /// Parses a bundle entry URL into `(resource_type, id)`. + /// + /// Both absolute URLs (`https://base/Patient/123`) and relative paths + /// (`Patient/123`) are accepted. Returns a validation error if the URL + /// does not contain at least two path segments. fn parse_url(&self, url: &str) -> crate::error::StorageResult<(String, String)> { let path = url .strip_prefix("http://") @@ -395,6 +431,8 @@ impl S3Backend { } } +/// Recursively rewrites `urn:uuid:…` references in a resource JSON value +/// using the full URL map built from earlier POST entries in the bundle. fn resolve_bundle_references(value: &mut Value, reference_map: &HashMap) { match value { Value::Object(map) => { diff --git a/crates/persistence/src/backends/s3/client.rs b/crates/persistence/src/backends/s3/client.rs index bd4f83b0..ad5e58b1 100644 --- a/crates/persistence/src/backends/s3/client.rs +++ b/crates/persistence/src/backends/s3/client.rs @@ -1,3 +1,6 @@ +//! S3 API abstraction — trait definition, request/response types, AWS SDK +//! client implementation, and SDK error mapping. + use async_trait::async_trait; use aws_config::{BehaviorVersion, Region, SdkConfig}; use aws_sdk_s3::Client; @@ -5,61 +8,104 @@ use aws_sdk_s3::error::ProvideErrorMetadata; use aws_sdk_s3::primitives::ByteStream; use chrono::{DateTime, Utc}; +/// Metadata returned from S3 object head and put operations. #[derive(Debug, Clone)] #[allow(dead_code)] pub struct ObjectMetadata { + /// ETag returned by S3; used as an optimistic concurrency token for + /// conditional writes. pub etag: Option, + /// Wall-clock time of the last write, if returned by the operation. pub last_modified: Option>, + /// Object size in bytes. pub size: i64, } +/// Full S3 object body together with its metadata. #[derive(Debug, Clone)] pub struct ObjectData { + /// Raw object bytes. pub bytes: Vec, + /// Metadata associated with the object at the time it was fetched. pub metadata: ObjectMetadata, } +/// A single entry returned by a `ListObjects` call. #[derive(Debug, Clone)] #[allow(dead_code)] pub struct ListObjectItem { + /// Full S3 object key. pub key: String, + /// ETag of the object at the time of listing. pub etag: Option, + /// Last-modified timestamp of the object. pub last_modified: Option>, + /// Object size in bytes. pub size: i64, } +/// Paginated result set from a `ListObjects` call. #[derive(Debug, Clone)] pub struct ListObjectsResult { + /// Objects matching the requested prefix in this page. pub items: Vec, + /// Continuation token to retrieve the next page, or `None` if this is the + /// last page. pub next_continuation_token: Option, } +/// Normalised error variants returned by the S3 API abstraction. +/// +/// These are mapped from SDK-specific errors so that callers do not need to +/// depend on the AWS SDK error types directly. #[derive(Debug, Clone)] pub enum S3ClientError { + /// The requested bucket or object does not exist. NotFound, + /// A conditional write failed because the ETag or existence precondition + /// was not satisfied (`If-Match` or `If-None-Match: *`). PreconditionFailed, + /// The request was rate-limited by S3. Throttled(String), + /// The service was unreachable (timeout, dispatch failure, etc.). Unavailable(String), + /// The request was rejected due to invalid input (bad bucket name, etc.). InvalidInput(String), + /// An unexpected error occurred inside the SDK or service. Internal(String), } +/// Abstraction over the AWS S3 API surface used by this backend. +/// +/// Implemented by `AwsS3Client` in production and by a `MockS3Client` in +/// tests, allowing the backend logic to be exercised without a real AWS +/// account. #[async_trait] pub trait S3Api: Send + Sync { + /// Checks that `bucket` exists and is accessible to the current + /// credentials. async fn head_bucket(&self, bucket: &str) -> Result<(), S3ClientError>; + /// Returns object metadata if the key exists, or `None` if not found. async fn head_object( &self, bucket: &str, key: &str, ) -> Result, S3ClientError>; + /// Downloads the full object body, returning `None` if the key does not + /// exist. async fn get_object( &self, bucket: &str, key: &str, ) -> Result, S3ClientError>; + /// Uploads `body` to the given key. + /// + /// `if_match` enforces that the existing ETag matches before overwriting. + /// `if_none_match = Some("*")` prevents overwriting an existing object. + /// Both conditions return `PreconditionFailed` on mismatch. async fn put_object( &self, bucket: &str, @@ -70,8 +116,12 @@ pub trait S3Api: Send + Sync { if_none_match: Option<&str>, ) -> Result; + /// Deletes the object at the given key. Succeeds even if the key does not + /// exist. async fn delete_object(&self, bucket: &str, key: &str) -> Result<(), S3ClientError>; + /// Lists objects whose keys start with `prefix`, with optional + /// cursor-based pagination via `continuation`. async fn list_objects( &self, bucket: &str, @@ -81,18 +131,26 @@ pub trait S3Api: Send + Sync { ) -> Result; } +/// Production `S3Api` implementation backed by the AWS SDK. #[derive(Debug, Clone)] pub struct AwsS3Client { + /// Underlying AWS SDK S3 client. client: Client, } impl AwsS3Client { + /// Constructs a client from a pre-loaded AWS SDK configuration. pub fn from_sdk_config(config: &SdkConfig) -> Self { Self { client: Client::new(config), } } + /// Loads the AWS SDK configuration from the environment. + /// + /// If `region` is `Some`, it overrides the region from the environment; + /// otherwise the standard provider chain is used (shared config file, + /// environment variables, EC2 instance metadata, etc.). pub async fn load_sdk_config(region: Option<&str>) -> SdkConfig { let mut loader = aws_config::defaults(BehaviorVersion::latest()); if let Some(region) = region { @@ -270,6 +328,10 @@ impl S3Api for AwsS3Client { } } +/// Maps an AWS SDK error to the normalised `S3ClientError` taxonomy. +/// +/// Known service error codes are matched to specific variants; everything +/// else falls through to `Internal`. fn map_sdk_error(err: aws_sdk_s3::error::SdkError) -> S3ClientError where E: ProvideErrorMetadata + std::fmt::Debug, diff --git a/crates/persistence/src/backends/s3/config.rs b/crates/persistence/src/backends/s3/config.rs index 5b44a4be..2d0ea081 100644 --- a/crates/persistence/src/backends/s3/config.rs +++ b/crates/persistence/src/backends/s3/config.rs @@ -1,3 +1,5 @@ +//! Configuration types for the S3 backend. + use std::collections::{HashMap, HashSet}; use serde::{Deserialize, Serialize}; diff --git a/crates/persistence/src/backends/s3/keyspace.rs b/crates/persistence/src/backends/s3/keyspace.rs index 2cb3ba63..5b2089bc 100644 --- a/crates/persistence/src/backends/s3/keyspace.rs +++ b/crates/persistence/src/backends/s3/keyspace.rs @@ -1,12 +1,27 @@ +//! S3 key construction for all FHIR storage namespaces. +//! +//! Keys are structured as hierarchical paths that encode the tenant prefix, +//! resource type, resource ID, version, and operation type. [`S3Keyspace`] +//! derives every key shape used by the backend from a common base prefix. + use chrono::{DateTime, Utc}; /// Keyspace builder for S3 object paths. +/// +/// Holds an optional base prefix that is prepended to every generated key. +/// All key-building methods ensure segments are joined with `/` and that the +/// prefix never has leading or trailing slashes. #[derive(Debug, Clone)] pub struct S3Keyspace { + /// Optional prefix prepended to all keys, with surrounding slashes stripped. base_prefix: Option, } impl S3Keyspace { + /// Creates a new keyspace with an optional base prefix. + /// + /// Leading and trailing slashes in `base_prefix` are stripped. An empty + /// string is treated as no prefix. pub fn new(base_prefix: Option) -> Self { let base_prefix = base_prefix .map(|p| p.trim_matches('/').to_string()) @@ -14,6 +29,10 @@ impl S3Keyspace { Self { base_prefix } } + /// Returns a new keyspace with `tenant_id` appended to the base prefix. + /// + /// Used in `PrefixPerTenant` mode to scope all keys under a per-tenant + /// directory segment without changing the bucket. pub fn with_tenant_prefix(&self, tenant_id: &str) -> Self { let tenant = tenant_id.trim_matches('/'); let merged = match &self.base_prefix { @@ -23,10 +42,14 @@ impl S3Keyspace { Self::new(Some(merged)) } + /// Key for the mutable "current" pointer of a resource. + /// + /// This object is overwritten on every create, update, and delete. pub fn current_resource_key(&self, resource_type: &str, id: &str) -> String { self.join(&["resources", resource_type, id, "current.json"]) } + /// Immutable key for a specific historical version of a resource. pub fn history_version_key(&self, resource_type: &str, id: &str, version_id: &str) -> String { self.join(&[ "resources", @@ -37,18 +60,26 @@ impl S3Keyspace { ]) } + /// Prefix covering all history version objects for a resource. pub fn history_versions_prefix(&self, resource_type: &str, id: &str) -> String { self.join(&["resources", resource_type, id, "_history/"]) } + /// Prefix covering all current resource objects across all types. pub fn resources_prefix(&self) -> String { self.join(&["resources/"]) } + /// Prefix covering all current objects of a specific resource type. pub fn resource_type_prefix(&self, resource_type: &str) -> String { self.join(&["resources", resource_type, "/"]) } + /// Key for a type-level history index event. + /// + /// The filename encodes the event timestamp in milliseconds, resource ID, + /// version ID, and a random suffix to prevent key collisions during + /// concurrent writes to the same resource. pub fn history_type_event_key( &self, resource_type: &str, @@ -71,6 +102,10 @@ impl S3Keyspace { ]) } + /// Key for a system-level history index event. + /// + /// Analogous to `history_type_event_key` but stored under the system + /// history prefix so that cross-type queries scan a single directory. pub fn history_system_event_key( &self, resource_type: &str, @@ -93,18 +128,22 @@ impl S3Keyspace { ]) } + /// Prefix covering all type-level history index events for a resource type. pub fn history_type_prefix(&self, resource_type: &str) -> String { self.join(&["history", "type", resource_type, "/"]) } + /// Prefix covering all system-level history index events. pub fn history_system_prefix(&self) -> String { self.join(&["history", "system/"]) } + /// Key for the JSON state object of a bulk export job. pub fn export_job_state_key(&self, job_id: &str) -> String { self.join(&["bulk", "export", "jobs", job_id, "state.json"]) } + /// Key for per-type export progress within a job. pub fn export_job_progress_key(&self, job_id: &str, resource_type: &str) -> String { self.join(&[ "bulk", @@ -116,10 +155,12 @@ impl S3Keyspace { ]) } + /// Key for the completed export manifest of a job. pub fn export_job_manifest_key(&self, job_id: &str) -> String { self.join(&["bulk", "export", "jobs", job_id, "manifest.json"]) } + /// Key for a single NDJSON output part within an export job. pub fn export_job_output_key(&self, job_id: &str, resource_type: &str, part: u32) -> String { self.join(&[ "bulk", @@ -132,18 +173,22 @@ impl S3Keyspace { ]) } + /// Prefix covering all export job objects. pub fn export_jobs_prefix(&self) -> String { self.join(&["bulk", "export", "jobs/"]) } + /// Prefix covering all objects belonging to a single export job. pub fn export_job_prefix(&self, job_id: &str) -> String { self.join(&["bulk", "export", "jobs", job_id, "/"]) } + /// Key for the JSON state object of a bulk submission. pub fn submit_state_key(&self, submitter: &str, submission_id: &str) -> String { self.join(&["bulk", "submit", submitter, submission_id, "state.json"]) } + /// Key for a manifest within a bulk submission. pub fn submit_manifest_key( &self, submitter: &str, @@ -160,6 +205,7 @@ impl S3Keyspace { ]) } + /// Key for a single raw NDJSON line within a submission manifest. pub fn submit_raw_line_key( &self, submitter: &str, @@ -178,6 +224,7 @@ impl S3Keyspace { ]) } + /// Key for the processing result of a single NDJSON line. pub fn submit_result_line_key( &self, submitter: &str, @@ -196,6 +243,7 @@ impl S3Keyspace { ]) } + /// Key for a recorded change (create or update) within a submission. pub fn submit_change_key( &self, submitter: &str, @@ -212,14 +260,20 @@ impl S3Keyspace { ]) } + /// Prefix covering all objects belonging to a single submission. pub fn submit_prefix(&self, submitter: &str, submission_id: &str) -> String { self.join(&["bulk", "submit", submitter, submission_id, "/"]) } + /// Prefix covering all bulk-submit objects across all submissions. pub fn submit_root_prefix(&self) -> String { self.join(&["bulk", "submit/"]) } + /// Joins `parts` with `/`, prepending the base prefix when set. + /// + /// Trailing slashes are preserved only when the final part itself ends with + /// `/` (used to produce consistent list prefixes for S3 pagination). fn join(&self, parts: &[&str]) -> String { let mut segs: Vec = Vec::new(); if let Some(prefix) = &self.base_prefix { @@ -242,6 +296,11 @@ impl S3Keyspace { } } +/// Replaces characters that are unsafe in S3 key path segments. +/// +/// Slashes, backslashes, and spaces are replaced with underscores so that +/// resource IDs and type names can be embedded in key paths without +/// accidentally splitting path segments. fn sanitize(value: &str) -> String { value .chars() diff --git a/crates/persistence/src/backends/s3/models.rs b/crates/persistence/src/backends/s3/models.rs index b71dc429..02c632ee 100644 --- a/crates/persistence/src/backends/s3/models.rs +++ b/crates/persistence/src/backends/s3/models.rs @@ -1,3 +1,9 @@ +//! S3-specific persistence models for history indexing, bulk export job +//! state, and bulk submission state. +//! +//! These types are serialised as JSON objects in S3 and are never exposed +//! outside the `s3` backend module. + use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; @@ -5,30 +11,56 @@ use crate::core::bulk_export::{ExportManifest, ExportProgress, ExportRequest}; use crate::core::bulk_submit::{SubmissionManifest, SubmissionSummary}; use crate::core::history::HistoryMethod; +/// A small index record written to S3 for each resource mutation. +/// +/// One event is stored under the type-level history prefix and another under +/// the system-level prefix. They are later scanned to reconstruct history +/// without loading the full resource bodies. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct HistoryIndexEvent { + /// FHIR resource type of the mutated resource. pub resource_type: String, + /// Logical resource ID. pub id: String, + /// Version ID assigned to this mutation. pub version_id: String, + /// Wall-clock time of the mutation. pub timestamp: DateTime, + /// HTTP method that produced this version. pub method: HistoryMethod, + /// True if this mutation is a logical delete (tombstone). pub deleted: bool, } +/// Durable state of a bulk export job stored in S3. +/// +/// Written to `bulk/export/jobs//state.json` and updated as the job +/// transitions through `accepted → in-progress → complete/error/cancelled`. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ExportJobState { + /// The original export request parameters. pub request: ExportRequest, + /// Current progress, including status and per-type counts. pub progress: ExportProgress, + /// The completed manifest, populated once the job reaches `Complete`. pub manifest: Option, } +/// Durable state of a bulk submission stored in S3. +/// +/// Written to `bulk/submit///state.json` when a submission is +/// created and updated on every lifecycle transition. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SubmissionState { + /// Submission summary including status and aggregate counts. pub summary: SubmissionSummary, + /// Human-readable reason recorded when the submission is aborted. pub abort_reason: Option, } +/// Wrapper persisted to S3 for each manifest within a bulk submission. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SubmissionManifestState { + /// The manifest metadata and current processing status. pub manifest: SubmissionManifest, } diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs index 842ece85..08e3cfda 100644 --- a/crates/persistence/src/backends/s3/storage.rs +++ b/crates/persistence/src/backends/s3/storage.rs @@ -1,3 +1,7 @@ +//! ResourceStorage, VersionedStorage, and history provider implementations +//! for the S3 backend, plus shared helper methods for JSON serialization, +//! object I/O, and history index maintenance. + use async_trait::async_trait; use helios_fhir::FhirVersion; use serde::Serialize; @@ -23,13 +27,20 @@ use super::backend::{S3Backend, TenantLocation}; use super::client::{ListObjectItem, ObjectMetadata}; use super::models::HistoryIndexEvent; +/// A loaded current resource together with its S3 ETag. +/// +/// The ETag is used as the optimistic concurrency token for subsequent +/// conditional writes (`If-Match` on update, `If-None-Match: *` on create). #[derive(Debug, Clone)] pub(crate) struct CurrentResourceWithMeta { + /// The stored resource content and metadata. pub resource: StoredResource, + /// S3 ETag of the object at the time it was fetched. pub etag: Option, } impl S3Backend { + /// Serialises `value` to a JSON byte vector. pub(crate) fn serialize_json(&self, value: &T) -> StorageResult> { serde_json::to_vec(value).map_err(|e| { StorageError::Backend(BackendError::SerializationError { @@ -38,6 +49,7 @@ impl S3Backend { }) } + /// Deserialises a JSON byte slice into `T`. pub(crate) fn deserialize_json(&self, bytes: &[u8]) -> StorageResult { serde_json::from_slice(bytes).map_err(|e| { StorageError::Backend(BackendError::SerializationError { @@ -46,6 +58,11 @@ impl S3Backend { }) } + /// Writes a JSON byte payload to `key` with optional ETag preconditions. + /// + /// - `if_match`: the object must exist with exactly this ETag. + /// - `if_none_match`: typically `"*"` to prevent overwriting an existing + /// object. pub(crate) async fn put_json_object( &self, bucket: &str, @@ -67,6 +84,10 @@ impl S3Backend { .map_err(|e| self.map_client_error(e)) } + /// Writes raw bytes to `key` with the given content type. + /// + /// No conditional preconditions are applied; used for bulk export NDJSON + /// output parts and raw NDJSON archival. pub(crate) async fn put_bytes_object( &self, bucket: &str, @@ -80,6 +101,7 @@ impl S3Backend { .map_err(|e| self.map_client_error(e)) } + /// Deletes the object at `key`. Succeeds silently if the key does not exist. pub(crate) async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()> { self.client .delete_object(bucket, key) @@ -87,6 +109,7 @@ impl S3Backend { .map_err(|e| self.map_client_error(e)) } + /// Downloads and deserialises a JSON object, returning `None` if not found. pub(crate) async fn get_json_object( &self, bucket: &str, @@ -102,6 +125,8 @@ impl S3Backend { } } + /// Exhaustively lists all objects under `prefix`, auto-paginating through + /// S3 continuation tokens until the full result set is collected. pub(crate) async fn list_objects_all( &self, bucket: &str, @@ -126,6 +151,11 @@ impl S3Backend { Ok(out) } + /// Loads the current resource pointer together with its S3 ETag. + /// + /// Returns `None` if the resource has never been created. Does not check + /// whether the resource is logically deleted — callers must check + /// `StoredResource::is_deleted()` themselves. pub(crate) async fn load_current_with_meta( &self, tenant: &TenantContext, @@ -145,6 +175,13 @@ impl S3Backend { })) } + /// Writes the versioned history snapshot and both history index event keys + /// for a resource mutation. + /// + /// Three objects are written per mutation: + /// - The immutable history snapshot under `_history/.json`. + /// - A type-level history index event under `history/type//…`. + /// - A system-level history index event under `history/system/…`. pub(crate) async fn put_history_and_indexes( &self, location: &TenantLocation, @@ -194,6 +231,8 @@ impl S3Backend { Ok(()) } + /// Derives the `HistoryMethod` for a stored resource from its own method + /// field, falling back to `Delete` or `Put` based on the deletion flag. pub(crate) fn history_method_for(resource: &StoredResource) -> HistoryMethod { match resource.method() { Some(ResourceMethod::Post) => HistoryMethod::Post, @@ -210,6 +249,11 @@ impl S3Backend { } } + /// Sorts entries by timestamp descending and returns a cursor-paginated page. + /// + /// The cursor encodes a simple offset into the sorted list; both forward + /// and backward cursors are generated so callers can navigate in either + /// direction. pub(crate) fn page_history( &self, mut entries: Vec, @@ -256,6 +300,8 @@ impl S3Backend { )) } + /// Returns all keys ending with `/current.json` under the given resource + /// type prefix (or the entire resource tree if `resource_type` is `None`). pub(crate) async fn list_current_keys( &self, location: &TenantLocation, @@ -278,6 +324,11 @@ impl S3Backend { Ok(keys) } + /// Loads history entries by scanning all index event objects under `prefix`. + /// + /// For each event key found, the corresponding versioned history snapshot is + /// fetched and assembled into a `HistoryEntry`. Objects that fail to parse + /// are silently skipped. pub(crate) async fn load_history_event_entries( &self, location: &TenantLocation, @@ -315,6 +366,8 @@ impl S3Backend { Ok(entries) } + /// Ensures the resource JSON contains the correct `resourceType` and `id` + /// fields, inserting them if they are absent or incorrect. pub(crate) fn ensure_resource_shape( &self, resource_type: &str, @@ -908,6 +961,10 @@ impl SystemHistoryProvider for S3Backend { } } +/// Extracts the numeric version string from a history key filename. +/// +/// History keys have the form `…/_history/.json`; the version is the +/// filename stem. Returns `None` for empty stems or non-`.json` extensions. fn parse_version_from_history_key(key: &str) -> Option { if !key.ends_with(".json") { return None; @@ -921,6 +978,10 @@ fn parse_version_from_history_key(key: &str) -> Option { } } +/// Decodes the numeric offset from a history pagination struct. +/// +/// Handles both explicit `Offset` mode and `Cursor` mode, where the cursor +/// encodes the offset as a `CursorValue::Number`. fn decode_pagination_offset(pagination: &Pagination) -> StorageResult { match &pagination.mode { PaginationMode::Offset(offset) => Ok(*offset as usize), diff --git a/crates/persistence/src/backends/s3/tests.rs b/crates/persistence/src/backends/s3/tests.rs index 8bb2a248..08b0b385 100644 --- a/crates/persistence/src/backends/s3/tests.rs +++ b/crates/persistence/src/backends/s3/tests.rs @@ -1,3 +1,9 @@ +//! Unit tests for the S3 backend using an in-process mock S3 client. +//! +//! All tests run without AWS credentials. [`MockS3Client`] provides a +//! thread-safe in-memory S3 implementation with optional fault injection +//! for concurrency and rollback scenarios. + use std::collections::{HashMap, HashSet}; use std::io::Cursor; use std::sync::{Arc, Mutex}; @@ -30,29 +36,47 @@ use crate::error::{ use crate::tenant::{TenantContext, TenantId, TenantPermissions}; use crate::types::{CursorValue, PageCursor, Pagination, PaginationMode}; +/// An in-memory representation of a single S3 object. #[derive(Debug, Clone)] struct MockObject { + /// Raw object body. body: Vec, + /// Monotonically assigned ETag string for conditional write testing. etag: String, + /// Simulated last-modified timestamp. last_modified: DateTime, } +/// Shared mutable state backing `MockS3Client`. #[derive(Debug, Default)] struct MockState { + /// Set of buckets that exist in the mock store. buckets: HashSet, + /// Stored objects keyed by `(bucket, key)`. objects: HashMap<(String, String), MockObject>, + /// Monotonic counter used to generate unique ETags. etag_counter: u64, + /// Total number of `put_object` calls received. put_count: u64, + /// When set, puts fail once this call count is exceeded (fault injection). fail_put_after: Option, + /// When true, all `delete_object` calls return an internal error. fail_deletes: bool, } +/// An in-process S3 mock implementing `S3Api`. +/// +/// Designed for deterministic unit tests that exercise the backend logic +/// without an AWS account. Supports optional fault injection to simulate +/// concurrent write conflicts and network errors. #[derive(Debug, Clone, Default)] struct MockS3Client { + /// Shared state, cloneable across multiple backend instances in a test. state: Arc>, } impl MockS3Client { + /// Creates a mock client with the specified buckets pre-seeded. fn with_buckets(buckets: &[&str]) -> Self { let mut state = MockState::default(); state.buckets = buckets.iter().map(|b| (*b).to_string()).collect(); @@ -61,11 +85,15 @@ impl MockS3Client { } } + /// Configures the mock to fail all `put_object` calls once `put_count` + /// successful puts have been observed. Used to simulate partial-write + /// failures during rollback testing. fn set_fail_put_after(&self, put_count: u64) { let mut state = self.state.lock().unwrap(); state.fail_put_after = Some(put_count); } + /// Returns the number of objects currently stored in `bucket`. fn bucket_object_count(&self, bucket: &str) -> usize { let state = self.state.lock().unwrap(); state.objects.keys().filter(|(b, _)| b == bucket).count() @@ -224,6 +252,7 @@ impl S3Api for MockS3Client { } } +/// Constructs a `PrefixPerTenant` backend backed by the given mock client. fn make_prefix_backend(mock: Arc) -> S3Backend { let config = S3BackendConfig { tenancy_mode: S3TenancyMode::PrefixPerTenant { @@ -236,6 +265,8 @@ fn make_prefix_backend(mock: Arc) -> S3Backend { S3Backend::with_client(config, mock).expect("backend") } +/// Constructs a `BucketPerTenant` backend backed by the given mock client +/// with `tenant-a → bucket-a`, `tenant-b → bucket-b`, and a system bucket. fn make_bucket_backend(mock: Arc) -> S3Backend { let mut tenant_bucket_map = HashMap::new(); tenant_bucket_map.insert("tenant-a".to_string(), "bucket-a".to_string()); @@ -253,6 +284,7 @@ fn make_bucket_backend(mock: Arc) -> S3Backend { S3Backend::with_client(config, mock).expect("backend") } +/// Creates a full-access `TenantContext` for the given tenant ID string. fn tenant(id: &str) -> TenantContext { TenantContext::new(TenantId::new(id), TenantPermissions::full_access()) } From 31f9e65171a5596c316dec168611c0c111e2dc79 Mon Sep 17 00:00:00 2001 From: smunini Date: Wed, 4 Mar 2026 13:02:51 -0500 Subject: [PATCH 6/7] fix(rest): improve batch/transaction response FHIR spec compliance - Fix outcome placement: error OperationOutcome now goes in response.outcome instead of being incorrectly placed at entry.resource - Add fullUrl to response entries using HFS_BASE_URL - Honor Prefer header (return=minimal, return=representation, return=OperationOutcome) in batch and transaction responses - Add lastModified, etag, and location to batch response entries by unifying batch and transaction paths through BundleEntryResult - Add 22 new integration tests in batch_conformance.rs covering all fixes --- crates/rest/src/handlers/batch.rs | 183 +++--- crates/rest/tests/batch_conformance.rs | 811 +++++++++++++++++++++++++ 2 files changed, 924 insertions(+), 70 deletions(-) create mode 100644 crates/rest/tests/batch_conformance.rs diff --git a/crates/rest/src/handlers/batch.rs b/crates/rest/src/handlers/batch.rs index 578cdd33..77bee9b3 100644 --- a/crates/rest/src/handlers/batch.rs +++ b/crates/rest/src/handlers/batch.rs @@ -19,6 +19,7 @@ use tracing::{debug, error, warn}; use crate::error::{RestError, RestResult}; use crate::extractors::TenantExtractor; +use crate::middleware::prefer::PreferHeader; use crate::state::AppState; /// Handler for batch/transaction processing. @@ -46,6 +47,7 @@ use crate::state::AppState; pub async fn batch_handler( State(state): State>, tenant: TenantExtractor, + prefer: PreferHeader, Json(bundle): Json, ) -> RestResult where @@ -75,8 +77,8 @@ where })?; match bundle_type { - "batch" => process_batch(&state, tenant, &bundle).await, - "transaction" => process_transaction(&state, tenant, &bundle).await, + "batch" => process_batch(&state, tenant, &prefer, &bundle).await, + "transaction" => process_transaction(&state, tenant, &prefer, &bundle).await, _ => Err(RestError::BadRequest { message: format!( "Bundle type must be 'batch' or 'transaction', got '{}'", @@ -90,6 +92,7 @@ where async fn process_batch( state: &AppState, tenant: TenantExtractor, + prefer: &PreferHeader, bundle: &Value, ) -> RestResult where @@ -106,11 +109,12 @@ where .cloned() .unwrap_or_default(); + let base_url = state.base_url(); let mut response_entries = Vec::with_capacity(entries.len()); for (index, entry) in entries.iter().enumerate() { let result = process_batch_entry(state, &tenant, entry, index).await; - response_entries.push(result); + response_entries.push(bundle_entry_result_to_json(&result, base_url, prefer)); } let response_bundle = serde_json::json!({ @@ -138,6 +142,7 @@ where async fn process_transaction( state: &AppState, tenant: TenantExtractor, + prefer: &PreferHeader, bundle: &Value, ) -> RestResult where @@ -201,9 +206,10 @@ where .collect(); ordered_results.sort_by_key(|(idx, _)| *idx); + let base_url = state.base_url(); let response_entries: Vec = ordered_results .into_iter() - .map(|(_, result)| bundle_entry_result_to_json(result)) + .map(|(_, result)| bundle_entry_result_to_json(result, base_url, prefer)) .collect(); let response_bundle = serde_json::json!({ @@ -226,20 +232,20 @@ where } } -/// Processes a single batch entry. +/// Processes a single batch entry, returning a structured BundleEntryResult. async fn process_batch_entry( state: &AppState, tenant: &TenantExtractor, entry: &Value, index: usize, -) -> Value +) -> BundleEntryResult where S: ResourceStorage + Send + Sync, { let request = match entry.get("request") { Some(r) => r, None => { - return create_error_entry("400", &format!("Entry {} missing request", index)); + return create_error_result(400, &format!("Entry {} missing request", index)); } }; @@ -250,7 +256,7 @@ where let (resource_type, id) = match parse_request_url(url) { Ok(parsed) => parsed, Err(e) => { - return create_error_entry("400", &e); + return create_error_result(400, &e); } }; @@ -262,17 +268,9 @@ where .read(tenant.context(), &resource_type, &id) .await { - Ok(Some(stored)) => { - serde_json::json!({ - "resource": stored.content(), - "response": { - "status": "200 OK", - "etag": format!("W/\"{}\"", stored.version_id()) - } - }) - } - Ok(None) => create_error_entry("404", "Resource not found"), - Err(e) => create_error_entry("500", &e.to_string()), + Ok(Some(stored)) => BundleEntryResult::ok(stored), + Ok(None) => create_error_result(404, "Resource not found"), + Err(e) => create_error_result(500, &e.to_string()), } } "POST" => { @@ -280,7 +278,7 @@ where let resource = match entry.get("resource") { Some(r) => r.clone(), None => { - return create_error_entry("400", "POST entry missing resource"); + return create_error_result(400, "POST entry missing resource"); } }; @@ -295,17 +293,8 @@ where ) .await { - Ok(stored) => { - serde_json::json!({ - "resource": stored.content(), - "response": { - "status": "201 Created", - "location": format!("{}/{}", resource_type, stored.id()), - "etag": format!("W/\"{}\"", stored.version_id()) - } - }) - } - Err(e) => create_error_entry("400", &e.to_string()), + Ok(stored) => BundleEntryResult::created(stored), + Err(e) => create_error_result(400, &e.to_string()), } } "PUT" => { @@ -313,7 +302,7 @@ where let resource = match entry.get("resource") { Some(r) => r.clone(), None => { - return create_error_entry("400", "PUT entry missing resource"); + return create_error_result(400, "PUT entry missing resource"); } }; @@ -330,16 +319,16 @@ where .await { Ok((stored, created)) => { - let status = if created { "201 Created" } else { "200 OK" }; - serde_json::json!({ - "resource": stored.content(), - "response": { - "status": status, - "etag": format!("W/\"{}\"", stored.version_id()) - } - }) + if created { + BundleEntryResult::created(stored) + } else { + // For updates, include location with versioned URL + let mut result = BundleEntryResult::ok(stored); + result.location = Some(format!("{}/{}", resource_type, id)); + result + } } - Err(e) => create_error_entry("400", &e.to_string()), + Err(e) => create_error_result(400, &e.to_string()), } } "DELETE" => { @@ -349,19 +338,13 @@ where .delete(tenant.context(), &resource_type, &id) .await { - Ok(()) => { - serde_json::json!({ - "response": { - "status": "204 No Content" - } - }) - } - Err(e) => create_error_entry("404", &e.to_string()), + Ok(()) => BundleEntryResult::deleted(), + Err(e) => create_error_result(404, &e.to_string()), } } _ => { warn!(method = method, "Unsupported batch method"); - create_error_entry("405", &format!("Unsupported method: {}", method)) + create_error_result(405, &format!("Unsupported method: {}", method)) } } } @@ -381,23 +364,19 @@ fn parse_request_url(url: &str) -> Result<(String, String), String> { } } -/// Creates an error response entry. -fn create_error_entry(status: &str, message: &str) -> Value { - serde_json::json!({ - "response": { - "status": format!("{} {}", status, status_text(status)), - "outcome": { - "resourceType": "OperationOutcome", - "issue": [{ - "severity": "error", - "code": "processing", - "details": { - "text": message - } - }] +/// Creates an error BundleEntryResult. +fn create_error_result(status: u16, message: &str) -> BundleEntryResult { + let outcome = serde_json::json!({ + "resourceType": "OperationOutcome", + "issue": [{ + "severity": "error", + "code": "processing", + "details": { + "text": message } - } - }) + }] + }); + BundleEntryResult::error(status, outcome) } /// Returns HTTP status text for a status code. @@ -491,7 +470,11 @@ fn method_processing_order(method: &BundleMethod) -> u8 { } /// Converts a BundleEntryResult to JSON for the response bundle. -fn bundle_entry_result_to_json(result: &BundleEntryResult) -> Value { +fn bundle_entry_result_to_json( + result: &BundleEntryResult, + base_url: &str, + prefer: &PreferHeader, +) -> Value { let mut response = serde_json::Map::new(); let status_code = result.status.to_string(); @@ -513,14 +496,43 @@ fn bundle_entry_result_to_json(result: &BundleEntryResult) -> Value { ); } + // Place outcome in response.outcome (not entry.resource) + if let Some(ref outcome) = result.outcome { + response.insert("outcome".to_string(), outcome.clone()); + } + let mut entry = serde_json::Map::new(); + // Include resource based on Prefer header if let Some(ref resource) = result.resource { - entry.insert("resource".to_string(), resource.clone()); + match prefer.return_preference() { + Some("minimal") => { + // Omit resource body + } + Some("OperationOutcome") => { + // Return an OperationOutcome instead of the resource + let outcome = serde_json::json!({ + "resourceType": "OperationOutcome", + "issue": [{ + "severity": "information", + "code": "informational", + "details": { + "text": format!("Operation completed with status {}", result.status) + } + }] + }); + entry.insert("resource".to_string(), outcome); + } + _ => { + // Default: return=representation — include the resource + entry.insert("resource".to_string(), resource.clone()); + } + } } - if let Some(ref outcome) = result.outcome { - entry.insert("resource".to_string(), outcome.clone()); + // Build fullUrl from location or resource content + if let Some(full_url) = build_full_url(result, base_url) { + entry.insert("fullUrl".to_string(), Value::String(full_url)); } entry.insert("response".to_string(), Value::Object(response)); @@ -528,6 +540,37 @@ fn bundle_entry_result_to_json(result: &BundleEntryResult) -> Value { Value::Object(entry) } +/// Builds the fullUrl for a response entry. +/// +/// Uses the location (stripping the _history suffix) or falls back to +/// extracting resourceType/id from the resource content. +fn build_full_url(result: &BundleEntryResult, base_url: &str) -> Option { + // Try to derive from location (e.g., "Patient/123/_history/1" -> base_url/Patient/123) + if let Some(ref location) = result.location { + let resource_url = if let Some(idx) = location.find("/_history/") { + &location[..idx] + } else { + location.as_str() + }; + return Some(format!( + "{}/{}", + base_url.trim_end_matches('/'), + resource_url + )); + } + + // Fall back to resource content + if let Some(ref resource) = result.resource { + let resource_type = resource.get("resourceType").and_then(|v| v.as_str()); + let id = resource.get("id").and_then(|v| v.as_str()); + if let (Some(rt), Some(id)) = (resource_type, id) { + return Some(format!("{}/{}/{}", base_url.trim_end_matches('/'), rt, id)); + } + } + + None +} + /// Converts a TransactionError to an HTTP response with OperationOutcome. fn transaction_error_to_response(err: TransactionError) -> RestResult { let (status_code, issue_code, message) = match &err { diff --git a/crates/rest/tests/batch_conformance.rs b/crates/rest/tests/batch_conformance.rs new file mode 100644 index 00000000..b4096431 --- /dev/null +++ b/crates/rest/tests/batch_conformance.rs @@ -0,0 +1,811 @@ +//! Batch and transaction response conformance tests. +//! +//! Tests FHIR spec compliance for batch/transaction responses: +//! - Response Bundle type (batch-response / transaction-response) +//! - fullUrl on response entries +//! - Prefer header handling (return=minimal, return=representation, return=OperationOutcome) +//! - Error outcome placement (response.outcome, not entry.resource) +//! - lastModified and location on response entries +//! - Entry count matches request + +mod common; + +use std::path::PathBuf; +use std::sync::Arc; + +use axum::http::{HeaderName, HeaderValue, StatusCode}; +use axum_test::TestServer; +use helios_fhir::FhirVersion; +use helios_persistence::backends::sqlite::{SqliteBackend, SqliteBackendConfig}; +use helios_persistence::core::ResourceStorage; +use helios_persistence::tenant::{TenantContext, TenantId, TenantPermissions}; +use helios_rest::ServerConfig; +use helios_rest::config::{MultitenancyConfig, TenantRoutingMode}; +use serde_json::{Value, json}; + +const X_TENANT_ID: HeaderName = HeaderName::from_static("x-tenant-id"); +const CONTENT_TYPE: HeaderName = HeaderName::from_static("content-type"); +const PREFER: HeaderName = HeaderName::from_static("prefer"); + +/// Creates a test server with a known base URL. +async fn create_test_server() -> (TestServer, Arc) { + let data_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .and_then(|p| p.parent()) + .map(|p| p.join("data")) + .unwrap_or_else(|| PathBuf::from("data")); + + let backend_config = SqliteBackendConfig { + data_dir: Some(data_dir), + ..Default::default() + }; + let backend = SqliteBackend::with_config(":memory:", backend_config) + .expect("Failed to create SQLite backend"); + backend.init_schema().expect("Failed to init schema"); + let backend = Arc::new(backend); + + let config = ServerConfig { + multitenancy: MultitenancyConfig { + routing_mode: TenantRoutingMode::HeaderOnly, + ..Default::default() + }, + base_url: "http://localhost:8080".to_string(), + default_tenant: "test-tenant".to_string(), + ..ServerConfig::for_testing() + }; + + let state = helios_rest::AppState::new(Arc::clone(&backend), config); + let app = helios_rest::routing::fhir_routes::create_routes(state); + let server = TestServer::new(app).expect("Failed to create test server"); + + (server, backend) +} + +fn test_tenant() -> TenantContext { + TenantContext::new( + TenantId::new("test-tenant"), + TenantPermissions::full_access(), + ) +} + +async fn seed_patient(backend: &SqliteBackend, id: &str, family: &str) { + let tenant = test_tenant(); + let patient = json!({ + "resourceType": "Patient", + "id": id, + "name": [{"family": family}], + "active": true + }); + backend + .create(&tenant, "Patient", patient, FhirVersion::R4) + .await + .expect("Failed to seed patient"); +} + +/// Helper: post a batch bundle and return the parsed response body. +async fn post_batch(server: &TestServer, bundle: Value) -> Value { + let response = server + .post("/") + .add_header(X_TENANT_ID, HeaderValue::from_static("test-tenant")) + .add_header( + CONTENT_TYPE, + HeaderValue::from_static("application/fhir+json"), + ) + .json(&bundle) + .await; + response.assert_status_ok(); + response.json() +} + +/// Helper: post a batch bundle with a Prefer header. +async fn post_batch_with_prefer(server: &TestServer, bundle: Value, prefer: &str) -> Value { + let response = server + .post("/") + .add_header(X_TENANT_ID, HeaderValue::from_static("test-tenant")) + .add_header( + CONTENT_TYPE, + HeaderValue::from_static("application/fhir+json"), + ) + .add_header(PREFER, HeaderValue::from_str(prefer).unwrap()) + .json(&bundle) + .await; + response.assert_status_ok(); + response.json() +} + +// ============================================================================= +// Bundle Type Tests +// ============================================================================= + +mod bundle_type { + use super::*; + + #[tokio::test] + async fn test_batch_returns_batch_response_type() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch(&server, bundle).await; + assert_eq!(body["resourceType"], "Bundle"); + assert_eq!(body["type"], "batch-response"); + } + + #[tokio::test] + async fn test_transaction_returns_transaction_response_type() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "transaction", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch(&server, bundle).await; + assert_eq!(body["resourceType"], "Bundle"); + assert_eq!(body["type"], "transaction-response"); + } +} + +// ============================================================================= +// Response Entry Count Tests +// ============================================================================= + +mod entry_count { + use super::*; + + #[tokio::test] + async fn test_batch_response_has_one_entry_per_request() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [ + { + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "New"}] } + }, + { + "request": { "method": "GET", "url": "Patient/p1" } + }, + { + "request": { "method": "DELETE", "url": "Patient/p1" } + } + ] + }); + + let body = post_batch(&server, bundle).await; + let entries = body["entry"].as_array().expect("entry should be an array"); + assert_eq!( + entries.len(), + 3, + "Response should have one entry per request" + ); + } +} + +// ============================================================================= +// fullUrl Tests +// ============================================================================= + +mod full_url { + use super::*; + + #[tokio::test] + async fn test_batch_create_response_has_full_url() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + let full_url = entry["fullUrl"] + .as_str() + .expect("fullUrl should be present"); + assert!( + full_url.starts_with("http://localhost:8080/Patient/"), + "fullUrl should start with base URL + resource type: {}", + full_url + ); + } + + #[tokio::test] + async fn test_batch_read_response_has_full_url() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "GET", "url": "Patient/p1" } + }] + }); + + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + let full_url = entry["fullUrl"] + .as_str() + .expect("fullUrl should be present"); + assert_eq!( + full_url, "http://localhost:8080/Patient/p1", + "fullUrl should be base URL + resource path" + ); + } + + #[tokio::test] + async fn test_batch_delete_response_has_no_full_url() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "DELETE", "url": "Patient/p1" } + }] + }); + + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + // DELETE returns no resource and no location, so no fullUrl + assert!( + entry.get("fullUrl").is_none() || entry["fullUrl"].is_null(), + "DELETE response should not have fullUrl" + ); + } + + #[tokio::test] + async fn test_transaction_create_response_has_full_url() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "transaction", + "entry": [{ + "fullUrl": "urn:uuid:test-1", + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "TxTest"}] } + }] + }); + + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + let full_url = entry["fullUrl"] + .as_str() + .expect("fullUrl should be present"); + assert!( + full_url.starts_with("http://localhost:8080/Patient/"), + "fullUrl should start with base URL: {}", + full_url + ); + } +} + +// ============================================================================= +// Response Fields Tests (status, etag, lastModified, location) +// ============================================================================= + +mod response_fields { + use super::*; + + #[tokio::test] + async fn test_batch_create_has_status_location_etag_last_modified() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch(&server, bundle).await; + let response = &body["entry"][0]["response"]; + + assert_eq!( + response["status"].as_str().unwrap(), + "201 Created", + "Create should return 201" + ); + + assert!( + response["location"].as_str().is_some(), + "Create response should have location" + ); + + let etag = response["etag"].as_str().expect("Create should have etag"); + assert!(etag.starts_with("W/\""), "ETag should be weak: {}", etag); + + assert!( + response["lastModified"].as_str().is_some(), + "Create response should have lastModified" + ); + } + + #[tokio::test] + async fn test_batch_read_has_etag_and_last_modified() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "GET", "url": "Patient/p1" } + }] + }); + + let body = post_batch(&server, bundle).await; + let response = &body["entry"][0]["response"]; + + assert_eq!(response["status"].as_str().unwrap(), "200 OK"); + + assert!( + response["etag"].as_str().is_some(), + "Read response should have etag" + ); + + assert!( + response["lastModified"].as_str().is_some(), + "Read response should have lastModified" + ); + } + + #[tokio::test] + async fn test_batch_update_has_location() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "PUT", "url": "Patient/p1" }, + "resource": { + "resourceType": "Patient", + "id": "p1", + "name": [{"family": "Updated"}] + } + }] + }); + + let body = post_batch(&server, bundle).await; + let response = &body["entry"][0]["response"]; + + assert_eq!(response["status"].as_str().unwrap(), "200 OK"); + + assert!( + response["etag"].as_str().is_some(), + "Update response should have etag" + ); + + assert!( + response["lastModified"].as_str().is_some(), + "Update response should have lastModified" + ); + } + + #[tokio::test] + async fn test_batch_delete_has_status_204() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "DELETE", "url": "Patient/p1" } + }] + }); + + let body = post_batch(&server, bundle).await; + let response = &body["entry"][0]["response"]; + + assert_eq!( + response["status"].as_str().unwrap(), + "204 No Content", + "Delete should return 204" + ); + } + + #[tokio::test] + async fn test_batch_upsert_create_returns_201() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "PUT", "url": "Patient/new-upsert" }, + "resource": { + "resourceType": "Patient", + "id": "new-upsert", + "name": [{"family": "Upserted"}] + } + }] + }); + + let body = post_batch(&server, bundle).await; + let response = &body["entry"][0]["response"]; + + assert_eq!( + response["status"].as_str().unwrap(), + "201 Created", + "Upsert of new resource should return 201" + ); + + assert!( + response["location"].as_str().is_some(), + "Upsert create should have location" + ); + } +} + +// ============================================================================= +// Error Outcome Placement Tests +// ============================================================================= + +mod error_outcome { + use super::*; + + #[tokio::test] + async fn test_batch_error_outcome_in_response_not_resource() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "GET", "url": "Patient/nonexistent" } + }] + }); + + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + // outcome should be in response.outcome + let outcome = &entry["response"]["outcome"]; + assert_eq!( + outcome["resourceType"].as_str().unwrap(), + "OperationOutcome", + "Error outcome should be in response.outcome" + ); + + // resource should NOT be set + assert!( + entry.get("resource").is_none() || entry["resource"].is_null(), + "Error entry should not have a resource field" + ); + } + + #[tokio::test] + async fn test_batch_error_has_status_and_outcome() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" } + // Missing resource — should produce an error + }] + }); + + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + let status = entry["response"]["status"].as_str().unwrap(); + assert!( + status.starts_with("400"), + "Missing resource should return 400: {}", + status + ); + + let outcome = &entry["response"]["outcome"]; + assert_eq!( + outcome["resourceType"].as_str().unwrap(), + "OperationOutcome" + ); + } + + #[tokio::test] + async fn test_batch_mixed_success_and_error() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "exists", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [ + { + "request": { "method": "GET", "url": "Patient/exists" } + }, + { + "request": { "method": "GET", "url": "Patient/does-not-exist" } + } + ] + }); + + let body = post_batch(&server, bundle).await; + let entries = body["entry"].as_array().unwrap(); + + // First entry: success + assert_eq!(entries[0]["response"]["status"].as_str().unwrap(), "200 OK"); + assert!(entries[0].get("resource").is_some()); + + // Second entry: error + let status = entries[1]["response"]["status"].as_str().unwrap(); + assert!(status.starts_with("404"), "Not found should return 404"); + assert!(entries[1]["response"]["outcome"]["resourceType"] == "OperationOutcome"); + } +} + +// ============================================================================= +// Prefer Header Tests +// ============================================================================= + +mod prefer_header { + use super::*; + + #[tokio::test] + async fn test_prefer_representation_includes_resource() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch_with_prefer(&server, bundle, "return=representation").await; + let entry = &body["entry"][0]; + + assert!( + entry.get("resource").is_some() && !entry["resource"].is_null(), + "return=representation should include resource in response" + ); + + assert_eq!( + entry["resource"]["resourceType"].as_str().unwrap(), + "Patient", + "Resource should be the created Patient" + ); + } + + #[tokio::test] + async fn test_prefer_minimal_omits_resource() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch_with_prefer(&server, bundle, "return=minimal").await; + let entry = &body["entry"][0]; + + assert!( + entry.get("resource").is_none() || entry["resource"].is_null(), + "return=minimal should NOT include resource in response" + ); + + // Response metadata should still be present + assert!( + entry["response"]["status"].as_str().is_some(), + "Status should still be present" + ); + assert!( + entry["response"]["etag"].as_str().is_some(), + "ETag should still be present even with minimal" + ); + } + + #[tokio::test] + async fn test_prefer_operation_outcome_returns_outcome() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + let body = post_batch_with_prefer(&server, bundle, "return=OperationOutcome").await; + let entry = &body["entry"][0]; + + assert!( + entry.get("resource").is_some() && !entry["resource"].is_null(), + "return=OperationOutcome should include a resource (the OperationOutcome)" + ); + + assert_eq!( + entry["resource"]["resourceType"].as_str().unwrap(), + "OperationOutcome", + "Resource should be an OperationOutcome when return=OperationOutcome" + ); + } + + #[tokio::test] + async fn test_default_prefer_includes_resource() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "Test"}] } + }] + }); + + // No Prefer header — should default to representation + let body = post_batch(&server, bundle).await; + let entry = &body["entry"][0]; + + assert!( + entry.get("resource").is_some() && !entry["resource"].is_null(), + "Default (no Prefer) should include resource in response" + ); + + assert_eq!( + entry["resource"]["resourceType"].as_str().unwrap(), + "Patient" + ); + } + + #[tokio::test] + async fn test_prefer_minimal_on_transaction() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "transaction", + "entry": [{ + "fullUrl": "urn:uuid:tx-1", + "request": { "method": "POST", "url": "Patient" }, + "resource": { "resourceType": "Patient", "name": [{"family": "TxMinimal"}] } + }] + }); + + let body = post_batch_with_prefer(&server, bundle, "return=minimal").await; + assert_eq!(body["type"], "transaction-response"); + + let entry = &body["entry"][0]; + assert!( + entry.get("resource").is_none() || entry["resource"].is_null(), + "return=minimal on transaction should omit resource" + ); + + // Metadata should still be present + assert_eq!(entry["response"]["status"].as_str().unwrap(), "201 Created"); + } + + #[tokio::test] + async fn test_prefer_minimal_read_omits_resource() { + let (server, backend) = create_test_server().await; + seed_patient(&backend, "p1", "Smith").await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "batch", + "entry": [{ + "request": { "method": "GET", "url": "Patient/p1" } + }] + }); + + let body = post_batch_with_prefer(&server, bundle, "return=minimal").await; + let entry = &body["entry"][0]; + + assert!( + entry.get("resource").is_none() || entry["resource"].is_null(), + "return=minimal should omit resource even for reads" + ); + } +} + +// ============================================================================= +// Transaction Error Response Tests +// ============================================================================= + +mod transaction_errors { + use super::*; + + #[tokio::test] + async fn test_failed_transaction_returns_operation_outcome() { + let (server, _backend) = create_test_server().await; + + // Transaction with a bad entry (missing resource for POST) + let bundle = json!({ + "resourceType": "Bundle", + "type": "transaction", + "entry": [{ + "request": { "method": "POST", "url": "Patient" } + // Missing resource + }] + }); + + let response = server + .post("/") + .add_header(X_TENANT_ID, HeaderValue::from_static("test-tenant")) + .add_header( + CONTENT_TYPE, + HeaderValue::from_static("application/fhir+json"), + ) + .json(&bundle) + .await; + + // Failed transaction should return 4xx/5xx with OperationOutcome, not a Bundle + let status = response.status_code(); + assert!( + status.is_client_error() || status.is_server_error(), + "Failed transaction should return error status: {}", + status + ); + + let body: Value = response.json(); + assert_eq!( + body["resourceType"].as_str().unwrap(), + "OperationOutcome", + "Failed transaction should return OperationOutcome, not a Bundle" + ); + } + + #[tokio::test] + async fn test_invalid_bundle_type_returns_400() { + let (server, _backend) = create_test_server().await; + + let bundle = json!({ + "resourceType": "Bundle", + "type": "collection", + "entry": [] + }); + + let response = server + .post("/") + .add_header(X_TENANT_ID, HeaderValue::from_static("test-tenant")) + .add_header( + CONTENT_TYPE, + HeaderValue::from_static("application/fhir+json"), + ) + .json(&bundle) + .await; + + response.assert_status(StatusCode::BAD_REQUEST); + } +} From e7aaf06fb8df4fe8c452a3aefaa25b8625c61b27 Mon Sep 17 00:00:00 2001 From: smunini Date: Wed, 4 Mar 2026 13:03:50 -0500 Subject: [PATCH 7/7] feat(s3): integrate S3 backend into HFS server and CI - Add S3 storage backend mode to ServerConfig and StorageBackendMode - Wire S3Backend into HFS main binary with HFS_S3_BUCKET, HFS_S3_REGION, and HFS_S3_VALIDATE_BUCKETS env vars - Add s3 feature flag to helios-hfs and helios-rest Cargo.toml - Implement stub SearchProvider and ConditionalStorage traits for S3 (returns UnsupportedCapability errors) - Improve S3 client error mapping for access denied, timeout, and dispatch failures - Add S3 backend to Inferno CI test matrix with conditional execution - Update README with S3 backend docs and env vars --- .github/workflows/inferno.yml | 11 ++- README.md | 16 +++- crates/hfs/Cargo.toml | 1 + crates/hfs/src/main.rs | 62 ++++++++++++++- crates/persistence/src/backends/s3/client.rs | 35 ++++++-- crates/persistence/src/backends/s3/storage.rs | 79 +++++++++++++++++++ crates/rest/Cargo.toml | 1 + crates/rest/src/config.rs | 22 +++++- 8 files changed, 213 insertions(+), 14 deletions(-) diff --git a/.github/workflows/inferno.yml b/.github/workflows/inferno.yml index 7cabcbeb..c6603535 100644 --- a/.github/workflows/inferno.yml +++ b/.github/workflows/inferno.yml @@ -12,6 +12,8 @@ env: # Remote Docker host (set via GitHub repository secrets or variables; leave unset for local Docker) DOCKER_HOST: ${{ secrets.DOCKER_HOST }} DOCKER_HOST_IP: ${{ secrets.DOCKER_HOST_IP }} + # S3 backend configuration (optional; S3 tests are skipped when not set) + HFS_S3_BUCKET: ${{ secrets.HFS_S3_BUCKET || vars.HFS_S3_BUCKET }} jobs: build: @@ -35,7 +37,7 @@ jobs: echo 'rustflags = ["-C", "link-arg=-fuse-ld=lld", "-C", "link-arg=-Wl,-zstack-size=8388608"]' >> ~/.cargo/config.toml - name: Build HFS - run: cargo build -p helios-hfs --features R4,sqlite,elasticsearch,postgres + run: cargo build -p helios-hfs --features R4,sqlite,elasticsearch,postgres,s3 - name: Upload HFS binary uses: actions/upload-artifact@v4 @@ -196,6 +198,7 @@ jobs: exit 1 - name: Start HFS server + if: matrix.backend != 's3' || env.HFS_S3_BUCKET != '' run: | HFS_LOG="/tmp/hfs-${{ matrix.suite_id }}-${{ matrix.backend }}.log" echo "HFS_LOG=$HFS_LOG" >> $GITHUB_ENV @@ -212,6 +215,12 @@ jobs: HFS_PG_USER=helios \ HFS_PG_PASSWORD=helios \ ./target/debug/hfs --log-level info --port $HFS_PORT --host 0.0.0.0 > "$HFS_LOG" 2>&1 & + elif [ "${{ matrix.backend }}" = "s3" ]; then + HFS_STORAGE_BACKEND=s3 \ + HFS_S3_BUCKET=${{ secrets.HFS_S3_BUCKET || vars.HFS_S3_BUCKET }} \ + HFS_S3_VALIDATE_BUCKETS=false \ + AWS_REGION=${{ secrets.AWS_REGION || vars.AWS_REGION || 'us-east-1' }} \ + ./target/debug/hfs --log-level info --port $HFS_PORT --host 0.0.0.0 > "$HFS_LOG" 2>&1 & else ./target/debug/hfs --database-url :memory: --log-level info --port $HFS_PORT --host 0.0.0.0 > "$HFS_LOG" 2>&1 & fi diff --git a/README.md b/README.md index 3648dc9c..7a337a79 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ The Helios FHIR Server includes several components: See [Core Components](#core-components) for details on each. -The server supports SQLite, PostgreSQL, and Elasticsearch in various configurations — see [Storage Backends](#storage-backends) for setup options. +The server supports SQLite, PostgreSQL, Elasticsearch, and S3 in various configurations — see [Storage Backends](#storage-backends) for setup options. ## Using Release Binaries @@ -171,6 +171,7 @@ The Helios FHIR Server supports multiple storage backend configurations. Choose | **SQLite + Elasticsearch** | Elasticsearch-powered search with relevance scoring | Production deployments needing robust search | | **PostgreSQL** | Built-in full-text search (tsvector/tsquery) | Production OLTP deployments | | **PostgreSQL + Elasticsearch** | Elasticsearch-powered search with PostgreSQL CRUD | Production deployments needing RDBMS + robust search | +| **S3** | Object storage for CRUD, versioning, history, and bulk operations (no search) | Archival, bulk analytics, cost-effective storage | ### Running the Server @@ -193,13 +194,21 @@ HFS_STORAGE_BACKEND=postgres-elasticsearch \ HFS_DATABASE_URL="postgresql://user:pass@localhost:5432/fhir" \ HFS_ELASTICSEARCH_NODES=http://localhost:9200 \ ./hfs + +# S3 (requires AWS credentials via standard provider chain: +# https://docs.aws.amazon.com/sdkref/latest/guide/standardized-credentials.html) +HFS_STORAGE_BACKEND=s3 \ +HFS_S3_BUCKET=my-fhir-bucket \ +AWS_PROFILE=your-aws-profile \ +AWS_REGION=us-east-1 \ + ./hfs ``` ### Environment Variables | Variable | Default | Description | |---|---|---| -| `HFS_STORAGE_BACKEND` | `sqlite` | Backend mode: `sqlite`, `sqlite-elasticsearch`, `postgres`, or `postgres-elasticsearch` | +| `HFS_STORAGE_BACKEND` | `sqlite` | Backend mode: `sqlite`, `sqlite-elasticsearch`, `postgres`, `postgres-elasticsearch`, or `s3` | | `HFS_SERVER_PORT` | `8080` | Server port | | `HFS_SERVER_HOST` | `127.0.0.1` | Host to bind | | `HFS_DATABASE_URL` | `fhir.db` | Database URL (SQLite path or PostgreSQL connection string) | @@ -209,6 +218,9 @@ HFS_ELASTICSEARCH_NODES=http://localhost:9200 \ | `HFS_ELASTICSEARCH_INDEX_PREFIX` | `hfs` | ES index name prefix | | `HFS_ELASTICSEARCH_USERNAME` | *(none)* | ES basic auth username | | `HFS_ELASTICSEARCH_PASSWORD` | *(none)* | ES basic auth password | +| `HFS_S3_BUCKET` | `hfs` | S3 bucket name (prefix-per-tenant mode) | +| `HFS_S3_REGION` | *(AWS provider chain)* | AWS region override | +| `HFS_S3_VALIDATE_BUCKETS` | `true` | Validate bucket access on startup | For detailed backend setup instructions (building from source, Docker commands, and search offloading architecture), see the [persistence crate documentation](crates/persistence/README.md#building--running-storage-backends). diff --git a/crates/hfs/Cargo.toml b/crates/hfs/Cargo.toml index f8c695c9..c77bc477 100644 --- a/crates/hfs/Cargo.toml +++ b/crates/hfs/Cargo.toml @@ -28,6 +28,7 @@ sqlite = ["helios-rest/sqlite"] postgres = ["helios-rest/postgres"] mongodb = ["helios-rest/mongodb"] elasticsearch = ["helios-rest/elasticsearch"] +s3 = ["helios-rest/s3"] [build-dependencies] reqwest = { version = "0.12", features = ["blocking"] } diff --git a/crates/hfs/src/main.rs b/crates/hfs/src/main.rs index c5f3ee0c..3ad07723 100644 --- a/crates/hfs/src/main.rs +++ b/crates/hfs/src/main.rs @@ -10,8 +10,9 @@ //! | SQLite + Elasticsearch | `sqlite,elasticsearch` | SQLite for CRUD, Elasticsearch for search | //! | PostgreSQL | `postgres` | Full-featured RDBMS with JSONB storage and tsvector search | //! | PostgreSQL + Elasticsearch | `postgres,elasticsearch` | PostgreSQL for CRUD, Elasticsearch for search | +//! | S3 | `s3` | AWS S3 object storage for CRUD, versioning, history, and bulk ops (no search) | //! -//! Set `HFS_STORAGE_BACKEND` to `sqlite`, `sqlite-elasticsearch`, `postgres`, or `postgres-elasticsearch`. +//! Set `HFS_STORAGE_BACKEND` to `sqlite`, `sqlite-elasticsearch`, `postgres`, `postgres-elasticsearch`, or `s3`. use clap::Parser; use helios_rest::{ServerConfig, StorageBackendMode, create_app_with_config, init_logging}; @@ -88,6 +89,9 @@ async fn main() -> anyhow::Result<()> { StorageBackendMode::PostgresElasticsearch => { start_postgres_elasticsearch(config).await?; } + StorageBackendMode::S3 => { + start_s3(config).await?; + } } Ok(()) @@ -370,5 +374,59 @@ async fn start_postgres_elasticsearch(_config: ServerConfig) -> anyhow::Result<( ) } -#[cfg(not(any(feature = "sqlite", feature = "postgres", feature = "mongodb")))] +/// Starts the server with AWS S3 backend. +#[cfg(feature = "s3")] +async fn start_s3(config: ServerConfig) -> anyhow::Result<()> { + use helios_persistence::backends::s3::{S3Backend, S3BackendConfig, S3TenancyMode}; + + let bucket = std::env::var("HFS_S3_BUCKET").unwrap_or_else(|_| "hfs".to_string()); + let region = std::env::var("HFS_S3_REGION").ok(); + let validate_buckets = std::env::var("HFS_S3_VALIDATE_BUCKETS") + .map(|s| s.to_lowercase() != "false" && s != "0") + .unwrap_or(true); + + info!( + bucket = %bucket, + region = ?region, + validate_buckets = validate_buckets, + "Initializing S3 backend" + ); + + let s3_config = S3BackendConfig { + tenancy_mode: S3TenancyMode::PrefixPerTenant { + bucket: bucket.clone(), + }, + region, + validate_buckets_on_startup: validate_buckets, + ..Default::default() + }; + + let backend = S3Backend::new(s3_config).map_err(|e| { + anyhow::anyhow!( + "Failed to initialize S3 backend (bucket={}, region={:?}): {}", + bucket, + std::env::var("AWS_REGION").ok(), + e + ) + })?; + + let app = create_app_with_config(backend, config.clone()); + serve(app, &config).await +} + +/// Fallback when s3 feature is not enabled. +#[cfg(not(feature = "s3"))] +async fn start_s3(_config: ServerConfig) -> anyhow::Result<()> { + anyhow::bail!( + "The s3 backend requires the 's3' feature. \ + Build with: cargo build -p helios-hfs --features s3" + ) +} + +#[cfg(not(any( + feature = "sqlite", + feature = "postgres", + feature = "mongodb", + feature = "s3" +)))] compile_error!("At least one database backend feature must be enabled"); diff --git a/crates/persistence/src/backends/s3/client.rs b/crates/persistence/src/backends/s3/client.rs index ad5e58b1..93971755 100644 --- a/crates/persistence/src/backends/s3/client.rs +++ b/crates/persistence/src/backends/s3/client.rs @@ -336,8 +336,6 @@ fn map_sdk_error(err: aws_sdk_s3::error::SdkError) -> S3ClientError where E: ProvideErrorMetadata + std::fmt::Debug, { - let fallback = format!("{err:?}"); - match err { aws_sdk_s3::error::SdkError::ServiceError(service_err) => { let code = service_err.err().code().unwrap_or("Unknown"); @@ -345,7 +343,7 @@ where .err() .message() .map(str::to_string) - .unwrap_or_else(|| fallback.clone()); + .unwrap_or_default(); match code { "NoSuchKey" | "NotFound" | "NoSuchBucket" => S3ClientError::NotFound, "PreconditionFailed" => S3ClientError::PreconditionFailed, @@ -353,11 +351,34 @@ where S3ClientError::Throttled(message) } "InvalidBucketName" | "InvalidArgument" => S3ClientError::InvalidInput(message), - _ => S3ClientError::Internal(message), + "AccessDenied" + | "InvalidAccessKeyId" + | "SignatureDoesNotMatch" + | "ExpiredToken" => S3ClientError::Unavailable(format!("access denied: {code}")), + _ => { + // When S3 returns no error code (e.g. HeadBucket 403), + // fall back to the HTTP status for a cleaner message. + let status = service_err.raw().status().as_u16(); + match status { + 403 => S3ClientError::Unavailable( + "access denied (HTTP 403) — check AWS credentials and bucket policy" + .to_string(), + ), + 404 => S3ClientError::NotFound, + _ if message.is_empty() => S3ClientError::Internal(format!( + "S3 error (HTTP {status}, code={code})" + )), + _ => S3ClientError::Internal(message), + } + } } } - aws_sdk_s3::error::SdkError::TimeoutError(_) => S3ClientError::Unavailable(fallback), - aws_sdk_s3::error::SdkError::DispatchFailure(_) => S3ClientError::Unavailable(fallback), - _ => S3ClientError::Internal(fallback), + aws_sdk_s3::error::SdkError::TimeoutError(_) => { + S3ClientError::Unavailable("request timed out".to_string()) + } + aws_sdk_s3::error::SdkError::DispatchFailure(err) => { + S3ClientError::Unavailable(format!("connection failed: {err:?}")) + } + _ => S3ClientError::Internal(format!("{err}")), } } diff --git a/crates/persistence/src/backends/s3/storage.rs b/crates/persistence/src/backends/s3/storage.rs index 08e3cfda..d0ba7e52 100644 --- a/crates/persistence/src/backends/s3/storage.rs +++ b/crates/persistence/src/backends/s3/storage.rs @@ -1002,6 +1002,85 @@ fn decode_pagination_offset(pagination: &Pagination) -> StorageResult { } } +// --------------------------------------------------------------------------- +// Stub trait impls: S3 does not support search or conditional operations +// --------------------------------------------------------------------------- + +use crate::core::search::{SearchProvider, SearchResult}; +use crate::core::storage::{ + ConditionalCreateResult, ConditionalDeleteResult, ConditionalStorage, ConditionalUpdateResult, +}; +use crate::types::SearchQuery; + +#[async_trait] +impl SearchProvider for S3Backend { + async fn search( + &self, + _tenant: &TenantContext, + _query: &SearchQuery, + ) -> StorageResult { + Err(StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "S3".to_string(), + capability: "search".to_string(), + })) + } + + async fn search_count( + &self, + _tenant: &TenantContext, + _query: &SearchQuery, + ) -> StorageResult { + Err(StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "S3".to_string(), + capability: "search_count".to_string(), + })) + } +} + +#[async_trait] +impl ConditionalStorage for S3Backend { + async fn conditional_create( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _resource: Value, + _search_params: &str, + _fhir_version: FhirVersion, + ) -> StorageResult { + Err(StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "S3".to_string(), + capability: "conditional_create".to_string(), + })) + } + + async fn conditional_update( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _resource: Value, + _search_params: &str, + _upsert: bool, + _fhir_version: FhirVersion, + ) -> StorageResult { + Err(StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "S3".to_string(), + capability: "conditional_update".to_string(), + })) + } + + async fn conditional_delete( + &self, + _tenant: &TenantContext, + _resource_type: &str, + _search_params: &str, + ) -> StorageResult { + Err(StorageError::Backend(BackendError::UnsupportedCapability { + backend_name: "S3".to_string(), + capability: "conditional_delete".to_string(), + })) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/rest/Cargo.toml b/crates/rest/Cargo.toml index 511aa632..23f6c050 100644 --- a/crates/rest/Cargo.toml +++ b/crates/rest/Cargo.toml @@ -28,6 +28,7 @@ sqlite = ["helios-persistence/sqlite"] postgres = ["helios-persistence/postgres"] mongodb = ["helios-persistence/mongodb"] elasticsearch = ["helios-persistence/elasticsearch"] +s3 = ["helios-persistence/s3"] [dependencies] # Core dependencies diff --git a/crates/rest/src/config.rs b/crates/rest/src/config.rs index 928f9c04..2bc84d9a 100644 --- a/crates/rest/src/config.rs +++ b/crates/rest/src/config.rs @@ -63,6 +63,9 @@ pub enum StorageBackendMode { /// PostgreSQL for CRUD + Elasticsearch for search. /// Requires running PostgreSQL and Elasticsearch instances. PostgresElasticsearch, + /// AWS S3 object storage for CRUD, versioning, history, and bulk operations. + /// Requires AWS credentials via the standard provider chain. No search support. + S3, } impl fmt::Display for StorageBackendMode { @@ -74,6 +77,7 @@ impl fmt::Display for StorageBackendMode { StorageBackendMode::PostgresElasticsearch => { write!(f, "postgres-elasticsearch") } + StorageBackendMode::S3 => write!(f, "s3"), } } } @@ -89,8 +93,9 @@ impl FromStr for StorageBackendMode { "postgres-elasticsearch" | "postgres-es" | "pg-elasticsearch" | "pg-es" => { Ok(StorageBackendMode::PostgresElasticsearch) } + "s3" | "objectstore" => Ok(StorageBackendMode::S3), _ => Err(format!( - "Invalid storage backend '{}'. Valid values: sqlite, sqlite-elasticsearch, postgres, postgres-elasticsearch", + "Invalid storage backend '{}'. Valid values: sqlite, sqlite-elasticsearch, postgres, postgres-elasticsearch, s3", s )), } @@ -299,7 +304,7 @@ pub struct ServerConfig { #[arg(long, env = "HFS_MAX_PAGE_SIZE", default_value = "1000")] pub max_page_size: usize, - /// Storage backend mode: sqlite (default), sqlite-elasticsearch, postgres, or postgres-elasticsearch. + /// Storage backend mode: sqlite (default), sqlite-elasticsearch, postgres, postgres-elasticsearch, or s3. #[arg(long, env = "HFS_STORAGE_BACKEND", default_value = "sqlite")] pub storage_backend: String, @@ -624,6 +629,18 @@ mod tests { .unwrap(), StorageBackendMode::PostgresElasticsearch ); + assert_eq!( + "s3".parse::().unwrap(), + StorageBackendMode::S3 + ); + assert_eq!( + "objectstore".parse::().unwrap(), + StorageBackendMode::S3 + ); + assert_eq!( + "S3".parse::().unwrap(), + StorageBackendMode::S3 + ); assert!("invalid".parse::().is_err()); } @@ -639,6 +656,7 @@ mod tests { StorageBackendMode::PostgresElasticsearch.to_string(), "postgres-elasticsearch" ); + assert_eq!(StorageBackendMode::S3.to_string(), "s3"); } #[test]