From 25c7b328e78c2ad50cbb5fe08e41cf6d5c0a063a Mon Sep 17 00:00:00 2001 From: Tim Diekmann Date: Fri, 6 Mar 2026 14:21:08 +0100 Subject: [PATCH] BE-452: Enforce configurable limit for ontology query endpoints Add `query_ontology_limit` to `ApiConfig` (default 1000, env `HASH_GRAPH_QUERY_ONTOLOGY_LIMIT`) and enforce it in all six ontology query handlers: data types, property types, entity types, and their subgraph variants. Extract `resolve_limit` and `LimitExceededError` into `rest::mod` for reuse across entity and ontology handlers. The entity query limit validation in `EntityQueryOptions::into_params` now delegates to the shared `resolve_limit` function. --- libs/@local/graph/api/src/rest/data_type.rs | 44 ++++++++++++------- .../api/src/rest/entity_query_request.rs | 24 +++------- libs/@local/graph/api/src/rest/entity_type.rs | 44 ++++++++++++------- libs/@local/graph/api/src/rest/mod.rs | 43 +++++++++++++++++- .../graph/api/src/rest/property_type.rs | 44 ++++++++++++------- .../manual_queries/entity_queries/mod.rs | 1 + 6 files changed, 136 insertions(+), 64 deletions(-) diff --git a/libs/@local/graph/api/src/rest/data_type.rs b/libs/@local/graph/api/src/rest/data_type.rs index ab0f81ab6df..a041b8e8968 100644 --- a/libs/@local/graph/api/src/rest/data_type.rs +++ b/libs/@local/graph/api/src/rest/data_type.rs @@ -47,8 +47,9 @@ use utoipa::{OpenApi, ToSchema}; use super::status::BoxedResponse; use crate::rest::{ - AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, + ApiConfig, AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, json::Json, + resolve_limit, status::{report_to_response, status_to_response}, utoipa_typedef::{ListOrValue, MaybeListOfDataType, subgraph::Subgraph}, }; @@ -341,6 +342,7 @@ async fn query_data_types( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + Extension(api_config): Extension, mut query_logger: Option>, Json(request): Json, ) -> Result, BoxedResponse> @@ -351,20 +353,25 @@ where query_logger.capture(actor_id, OpenApiQuery::GetDataTypes(&request)); } + // Manually deserialize the query from a JSON value to allow borrowed deserialization + // and better error reporting. + let mut params = QueryDataTypesParams::deserialize(&request) + .map_err(Report::from) + .map_err(report_to_response)?; + + params.limit = Some( + resolve_limit(params.limit, api_config.query_ontology_limit) + .attach(hash_status::StatusCode::InvalidArgument) + .map_err(report_to_response)?, + ); + let store = store_pool .acquire(temporal_client.0) .await .map_err(report_to_response)?; let response = store - .query_data_types( - actor_id, - // Manually deserialize the query from a JSON value to allow borrowed deserialization - // and better error reporting. - QueryDataTypesParams::deserialize(&request) - .map_err(Report::from) - .map_err(report_to_response)?, - ) + .query_data_types(actor_id, params) .await .map_err(report_to_response) .map(Json); @@ -405,6 +412,7 @@ async fn query_data_type_subgraph( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + Extension(api_config): Extension, mut query_logger: Option>, Json(request): Json, ) -> Result, BoxedResponse> @@ -415,14 +423,9 @@ where query_logger.capture(actor_id, OpenApiQuery::GetDataTypeSubgraph(&request)); } - let store = store_pool - .acquire(temporal_client.0) - .await - .map_err(report_to_response)?; - // Manually deserialize the query from a JSON value to allow borrowed deserialization // and better error reporting. - let params = QueryDataTypeSubgraphParams::deserialize(&request) + let mut params = QueryDataTypeSubgraphParams::deserialize(&request) .map_err(Report::from) .map_err(report_to_response)?; params @@ -430,6 +433,17 @@ where .map_err(Report::new) .map_err(report_to_response)?; + params.request_mut().limit = Some( + resolve_limit(params.request().limit, api_config.query_ontology_limit) + .attach(hash_status::StatusCode::InvalidArgument) + .map_err(report_to_response)?, + ); + + let store = store_pool + .acquire(temporal_client.0) + .await + .map_err(report_to_response)?; + let response = store .query_data_type_subgraph(actor_id, params) .await diff --git a/libs/@local/graph/api/src/rest/entity_query_request.rs b/libs/@local/graph/api/src/rest/entity_query_request.rs index 44b3086df0e..63b025672c7 100644 --- a/libs/@local/graph/api/src/rest/entity_query_request.rs +++ b/libs/@local/graph/api/src/rest/entity_query_request.rs @@ -66,7 +66,7 @@ use serde_json::value::RawValue as RawJsonValue; use type_system::knowledge::Entity; use utoipa::ToSchema; -use super::{ApiConfig, status::BoxedResponse}; +use super::{ApiConfig, LimitExceededError, resolve_limit, status::BoxedResponse}; #[tracing::instrument(level = "info", skip_all)] fn generate_sorting_paths( @@ -485,8 +485,6 @@ pub enum EntityQueryOptionsError { instead." )] InvalidFieldForEntityOptions { field: &'static str }, - #[display("The requested limit ({requested}) exceeds the maximum allowed limit ({max}).")] - LimitExceeded { requested: usize, max: usize }, } impl core::error::Error for EntityQueryOptionsError {} @@ -591,27 +589,17 @@ impl<'q, 's, 'p> TryFrom> for EntityQue impl<'p> EntityQueryOptions<'_, 'p> { /// # Errors /// - /// Returns `LimitExceeded` if the requested limit exceeds the configured maximum in + /// Returns [`LimitExceededError`] if the requested limit exceeds the configured maximum in /// [`ApiConfig::query_entity_limit`]. pub fn into_params<'f>( self, filter: Filter<'f, Entity>, config: ApiConfig, - ) -> Result, Report> + ) -> Result, Report> where 'p: 'f, { - let max = config.query_entity_limit; - let limit = match self.limit { - Some(requested) if requested > max => { - return Err(Report::new(EntityQueryOptionsError::LimitExceeded { - requested, - max, - })); - } - Some(limit) => limit, - None => max, - }; + let limit = resolve_limit(self.limit, config.query_entity_limit)?; Ok(QueryEntitiesParams { filter, @@ -636,14 +624,14 @@ impl<'p> EntityQueryOptions<'_, 'p> { /// # Errors /// - /// Returns `LimitExceeded` if the requested limit exceeds the configured maximum in + /// Returns [`LimitExceededError`] if the requested limit exceeds the configured maximum in /// [`ApiConfig::query_entity_limit`]. pub fn into_traversal_params<'q>( self, filter: Filter<'q, Entity>, traversal: SubgraphTraversalParams, config: ApiConfig, - ) -> Result, Report> + ) -> Result, Report> where 'p: 'q, { diff --git a/libs/@local/graph/api/src/rest/entity_type.rs b/libs/@local/graph/api/src/rest/entity_type.rs index 1a6e26f6654..b6bfa71f0fe 100644 --- a/libs/@local/graph/api/src/rest/entity_type.rs +++ b/libs/@local/graph/api/src/rest/entity_type.rs @@ -46,8 +46,9 @@ use utoipa::{OpenApi, ToSchema}; use super::status::BoxedResponse; use crate::rest::{ - AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, + ApiConfig, AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, json::Json, + resolve_limit, status::{report_to_response, status_to_response}, utoipa_typedef::{ListOrValue, MaybeListOfEntityType, subgraph::Subgraph}, }; @@ -470,6 +471,7 @@ async fn query_entity_types( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + Extension(api_config): Extension, mut query_logger: Option>, Json(request): Json, ) -> Result, BoxedResponse> @@ -480,20 +482,25 @@ where query_logger.capture(actor_id, OpenApiQuery::GetEntityTypes(&request)); } + // Manually deserialize the query from a JSON value to allow borrowed deserialization + // and better error reporting. + let mut params = QueryEntityTypesParams::deserialize(&request) + .map_err(Report::from) + .map_err(report_to_response)?; + + params.request.limit = Some( + resolve_limit(params.request.limit, api_config.query_ontology_limit) + .attach(hash_status::StatusCode::InvalidArgument) + .map_err(report_to_response)?, + ); + let store = store_pool .acquire(temporal_client.0) .await .map_err(report_to_response)?; let response = store - .query_entity_types( - actor_id, - // Manually deserialize the query from a JSON value to allow borrowed deserialization - // and better error reporting. - QueryEntityTypesParams::deserialize(&request) - .map_err(Report::from) - .map_err(report_to_response)?, - ) + .query_entity_types(actor_id, params) .await .map_err(report_to_response) .map(Json); @@ -605,6 +612,7 @@ async fn query_entity_type_subgraph( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + Extension(api_config): Extension, mut query_logger: Option>, Json(request): Json, ) -> Result, BoxedResponse> @@ -615,12 +623,7 @@ where query_logger.capture(actor_id, OpenApiQuery::GetEntityTypeSubgraph(&request)); } - let store = store_pool - .acquire(temporal_client.0) - .await - .map_err(report_to_response)?; - - let params = QueryEntityTypeSubgraphParams::deserialize(&request) + let mut params = QueryEntityTypeSubgraphParams::deserialize(&request) .map_err(Report::from) .map_err(report_to_response)?; params @@ -628,6 +631,17 @@ where .map_err(Report::new) .map_err(report_to_response)?; + params.request_mut().limit = Some( + resolve_limit(params.request().limit, api_config.query_ontology_limit) + .attach(hash_status::StatusCode::InvalidArgument) + .map_err(report_to_response)?, + ); + + let store = store_pool + .acquire(temporal_client.0) + .await + .map_err(report_to_response)?; + let response = store .query_entity_type_subgraph(actor_id, params) .await diff --git a/libs/@local/graph/api/src/rest/mod.rs b/libs/@local/graph/api/src/rest/mod.rs index 815b128ce4a..eeb74c869dd 100644 --- a/libs/@local/graph/api/src/rest/mod.rs +++ b/libs/@local/graph/api/src/rest/mod.rs @@ -20,7 +20,7 @@ mod entity_query_request; mod json; mod utoipa_typedef; use alloc::{borrow::Cow, sync::Arc}; -use core::str::FromStr as _; +use core::{error::Error, str::FromStr as _}; use std::{ fs, io::{self, Write as _}, @@ -323,6 +323,37 @@ pub enum OpenApiQuery<'a> { DiffEntity(&'a DiffEntityParams), } +/// The requested limit exceeds the configured maximum. +#[derive(Debug, Copy, Clone, PartialEq, Eq, derive_more::Display)] +#[display("The requested limit ({requested}) exceeds the maximum allowed limit ({max}).")] +pub struct LimitExceededError { + pub requested: usize, + pub max: usize, +} + +impl Error for LimitExceededError {} + +/// Resolves an optional request limit against a configured maximum. +/// +/// Returns the configured maximum when no limit is requested. Returns the requested limit if it +/// does not exceed the maximum. +/// +/// # Errors +/// +/// Returns [`LimitExceededError`] if `requested` exceeds `max`. +pub(crate) fn resolve_limit( + requested: Option, + max: usize, +) -> Result> { + match requested { + Some(requested) if requested > max => { + Err(Report::new(LimitExceededError { requested, max })) + } + Some(limit) => Ok(limit), + None => Ok(max), + } +} + /// Server-side configuration for the REST API, shared across handlers via an [`Extension`]. #[derive(Debug, Clone, Copy)] #[cfg_attr(feature = "clap", derive(clap::Parser))] @@ -336,6 +367,16 @@ pub struct ApiConfig { clap(long, default_value_t = 1000, env = "HASH_GRAPH_QUERY_ENTITY_LIMIT") )] pub query_entity_limit: usize, + + /// The default and maximum number of ontology types returned by a single query. + /// + /// When a request omits `limit`, this value is used. Requests that specify a `limit` larger + /// than this value are rejected. + #[cfg_attr( + feature = "clap", + clap(long, default_value_t = 1000, env = "HASH_GRAPH_QUERY_ONTOLOGY_LIMIT") + )] + pub query_ontology_limit: usize, } pub struct RestRouterDependencies diff --git a/libs/@local/graph/api/src/rest/property_type.rs b/libs/@local/graph/api/src/rest/property_type.rs index 28ca67f3a63..0987e42e4d2 100644 --- a/libs/@local/graph/api/src/rest/property_type.rs +++ b/libs/@local/graph/api/src/rest/property_type.rs @@ -43,8 +43,9 @@ use utoipa::{OpenApi, ToSchema}; use super::status::BoxedResponse; use crate::rest::{ - AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, + ApiConfig, AuthenticatedUserHeader, OpenApiQuery, QueryLogger, RestApiStore, json::Json, + resolve_limit, status::{report_to_response, status_to_response}, utoipa_typedef::{ListOrValue, MaybeListOfPropertyType, subgraph::Subgraph}, }; @@ -315,6 +316,7 @@ async fn query_property_types( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + Extension(api_config): Extension, mut query_logger: Option>, Json(request): Json, ) -> Result, BoxedResponse> @@ -325,20 +327,25 @@ where query_logger.capture(actor_id, OpenApiQuery::GetPropertyTypes(&request)); } + // Manually deserialize the query from a JSON value to allow borrowed deserialization + // and better error reporting. + let mut params = QueryPropertyTypesParams::deserialize(&request) + .map_err(Report::from) + .map_err(report_to_response)?; + + params.limit = Some( + resolve_limit(params.limit, api_config.query_ontology_limit) + .attach(hash_status::StatusCode::InvalidArgument) + .map_err(report_to_response)?, + ); + let store = store_pool .acquire(temporal_client.0) .await .map_err(report_to_response)?; let response = store - .query_property_types( - actor_id, - // Manually deserialize the query from a JSON value to allow borrowed deserialization - // and better error reporting. - QueryPropertyTypesParams::deserialize(&request) - .map_err(Report::from) - .map_err(report_to_response)?, - ) + .query_property_types(actor_id, params) .await .map_err(report_to_response) .map(Json); @@ -383,6 +390,7 @@ async fn query_property_type_subgraph( AuthenticatedUserHeader(actor_id): AuthenticatedUserHeader, store_pool: Extension>, temporal_client: Extension>>, + Extension(api_config): Extension, mut query_logger: Option>, Json(request): Json, ) -> Result, BoxedResponse> @@ -393,12 +401,7 @@ where query_logger.capture(actor_id, OpenApiQuery::GetPropertyTypeSubgraph(&request)); } - let store = store_pool - .acquire(temporal_client.0) - .await - .map_err(report_to_response)?; - - let params = QueryPropertyTypeSubgraphParams::deserialize(&request) + let mut params = QueryPropertyTypeSubgraphParams::deserialize(&request) .map_err(Report::from) .map_err(report_to_response)?; params @@ -406,6 +409,17 @@ where .map_err(Report::new) .map_err(report_to_response)?; + params.request_mut().limit = Some( + resolve_limit(params.request().limit, api_config.query_ontology_limit) + .attach(hash_status::StatusCode::InvalidArgument) + .map_err(report_to_response)?, + ); + + let store = store_pool + .acquire(temporal_client.0) + .await + .map_err(report_to_response)?; + let response = store .query_property_type_subgraph(actor_id, params) .await diff --git a/tests/graph/benches/manual_queries/entity_queries/mod.rs b/tests/graph/benches/manual_queries/entity_queries/mod.rs index e4fc613cca1..ff0e4d76963 100644 --- a/tests/graph/benches/manual_queries/entity_queries/mod.rs +++ b/tests/graph/benches/manual_queries/entity_queries/mod.rs @@ -337,6 +337,7 @@ where { let config = ApiConfig { query_entity_limit: 1000, + query_ontology_limit: 1000, }; match request {