From 335397750084e56d2d12575889dd06187b141825 Mon Sep 17 00:00:00 2001 From: Christophe Date: Thu, 29 May 2025 13:06:29 -0400 Subject: [PATCH 01/13] feat: semantic search query optimization and entity filtering --- api/src/schema/query.rs | 9 +- grc20-core/src/mapping/entity/find_many.rs | 1 + grc20-core/src/mapping/entity/find_one.rs | 1 + grc20-core/src/mapping/entity/mod.rs | 6 + .../src/mapping/entity/semantic_search.rs | 218 ++++++++++++++++++ grc20-core/src/mapping/entity/utils.rs | 25 +- .../mapping/query_utils/attributes_filter.rs | 62 ++++- .../src/mapping/query_utils/prop_filter.rs | 58 +++-- .../src/mapping/query_utils/query_builder.rs | 2 +- .../src/mapping/query_utils/version_filter.rs | 2 +- .../src/mapping/relation/find_many_to.rs | 1 + .../src/mapping/relation/find_one_to.rs | 1 + grc20-core/src/mapping/triple.rs | 58 ++--- grc20-sdk/src/models/base_entity.rs | 4 +- sink/src/bootstrap/boostrap_indexer.rs | 2 - 15 files changed, 381 insertions(+), 69 deletions(-) create mode 100644 grc20-core/src/mapping/entity/semantic_search.rs diff --git a/api/src/schema/query.rs b/api/src/schema/query.rs index 791ebeb..0e49f5f 100644 --- a/api/src/schema/query.rs +++ b/api/src/schema/query.rs @@ -318,12 +318,12 @@ impl RootQuery { .map(|triple| Triple::new(triple, space_id, version_index))) } - async fn search<'a, S: ScalarValue>( + async fn search_triples<'a, S: ScalarValue>( &'a self, executor: &'a Executor<'_, '_, KnowledgeGraph, S>, query: String, #[graphql(default = 100)] first: i32, - // #[graphql(default = 0)] skip: i32, + #[graphql(default = 0)] skip: i32, ) -> FieldResult> { let embedding = executor .context() @@ -336,8 +336,9 @@ impl RootQuery { .map(|v| v as f64) .collect::>(); - let query = mapping::triple::semantic_search(&executor.context().neo4j, embedding) - .limit(first as usize); + let query = mapping::triple::search(&executor.context().neo4j, embedding) + .limit(first as usize) + .skip(skip as usize); Ok(query .send() diff --git a/grc20-core/src/mapping/entity/find_many.rs b/grc20-core/src/mapping/entity/find_many.rs index 1a2f620..ff3704d 100644 --- a/grc20-core/src/mapping/entity/find_many.rs +++ b/grc20-core/src/mapping/entity/find_many.rs @@ -157,6 +157,7 @@ impl QueryStream> for FindManyQuery> { "e", "attrs", "types", + None, "RETURN e{.*, attrs: attrs, types: types}", ), ); diff --git a/grc20-core/src/mapping/entity/find_one.rs b/grc20-core/src/mapping/entity/find_one.rs index 451b286..f0ebe9e 100644 --- a/grc20-core/src/mapping/entity/find_one.rs +++ b/grc20-core/src/mapping/entity/find_one.rs @@ -88,6 +88,7 @@ impl Query>> for FindOneQuery> { "e", "attrs", "types", + None, "RETURN e{.*, attrs: attrs, types: types}", ), ) diff --git a/grc20-core/src/mapping/entity/mod.rs b/grc20-core/src/mapping/entity/mod.rs index be732f9..96f48c4 100644 --- a/grc20-core/src/mapping/entity/mod.rs +++ b/grc20-core/src/mapping/entity/mod.rs @@ -5,6 +5,7 @@ pub mod find_one; pub mod insert_many; pub mod insert_one; pub mod models; +pub mod semantic_search; pub mod utils; pub use delete_one::DeleteOneQuery; @@ -12,6 +13,7 @@ pub use find_many::FindManyQuery; pub use find_one::FindOneQuery; pub use insert_one::InsertOneQuery; pub use models::{Entity, EntityNode, EntityNodeRef, SystemProperties}; +pub use semantic_search::SemanticSearchQuery; pub use utils::{EntityFilter, EntityRelationFilter}; use crate::block::BlockMetadata; @@ -40,6 +42,10 @@ pub fn find_many(neo4j: &neo4rs::Graph) -> FindManyQuery { FindManyQuery::new(neo4j) } +pub fn search(neo4j: &neo4rs::Graph, vector: Vec) -> SemanticSearchQuery { + SemanticSearchQuery::new(neo4j, vector) +} + pub fn insert_one( neo4j: &neo4rs::Graph, block: &BlockMetadata, diff --git a/grc20-core/src/mapping/entity/semantic_search.rs b/grc20-core/src/mapping/entity/semantic_search.rs new file mode 100644 index 0000000..f8607f9 --- /dev/null +++ b/grc20-core/src/mapping/entity/semantic_search.rs @@ -0,0 +1,218 @@ +use futures::{Stream, StreamExt, TryStreamExt}; + +use crate::{ + entity::utils::MatchEntity, + error::DatabaseError, + mapping::{ + query_utils::VersionFilter, AttributeNode, FromAttributes, PropFilter, QueryBuilder, + QueryStream, Subquery, + }, +}; + +use super::{Entity, EntityFilter, EntityNode}; + +pub struct SemanticSearchQuery { + neo4j: neo4rs::Graph, + vector: Vec, + filter: EntityFilter, + space_id: Option>, + version: VersionFilter, + limit: usize, + skip: Option, + + _marker: std::marker::PhantomData, +} + +impl SemanticSearchQuery { + pub fn new(neo4j: &neo4rs::Graph, vector: Vec) -> Self { + Self { + neo4j: neo4j.clone(), + vector, + filter: EntityFilter::default(), + space_id: None, + version: VersionFilter::default(), + limit: 100, + skip: None, + + _marker: std::marker::PhantomData, + } + } + + pub fn filter(mut self, filter: EntityFilter) -> Self { + self.filter = filter; + self + } + + pub fn space_id(mut self, filter: PropFilter) -> Self { + self.space_id = Some(filter); + self + } + + pub fn version(mut self, version: impl Into) -> Self { + self.version.version_mut(version.into()); + self + } + + pub fn limit(mut self, limit: usize) -> Self { + self.limit = limit; + self + } + + pub fn limit_opt(mut self, limit: Option) -> Self { + if let Some(limit) = limit { + self.limit = limit; + } + self + } + + pub fn skip(mut self, skip: usize) -> Self { + self.skip = Some(skip); + self + } + + pub fn skip_opt(mut self, skip: Option) -> Self { + self.skip = skip; + self + } + + fn subquery(&self) -> QueryBuilder { + const QUERY: &str = r#" + CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector) + YIELD node AS n, score AS score + MATCH (e:Entity) -[r:ATTRIBUTE]-> (n) + "#; + + // Exact neighbor search using vector index (very expensive but allows prefiltering) + // const QUERY: &str = const_format::formatcp!( + // r#" + // MATCH (e:Entity) -[r:ATTRIBUTE]-> (a:Attribute:Indexed) + // WHERE r.max_version IS null + // AND a.embedding IS NOT NULL + // WITH e, a, r, vector.similarity.cosine(a.embedding, $vector) AS score + // ORDER BY score DESC + // WHERE score IS NOT null + // LIMIT $limit + // RETURN a{{.*, entity: e.id, space_version: r.min_version, space_id: r.space_id, score: score}} + // "#, + // ); + + QueryBuilder::default() + .subquery(QUERY) + .subquery(self.filter.subquery("e")) + .limit(self.limit) + .skip_opt(self.skip) + .params("vector", self.vector.clone()) + .params("effective_search_ratio", EFFECTIVE_SEARCH_RATIO) + .params("limit", self.limit as i64) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct SemanticSearchResult { + pub entity: T, + pub score: f64, +} + +const EFFECTIVE_SEARCH_RATIO: f64 = 10000.0; // Adjust this ratio based on your needs + +impl QueryStream> for SemanticSearchQuery { + async fn send( + self, + ) -> Result< + impl Stream, DatabaseError>>, + DatabaseError, + > { + let query = self.subquery().r#return("DISTINCT e, score"); + + if cfg!(debug_assertions) || cfg!(test) { + tracing::info!( + "entity_node::FindManyQuery:::\n{}\nparams:{:?}", + query.compile(), + query.params() + ); + }; + + #[derive(Debug, serde::Deserialize)] + struct RowResult { + e: EntityNode, + score: f64, + } + + Ok(self + .neo4j + .execute(query.build()) + .await? + .into_stream_as::() + .map_err(DatabaseError::from) + .and_then(|row| async move { + Ok(SemanticSearchResult { + entity: row.e, + score: row.score, + }) + })) + } +} + +impl QueryStream>> + for SemanticSearchQuery> +{ + async fn send( + self, + ) -> Result< + impl Stream>, DatabaseError>>, + DatabaseError, + > { + let match_entity = MatchEntity::new(&self.space_id, &self.version); + + let query = self.subquery().with( + vec!["e".to_string(), "score".to_string()], + match_entity.chain( + "e", + "attrs", + "types", + Some(vec!["score".to_string()]), + "RETURN e{.*, attrs: attrs, types: types, score: score}", + ), + ); + + if cfg!(debug_assertions) || cfg!(test) { + tracing::info!( + "entity_node::FindManyQuery::>:\n{}\nparams:{:?}", + query.compile(), + query.params + ); + }; + + #[derive(Debug, serde::Deserialize)] + struct RowResult { + #[serde(flatten)] + node: EntityNode, + attrs: Vec, + types: Vec, + score: f64, + } + + let stream = self + .neo4j + .execute(query.build()) + .await? + .into_stream_as::() + .map_err(DatabaseError::from) + .map(|row_result| { + row_result.and_then(|row| { + T::from_attributes(row.attrs.into()) + .map(|data| SemanticSearchResult { + entity: Entity { + node: row.node, + attributes: data, + types: row.types.into_iter().map(|t| t.id).collect(), + }, + score: row.score, + }) + .map_err(DatabaseError::from) + }) + }); + + Ok(stream) + } +} diff --git a/grc20-core/src/mapping/entity/utils.rs b/grc20-core/src/mapping/entity/utils.rs index fc05811..3fd6117 100644 --- a/grc20-core/src/mapping/entity/utils.rs +++ b/grc20-core/src/mapping/entity/utils.rs @@ -16,6 +16,8 @@ pub struct EntityFilter { pub(crate) id: Option>, pub(crate) attributes: Vec, pub(crate) relations: Option, + /// Used to check if the entity exists in the space (i.e.: the entity + /// has at least one attribute in the space). pub(crate) space_id: Option>, } @@ -284,6 +286,7 @@ impl<'a> MatchEntity<'a> { node_var: impl Into, attributes_node_var: impl Into, types_node_var: impl Into, + extra_vars: Option>, next: impl Subquery, ) -> QueryBuilder { let node_var = node_var.into(); @@ -294,21 +297,23 @@ impl<'a> MatchEntity<'a> { // let attributes_node_var = format!("{entity_node_var}_attributes"); // let types_node_var = format!("{entity_node_var}_types"); + let with_vars = vec![ + node_var.clone(), + format!("COLLECT(DISTINCT {attributes_node_var}{{.*}}) AS {attributes_node_var}"), + format!("COLLECT(DISTINCT {types_node_var}{{.*}}) AS {types_node_var}"), + ]; + let with_vars = if let Some(extra_vars) = extra_vars { + with_vars.into_iter().chain(extra_vars).collect() + } else { + with_vars + }; + QueryBuilder::default() .subquery( self.match_attributes .subquery(&node_var, &attributes_node_var), ) .subquery(self.match_types.subquery(&node_var, &types_node_var)) - .with( - vec![ - node_var, - format!( - "COLLECT(DISTINCT {attributes_node_var}{{.*}}) AS {attributes_node_var}" - ), - format!("COLLECT(DISTINCT {types_node_var}{{.*}}) AS {types_node_var}"), - ], - next, - ) + .with(with_vars, next) } } diff --git a/grc20-core/src/mapping/query_utils/attributes_filter.rs b/grc20-core/src/mapping/query_utils/attributes_filter.rs index f23be78..7860342 100644 --- a/grc20-core/src/mapping/query_utils/attributes_filter.rs +++ b/grc20-core/src/mapping/query_utils/attributes_filter.rs @@ -1,5 +1,19 @@ use super::{prop_filter::PropFilter, query_builder::MatchQuery, version_filter::VersionFilter}; +/// Struct representing an attribute filter subquery for an entity's attributes. +/// +/// IMPORTANT: This filter subquery is designed to be used to filter an entity by its +/// attributes (and not filter a list of attributes!) +/// +/// The struct follows the builder pattern to set the filter parameters. +/// ```rust +/// let filter = AttributeFilter::new(system_ids::NAME_ATTRIBUTE) +/// .value(["Bob", "Alice"]) +/// .value_type("TEXT") +/// .space_id("25omwWh6HYgeRQKCaSpVpa"); +/// +/// let subquery = filter.subquery("e"); +/// ``` #[derive(Clone, Debug)] pub struct AttributeFilter { attribute: String, @@ -10,6 +24,9 @@ pub struct AttributeFilter { } impl AttributeFilter { + /// Create a new filter subquery for the provided `attribute`. By default, if no other + /// parameters are set, this filter subquery will filter entities for which the `attribute` + /// exists in the current version of the knowledge graph. pub fn new(attribute: &str) -> Self { Self { attribute: attribute.to_owned(), @@ -20,23 +37,23 @@ impl AttributeFilter { } } - pub fn space_id(mut self, space_id: PropFilter) -> Self { - self.space_id = Some(space_id); + pub fn space_id(mut self, space_id: impl Into>) -> Self { + self.space_id = Some(space_id.into()); self } - pub fn space_id_mut(&mut self, space_id: PropFilter) -> &mut Self { - self.space_id = Some(space_id); + pub fn space_id_mut(&mut self, space_id: impl Into>) -> &mut Self { + self.space_id = Some(space_id.into()); self } - pub fn value(mut self, value: PropFilter) -> Self { - self.value = Some(value); + pub fn value(mut self, value: impl Into>) -> Self { + self.value = Some(value.into()); self } - pub fn value_type(mut self, value_type: PropFilter) -> Self { - self.value_type = Some(value_type); + pub fn value_type(mut self, value_type: impl Into>) -> Self { + self.value_type = Some(value_type.into()); self } @@ -55,6 +72,35 @@ impl AttributeFilter { self } + /// Compiles the attribute filter into a Neo4j subquery that will filter the nodes + /// identified by `node_var` according to the provided parameters. + /// + /// The subquery will have the following form: + /// ```cypher + /// MATCH ({node_var}) -[r_{node_var}_attribute:ATTRIBUTE]-> ({node_var}_attribute:Attribute {id: $attribute}) + /// WHERE {VERSION_FILTER} + /// AND {SPACE_ID_FITLER} + /// AND {VALUE_FILTER} + /// AND {VALUE_TYPE_FILTER} + /// ``` + /// + /// For example, if: + /// - the attribute to filter on is `LuBWqZAu6pz54eiJS5mLv8` + /// - the nodes to filter are bound to the variable `e` + /// - the version filter is set to filter the current version + /// - the value filter is set to `["foo", "bar"]` + /// - the value type filter set to `TEXT` + /// - the space id filter set to `25omwWh6HYgeRQKCaSpVpa` + /// + /// the subquery will be: + /// ```cypher + /// MATCH (e) -[r_e_attribute:ATTRIBUTE]-> (e_attribute:Attribute {id: $attribute}) + /// WHERE r_e_attribute.max_version IS NULL + /// AND r_e_attribute.space_id = "25omwWh6HYgeRQKCaSpVpa" + /// AND e_attribute.value IN ["foo", "bar"] + /// AND e_attribute.value_type = "TEXT" + /// ``` + /// Note: the `$attribute` query parameter will contain the value `LuBWqZAu6pz54eiJS5mLv8` pub fn subquery(&self, node_var: &str) -> MatchQuery { let attr_rel_var = format!("r_{node_var}_{}", self.attribute); let attr_node_var = format!("{node_var}_{}", self.attribute); diff --git a/grc20-core/src/mapping/query_utils/prop_filter.rs b/grc20-core/src/mapping/query_utils/prop_filter.rs index 15d2e0a..9c7d7d5 100644 --- a/grc20-core/src/mapping/query_utils/prop_filter.rs +++ b/grc20-core/src/mapping/query_utils/prop_filter.rs @@ -158,13 +158,41 @@ impl PropFilter { } impl> PropFilter { - /// Converts the filter into a query part. - /// The `node_var` is the variable name of the node in the query. - /// The `key` is the property key of the node. - /// The `expr` is an optional expression to use instead of the property key. - /// If `expr` is `None`, the node_var and key will be used as the expression to - /// filter, e.g. `{node_var}.{key} = $value` - pub(crate) fn subquery(&self, node_var: &str, key: &str, expr: Option<&str>) -> WhereClause { + /// Compiles the attribute filter into a [WhereClause] Neo4j subquery that will apply + /// a filter on the `key` field of the `node_var` node(s) (i.e.: `{node_var}.{key}`). + /// + /// If `expr` is set, then it will used as the filter target instead of the above. + /// + /// For example, given the following [PropFilter] (which creates a property filter) + /// ```rust,no-run + /// # fn main() { + /// # use std::collections::HashMap; + /// # use grc20_core::mapping::{PropFilter, Subquery}; + /// let prop_filter = PropFilter::default() + /// .value_not("Bob") + /// .value_lt("Gary"); + /// + /// let query = prop_filter.subquery("e", "name", None); + /// assert_eq!( + /// query.compile(), + /// "WHERE e.name <> $e_name_value_not\nAND e.name < $e_name_value_lt" + /// ) + /// assert_eq!( + /// query.params(), + /// HashMap::from([ + /// ("e_name_value_not", "Bob"), + /// ("e_name_value_lt", "Gary") + /// ]) + /// ) + /// + /// let query = prop_filter.subquery("e", "name", Some("my_expr")); + /// assert_eq!( + /// query.compile(), + /// "WHERE my_expr <> $e_name_value_not\nAND my_expr < $e_name_value_lt" + /// ) + /// # } + /// ``` + pub fn subquery(&self, node_var: &str, key: &str, expr: Option<&str>) -> WhereClause { let mut where_clause = WhereClause::default(); let expr = expr @@ -175,56 +203,56 @@ impl> PropFilter { let param_key = format!("{node_var}_{key}_value"); where_clause = where_clause .clause(format!("{expr} = ${param_key}")) - .params(param_key, value.clone()); + .set_param(param_key, value.clone()); } if let Some(value_gt) = &self.value_gt { let param_key = format!("{node_var}_{key}_value_gt"); where_clause = where_clause .clause(format!("{expr} > ${param_key}")) - .params(param_key, value_gt.clone()); + .set_param(param_key, value_gt.clone()); } if let Some(value_gte) = &self.value_gte { let param_key = format!("{node_var}_{key}_value_gte"); where_clause = where_clause .clause(format!("{expr} >= ${param_key}")) - .params(param_key, value_gte.clone()); + .set_param(param_key, value_gte.clone()); } if let Some(value_lt) = &self.value_lt { let param_key = format!("{node_var}_{key}_value_lt"); where_clause = where_clause .clause(format!("{expr} < ${param_key}")) - .params(param_key, value_lt.clone()); + .set_param(param_key, value_lt.clone()); } if let Some(value_lte) = &self.value_lte { let param_key = format!("{node_var}_{key}_value_lte"); where_clause = where_clause .clause(format!("{expr} <= ${param_key}")) - .params(param_key, value_lte.clone()); + .set_param(param_key, value_lte.clone()); } if let Some(value_not) = &self.value_not { let param_key = format!("{node_var}_{key}_value_not"); where_clause = where_clause .clause(format!("{expr} <> ${param_key}")) - .params(param_key, value_not.clone()); + .set_param(param_key, value_not.clone()); } if let Some(value_in) = &self.value_in { let param_key = format!("{node_var}_{key}_value_in"); where_clause = where_clause .clause(format!("{expr} IN ${param_key}")) - .params(param_key, value_in.clone()); + .set_param(param_key, value_in.clone()); } if let Some(value_not_in) = &self.value_not_in { let param_key = format!("{node_var}_{key}_value_not_in"); where_clause = where_clause .clause(format!("{expr} NOT IN ${param_key}")) - .params(param_key, value_not_in.clone()); + .set_param(param_key, value_not_in.clone()); } where_clause diff --git a/grc20-core/src/mapping/query_utils/query_builder.rs b/grc20-core/src/mapping/query_utils/query_builder.rs index 3562a32..16d8564 100644 --- a/grc20-core/src/mapping/query_utils/query_builder.rs +++ b/grc20-core/src/mapping/query_utils/query_builder.rs @@ -236,7 +236,7 @@ impl WhereClause { self } - pub fn params(mut self, key: impl Into, value: impl Into) -> Self { + pub fn set_param(mut self, key: impl Into, value: impl Into) -> Self { self.params.insert(key.into(), value.into()); self } diff --git a/grc20-core/src/mapping/query_utils/version_filter.rs b/grc20-core/src/mapping/query_utils/version_filter.rs index 62e0351..615c066 100644 --- a/grc20-core/src/mapping/query_utils/version_filter.rs +++ b/grc20-core/src/mapping/query_utils/version_filter.rs @@ -28,7 +28,7 @@ impl VersionFilter { let param_key = format!("{}_version", var); WhereClause::new(format!("{var}.min_version <= ${param_key} AND ({var}.max_version IS NULL OR {var}.max_version > ${param_key})")) - .params(param_key, version.clone()) + .set_param(param_key, version.clone()) } else { WhereClause::new(format!("{var}.max_version IS NULL")) } diff --git a/grc20-core/src/mapping/relation/find_many_to.rs b/grc20-core/src/mapping/relation/find_many_to.rs index 5c73963..d5b6086 100644 --- a/grc20-core/src/mapping/relation/find_many_to.rs +++ b/grc20-core/src/mapping/relation/find_many_to.rs @@ -149,6 +149,7 @@ impl QueryStream> for FindManyToQuery> { "to", "attrs", "types", + None, "RETURN to{.*, attrs: attrs, types: types}", ), ); diff --git a/grc20-core/src/mapping/relation/find_one_to.rs b/grc20-core/src/mapping/relation/find_one_to.rs index 6c88c2d..bf9ec1a 100644 --- a/grc20-core/src/mapping/relation/find_one_to.rs +++ b/grc20-core/src/mapping/relation/find_one_to.rs @@ -84,6 +84,7 @@ impl Query>> for FindOneToQuery> { "to", "attrs", "types", + None, "RETURN to{.*, attrs: attrs, types: types}", ), ); diff --git a/grc20-core/src/mapping/triple.rs b/grc20-core/src/mapping/triple.rs index e5c1e0c..61d504b 100644 --- a/grc20-core/src/mapping/triple.rs +++ b/grc20-core/src/mapping/triple.rs @@ -136,7 +136,7 @@ pub fn find_many(neo4j: &neo4rs::Graph) -> FindManyQuery { FindManyQuery::new(neo4j) } -pub fn semantic_search(neo4j: &neo4rs::Graph, vector: Vec) -> SemanticSearchQuery { +pub fn search(neo4j: &neo4rs::Graph, vector: Vec) -> SemanticSearchQuery { SemanticSearchQuery::new(neo4j, vector) } @@ -621,7 +621,7 @@ pub struct SemanticSearchQuery { // space_id: Option>, // space_version: VersionFilter, limit: usize, - // skip: Option, + skip: Option, } impl SemanticSearchQuery { @@ -632,7 +632,7 @@ impl SemanticSearchQuery { // space_id: None, // space_version: VersionFilter::default(), limit: 100, - // skip: None, + skip: None, } } @@ -658,15 +658,15 @@ impl SemanticSearchQuery { self } - // pub fn skip(mut self, skip: usize) -> Self { - // self.skip = Some(skip); - // self - // } + pub fn skip(mut self, skip: usize) -> Self { + self.skip = Some(skip); + self + } - // pub fn skip_opt(mut self, skip: Option) -> Self { - // self.skip = skip; - // self - // } + pub fn skip_opt(mut self, skip: Option) -> Self { + self.skip = skip; + self + } } #[derive(Clone, Debug, Default, Deserialize, PartialEq)] @@ -678,34 +678,40 @@ pub struct SemanticSearchResult { pub space_version: String, } +const EFFECTIVE_SEARCH_RATIO: f64 = 10000.0; // Adjust this ratio based on your needs + impl QueryStream for SemanticSearchQuery { async fn send( self, ) -> Result>, DatabaseError> { - // const QUERY: &str = const_format::formatcp!( - // r#" - // CALL db.index.vector.queryNodes('vector_index', $limit, $vector) - // YIELD node AS n, score AS score - // MATCH (e:Entity) -[r:ATTRIBUTE]-> (n) - // RETURN n{{.*, entity: e.id, space_version: r.min_version, space_id: r.space_id, score: score}} - // "# - // ); const QUERY: &str = const_format::formatcp!( r#" - MATCH (e:Entity) -[r:ATTRIBUTE]-> (a:Attribute:Indexed) - WHERE r.max_version IS null - WITH e, a, r, vector.similarity.cosine(a.embedding, $vector) AS score + CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector) + YIELD node AS n, score AS score ORDER BY score DESC - WHERE score IS NOT null LIMIT $limit - RETURN a{{.*, entity: e.id, space_version: r.min_version, space_id: r.space_id, score: score}} - "#, + MATCH (e:Entity) -[r:ATTRIBUTE]-> (n) + RETURN n{{.*, entity: e.id, space_version: r.min_version, space_id: r.space_id, score: score}} + "# ); + // const QUERY: &str = const_format::formatcp!( + // r#" + // MATCH (e:Entity) -[r:ATTRIBUTE]-> (a:Attribute:Indexed) + // WHERE r.max_version IS null + // AND a.embedding IS NOT NULL + // WITH e, a, r, vector.similarity.cosine(a.embedding, $vector) AS score + // ORDER BY score DESC + // WHERE score IS NOT null + // LIMIT $limit + // RETURN a{{.*, entity: e.id, space_version: r.min_version, space_id: r.space_id, score: score}} + // "#, + // ); let query = neo4rs::query(QUERY) .param("vector", self.vector) - .param("limit", self.limit as i64); + .param("limit", self.limit as i64) + .param("effective_search_ratio", EFFECTIVE_SEARCH_RATIO); Ok(self .neo4j diff --git a/grc20-sdk/src/models/base_entity.rs b/grc20-sdk/src/models/base_entity.rs index 6924f51..76983c7 100644 --- a/grc20-sdk/src/models/base_entity.rs +++ b/grc20-sdk/src/models/base_entity.rs @@ -11,10 +11,10 @@ use grc20_core::{ #[grc20_core::entity] pub struct BaseEntity { #[grc20(attribute = system_ids::NAME_ATTRIBUTE)] - name: Option, + pub name: Option, #[grc20(attribute = system_ids::DESCRIPTION_ATTRIBUTE)] - description: Option, + pub description: Option, } pub async fn blocks( diff --git a/sink/src/bootstrap/boostrap_indexer.rs b/sink/src/bootstrap/boostrap_indexer.rs index 257424c..a2a10aa 100644 --- a/sink/src/bootstrap/boostrap_indexer.rs +++ b/sink/src/bootstrap/boostrap_indexer.rs @@ -54,8 +54,6 @@ pub fn triples() -> Vec { system_ids::NAME_ATTRIBUTE, "Space Kind", ), - // Triple::new(indexer_ids::SPACE_VERSION_COUNTER, system_ids::NAME_ATTRIBUTE, "Space Version Counter"), - // Member and Editor relations Triple::new( indexer_ids::MEMBER_RELATION, From 663c5cc65a299f5d83dc158835461dd62f9a8b3e Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 10:40:43 -0400 Subject: [PATCH 02/13] feat: Add mcp server --- Cargo.lock | 249 ++++++++++-- Cargo.toml | 2 +- README.md | 8 + mcp-server/Cargo.toml | 21 + .../resources/get_properties_description.md | 0 mcp-server/resources/instructions.md | 13 + .../resources/search_entity_description.md | 0 .../search_relation_type_description.md | 0 .../resources/search_type_description.md | 0 mcp-server/src/main.rs | 381 ++++++++++++++++++ 10 files changed, 642 insertions(+), 32 deletions(-) create mode 100644 mcp-server/Cargo.toml create mode 100644 mcp-server/resources/get_properties_description.md create mode 100644 mcp-server/resources/instructions.md create mode 100644 mcp-server/resources/search_entity_description.md create mode 100644 mcp-server/resources/search_relation_type_description.md create mode 100644 mcp-server/resources/search_type_description.md create mode 100644 mcp-server/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 43f3095..1bafabf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -99,16 +99,16 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.95" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "api" version = "0.1.0" dependencies = [ "anyhow", - "axum", + "axum 0.7.9", "cache", "chrono", "clap", @@ -417,7 +417,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", - "axum-core", + "axum-core 0.4.5", "bytes", "futures-util", "http 1.2.0", @@ -426,7 +426,41 @@ dependencies = [ "hyper 1.6.0", "hyper-util", "itoa", - "matchit", + "matchit 0.7.3", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper 1.0.2", + "tokio", + "tower 0.5.2", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5" +dependencies = [ + "axum-core 0.5.2", + "bytes", + "form_urlencoded", + "futures-util", + "http 1.2.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.6.0", + "hyper-util", + "itoa", + "matchit 0.8.4", "memchr", "mime", "percent-encoding", @@ -465,6 +499,26 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum-core" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6" +dependencies = [ + "bytes", + "futures-core", + "http 1.2.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "backoff" version = "0.4.0" @@ -475,7 +529,7 @@ dependencies = [ "getrandom 0.2.15", "instant", "pin-project-lite", - "rand", + "rand 0.8.5", "tokio", ] @@ -756,9 +810,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.27" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" +checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" dependencies = [ "clap_builder", "clap_derive", @@ -766,9 +820,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.27" +version = "4.5.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7" +checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" dependencies = [ "anstream", "anstyle", @@ -778,9 +832,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.24" +version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ "heck", "proc-macro2", @@ -1170,6 +1224,12 @@ dependencies = [ "serde_json", ] +[[package]] +name = "dyn-clone" +version = "1.0.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" + [[package]] name = "either" version = "1.13.0" @@ -1597,7 +1657,7 @@ dependencies = [ "neo4rs", "pretty_assertions", "prost", - "rand", + "rand 0.8.5", "serde", "serde_json", "serde_with", @@ -1737,7 +1797,7 @@ dependencies = [ "libc", "log", "native-tls", - "rand", + "rand 0.8.5", "reqwest 0.12.12", "serde", "serde_json", @@ -2397,7 +2457,7 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed3b21b9af313a2967572c8d4b8875c53fc8062e10768470de4748c16ce7b992" dependencies = [ - "axum", + "axum 0.7.9", "bytes", "juniper", "juniper_graphql_ws", @@ -2626,6 +2686,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "matrixmultiply" version = "0.3.9" @@ -2646,6 +2712,27 @@ dependencies = [ "rayon", ] +[[package]] +name = "mcp-server" +version = "0.1.0" +dependencies = [ + "anyhow", + "axum 0.8.4", + "clap", + "fastembed", + "futures", + "grc20-core", + "grc20-sdk", + "rmcp", + "schemars", + "serde", + "serde_json", + "tokio", + "tokio-util", + "tracing", + "tracing-subscriber", +] + [[package]] name = "md-5" version = "0.10.6" @@ -2666,7 +2753,7 @@ dependencies = [ "enum_dispatch", "openssl", "r2d2", - "rand", + "rand 0.8.5", "url", ] @@ -3141,7 +3228,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -3421,8 +3508,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.3", ] [[package]] @@ -3432,7 +3529,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", ] [[package]] @@ -3444,6 +3551,15 @@ dependencies = [ "getrandom 0.2.15", ] +[[package]] +name = "rand_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +dependencies = [ + "getrandom 0.3.1", +] + [[package]] name = "rav1e" version = "0.7.1" @@ -3470,8 +3586,8 @@ dependencies = [ "once_cell", "paste", "profiling", - "rand", - "rand_chacha", + "rand 0.8.5", + "rand_chacha 0.3.1", "simd_helpers", "system-deps", "thiserror 1.0.69", @@ -3717,6 +3833,41 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmcp" +version = "0.1.5" +source = "git+https://github.com/modelcontextprotocol/rust-sdk?branch=main#d5a72e43c17d688086738030387af1cd39a9ce38" +dependencies = [ + "axum 0.8.4", + "base64 0.22.1", + "chrono", + "futures", + "paste", + "pin-project-lite", + "rand 0.9.1", + "rmcp-macros", + "schemars", + "serde", + "serde_json", + "thiserror 2.0.11", + "tokio", + "tokio-stream", + "tokio-util", + "tracing", + "uuid", +] + +[[package]] +name = "rmcp-macros" +version = "0.1.5" +source = "git+https://github.com/modelcontextprotocol/rust-sdk?branch=main#d5a72e43c17d688086738030387af1cd39a9ce38" +dependencies = [ + "proc-macro2", + "quote", + "serde_json", + "syn 2.0.96", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -3850,6 +4001,31 @@ dependencies = [ "parking_lot", ] +[[package]] +name = "schemars" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" +dependencies = [ + "chrono", + "dyn-clone", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "0.8.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.96", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -3912,6 +4088,17 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.96", +] + [[package]] name = "serde_json" version = "1.0.140" @@ -4090,7 +4277,7 @@ name = "sink" version = "0.1.0" dependencies = [ "anyhow", - "axum", + "axum 0.7.9", "cache", "chrono", "clap", @@ -4612,7 +4799,7 @@ dependencies = [ "monostate", "onig", "paste", - "rand", + "rand 0.8.5", "rayon", "rayon-cond", "regex", @@ -4628,9 +4815,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.44.1" +version = "1.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" +checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" dependencies = [ "backtrace", "bytes", @@ -4672,7 +4859,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" dependencies = [ "pin-project", - "rand", + "rand 0.8.5", "tokio", ] @@ -4715,9 +4902,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.13" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" +checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df" dependencies = [ "bytes", "futures-core", @@ -4768,7 +4955,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", - "axum", + "axum 0.7.9", "base64 0.22.1", "bytes", "flate2", @@ -4805,7 +4992,7 @@ dependencies = [ "indexmap 1.9.3", "pin-project", "pin-project-lite", - "rand", + "rand 0.8.5", "slab", "tokio", "tokio-util", diff --git a/Cargo.toml b/Cargo.toml index c34c1b8..29e4a9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,5 +8,5 @@ members = [ "web3-utils", "grc20-core", "grc20-macros", - "grc20-sdk", + "grc20-sdk", "mcp-server", ] diff --git a/README.md b/README.md index 977910a..83b3218 100644 --- a/README.md +++ b/README.md @@ -43,5 +43,13 @@ Schema introspection npx get-graphql-schema http://127.0.0.1:8080/graphql > api/schema.graphql ``` +## MCP Server +```bash +CFLAGS='-std=gnu17' cargo run --bin mcp-server -- \ + --neo4j-uri neo4j://localhost:7687 \ + --neo4j-user neo4j \ + --neo4j-pass neo4j +``` + ## GRC20 CLI Coming soon™️ \ No newline at end of file diff --git a/mcp-server/Cargo.toml b/mcp-server/Cargo.toml new file mode 100644 index 0000000..388ddbc --- /dev/null +++ b/mcp-server/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "mcp-server" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1.0.98" +axum = "0.8.4" +clap = { version = "4.5.39", features = ["derive", "env"] } +fastembed = "4.8.0" +futures = "0.3.31" +grc20-core = { version = "0.1.0", path = "../grc20-core" } +grc20-sdk = { version = "0.1.0", path = "../grc20-sdk" } +rmcp = { git = "https://github.com/modelcontextprotocol/rust-sdk", branch = "main", features = ["server", "transport-sse-server"] } +schemars = "0.8.22" +serde = { version = "1.0.219", features = ["derive"] } +serde_json = "1.0.140" +tokio = { version = "1.45.1", features = ["rt-multi-thread", "macros", "signal"] } +tokio-util = "0.7.15" +tracing = "0.1.41" +tracing-subscriber = { version = "0.3.19", features = ["env-filter"] } diff --git a/mcp-server/resources/get_properties_description.md b/mcp-server/resources/get_properties_description.md new file mode 100644 index 0000000..e69de29 diff --git a/mcp-server/resources/instructions.md b/mcp-server/resources/instructions.md new file mode 100644 index 0000000..e251cd8 --- /dev/null +++ b/mcp-server/resources/instructions.md @@ -0,0 +1,13 @@ +This server provides tools to query the Knowledge Graph (KG), a database of wide-ranging structured information (similar to wikidata). The KG organizes information using entities and relations. Entities can have 0, 1 or many types, while relations have exactly one relation type. Both entities and relations can have properties. + +Importantly, types, relation types and properties are themselves entities that can be queried. In other words, the KG contains both the property graph of the data as well as the data itself! + +The tools defined in the MCP server are made to be used in combination with each other. All except the most trivial user requests will require the use of multiple tools. + +Here is an example: +User> What are the properties of the Person type? + +ToolCall> search_type("person") +ToolResult> +``` +``` \ No newline at end of file diff --git a/mcp-server/resources/search_entity_description.md b/mcp-server/resources/search_entity_description.md new file mode 100644 index 0000000..e69de29 diff --git a/mcp-server/resources/search_relation_type_description.md b/mcp-server/resources/search_relation_type_description.md new file mode 100644 index 0000000..e69de29 diff --git a/mcp-server/resources/search_type_description.md b/mcp-server/resources/search_type_description.md new file mode 100644 index 0000000..e69de29 diff --git a/mcp-server/src/main.rs b/mcp-server/src/main.rs new file mode 100644 index 0000000..b0e72a0 --- /dev/null +++ b/mcp-server/src/main.rs @@ -0,0 +1,381 @@ +use clap::{Args, Parser}; +use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; +use futures::TryStreamExt; +use grc20_core::{ + entity::{self, Entity, EntityRelationFilter}, + mapping::{query_utils::TypesFilter, Query, QueryStream}, + neo4rs, system_ids, +}; +use grc20_sdk::models::BaseEntity; +use rmcp::{ + Error as McpError, RoleServer, ServerHandler, + model::*, + service::RequestContext, + tool, + transport::sse_server::{SseServer, SseServerConfig}, +}; +use serde_json::json; +use std::sync::Arc; +use tracing_subscriber::{ + layer::SubscriberExt, + util::SubscriberInitExt, + {self}, +}; + +const BIND_ADDRESS: &str = "127.0.0.1:8000"; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "debug".to_string().into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + + let args = AppArgs::parse(); + + let neo4j = neo4rs::Graph::new( + &args.neo4j_args.neo4j_uri, + &args.neo4j_args.neo4j_user, + &args.neo4j_args.neo4j_pass, + ) + .await?; + + let config = SseServerConfig { + bind: BIND_ADDRESS.parse()?, + sse_path: "/sse".to_string(), + post_path: "/message".to_string(), + ct: tokio_util::sync::CancellationToken::new(), + sse_keep_alive: None, + }; + + let (sse_server, router) = SseServer::new(config); + + // Do something with the router, e.g., add routes or middleware + + let listener = tokio::net::TcpListener::bind(sse_server.config.bind).await?; + + let ct = sse_server.config.ct.child_token(); + + let server = axum::serve(listener, router).with_graceful_shutdown(async move { + ct.cancelled().await; + tracing::info!("sse server cancelled"); + }); + + tokio::spawn(async move { + if let Err(e) = server.await { + tracing::error!(error = %e, "sse server shutdown with error"); + } + }); + + let ct = sse_server.with_service(move || KnowledgeGraph::new(neo4j.clone())); + + tokio::signal::ctrl_c().await?; + ct.cancel(); + Ok(()) +} + +#[derive(Debug, serde::Deserialize, schemars::JsonSchema)] +pub struct StructRequest { + pub a: i32, + pub b: i32, +} + +const EMBEDDING_MODEL: EmbeddingModel = EmbeddingModel::AllMiniLML6V2; + +#[derive(Clone)] +pub struct KnowledgeGraph { + neo4j: neo4rs::Graph, + pub embedding_model: Arc, +} + +#[tool(tool_box)] +impl KnowledgeGraph { + #[allow(dead_code)] + pub fn new(neo4j: neo4rs::Graph) -> Self { + Self { + neo4j, + embedding_model: Arc::new( + TextEmbedding::try_new( + InitOptions::new(EMBEDDING_MODEL).with_show_download_progress(true), + ) + .expect("Failed to initialize embedding model"), + ), + } + } + + fn _create_resource_text(&self, uri: &str, name: &str) -> Resource { + RawResource::new(uri, name.to_string()).no_annotation() + } + + #[tool(description = "Search Types")] + async fn search_types( + &self, + #[tool(param)] + #[schemars(description = "The query string to search for types")] + query: String, + ) -> Result { + let embedding = self + .embedding_model + .embed(vec![&query], None) + .expect("Failed to get embedding") + .pop() + .expect("Embedding is empty") + .into_iter() + .map(|v| v as f64) + .collect::>(); + + let results = entity::search::>(&self.neo4j, embedding) + .filter( + entity::EntityFilter::default() + .relations(TypesFilter::default().r#type(system_ids::SCHEMA_TYPE)), + ) + .limit(8) + .send() + .await + .map_err(|e| { + McpError::internal_error( + "search_types_failed", + Some(json!({ "error": e.to_string() })), + ) + })? + .try_collect::>() + .await + .map_err(|e| { + McpError::internal_error( + "search_types_failed", + Some(json!({ "error": e.to_string() })), + ) + })?; + + tracing::info!("Found {} results for query '{}'", results.len(), query); + + Ok(CallToolResult::success( + results + .into_iter() + .map(|result| { + Content::json(json!({ + "id": result.entity.id(), + "name": result.entity.attributes.name, + "description": result.entity.attributes.description, + "types": result.entity.types, + })) + .expect("Failed to create JSON content") + }) + .collect(), + )) + } + + #[tool(description = "Search Relation Types")] + async fn search_relation_types( + &self, + #[tool(param)] + #[schemars(description = "The query string to search for relation types")] + query: String, + ) -> Result { + let embedding = self + .embedding_model + .embed(vec![&query], None) + .expect("Failed to get embedding") + .pop() + .expect("Embedding is empty") + .into_iter() + .map(|v| v as f64) + .collect::>(); + + let results = entity::search::>(&self.neo4j, embedding) + .filter( + entity::EntityFilter::default().relations( + EntityRelationFilter::default() + .relation_type(system_ids::VALUE_TYPE_ATTRIBUTE) + .to_id(system_ids::RELATION), + ), + ) + .limit(8) + .send() + .await + .map_err(|e| { + McpError::internal_error( + "search_relation_types", + Some(json!({ "error": e.to_string() })), + ) + })? + .try_collect::>() + .await + .map_err(|e| { + McpError::internal_error( + "search_relation_types", + Some(json!({ "error": e.to_string() })), + ) + })?; + + tracing::info!("Found {} results for query '{}'", results.len(), query); + + Ok(CallToolResult::success( + results + .into_iter() + .map(|result| { + Content::json(json!({ + "id": result.entity.id(), + "name": result.entity.attributes.name, + "description": result.entity.attributes.description, + "types": result.entity.types, + })) + .expect("Failed to create JSON content") + }) + .collect(), + )) + } + + #[tool(description = "Search Properties")] + async fn search_properties( + &self, + #[tool(param)] + #[schemars(description = "The query string to search for properties")] + query: String, + ) -> Result { + let embedding = self + .embedding_model + .embed(vec![&query], None) + .expect("Failed to get embedding") + .pop() + .expect("Embedding is empty") + .into_iter() + .map(|v| v as f64) + .collect::>(); + + let results = entity::search::>(&self.neo4j, embedding) + .filter( + entity::EntityFilter::default() + .relations(TypesFilter::default().r#type(system_ids::ATTRIBUTE)), + ) + .limit(8) + .send() + .await + .map_err(|e| { + McpError::internal_error( + "search_properties", + Some(json!({ "error": e.to_string() })), + ) + })? + .try_collect::>() + .await + .map_err(|e| { + McpError::internal_error( + "search_properties", + Some(json!({ "error": e.to_string() })), + ) + })?; + + tracing::info!("Found {} results for query '{}'", results.len(), query); + + Ok(CallToolResult::success( + results + .into_iter() + .map(|result| { + Content::json(json!({ + "id": result.entity.id(), + "name": result.entity.attributes.name, + "description": result.entity.attributes.description, + "types": result.entity.types, + })) + .expect("Failed to create JSON content") + }) + .collect(), + )) + } + + // #[tool(description = "Search Properties")] + // async fn get_entities( + // &self, + // #[tool(param)] + // #[schemars(description = "The query string to search for properties")] + // query: String, + // ) + + #[tool(description = "Get entity by ID")] + async fn get_entity( + &self, + #[tool(param)] + #[schemars(description = "Return an entity by its ID along with its attributes (name, description, etc.) and types")] + id: String, + ) -> Result { + let entity = entity::find_one::>(&self.neo4j, &id) + .send() + .await + .map_err(|e| { + McpError::internal_error( + "get_entity", + Some(json!({ "error": e.to_string() })), + ) + })? + .ok_or_else(|| { + McpError::internal_error("entity_not_found", Some(json!({ "id": id }))) + })?; + + tracing::info!("Found entity with ID '{}'", id); + + Ok(CallToolResult::success(vec![Content::json( + json!({ + "id": entity.id(), + "name": entity.attributes.name, + "description": entity.attributes.description, + "types": entity.types, + }), + ) + .expect("Failed to create JSON content")])) + } +} + +#[tool(tool_box)] +impl ServerHandler for KnowledgeGraph { + fn get_info(&self) -> ServerInfo { + ServerInfo { + protocol_version: ProtocolVersion::V_2024_11_05, + capabilities: ServerCapabilities::builder() + .enable_prompts() + .enable_resources() + .enable_tools() + .build(), + server_info: Implementation::from_build_env(), + instructions: Some(include_str!("../resources/instructions.md").to_string()), + } + } + + async fn initialize( + &self, + _request: InitializeRequestParam, + context: RequestContext, + ) -> Result { + if let Some(http_request_part) = context.extensions.get::() { + let initialize_headers = &http_request_part.headers; + let initialize_uri = &http_request_part.uri; + tracing::info!(?initialize_headers, %initialize_uri, "initialize from http server"); + } + Ok(self.get_info()) + } +} + +#[derive(Debug, Parser)] +#[command(name = "stdout", version, about, arg_required_else_help = true)] +struct AppArgs { + #[clap(flatten)] + neo4j_args: Neo4jArgs, +} + +#[derive(Debug, Args)] +struct Neo4jArgs { + /// Neo4j database host + #[arg(long)] + neo4j_uri: String, + + /// Neo4j database user name + #[arg(long)] + neo4j_user: String, + + /// Neo4j database user password + #[arg(long)] + neo4j_pass: String, +} From 9ec2c2f10f000840555d0f09a5e079ec70228467 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 10:41:39 -0400 Subject: [PATCH 03/13] style: fmt --- mcp-server/src/main.rs | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/mcp-server/src/main.rs b/mcp-server/src/main.rs index b0e72a0..caf58cf 100644 --- a/mcp-server/src/main.rs +++ b/mcp-server/src/main.rs @@ -3,7 +3,7 @@ use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; use futures::TryStreamExt; use grc20_core::{ entity::{self, Entity, EntityRelationFilter}, - mapping::{query_utils::TypesFilter, Query, QueryStream}, + mapping::{Query, QueryStream, query_utils::TypesFilter}, neo4rs, system_ids, }; use grc20_sdk::models::BaseEntity; @@ -272,7 +272,7 @@ impl KnowledgeGraph { tracing::info!("Found {} results for query '{}'", results.len(), query); Ok(CallToolResult::success( - results + results .into_iter() .map(|result| { Content::json(json!({ @@ -299,17 +299,16 @@ impl KnowledgeGraph { async fn get_entity( &self, #[tool(param)] - #[schemars(description = "Return an entity by its ID along with its attributes (name, description, etc.) and types")] + #[schemars( + description = "Return an entity by its ID along with its attributes (name, description, etc.) and types" + )] id: String, ) -> Result { let entity = entity::find_one::>(&self.neo4j, &id) .send() .await .map_err(|e| { - McpError::internal_error( - "get_entity", - Some(json!({ "error": e.to_string() })), - ) + McpError::internal_error("get_entity", Some(json!({ "error": e.to_string() }))) })? .ok_or_else(|| { McpError::internal_error("entity_not_found", Some(json!({ "id": id }))) @@ -317,15 +316,15 @@ impl KnowledgeGraph { tracing::info!("Found entity with ID '{}'", id); - Ok(CallToolResult::success(vec![Content::json( - json!({ + Ok(CallToolResult::success(vec![ + Content::json(json!({ "id": entity.id(), "name": entity.attributes.name, "description": entity.attributes.description, "types": entity.types, - }), - ) - .expect("Failed to create JSON content")])) + })) + .expect("Failed to create JSON content"), + ])) } } From 1917103cdc4eaf8a53566180dd3c31019d9776d9 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 11:54:47 -0400 Subject: [PATCH 04/13] fix: docstring --- grc20-core/src/mapping/query_utils/prop_filter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grc20-core/src/mapping/query_utils/prop_filter.rs b/grc20-core/src/mapping/query_utils/prop_filter.rs index 9c7d7d5..9eb816f 100644 --- a/grc20-core/src/mapping/query_utils/prop_filter.rs +++ b/grc20-core/src/mapping/query_utils/prop_filter.rs @@ -164,7 +164,7 @@ impl> PropFilter { /// If `expr` is set, then it will used as the filter target instead of the above. /// /// For example, given the following [PropFilter] (which creates a property filter) - /// ```rust,no-run + /// ```rust /// # fn main() { /// # use std::collections::HashMap; /// # use grc20_core::mapping::{PropFilter, Subquery}; From 575e7cee101fadf6e46db48d41bca9b9fea7411a Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 14:50:20 -0400 Subject: [PATCH 05/13] misc: Add sample data to help local testing --- grc20-core/src/ids/system_ids.rs | 2 +- mcp-server/src/main.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/grc20-core/src/ids/system_ids.rs b/grc20-core/src/ids/system_ids.rs index ddbce65..a3d2e9f 100644 --- a/grc20-core/src/ids/system_ids.rs +++ b/grc20-core/src/ids/system_ids.rs @@ -39,7 +39,7 @@ pub const IMAGE: &str = "X8KB1uF84RYppghBSVvhqr"; /// Relation type. This is the entity representing the Join between the /// the Collection and the Entity -pub const RELATION: &str = "AKDxovGvZaPSWnmKnSoZJY"; +pub const RELATION_SCHEMA_TYPE: &str = "AKDxovGvZaPSWnmKnSoZJY"; pub const SPACE_TYPE: &str = "7gzF671tq5JTZ13naG4tnr"; diff --git a/mcp-server/src/main.rs b/mcp-server/src/main.rs index caf58cf..c5b0bc1 100644 --- a/mcp-server/src/main.rs +++ b/mcp-server/src/main.rs @@ -190,7 +190,7 @@ impl KnowledgeGraph { entity::EntityFilter::default().relations( EntityRelationFilter::default() .relation_type(system_ids::VALUE_TYPE_ATTRIBUTE) - .to_id(system_ids::RELATION), + .to_id(system_ids::RELATION_SCHEMA_TYPE), ), ) .limit(8) From bfd0ffd4f306d86fd45cb6f760753e526c301ab2 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 14:50:42 -0400 Subject: [PATCH 06/13] misc: Add actual sample data script --- sink/examples/seed_data.rs | 723 +++++++++++++++++++++++++++++++++++++ 1 file changed, 723 insertions(+) create mode 100644 sink/examples/seed_data.rs diff --git a/sink/examples/seed_data.rs b/sink/examples/seed_data.rs new file mode 100644 index 0000000..59ff86d --- /dev/null +++ b/sink/examples/seed_data.rs @@ -0,0 +1,723 @@ +use grc20_core::{ + block::BlockMetadata, + entity::EntityNodeRef, + ids, + mapping::{triple, Query, RelationEdge, Triple}, + neo4rs, relation, system_ids, +}; + +const NEO4J_URL: &str = "bolt://localhost:7687"; +const NEO4J_USER: &str = "neo4j"; +const NEO4J_PASSWORD: &str = "password"; + +const DEFAULT_VERSION: &str = "0"; + +const EVENT_TYPE: &str = "LmVu35JFfyGW2B4TCkRq5r"; +const CITY_TYPE: &str = "7iULQxoxfxMXxhccYmWJVZ"; +const EVENT_LOCATION_PROP: &str = "5hJcLH7zd6auNs8br859UJ"; +const SPEAKERS_PROP: &str = "6jVaNgq31A8eAHQ6iBm6aG"; +const RUSTCONF_2023: &str = "WNaUUp4WdPJtdnchrSxQYA"; +const JSCONF_2024: &str = "L6rgWLHrUxgME5ZTi3WWVx"; +const ALICE_ID: &str = "QGGFVgMWJGQCPLpme8iCdZ"; +const BOB_ID: &str = "SQmjDM5WrfPNafdpFPFtno"; +const CAROL_ID: &str = "BsiZXi6G9QpyZ47Eq87iSE"; +const DAVE_ID: &str = "8a2MNSg4myMVXXpXnE2Yti"; +const SAN_FRANCISCO_ID: &str = "2tvbXLHW1GCkE1LvgQFMLF"; +const NEW_YORK_ID: &str = "FEiviAcKw5jkNH75vBoJ44"; +const SIDE_EVENTS: &str = "As4CaMsDuGLqpRCVyjuYAN"; +const RUST_ASYNC_WORKSHOP_SIDEEVENT: &str = "QPZnckrRUebWjdwQZTR7Ka"; +const RUST_HACKATHON_SIDEEVENT: &str = "ReJ5RRMqTer9qfr87Yjexp"; +const JOE_ID: &str = "MpR7wuVWyXV988F5NWZ21r"; +const CHRIS_ID: &str = "ScHYh4PpRpyuvY2Ab4Znf5"; +const _: &str = "Mu7ddiBnwZH1LvpDTpKcvq"; +const _: &str = "DVurPdLUZi7Ajfv9BC3ADm"; +const _: &str = "MPxRvh35rnDeRJNEJLU1YF"; +const _: &str = "JjoWPp8LiCKVZiWtE5iZaJ"; +const _: &str = "8bCuTuWqL3dxALLff1Awdb"; +const _: &str = "9Bj46RXQzHQq25WNPY4Lw"; +const _: &str = "RkTkM28NSx3WZuW33vZUjx"; +const _: &str = "Lc9L7StPfXMFGWw45utaTY"; +const _: &str = "G49gECRJmW6BwqHaENF5nS"; +const _: &str = "GfugZRvoWmQhkjMcFJHg49"; +const _: &str = "5bwj7yNukCHoJnW8ksgZY"; +const _: &str = "GKXfCXBAJ2oAufgETPcFK7"; +const _: &str = "X6q73SFySo5u2BuQrYUxR5"; +const _: &str = "S2etHTe7W92QbXz32QWimW"; +const _: &str = "UV2buTZhfviv7CYTR41APA"; +const _: &str = "2ASGaR78dDZAiXM1oeLgDp"; +const _: &str = "9EKE5gNaCCb1sMF8BZoGvU"; +const _: &str = "TTbAuVjFb9TLsvMjtRJpKi"; +const _: &str = "HJDgxUcnjzvWhjX9r3zNua"; +const _: &str = "2FySkRW5LnWaf2dN4i214o"; +const _: &str = "Em2QUUXS7HDaCGtQ2h5YVc"; +const _: &str = "CdPyBWaMAmCUmyutWoVStQ"; +const _: &str = "L3xF6a8gbxxVRoCyBs373N"; +const _: &str = "WE4GbaJ1eHtQZaG516Pb9j"; +const _: &str = "J7ocdxruhsZHBjVGZbPbZJ"; +const _: &str = "3QCECHDBpVjd3ZSNYVRUsW"; +const _: &str = "CWesNo9yeRdNaKKk8LGoxr"; +const _: &str = "DeWmJcSYrxKQ794BgphfmS"; +const _: &str = "JCf7JGmhXog1swmX7JVV"; +const _: &str = "NmGh6yGqFuHw3F885SHeJj"; +const _: &str = "8EjgLrZYP9pzhpzqf82T99"; +const _: &str = "7df1NGiRjFtVGVwaDZTPPC"; +const _: &str = "YyATjD7HyDrVq4SKkQGBu"; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let neo4j = neo4rs::Graph::new(NEO4J_URL, NEO4J_USER, NEO4J_PASSWORD) + .await + .expect("Failed to connect to Neo4j"); + + // Bootstrap the database + bootstrap(&neo4j).await?; + + // Create some common types + create_type( + &neo4j, + "Person", + [], + [ + system_ids::NAME_ATTRIBUTE, + system_ids::DESCRIPTION_ATTRIBUTE, + ], + Some(system_ids::PERSON_TYPE), + ) + .await?; + + create_type( + &neo4j, + "Event", + [], + [ + system_ids::NAME_ATTRIBUTE, + system_ids::DESCRIPTION_ATTRIBUTE, + ], + Some(EVENT_TYPE), + ) + .await?; + + create_type( + &neo4j, + "City", + [], + [ + system_ids::NAME_ATTRIBUTE, + system_ids::DESCRIPTION_ATTRIBUTE, + ], + Some(CITY_TYPE), + ) + .await?; + + create_property( + &neo4j, + "Event location", + system_ids::RELATION_SCHEMA_TYPE, + Some(CITY_TYPE), + Some(EVENT_LOCATION_PROP), + ) + .await?; + + create_property( + &neo4j, + "Speakers", + system_ids::RELATION_SCHEMA_TYPE, + Some(system_ids::PERSON_TYPE), + Some(SPEAKERS_PROP), + ) + .await?; + + create_property( + &neo4j, + "Side events", + system_ids::RELATION_SCHEMA_TYPE, + Some(EVENT_TYPE), + Some(SIDE_EVENTS), + ) + .await?; + + // Create person entities + create_entity( + &neo4j, + "Alice", + None, + [system_ids::PERSON_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "Alice"), + ( + system_ids::DESCRIPTION_ATTRIBUTE, + "Speaker at Rust Conference 2023", + ), + ], + [], + Some(ALICE_ID), + ) + .await?; + + create_entity( + &neo4j, + "Bob", + None, + [system_ids::PERSON_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "Bob"), + ( + system_ids::DESCRIPTION_ATTRIBUTE, + "Speaker at Rust Conference 2023", + ), + ], + [], + Some(BOB_ID), + ) + .await?; + + create_entity( + &neo4j, + "Carol", + None, + [system_ids::PERSON_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "Carol"), + ( + system_ids::DESCRIPTION_ATTRIBUTE, + "Speaker at JavaScript Summit 2024", + ), + ], + [], + Some(CAROL_ID), + ) + .await?; + + create_entity( + &neo4j, + "Dave", + None, + [system_ids::PERSON_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "Dave"), + ( + system_ids::DESCRIPTION_ATTRIBUTE, + "Speaker at JavaScript Summit 2024", + ), + ], + [], + Some(DAVE_ID), + ) + .await?; + + create_entity( + &neo4j, + "Joe", + None, + [system_ids::PERSON_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "Joe"), + ( + system_ids::DESCRIPTION_ATTRIBUTE, + "Speaker at Rust Async Workshop", + ), + ], + [], + Some(JOE_ID), + ) + .await?; + + create_entity( + &neo4j, + "Chris", + None, + [system_ids::PERSON_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "Chris"), + ( + system_ids::DESCRIPTION_ATTRIBUTE, + "Speaker at RustConf Hackathon", + ), + ], + [], + Some(CHRIS_ID), + ) + .await?; + + // Create city entities + create_entity( + &neo4j, + "San Francisco", + Some("City in California"), + [CITY_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "San Francisco"), + (system_ids::DESCRIPTION_ATTRIBUTE, "City in California"), + ], + [], + Some(SAN_FRANCISCO_ID), + ) + .await?; + + create_entity( + &neo4j, + "New York", + Some("City in New York State"), + [CITY_TYPE], + [ + (system_ids::NAME_ATTRIBUTE, "New York"), + (system_ids::DESCRIPTION_ATTRIBUTE, "City in New York State"), + ], + [], + Some(NEW_YORK_ID), + ) + .await?; + + // Create events entities + // Create side event entities for RustConf 2023 + create_entity( + &neo4j, + "Rust Async Workshop", + Some("A hands-on workshop about async programming in Rust"), + [EVENT_TYPE], + [], + [ + (EVENT_LOCATION_PROP, SAN_FRANCISCO_ID), + (SPEAKERS_PROP, JOE_ID), + ], + Some(RUST_ASYNC_WORKSHOP_SIDEEVENT), + ) + .await?; + + create_entity( + &neo4j, + "RustConf Hackathon", + Some("A hackathon for RustConf 2023 attendees"), + [EVENT_TYPE], + [], + [ + (EVENT_LOCATION_PROP, SAN_FRANCISCO_ID), + (SPEAKERS_PROP, CHRIS_ID), + ], + Some(RUST_HACKATHON_SIDEEVENT), + ) + .await?; + + create_entity( + &neo4j, + "Rust Conference 2023", + Some("A conference about Rust programming language"), + [EVENT_TYPE], + [], + [ + (SPEAKERS_PROP, ALICE_ID), // Alice + (SPEAKERS_PROP, BOB_ID), // Bob + (EVENT_LOCATION_PROP, SAN_FRANCISCO_ID), // San Francisco + (SIDE_EVENTS, RUST_ASYNC_WORKSHOP_SIDEEVENT), // Rust Async Workshop + (SIDE_EVENTS, RUST_HACKATHON_SIDEEVENT), // RustConf Hackathon + ], + Some(RUSTCONF_2023), + ) + .await?; + + create_entity( + &neo4j, + "JavaScript Summit 2024", + Some("A summit for JavaScript enthusiasts and professionals"), + [EVENT_TYPE], + [], + [ + (SPEAKERS_PROP, CAROL_ID), // Carol + (SPEAKERS_PROP, DAVE_ID), // Dave + (EVENT_LOCATION_PROP, NEW_YORK_ID), // New York + ], + Some(JSCONF_2024), + ) + .await?; + + Ok(()) +} + +pub async fn bootstrap(neo4j: &neo4rs::Graph) -> anyhow::Result<()> { + triple::insert_many( + &neo4j, + &BlockMetadata::default(), + system_ids::ROOT_SPACE_ID, + DEFAULT_VERSION, + ) + .triples(vec![ + // Value types + Triple::new(system_ids::CHECKBOX, "name", "Checkbox"), + Triple::new(system_ids::TIME, "name", "Time"), + Triple::new(system_ids::TEXT, "name", "Text"), + Triple::new(system_ids::URL, "name", "Url"), + Triple::new(system_ids::NUMBER, "name", "Number"), + Triple::new(system_ids::POINT, "name", "Point"), + Triple::new(system_ids::IMAGE, "name", "Image"), + // System types + Triple::new(system_ids::ATTRIBUTE, "name", "Attribute"), + Triple::new(system_ids::SCHEMA_TYPE, "name", "Type"), + Triple::new( + system_ids::RELATION_SCHEMA_TYPE, + "name", + "Relation schema type", + ), + Triple::new(system_ids::RELATION_TYPE, "name", "Relation instance type"), + // Properties + Triple::new(system_ids::PROPERTIES, "name", "Properties"), + Triple::new(system_ids::TYPES_ATTRIBUTE, "name", "Types"), + Triple::new(system_ids::VALUE_TYPE_ATTRIBUTE, "name", "Value Type"), + Triple::new( + system_ids::RELATION_TYPE_ATTRIBUTE, + "name", + "Relation type attribute", + ), + Triple::new(system_ids::RELATION_INDEX, "name", "Relation index"), + Triple::new( + system_ids::RELATION_VALUE_RELATIONSHIP_TYPE, + "name", + "Relation value type", + ), + Triple::new(system_ids::NAME_ATTRIBUTE, "name", "Name"), + Triple::new(system_ids::DESCRIPTION_ATTRIBUTE, "name", "Description"), + ]) + .send() + .await + .expect("Failed to insert triples"); + + // Create properties + create_property( + neo4j, + "Properties", + system_ids::RELATION_SCHEMA_TYPE, + Some(system_ids::ATTRIBUTE), + Some(system_ids::PROPERTIES), + ) + .await?; + + create_property( + neo4j, + "Types", + system_ids::RELATION_SCHEMA_TYPE, + Some(system_ids::SCHEMA_TYPE), + Some(system_ids::TYPES_ATTRIBUTE), + ) + .await?; + + create_property( + neo4j, + "Value Type", + system_ids::RELATION_SCHEMA_TYPE, + None::<&str>, + Some(system_ids::VALUE_TYPE_ATTRIBUTE), + ) + .await?; + + create_property( + neo4j, + "Relation type attribute", + system_ids::RELATION_SCHEMA_TYPE, + None::<&str>, + Some(system_ids::RELATION_TYPE_ATTRIBUTE), + ) + .await?; + + create_property( + neo4j, + "Relation index", + system_ids::TEXT, + None::<&str>, + Some(system_ids::RELATION_INDEX), + ) + .await?; + + create_property( + neo4j, + "Relation value type", + system_ids::RELATION_SCHEMA_TYPE, + Some(system_ids::SCHEMA_TYPE), + Some(system_ids::RELATION_TYPE_ATTRIBUTE), + ) + .await?; + + create_property( + neo4j, + "Name", + system_ids::TEXT, + None::<&str>, + Some(system_ids::NAME_ATTRIBUTE), + ) + .await?; + + create_property( + neo4j, + "Description", + system_ids::TEXT, + None::<&str>, + Some(system_ids::DESCRIPTION_ATTRIBUTE), + ) + .await?; + + // Create types + create_type( + neo4j, + "Type", + [system_ids::SCHEMA_TYPE], + [ + system_ids::TYPES_ATTRIBUTE, + system_ids::PROPERTIES, + system_ids::NAME_ATTRIBUTE, + system_ids::DESCRIPTION_ATTRIBUTE, + ], + Some(system_ids::SCHEMA_TYPE), + ) + .await?; + + create_type( + neo4j, + "Relation schema type", + [system_ids::RELATION_SCHEMA_TYPE], + [system_ids::RELATION_VALUE_RELATIONSHIP_TYPE], + Some(system_ids::RELATION_SCHEMA_TYPE), + ) + .await?; + + create_type( + neo4j, + "Attribute", + [system_ids::SCHEMA_TYPE], + [ + system_ids::VALUE_TYPE_ATTRIBUTE, + system_ids::NAME_ATTRIBUTE, + system_ids::DESCRIPTION_ATTRIBUTE, + ], + Some(system_ids::ATTRIBUTE), + ) + .await?; + + create_type( + neo4j, + "Relation instance type", + [system_ids::RELATION_TYPE], + [ + system_ids::RELATION_TYPE_ATTRIBUTE, + system_ids::RELATION_INDEX, + ], + Some(system_ids::RELATION_TYPE), + ) + .await?; + + Ok(()) +} + +pub async fn create_entity( + neo4j: &neo4rs::Graph, + name: impl Into, + description: Option<&str>, + types: impl IntoIterator, + properties: impl IntoIterator, + relations: impl IntoIterator, + id: Option<&str>, +) -> anyhow::Result { + let block = BlockMetadata::default(); + let entity_id = id.map(Into::into).unwrap_or_else(|| ids::create_geo_id()); + let name = name.into(); + + // Set: Entity.name + triple::insert_many(neo4j, &block, system_ids::ROOT_SPACE_ID, DEFAULT_VERSION) + .triples(vec![Triple::new( + &entity_id, + system_ids::NAME_ATTRIBUTE, + name, + )]) + .send() + .await?; + + // Set: Entity.description + if let Some(description) = description { + triple::insert_many(neo4j, &block, system_ids::ROOT_SPACE_ID, DEFAULT_VERSION) + .triples(vec![Triple::new( + &entity_id, + system_ids::DESCRIPTION_ATTRIBUTE, + description, + )]) + .send() + .await?; + } + + // Set: Entity > TYPES_ATTRIBUTE > Type[] + set_types(neo4j, &entity_id, types).await?; + + // Set: Entity.* + triple::insert_many(neo4j, &block, system_ids::ROOT_SPACE_ID, DEFAULT_VERSION) + .triples( + properties + .into_iter() + .map(|(property_id, value)| Triple::new(&entity_id, property_id, value)), + ) + .send() + .await?; + + // Set: Entity > RELATIONS > Relation[] + relation::insert_many::>( + neo4j, + &block, + system_ids::ROOT_SPACE_ID, + DEFAULT_VERSION, + ) + .relations(relations.into_iter().map(|(relation_type, target_id)| { + RelationEdge::new( + ids::create_geo_id(), + &entity_id, + target_id, + relation_type, + "0", + ) + })) + .send() + .await?; + + Ok(entity_id) +} + +/// Creates a type with the given name, types, and properties. +pub async fn create_type( + neo4j: &neo4rs::Graph, + name: impl Into, + types: impl IntoIterator, + properties: impl IntoIterator, + id: Option<&str>, +) -> anyhow::Result { + let block = BlockMetadata::default(); + let type_id = id.map(Into::into).unwrap_or_else(|| ids::create_geo_id()); + let name = name.into(); + + let mut types_vec: Vec<&str> = types.into_iter().collect(); + if !types_vec.contains(&system_ids::SCHEMA_TYPE) { + types_vec.push(system_ids::SCHEMA_TYPE); + } + + // Set: Type.name + triple::insert_many(neo4j, &block, system_ids::ROOT_SPACE_TYPE, DEFAULT_VERSION) + .triples(vec![Triple::new( + &type_id, + system_ids::NAME_ATTRIBUTE, + name, + )]) + .send() + .await?; + + // Set: Type > TYPES_ATTRIBUTE > Type[] + set_types(neo4j, &type_id, types_vec).await?; + + // Set: Type > PROPERTIES > Property[] + relation::insert_many::>( + neo4j, + &block, + system_ids::ROOT_SPACE_ID, + DEFAULT_VERSION, + ) + .relations(properties.into_iter().map(|property_id| { + RelationEdge::new( + ids::create_geo_id(), + &type_id, + system_ids::PROPERTIES, + property_id, + "0", + ) + })) + .send() + .await?; + + Ok(type_id) +} + +/// Creates a property with the given name and value type. +/// If `relation_value_type` is provided, it will be set as the relation value type ( +/// Note: if that is the case, then `value_type` should be the system_ids::RELATION_SCHEMA_TYPE type). +pub async fn create_property( + neo4j: &neo4rs::Graph, + name: impl Into, + value_type: impl Into, + relation_value_type: Option>, + id: Option>, +) -> anyhow::Result { + let block = BlockMetadata::default(); + + let property_id = id.map(Into::into).unwrap_or_else(|| ids::create_geo_id()); + + // Set: Property.name + triple::insert_many(neo4j, &block, system_ids::ROOT_SPACE_ID, DEFAULT_VERSION) + .triples(vec![Triple::new( + &property_id, + system_ids::NAME_ATTRIBUTE, + name.into(), + )]) + .send() + .await?; + + // Set: Property > VALUE_TYPE > ValueType + relation::insert_one::>( + neo4j, + &block, + system_ids::ROOT_SPACE_ID, + DEFAULT_VERSION, + RelationEdge::new( + ids::create_geo_id(), + property_id.clone(), + system_ids::VALUE_TYPE_ATTRIBUTE, + value_type.into(), + "0", + ), + ) + .send() + .await?; + + if let Some(relation_value_type) = relation_value_type { + // Set: Property > RELATION_VALUE_RELATIONSHIP_TYPE > RelationValueType + relation::insert_one::>( + neo4j, + &block, + system_ids::ROOT_SPACE_ID, + DEFAULT_VERSION, + RelationEdge::new( + ids::create_geo_id(), + property_id.clone(), + system_ids::RELATION_VALUE_RELATIONSHIP_TYPE, + relation_value_type.into(), + "0", + ), + ) + .send() + .await?; + } + + set_types(neo4j, &property_id, [system_ids::ATTRIBUTE]).await?; + + Ok(property_id) +} + +pub async fn set_types( + neo4j: &neo4rs::Graph, + entity_id: impl Into, + types: impl IntoIterator, +) -> anyhow::Result<()> { + let block = BlockMetadata::default(); + let entity_id = entity_id.into(); + + // Set: Entity > TYPES_ATTRIBUTE > Type[] + relation::insert_many::>( + neo4j, + &block, + system_ids::ROOT_SPACE_ID, + DEFAULT_VERSION, + ) + .relations(types.into_iter().map(|type_id| { + RelationEdge::new( + ids::create_geo_id(), + &entity_id, + type_id, + system_ids::TYPES_ATTRIBUTE, + "0", + ) + })) + .send() + .await?; + + Ok(()) +} From 2760140b55381ce69bb2378bdfc6b0c76a93b88a Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:10:15 -0400 Subject: [PATCH 07/13] misc: docs + small refactoring --- grc20-core/src/mapping/entity/mod.rs | 83 ++++++++++++++++++- grc20-core/src/mapping/entity/utils.rs | 36 ++++++++ grc20-core/src/mapping/query_utils/mod.rs | 2 - .../src/mapping/query_utils/types_filter.rs | 36 -------- grc20-macros/src/entity.rs | 2 +- .../src/models/space/space_types_query.rs | 4 +- 6 files changed, 121 insertions(+), 42 deletions(-) delete mode 100644 grc20-core/src/mapping/query_utils/types_filter.rs diff --git a/grc20-core/src/mapping/entity/mod.rs b/grc20-core/src/mapping/entity/mod.rs index 96f48c4..893c864 100644 --- a/grc20-core/src/mapping/entity/mod.rs +++ b/grc20-core/src/mapping/entity/mod.rs @@ -14,7 +14,7 @@ pub use find_one::FindOneQuery; pub use insert_one::InsertOneQuery; pub use models::{Entity, EntityNode, EntityNodeRef, SystemProperties}; pub use semantic_search::SemanticSearchQuery; -pub use utils::{EntityFilter, EntityRelationFilter}; +pub use utils::{EntityFilter, EntityRelationFilter, TypesFilter}; use crate::block::BlockMetadata; @@ -34,14 +34,95 @@ pub fn delete_one( ) } +/// Creates a query to find a single entity by its ID if it exists. Supports optional +/// filtering by space ID and version. +/// ```rust +/// use grc20_core::mapping::entity; +/// +/// // Get current entity +/// let maybe_entity = entity::find_one::(&neo4j, "entity_id") +/// .send() +/// .await?; +/// +/// // Get entity in a specific space and version +/// let maybe_entity = entity::find_one::(&neo4j, "entity_id") +/// .space_id("space_id") +/// .space_version("space_version") +/// .send() +/// .await?; +/// ``` pub fn find_one(neo4j: &neo4rs::Graph, id: impl Into) -> FindOneQuery { FindOneQuery::new(neo4j, id.into()) } +/// Creates a query to find multiple entities. Supports filtering by relations and +/// properties as well as ordering and pagination. See [`EntityFilter`](EntityFilter) +/// for more details on filtering options. +/// ```rust +/// use grc20_core::mapping::entity; +/// use grc20_core::mapping::query_utils::order_by; +/// +/// // Find entities with a specific attribute, order them by a property and +/// // return the first 10. +/// let entities = entity::find_many::(&neo4j) +/// .filter(entity::EntityFilter::default() +/// // Filter by "SOME_ATTRIBUTE" attribute with value "some_value" +/// .attribute(AttributeFilter::new("SOME_ATTRIBUTE").value("some_value"))) +/// .order_by(order_by::asc("some_property")) +/// .limit(10) +/// .send() +/// .await?; +/// +/// // Find entities with a specific relation, in this case entities that have a +/// // `Parent` relation to an entity with ID "Alice". +/// let entities = entity::find_many::(&neo4j) +/// .filter(entity::EntityFilter::default() +/// // Filter by relations +/// .relations(entity::EntityRelationFilter::default() +/// // Filter by `Parent` relation to entity with ID "Alice" +/// .relation_type("Parent".to_string()) +/// .to_id("Alice".to_string()))) +/// .send() +/// .await?; +/// +/// // Find entities with a specific type (note: `TypesFilter` is a shorthand +/// // for `EntityRelationFilter`. It is converted to a relation filter internally). +/// let entities = entity::find_many::(&neo4j) +/// .filter(entity::EntityFilter::default() +/// // Filter by `Types` relations pointing to `EntityType` +/// .relations(TypesFilter::default().r#type("EntityType".to_string()))) +/// .send() +/// .await?; +/// ``` pub fn find_many(neo4j: &neo4rs::Graph) -> FindManyQuery { FindManyQuery::new(neo4j) } +/// Create a query to search for entities using semantic search based on a vector. The query +/// supports the same filtering options as `find_many`, allowing you to filter results by +/// attributes, relations, and other properties. +/// +/// Important: The search uses *approximate* nearest neighbor search, which means that +/// the results with filtering applied after the search, which may lead to some results +/// that contain fewer than the desired quantity `limit`. +/// ```rust +/// use grc20_core::mapping::entity; +/// +/// let search_vector = embedding::embed("my search query"); +/// +/// // Search for entities similar to the provided vector. +/// let results = entity::search::(&neo4j, search_vector) +/// .send() +/// +/// // Search for types (i.e.: entities that have `Types`` relation to `SchemaType``) of +/// // entities similar to the provided vector. +/// let results = entity::search::(&neo4j, search_vector) +/// .filter(entity::EntityFilter::default() +/// // Filter by `Types` relations pointing to `SchemaType` +/// .relations(entity::TypesFilter::default().r#type(system_ids::SCHEMA_TYPE))) +/// .send() +/// .await?; +/// ``` pub fn search(neo4j: &neo4rs::Graph, vector: Vec) -> SemanticSearchQuery { SemanticSearchQuery::new(neo4j, vector) } diff --git a/grc20-core/src/mapping/entity/utils.rs b/grc20-core/src/mapping/entity/utils.rs index 3fd6117..f57bfb8 100644 --- a/grc20-core/src/mapping/entity/utils.rs +++ b/grc20-core/src/mapping/entity/utils.rs @@ -162,6 +162,42 @@ impl EntityRelationFilter { } } +#[derive(Clone, Debug, Default)] +pub struct TypesFilter { + types_contains: Vec, +} + +impl TypesFilter { + pub fn r#type(mut self, r#type: impl Into) -> Self { + self.types_contains.push(r#type.into()); + self + } + + pub fn types(mut self, mut types: Vec) -> Self { + self.types_contains.append(&mut types); + self + } +} + +impl From for EntityRelationFilter { + fn from(types_filter: TypesFilter) -> Self { + let mut filter = EntityRelationFilter::default(); + + if !types_filter.types_contains.is_empty() { + filter = filter.relation_type(system_ids::TYPES_ATTRIBUTE); + + if let [r#type] = &types_filter.types_contains[..] { + filter = filter.to_id(r#type.to_string()); + } else { + filter = filter.to_id(types_filter.types_contains); + } + } + + filter + } +} + + #[derive(Clone, Debug)] pub struct MatchEntityAttributes<'a> { space_id: &'a Option>, diff --git a/grc20-core/src/mapping/query_utils/mod.rs b/grc20-core/src/mapping/query_utils/mod.rs index 916719d..af743b5 100644 --- a/grc20-core/src/mapping/query_utils/mod.rs +++ b/grc20-core/src/mapping/query_utils/mod.rs @@ -6,14 +6,12 @@ pub mod order_by; pub mod prop_filter; pub mod query_builder; pub mod query_part; -pub mod types_filter; pub mod version_filter; pub use attributes_filter::AttributeFilter; pub use order_by::{FieldOrderBy, OrderDirection}; pub use prop_filter::PropFilter; pub use query_part::QueryPart; -pub use types_filter::TypesFilter; pub use version_filter::VersionFilter; pub trait Query: Sized { diff --git a/grc20-core/src/mapping/query_utils/types_filter.rs b/grc20-core/src/mapping/query_utils/types_filter.rs deleted file mode 100644 index b91da7b..0000000 --- a/grc20-core/src/mapping/query_utils/types_filter.rs +++ /dev/null @@ -1,36 +0,0 @@ -use crate::{mapping::EntityRelationFilter, system_ids}; - -#[derive(Clone, Debug, Default)] -pub struct TypesFilter { - types_contains: Vec, -} - -impl TypesFilter { - pub fn r#type(mut self, r#type: impl Into) -> Self { - self.types_contains.push(r#type.into()); - self - } - - pub fn types(mut self, mut types: Vec) -> Self { - self.types_contains.append(&mut types); - self - } -} - -impl From for EntityRelationFilter { - fn from(types_filter: TypesFilter) -> Self { - let mut filter = EntityRelationFilter::default(); - - if !types_filter.types_contains.is_empty() { - filter = filter.relation_type(system_ids::TYPES_ATTRIBUTE); - - if let [r#type] = &types_filter.types_contains[..] { - filter = filter.to_id(r#type.to_string()); - } else { - filter = filter.to_id(types_filter.types_contains); - } - } - - filter - } -} diff --git a/grc20-macros/src/entity.rs b/grc20-macros/src/entity.rs index 604bb0b..2bddf3e 100644 --- a/grc20-macros/src/entity.rs +++ b/grc20-macros/src/entity.rs @@ -408,7 +408,7 @@ pub(crate) fn generate_query_impls(opts: &EntityOpts) -> TokenStream2 { let schema_type = opts.schema_type.as_ref().map(|s| quote!(#s)); let type_filter = if let Some(schema_type) = schema_type { quote! { - .relations(grc20_core::mapping::query_utils::TypesFilter::default().r#type(#schema_type.to_string())) + .relations(grc20_core::mapping::entity::TypesFilter::default().r#type(#schema_type.to_string())) } } else { quote! {} diff --git a/grc20-sdk/src/models/space/space_types_query.rs b/grc20-sdk/src/models/space/space_types_query.rs index 701e395..57ac798 100644 --- a/grc20-sdk/src/models/space/space_types_query.rs +++ b/grc20-sdk/src/models/space/space_types_query.rs @@ -1,11 +1,11 @@ use futures::{Stream, TryStreamExt}; use grc20_core::{ - entity, + entity::{self, TypesFilter}, error::DatabaseError, mapping::{ prop_filter, - query_utils::{QueryStream, TypesFilter}, + query_utils::QueryStream, EntityFilter, EntityNode, PropFilter, Query, }, neo4rs, system_ids, From c5fbf9a8d2cb1e7d0eebe97735e21c3d8c2071e0 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:21:32 -0400 Subject: [PATCH 08/13] docs: Add more docs to relation queries --- grc20-core/src/mapping/entity/utils.rs | 2 +- grc20-core/src/mapping/relation/mod.rs | 49 ++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/grc20-core/src/mapping/entity/utils.rs b/grc20-core/src/mapping/entity/utils.rs index f57bfb8..5dc450e 100644 --- a/grc20-core/src/mapping/entity/utils.rs +++ b/grc20-core/src/mapping/entity/utils.rs @@ -11,6 +11,7 @@ use crate::{ system_ids, }; +/// Filter used to find entities in the knowledge graph. #[derive(Clone, Debug, Default)] pub struct EntityFilter { pub(crate) id: Option>, @@ -197,7 +198,6 @@ impl From for EntityRelationFilter { } } - #[derive(Clone, Debug)] pub struct MatchEntityAttributes<'a> { space_id: &'a Option>, diff --git a/grc20-core/src/mapping/relation/mod.rs b/grc20-core/src/mapping/relation/mod.rs index ce8c746..9df9a56 100644 --- a/grc20-core/src/mapping/relation/mod.rs +++ b/grc20-core/src/mapping/relation/mod.rs @@ -47,6 +47,22 @@ pub fn delete_one( ) } +/// Creates a query to find a single relation by its ID and space ID if it exists. Supports optional +/// filtering by version. +/// +/// ```rust +/// use grc20_core::mapping::relation; +/// +/// // Get current relation +/// let maybe_relation = relation::find_one::(&neo4j, "relation_id", "space_id", None) +/// .send() +/// .await?; +/// +/// // Get relation in a specific space and version +/// let maybe_relation = relation::find_one::(&neo4j, "relation_id", "space_id", Some("space_version".to_string())) +/// .send() +/// .await?; +/// ``` pub fn find_one( neo4j: &neo4rs::Graph, relation_id: impl Into, @@ -56,10 +72,40 @@ pub fn find_one( FindOneQuery::new(neo4j, relation_id.into(), space_id.into(), space_version) } +/// Creates a query to find multiple relations. Supports filtering by relation_type and its to/from entities. +/// The results are ordered by relation index. +/// +/// See [`RelationFilter`](RelationFilter) for more details on filtering options. +/// +/// ```rust +/// use grc20_core::mapping::relation; +/// use grc20_core::mapping::query_utils::order_by; +/// +/// // Find relations of a specific type (e.g.: "Parent"). +/// let relations = relation::find_many::(&neo4j) +/// .filter(relation::RelationFilter::default() +/// // Filter by relation type "Parent" (we provide an entity filter with the ID "Parent") +/// .relation_type(entity::EntityFilter::default().id("Parent"))) +/// .limit(10) +/// .send() +/// .await?; +/// +/// // Find relations with a specific from entity, in this case relations that have a +/// // any type of relation between "Alice" and "Bob". +/// let relations = relation::find_many::(&neo4j) +/// .filter(relation::RelationFilter::default() +/// // Filter by from entity with ID "Alice" +/// .from_(entity::EntityFilter::default().id("Alice")) +/// .to_(entity::EntityFilter::default().id("Bob"))) +/// .send() +/// .await?; +/// ``` pub fn find_many(neo4j: &neo4rs::Graph) -> FindManyQuery { FindManyQuery::new(neo4j) } +/// Same as `find_one`, but it returns the `to` entity of the relation instead of the +/// relation itself. pub fn find_one_to( neo4j: &neo4rs::Graph, relation_id: impl Into, @@ -69,6 +115,9 @@ pub fn find_one_to( FindOneToQuery::new(neo4j, relation_id.into(), space_id.into(), space_version) } +/// Same as `find_many`, but it returns the `to` entities of the relations instead of the +/// relations themselves. This is useful when you want to retrieve the target entities of +/// a set of relations without fetching the relations themselves. pub fn find_many_to(neo4j: &neo4rs::Graph) -> FindManyToQuery { FindManyToQuery::new(neo4j) } From 48f18745cb097323d4eebcdc3a25f20dc24a25d9 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:22:39 -0400 Subject: [PATCH 09/13] style: clippy + fmt --- grc20-core/src/mapping/entity/mod.rs | 32 +++++++++---------- grc20-core/src/mapping/relation/mod.rs | 12 +++---- .../src/models/space/space_types_query.rs | 6 +--- mcp-server/src/main.rs | 4 +-- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/grc20-core/src/mapping/entity/mod.rs b/grc20-core/src/mapping/entity/mod.rs index 893c864..275f9e2 100644 --- a/grc20-core/src/mapping/entity/mod.rs +++ b/grc20-core/src/mapping/entity/mod.rs @@ -38,12 +38,12 @@ pub fn delete_one( /// filtering by space ID and version. /// ```rust /// use grc20_core::mapping::entity; -/// +/// /// // Get current entity /// let maybe_entity = entity::find_one::(&neo4j, "entity_id") /// .send() /// .await?; -/// +/// /// // Get entity in a specific space and version /// let maybe_entity = entity::find_one::(&neo4j, "entity_id") /// .space_id("space_id") @@ -55,14 +55,14 @@ pub fn find_one(neo4j: &neo4rs::Graph, id: impl Into) -> FindOneQuery FindOneQuery::new(neo4j, id.into()) } -/// Creates a query to find multiple entities. Supports filtering by relations and -/// properties as well as ordering and pagination. See [`EntityFilter`](EntityFilter) +/// Creates a query to find multiple entities. Supports filtering by relations and +/// properties as well as ordering and pagination. See [`EntityFilter`](EntityFilter) /// for more details on filtering options. /// ```rust /// use grc20_core::mapping::entity; /// use grc20_core::mapping::query_utils::order_by; -/// -/// // Find entities with a specific attribute, order them by a property and +/// +/// // Find entities with a specific attribute, order them by a property and /// // return the first 10. /// let entities = entity::find_many::(&neo4j) /// .filter(entity::EntityFilter::default() @@ -72,7 +72,7 @@ pub fn find_one(neo4j: &neo4rs::Graph, id: impl Into) -> FindOneQuery /// .limit(10) /// .send() /// .await?; -/// +/// /// // Find entities with a specific relation, in this case entities that have a /// // `Parent` relation to an entity with ID "Alice". /// let entities = entity::find_many::(&neo4j) @@ -85,7 +85,7 @@ pub fn find_one(neo4j: &neo4rs::Graph, id: impl Into) -> FindOneQuery /// .send() /// .await?; /// -/// // Find entities with a specific type (note: `TypesFilter` is a shorthand +/// // Find entities with a specific type (note: `TypesFilter` is a shorthand /// // for `EntityRelationFilter`. It is converted to a relation filter internally). /// let entities = entity::find_many::(&neo4j) /// .filter(entity::EntityFilter::default() @@ -100,21 +100,21 @@ pub fn find_many(neo4j: &neo4rs::Graph) -> FindManyQuery { /// Create a query to search for entities using semantic search based on a vector. The query /// supports the same filtering options as `find_many`, allowing you to filter results by -/// attributes, relations, and other properties. -/// -/// Important: The search uses *approximate* nearest neighbor search, which means that -/// the results with filtering applied after the search, which may lead to some results +/// attributes, relations, and other properties. +/// +/// Important: The search uses *approximate* nearest neighbor search, which means that +/// the results with filtering applied after the search, which may lead to some results /// that contain fewer than the desired quantity `limit`. /// ```rust /// use grc20_core::mapping::entity; -/// +/// /// let search_vector = embedding::embed("my search query"); -/// +/// /// // Search for entities similar to the provided vector. /// let results = entity::search::(&neo4j, search_vector) /// .send() -/// -/// // Search for types (i.e.: entities that have `Types`` relation to `SchemaType``) of +/// +/// // Search for types (i.e.: entities that have `Types`` relation to `SchemaType``) of /// // entities similar to the provided vector. /// let results = entity::search::(&neo4j, search_vector) /// .filter(entity::EntityFilter::default() diff --git a/grc20-core/src/mapping/relation/mod.rs b/grc20-core/src/mapping/relation/mod.rs index 9df9a56..afb721a 100644 --- a/grc20-core/src/mapping/relation/mod.rs +++ b/grc20-core/src/mapping/relation/mod.rs @@ -73,14 +73,14 @@ pub fn find_one( } /// Creates a query to find multiple relations. Supports filtering by relation_type and its to/from entities. -/// The results are ordered by relation index. -/// +/// The results are ordered by relation index. +/// /// See [`RelationFilter`](RelationFilter) for more details on filtering options. -/// +/// /// ```rust /// use grc20_core::mapping::relation; /// use grc20_core::mapping::query_utils::order_by; -/// +/// /// // Find relations of a specific type (e.g.: "Parent"). /// let relations = relation::find_many::(&neo4j) /// .filter(relation::RelationFilter::default() @@ -89,7 +89,7 @@ pub fn find_one( /// .limit(10) /// .send() /// .await?; -/// +/// /// // Find relations with a specific from entity, in this case relations that have a /// // any type of relation between "Alice" and "Bob". /// let relations = relation::find_many::(&neo4j) @@ -104,7 +104,7 @@ pub fn find_many(neo4j: &neo4rs::Graph) -> FindManyQuery { FindManyQuery::new(neo4j) } -/// Same as `find_one`, but it returns the `to` entity of the relation instead of the +/// Same as `find_one`, but it returns the `to` entity of the relation instead of the /// relation itself. pub fn find_one_to( neo4j: &neo4rs::Graph, diff --git a/grc20-sdk/src/models/space/space_types_query.rs b/grc20-sdk/src/models/space/space_types_query.rs index 57ac798..59bb555 100644 --- a/grc20-sdk/src/models/space/space_types_query.rs +++ b/grc20-sdk/src/models/space/space_types_query.rs @@ -3,11 +3,7 @@ use futures::{Stream, TryStreamExt}; use grc20_core::{ entity::{self, TypesFilter}, error::DatabaseError, - mapping::{ - prop_filter, - query_utils::QueryStream, - EntityFilter, EntityNode, PropFilter, Query, - }, + mapping::{prop_filter, query_utils::QueryStream, EntityFilter, EntityNode, PropFilter, Query}, neo4rs, system_ids, }; diff --git a/mcp-server/src/main.rs b/mcp-server/src/main.rs index c5b0bc1..7e55c45 100644 --- a/mcp-server/src/main.rs +++ b/mcp-server/src/main.rs @@ -2,8 +2,8 @@ use clap::{Args, Parser}; use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; use futures::TryStreamExt; use grc20_core::{ - entity::{self, Entity, EntityRelationFilter}, - mapping::{Query, QueryStream, query_utils::TypesFilter}, + entity::{self, Entity, EntityRelationFilter, TypesFilter}, + mapping::{Query, QueryStream}, neo4rs, system_ids, }; use grc20_sdk::models::BaseEntity; From 80e3a445057e848bc7beba173ec5de8a41d0a632 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:24:32 -0400 Subject: [PATCH 10/13] docs: Update readme sample data --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 83b3218..277985a 100644 --- a/README.md +++ b/README.md @@ -51,5 +51,13 @@ CFLAGS='-std=gnu17' cargo run --bin mcp-server -- \ --neo4j-pass neo4j ``` +### Local testing with sample data +Start the neo4j database and run the following command: +```bash +CFLAGS='-std=gnu17' cargo run --example seed_data +``` + +The IDs of the sample data can be found in `sink/examples/seed_data.rs`. + ## GRC20 CLI Coming soon™️ \ No newline at end of file From ee6753d93127201eba99d4716552a62ab6d9e20e Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:27:15 -0400 Subject: [PATCH 11/13] fix: conflict message --- mcp-server/src/main.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mcp-server/src/main.rs b/mcp-server/src/main.rs index 4a8e8d9..7e55c45 100644 --- a/mcp-server/src/main.rs +++ b/mcp-server/src/main.rs @@ -190,11 +190,7 @@ impl KnowledgeGraph { entity::EntityFilter::default().relations( EntityRelationFilter::default() .relation_type(system_ids::VALUE_TYPE_ATTRIBUTE) -<<<<<<< HEAD .to_id(system_ids::RELATION_SCHEMA_TYPE), -======= - .to_id(system_ids::RELATION), ->>>>>>> main ), ) .limit(8) From a871a1aeea56e78e29eb44026d6440085b109a08 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:33:07 -0400 Subject: [PATCH 12/13] docs: Fix references --- grc20-core/src/mapping/entity/mod.rs | 5 +++-- grc20-core/src/mapping/relation/mod.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/grc20-core/src/mapping/entity/mod.rs b/grc20-core/src/mapping/entity/mod.rs index 275f9e2..03ba50e 100644 --- a/grc20-core/src/mapping/entity/mod.rs +++ b/grc20-core/src/mapping/entity/mod.rs @@ -56,8 +56,9 @@ pub fn find_one(neo4j: &neo4rs::Graph, id: impl Into) -> FindOneQuery } /// Creates a query to find multiple entities. Supports filtering by relations and -/// properties as well as ordering and pagination. See [`EntityFilter`](EntityFilter) -/// for more details on filtering options. +/// properties as well as ordering and pagination. See [`EntityFilter`] for more details +/// on filtering options. +/// /// ```rust /// use grc20_core::mapping::entity; /// use grc20_core::mapping::query_utils::order_by; diff --git a/grc20-core/src/mapping/relation/mod.rs b/grc20-core/src/mapping/relation/mod.rs index afb721a..6e1f3bf 100644 --- a/grc20-core/src/mapping/relation/mod.rs +++ b/grc20-core/src/mapping/relation/mod.rs @@ -75,7 +75,7 @@ pub fn find_one( /// Creates a query to find multiple relations. Supports filtering by relation_type and its to/from entities. /// The results are ordered by relation index. /// -/// See [`RelationFilter`](RelationFilter) for more details on filtering options. +/// See [`RelationFilter`] for more details on filtering options. /// /// ```rust /// use grc20_core::mapping::relation; From 9a6e2d4772d7f210112d2fb407000877d8f124c6 Mon Sep 17 00:00:00 2001 From: Christophe Date: Tue, 3 Jun 2025 15:35:33 -0400 Subject: [PATCH 13/13] style: fmt --- grc20-core/src/mapping/entity/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/grc20-core/src/mapping/entity/mod.rs b/grc20-core/src/mapping/entity/mod.rs index 03ba50e..5f8192b 100644 --- a/grc20-core/src/mapping/entity/mod.rs +++ b/grc20-core/src/mapping/entity/mod.rs @@ -56,9 +56,9 @@ pub fn find_one(neo4j: &neo4rs::Graph, id: impl Into) -> FindOneQuery } /// Creates a query to find multiple entities. Supports filtering by relations and -/// properties as well as ordering and pagination. See [`EntityFilter`] for more details +/// properties as well as ordering and pagination. See [`EntityFilter`] for more details /// on filtering options. -/// +/// /// ```rust /// use grc20_core::mapping::entity; /// use grc20_core::mapping::query_utils::order_by;