Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions grc20-core/src/mapping/entity/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ pub mod find_path;
pub mod insert_many;
pub mod insert_one;
pub mod models;
pub mod prefiltered_semantic_search;
pub mod search_with_traversals;
pub mod semantic_search;
pub mod utils;
Expand All @@ -16,6 +17,7 @@ pub use find_one::FindOneQuery;
pub use find_path::FindPathQuery;
pub use insert_one::InsertOneQuery;
pub use models::{Entity, EntityNode, EntityNodeRef, SystemProperties};
pub use prefiltered_semantic_search::PrefilteredSemanticSearchQuery;
pub use search_with_traversals::SearchWithTraversals;
pub use semantic_search::SemanticSearchQuery;
pub use utils::{EntityFilter, EntityRelationFilter, TypesFilter};
Expand Down Expand Up @@ -132,6 +134,13 @@ pub fn search<T>(neo4j: &neo4rs::Graph, vector: Vec<f64>) -> SemanticSearchQuery
SemanticSearchQuery::new(neo4j, vector)
}

pub fn prefiltered_search<T>(
neo4j: &neo4rs::Graph,
vector: Vec<f64>,
) -> PrefilteredSemanticSearchQuery<T> {
PrefilteredSemanticSearchQuery::new(neo4j, vector)
}

pub fn search_from_restictions<T>(
neo4j: &neo4rs::Graph,
vector: Vec<f64>,
Expand Down
212 changes: 212 additions & 0 deletions grc20-core/src/mapping/entity/prefiltered_semantic_search.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
use futures::{Stream, StreamExt, TryStreamExt};

use crate::{
entity::utils::MatchEntity,
error::DatabaseError,
mapping::{
query_utils::VersionFilter, AttributeNode, FromAttributes, PropFilter, QueryBuilder,
QueryStream, Subquery,
},
};

use super::{Entity, EntityFilter, EntityNode};

pub struct PrefilteredSemanticSearchQuery<T> {
neo4j: neo4rs::Graph,
vector: Vec<f64>,
filter: EntityFilter,
space_id: Option<PropFilter<String>>,
version: VersionFilter,
threshold: f64,
limit: usize,
skip: Option<usize>,

_marker: std::marker::PhantomData<T>,
}

impl<T> PrefilteredSemanticSearchQuery<T> {
pub fn new(neo4j: &neo4rs::Graph, vector: Vec<f64>) -> Self {
Self {
neo4j: neo4j.clone(),
vector,
filter: EntityFilter::default(),
space_id: None,
version: VersionFilter::default(),
limit: 100,
threshold: 0.75,
skip: None,

_marker: std::marker::PhantomData,
}
}

pub fn filter(mut self, filter: EntityFilter) -> Self {
self.filter = filter;
self
}

pub fn space_id(mut self, filter: PropFilter<String>) -> Self {
self.space_id = Some(filter);
self
}

pub fn version(mut self, version: impl Into<String>) -> Self {
self.version.version_mut(version.into());
self
}

pub fn limit(mut self, limit: usize) -> Self {
self.limit = limit;
self
}

pub fn limit_opt(mut self, limit: Option<usize>) -> Self {
if let Some(limit) = limit {
self.limit = limit;
}
self
}

pub fn threshold(mut self, threshold: f64) -> Self {
self.threshold = threshold;
self
}

pub fn skip(mut self, skip: usize) -> Self {
self.skip = Some(skip);
self
}

pub fn skip_opt(mut self, skip: Option<usize>) -> Self {
self.skip = skip;
self
}

fn subquery(&self) -> QueryBuilder {
const QUERY: &str = const_format::formatcp!(
r#"
MATCH (e:Entity) -[r:ATTRIBUTE]-> (a:Attribute:Indexed)
WHERE r.max_version IS null
AND a.embedding IS NOT NULL
WITH e, a, r, vector.similarity.cosine(a.embedding, $vector) AS score
ORDER BY score DESC
WHERE score > $threshold
"#,
);

QueryBuilder::default()
.subquery(self.filter.subquery("e"))
.subquery(QUERY)
.limit(self.limit)
.skip_opt(self.skip)
.params("vector", self.vector.clone())
.params("limit", self.limit as i64)
.params("threshold", self.threshold)
}
}

#[derive(Clone, Debug, PartialEq)]
pub struct SemanticSearchResult<T> {
pub entity: T,
pub score: f64,
}
impl QueryStream<SemanticSearchResult<EntityNode>> for PrefilteredSemanticSearchQuery<EntityNode> {
async fn send(
self,
) -> Result<
impl Stream<Item = Result<SemanticSearchResult<EntityNode>, DatabaseError>>,
DatabaseError,
> {
let query = self.subquery().r#return("DISTINCT e, score");

if cfg!(debug_assertions) || cfg!(test) {
tracing::info!(
"entity_node::PrefilteredSemanticSearch::<EntityNode>:\n{}",
query.compile()
);
};

#[derive(Debug, serde::Deserialize)]
struct RowResult {
e: EntityNode,
score: f64,
}

Ok(self
.neo4j
.execute(query.build())
.await?
.into_stream_as::<RowResult>()
.map_err(DatabaseError::from)
.and_then(|row| async move {
Ok(SemanticSearchResult {
entity: row.e,
score: row.score,
})
}))
}
}

impl<T: FromAttributes> QueryStream<SemanticSearchResult<Entity<T>>>
for PrefilteredSemanticSearchQuery<Entity<T>>
{
async fn send(
self,
) -> Result<
impl Stream<Item = Result<SemanticSearchResult<Entity<T>>, DatabaseError>>,
DatabaseError,
> {
let match_entity = MatchEntity::new(&self.space_id, &self.version);

let query = self.subquery().with(
vec!["e".to_string(), "score".to_string()],
match_entity.chain(
"e",
"attrs",
"types",
Some(vec!["score".to_string()]),
"RETURN e{.*, attrs: attrs, types: types, score: score}",
),
);

if cfg!(debug_assertions) || cfg!(test) {
tracing::info!(
"entity_node::PrefilteredSemanticSearch::<Entity<T>>:\n{}\nparams:{:?}",
query.compile(),
query.params
);
};

#[derive(Debug, serde::Deserialize)]
struct RowResult {
#[serde(flatten)]
node: EntityNode,
attrs: Vec<AttributeNode>,
types: Vec<EntityNode>,
score: f64,
}

let stream = self
.neo4j
.execute(query.build())
.await?
.into_stream_as::<RowResult>()
.map_err(DatabaseError::from)
.map(|row_result| {
row_result.and_then(|row| {
T::from_attributes(row.attrs.into())
.map(|data| SemanticSearchResult {
entity: Entity {
node: row.node,
attributes: data,
types: row.types.into_iter().map(|t| t.id).collect(),
},
score: row.score,
})
.map_err(DatabaseError::from)
})
});

Ok(stream)
}
}
2 changes: 1 addition & 1 deletion grc20-core/src/mapping/entity/search_with_traversals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ impl<T> SearchWithTraversals<T> {

fn subquery(&self) -> QueryBuilder {
const QUERY: &str = r#"
CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector)
CALL db.index.vector.queryNodes('vector_index', $effective_search_ratio, $vector)
YIELD node AS n, score AS score
WHERE score > $threshold
MATCH (e:Entity) -[r:ATTRIBUTE]-> (n)
Expand Down
7 changes: 3 additions & 4 deletions grc20-core/src/mapping/entity/semantic_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ impl<T> SemanticSearchQuery<T> {

fn subquery(&self) -> QueryBuilder {
const QUERY: &str = r#"
CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector)
CALL db.index.vector.queryNodes('vector_index', $effective_search_ratio, $vector)
YIELD node AS n, score AS score
MATCH (e:Entity) -[r:ATTRIBUTE]-> (n)
"#;
Expand Down Expand Up @@ -123,9 +123,8 @@ impl QueryStream<SemanticSearchResult<EntityNode>> for SemanticSearchQuery<Entit

if cfg!(debug_assertions) || cfg!(test) {
tracing::info!(
"entity_node::FindManyQuery::<EntityNode>:\n{}\nparams:{:?}",
query.compile(),
query.params()
"entity_node::FindManyQuery::<EntityNode>:\n{}",
query.compile()
);
};

Expand Down
2 changes: 1 addition & 1 deletion grc20-core/src/mapping/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ pub use value::{Options, Value, ValueType};

use crate::{error::DatabaseError, indexer_ids};

pub const EFFECTIVE_SEARCH_RATIO: f64 = 100000.0;
pub const EFFECTIVE_SEARCH_RATIO: f64 = 1000000.0;

pub fn new_version_index(block_number: u64, idx: usize) -> String {
format!("{block_number:016}:{idx:04}")
Expand Down
2 changes: 1 addition & 1 deletion grc20-core/src/mapping/relation/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl RelationFilter {
.map(|from_filter| from_filter.subquery(from)),
)
.subquery_opt(self.to_.as_ref().map(|to_filter| to_filter.subquery(to)))
.subquery(MatchQuery::new(format!(
.subquery(MatchQuery::new_optional(format!(
"(rt:Entity {{id: {edge_var}.relation_type}})"
)))
.subquery_opt(self.relation_type.as_ref().map(|rt| rt.subquery("rt")))
Expand Down
2 changes: 1 addition & 1 deletion grc20-core/src/mapping/triple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ impl QueryStream<SemanticSearchResult> for SemanticSearchQuery {
{
const QUERY: &str = const_format::formatcp!(
r#"
CALL db.index.vector.queryNodes('vector_index', $limit * $effective_search_ratio, $vector)
CALL db.index.vector.queryNodes('vector_index', $effective_search_ratio, $vector)
YIELD node AS n, score AS score
ORDER BY score DESC
LIMIT $limit
Expand Down
76 changes: 8 additions & 68 deletions mcp-server/resources/get_entity_info_description.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,87 +35,27 @@ ToolResult>
"name": "Product Engineer at Geo",
"relation_id": "KPTqdNpCusxfM37KbKPX8w",
"relation_type": "Related spaces"
},
{
"id": "NcQ3h9jeJSavVd8iFsUxvD",
"name": "Senior Civil Engineer @ Golden Gate Bridge, Highway & Transportation District",
"relation_id": "AqpNtJ3XxaY4fqRCyoXbdt",
"relation_type": "Cities"
},
{
"id": "4ojV4dS1pV2tRnzXTpcMKJ",
"name": "Senior Plan Check Engineer (FT - Hybrid) @ CSG Consultants, Inc.",
"relation_id": "3AX4j43nywT5eBRV3s6AXi",
"relation_type": "Cities"
},
{
"id": "QoakYWCuv85FVuYdSmonxr",
"name": "Senior Civil Engineer - Land Development (FT - Hybrid) @ CSG Consultants, Inc.",
"relation_id": "8GEF1i3LK4Z56THjE8dVku",
"relation_type": "Cities"
},
{
"id": "JuV7jLoypebzLhkma6oZoU",
"name": "Lead Django Backend Engineer @ Textme Inc",
"relation_id": "46aBsQyBq15DimJ2i1DX4a",
"relation_type": "Cities"
},
{
"id": "RTmcYhLVmmfgUn9L3D1J3y",
"name": "Chief Engineer @ Wyndham Hotels & Resorts",
"relation_id": "8uYxjzkkdjskDQAeTQomvc",
"relation_type": "Cities"
}
}, ...
],
"outbound_relations": [
{
"id": "CUoEazCD7EmzXPTFFY8gGY",
"name": "No name",
"relation_id": "5WeSkkE1XXvGJGmXj9VUQ8",
"relation_type": "Cover"
},
{
"id": "7gzF671tq5JTZ13naG4tnr",
"name": "Space",
"relation_id": "WUZCXE1UGRtxdNQpGug8Tf",
"relation_type": "Types"
},
{
"id": "CUoEazCD7EmzXPTFFY8gGY",
"name": "No name",
"relation_id": "5WeSkkE1XXvGJGmXj9VUQ8",
"relation_type": "Cover"
},
{
"id": "D6Wy4bdtdoUrG3PDZceHr",
"name": "City",
"relation_id": "ARMj8fjJtdCwbtZa1f3jwe",
"relation_type": "Types"
},
{
"id": "AhidiWYnQ8fAbHqfzdU74k",
"name": "Upcoming events",
"relation_id": "V1ikGW9riu7dAP8rMgZq3u",
"relation_type": "Blocks"
},
{
"id": "T6iKbwZ17iv4dRdR9Qw7qV",
"name": "Trending restaurants",
"relation_id": "CvGXCmGXE7ofsgZeWad28p",
"relation_type": "Blocks"
},
{
"id": "X18WRE36mjwQ7gu3LKaLJS",
"name": "Neighborhoods",
"relation_id": "Uxpsee9LoTgJqMFfAQyJP6",
"relation_type": "Blocks"
},
{
"id": "HeC2pygci2tnvjTt5aEnBV",
"name": "Top goals",
"relation_id": "5WMTAzCnZH9Bsevou9GQ3K",
"relation_type": "Blocks"
},
{
"id": "5YtYFsnWq1jupvh5AjM2ni",
"name": "Culture",
"relation_id": "5TmxfepRr1THMRkGWenj5G",
"relation_type": "Tabs"
}
}, ...
]
}
```
Expand Down
Loading
Loading