Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
264 changes: 260 additions & 4 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,12 @@ async-stream = { version = "0.3", features = [] }
async-trait = { version = "0.1", features = [] }

# HTTP client
reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] }
reqwest = { version = "0.13", default-features = false, features = ["rustls"] }

# HTTP middleware
reqwest-middleware = { version = "0.5", features = ["json", "multipart"] }
reqwest-retry = { version = "0.9", features = [] }
reqwest-tracing = { version = "0.7", features = [] }

# HTTP server
axum = { version = "0.8", features = [] }
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
[![Build](https://img.shields.io/github/actions/workflow/status/nvisycom/server/build.yml?branch=main&label=build%20%26%20test&style=flat-square)](https://github.com/nvisycom/server/actions/workflows/build.yml)

Open-source multimodal redaction API. Detect and redact PII and sensitive data
across documents, images, audio, and video.
across documents, images, and audio.

## Features

- **Multimodal Redaction:** Detect and remove sensitive data across PDFs, images, audio, and video
- **Multimodal Redaction:** Detect and remove sensitive data across PDFs, images, and audio
- **AI-Powered Detection:** LLM-driven PII and entity recognition with configurable redaction policies
- **Workspace Isolation:** Multi-tenant workspaces with HKDF-derived credential encryption
- **Real-Time Collaboration:** WebSocket and NATS pub/sub for live document editing
Expand Down
10 changes: 8 additions & 2 deletions crates/nvisy-nats/src/client/nats_client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ use crate::kv::{
ApiToken, ApiTokensBucket, ChatHistoryBucket, KvBucket, KvKey, KvStore, SessionKey, TokenKey,
};
use crate::object::{
AccountKey, AvatarsBucket, FileKey, FilesBucket, IntermediatesBucket, ObjectBucket, ObjectKey,
ObjectStore, ThumbnailsBucket,
AccountKey, AvatarsBucket, ContextFilesBucket, ContextKey, FileKey, FilesBucket,
IntermediatesBucket, ObjectBucket, ObjectKey, ObjectStore, ThumbnailsBucket,
};
use crate::stream::{EventPublisher, EventStream, EventSubscriber, FileStream, WebhookStream};
use crate::{Error, Result, TRACING_TARGET_CLIENT, TRACING_TARGET_CONNECTION};
Expand Down Expand Up @@ -255,6 +255,12 @@ impl NatsClient {
pub async fn avatar_store(&self) -> Result<ObjectStore<AvatarsBucket, AccountKey>> {
self.object_store().await
}

/// Get or create a context file store for encrypted workspace contexts.
#[tracing::instrument(skip(self), target = TRACING_TARGET_CLIENT)]
pub async fn context_file_store(&self) -> Result<ObjectStore<ContextFilesBucket, ContextKey>> {
self.object_store().await
}
}

// Stream getters
Expand Down
5 changes: 3 additions & 2 deletions crates/nvisy-nats/src/object/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ mod object_key;
mod object_store;

pub use object_bucket::{
AvatarsBucket, FilesBucket, IntermediatesBucket, ObjectBucket, ThumbnailsBucket,
AvatarsBucket, ContextFilesBucket, FilesBucket, IntermediatesBucket, ObjectBucket,
ThumbnailsBucket,
};
pub use object_data::{GetResult, PutResult};
pub use object_key::{AccountKey, FileKey, ObjectKey};
pub use object_key::{AccountKey, ContextKey, FileKey, ObjectKey};
pub use object_store::ObjectStore;
13 changes: 13 additions & 0 deletions crates/nvisy-nats/src/object/object_bucket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,17 @@ impl ObjectBucket for AvatarsBucket {
const NAME: &'static str = "ACCOUNT_AVATARS";
}

/// Storage for encrypted workspace context files.
///
/// No expiration, context files are retained indefinitely.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct ContextFilesBucket;

impl ObjectBucket for ContextFilesBucket {
const MAX_AGE: Option<Duration> = None;
const NAME: &'static str = "CONTEXT_FILES";
}

#[cfg(test)]
mod tests {
use super::*;
Expand All @@ -69,6 +80,7 @@ mod tests {
assert_eq!(IntermediatesBucket::NAME, "DOCUMENT_INTERMEDIATES");
assert_eq!(ThumbnailsBucket::NAME, "DOCUMENT_THUMBNAILS");
assert_eq!(AvatarsBucket::NAME, "ACCOUNT_AVATARS");
assert_eq!(ContextFilesBucket::NAME, "CONTEXT_FILES");
}

#[test]
Expand All @@ -80,5 +92,6 @@ mod tests {
);
assert_eq!(ThumbnailsBucket::MAX_AGE, None);
assert_eq!(AvatarsBucket::MAX_AGE, None);
assert_eq!(ContextFilesBucket::MAX_AGE, None);
}
}
122 changes: 122 additions & 0 deletions crates/nvisy-nats/src/object/object_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,81 @@ impl From<Uuid> for AccountKey {
}
}

/// A validated key for context file objects in NATS object storage.
///
/// The key is encoded as `ctx_` prefix followed by URL-safe base64 of the
/// concatenated workspace ID and context ID. This produces a key like
/// `ctx_ABC123...` from two UUIDs (32 bytes -> base64).
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct ContextKey {
pub workspace_id: Uuid,
pub context_id: Uuid,
}

impl ObjectKey for ContextKey {
const PREFIX: &'static str = "ctx_";
}

impl ContextKey {
/// Creates a new context key from workspace and context IDs.
pub fn new(workspace_id: Uuid, context_id: Uuid) -> Self {
Self {
workspace_id,
context_id,
}
}

/// Encodes the key payload as URL-safe base64.
fn encode_payload(&self) -> String {
let mut bytes = [0u8; 32];
bytes[..16].copy_from_slice(self.workspace_id.as_bytes());
bytes[16..].copy_from_slice(self.context_id.as_bytes());
BASE64_URL_SAFE_NO_PAD.encode(bytes)
}

/// Decodes a key payload from URL-safe base64.
fn decode_payload(s: &str) -> Result<Self> {
let bytes = BASE64_URL_SAFE_NO_PAD.decode(s).map_err(|e| {
Error::operation("parse_key", format!("Invalid base64 encoding: {}", e))
})?;

if bytes.len() != 32 {
return Err(Error::operation(
"parse_key",
format!("Invalid key length: expected 32 bytes, got {}", bytes.len()),
));
}

let workspace_id = Uuid::from_slice(&bytes[..16])
.map_err(|e| Error::operation("parse_key", format!("Invalid workspace UUID: {}", e)))?;

let context_id = Uuid::from_slice(&bytes[16..])
.map_err(|e| Error::operation("parse_key", format!("Invalid context UUID: {}", e)))?;

Ok(Self::new(workspace_id, context_id))
}
}

impl fmt::Display for ContextKey {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}{}", Self::PREFIX, self.encode_payload())
}
}

impl FromStr for ContextKey {
type Err = Error;

fn from_str(s: &str) -> Result<Self> {
let payload = s.strip_prefix(Self::PREFIX).ok_or_else(|| {
Error::operation(
"parse_key",
format!("Invalid key prefix: expected '{}'", Self::PREFIX),
)
})?;
Self::decode_payload(payload)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -279,4 +354,51 @@ mod tests {
assert!(AccountKey::from_str("account_not-a-uuid").is_err());
}
}

mod context_key {
use super::*;

#[test]
fn test_prefix() {
assert_eq!(ContextKey::PREFIX, "ctx_");
}

#[test]
fn test_new() {
let workspace_id = Uuid::new_v4();
let context_id = Uuid::new_v4();
let key = ContextKey::new(workspace_id, context_id);
assert_eq!(key.workspace_id, workspace_id);
assert_eq!(key.context_id, context_id);
}

#[test]
fn test_display_has_prefix() {
let key = ContextKey::new(Uuid::new_v4(), Uuid::new_v4());
let encoded = key.to_string();
assert!(encoded.starts_with("ctx_"));
// prefix (4) + base64 (43) = 47
assert_eq!(encoded.len(), 47);
}

#[test]
fn test_roundtrip() {
let workspace_id = Uuid::new_v4();
let context_id = Uuid::new_v4();

let key = ContextKey::new(workspace_id, context_id);
let encoded = key.to_string();
let decoded: ContextKey = encoded.parse().unwrap();

assert_eq!(decoded.workspace_id, workspace_id);
assert_eq!(decoded.context_id, context_id);
assert_eq!(key, decoded);
}

#[test]
fn test_from_str_invalid_prefix() {
assert!(ContextKey::from_str("file_abc").is_err());
assert!(ContextKey::from_str("abc").is_err());
}
}
}
2 changes: 1 addition & 1 deletion crates/nvisy-object/src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ impl ObjectStoreClient {

/// Verify that the backing store is reachable.
///
/// Issues a HEAD for a probe key a not-found response is treated as
/// Issues a HEAD for a probe key: a not-found response is treated as
/// success (the bucket/container exists), any other error is propagated.
#[tracing::instrument(name = "object.verify", skip(self))]
pub async fn verify_reachable(&self) -> Result<(), Error> {
Expand Down
2 changes: 2 additions & 0 deletions crates/nvisy-postgres/src/model/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ mod account_notification;
mod workspace;
mod workspace_activity;
mod workspace_connection;
mod workspace_context;
mod workspace_file;
mod workspace_file_annotation;
mod workspace_file_chunk;
Expand All @@ -35,6 +36,7 @@ pub use workspace_activity::{NewWorkspaceActivity, WorkspaceActivity};
pub use workspace_connection::{
NewWorkspaceConnection, UpdateWorkspaceConnection, WorkspaceConnection,
};
pub use workspace_context::{NewWorkspaceContext, UpdateWorkspaceContext, WorkspaceContext};
pub use workspace_file::{NewWorkspaceFile, UpdateWorkspaceFile, WorkspaceFile};
// File models
pub use workspace_file_annotation::{
Expand Down
118 changes: 118 additions & 0 deletions crates/nvisy-postgres/src/model/workspace_context.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//! Workspace context model for PostgreSQL database operations.

use diesel::prelude::*;
use jiff_diesel::Timestamp;
use serde_json::Value as JsonValue;
use uuid::Uuid;

use crate::schema::workspace_contexts;
use crate::types::{HasCreatedAt, HasDeletedAt, HasUpdatedAt};

/// Workspace context model representing metadata for encrypted context files.
///
/// The actual encrypted content is stored in NATS object storage.
/// This record holds the metadata and storage reference.
#[derive(Debug, Clone, PartialEq, Queryable, Selectable)]
#[diesel(table_name = workspace_contexts)]
#[diesel(check_for_backend(diesel::pg::Pg))]
pub struct WorkspaceContext {
/// Unique context identifier.
pub id: Uuid,
/// Reference to the workspace this context belongs to.
pub workspace_id: Uuid,
/// Reference to the account that created this context.
pub account_id: Uuid,
/// Human-readable context name.
pub name: String,
/// Context description.
pub description: Option<String>,
/// Content MIME type.
pub mime_type: String,
/// NATS object store key for the encrypted content.
pub storage_key: String,
/// Size of the encrypted content in bytes.
pub content_size: i64,
/// SHA-256 hash of the encrypted content.
pub content_hash: Vec<u8>,
/// Non-encrypted metadata for filtering/display.
pub metadata: JsonValue,
/// Timestamp when the context was created.
pub created_at: Timestamp,
/// Timestamp when the context was last updated.
pub updated_at: Timestamp,
/// Timestamp when the context was soft-deleted.
pub deleted_at: Option<Timestamp>,
}

/// Data for creating a new workspace context.
#[derive(Debug, Clone, Insertable)]
#[diesel(table_name = workspace_contexts)]
#[diesel(check_for_backend(diesel::pg::Pg))]
pub struct NewWorkspaceContext {
/// Workspace ID (required).
pub workspace_id: Uuid,
/// Account ID (required).
pub account_id: Uuid,
/// Context name.
pub name: String,
/// Context description.
pub description: Option<String>,
/// Content MIME type.
pub mime_type: String,
/// NATS object store key.
pub storage_key: String,
/// Size of the encrypted content in bytes.
pub content_size: i64,
/// SHA-256 hash of the encrypted content.
pub content_hash: Vec<u8>,
/// Non-encrypted metadata for filtering/display.
pub metadata: Option<JsonValue>,
}

/// Data for updating a workspace context.
#[derive(Debug, Clone, Default, AsChangeset)]
#[diesel(table_name = workspace_contexts)]
#[diesel(check_for_backend(diesel::pg::Pg))]
pub struct UpdateWorkspaceContext {
/// Context name.
pub name: Option<String>,
/// Context description.
pub description: Option<Option<String>>,
/// Content MIME type.
pub mime_type: Option<String>,
/// NATS object store key (updated on content replacement).
pub storage_key: Option<String>,
/// Size of the encrypted content in bytes.
pub content_size: Option<i64>,
/// SHA-256 hash of the encrypted content.
pub content_hash: Option<Vec<u8>>,
/// Non-encrypted metadata for filtering/display.
pub metadata: Option<JsonValue>,
/// Soft delete timestamp.
pub deleted_at: Option<Option<Timestamp>>,
}

impl WorkspaceContext {
/// Returns whether the context is deleted.
pub fn is_deleted(&self) -> bool {
self.deleted_at.is_some()
}
}

impl HasCreatedAt for WorkspaceContext {
fn created_at(&self) -> jiff::Timestamp {
self.created_at.into()
}
}

impl HasUpdatedAt for WorkspaceContext {
fn updated_at(&self) -> jiff::Timestamp {
self.updated_at.into()
}
}

impl HasDeletedAt for WorkspaceContext {
fn deleted_at(&self) -> Option<jiff::Timestamp> {
self.deleted_at.map(Into::into)
}
}
2 changes: 2 additions & 0 deletions crates/nvisy-postgres/src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ mod account_notification;
mod workspace;
mod workspace_activity;
mod workspace_connection;
mod workspace_context;
mod workspace_file;
mod workspace_file_annotation;
mod workspace_file_chunk;
Expand All @@ -37,6 +38,7 @@ pub use account_notification::AccountNotificationRepository;
pub use workspace::WorkspaceRepository;
pub use workspace_activity::WorkspaceActivityRepository;
pub use workspace_connection::WorkspaceConnectionRepository;
pub use workspace_context::WorkspaceContextRepository;
pub use workspace_file::WorkspaceFileRepository;
pub use workspace_file_annotation::WorkspaceFileAnnotationRepository;
pub use workspace_file_chunk::WorkspaceFileChunkRepository;
Expand Down
Loading
Loading