diff --git a/CLAUDE.md b/CLAUDE.md index 5f6a54e..50c70e8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -51,7 +51,7 @@ pnpm release ## Architecture ### Monorepo Structure -- Uses pnpm workspaces (v9.0.0 strictly enforced) and Turbo for task orchestration +- Uses pnpm workspaces (v10.15.0 strictly enforced) and Turbo for task orchestration - Packages in `packages/*` directory - Node.js 20+ required - TypeScript 5.8+ with CommonJS module system for CLI package @@ -104,4 +104,4 @@ export MXBAI_API_KEY="your-api-key" - File upload with processing strategies (high_quality, fast, auto) - Git-based and hash-based sync capabilities - Manifest-based bulk uploads via YAML configuration -- Support for aliases and configuration management \ No newline at end of file +- Support for aliases and configuration management diff --git a/packages/cli/package.json b/packages/cli/package.json index 1c09932..27f88cb 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -55,6 +55,7 @@ "cli-table3": "^0.6.5", "commander": "^12.0.0", "dotenv": "^16.4.5", + "fast-deep-equal": "^3.1.3", "glob": "^10.4.5", "inquirer": "^9.2.23", "mime-types": "^3.0.1", diff --git a/packages/cli/src/commands/store/sync.ts b/packages/cli/src/commands/store/sync.ts index 63dfd7f..ee5e51c 100644 --- a/packages/cli/src/commands/store/sync.ts +++ b/packages/cli/src/commands/store/sync.ts @@ -13,6 +13,7 @@ import { parseOptions, } from "../../utils/global-options"; import { validateMetadata } from "../../utils/metadata"; +import { loadMetadataMapping } from "../../utils/metadata-file"; import { formatBytes, formatCountWithSuffix } from "../../utils/output"; import { resolveStore } from "../../utils/store"; import { @@ -37,6 +38,7 @@ const SyncStoreSchema = extendGlobalOptions({ yes: z.boolean().optional(), force: z.boolean().optional(), metadata: z.string().optional(), + metadataFile: z.string().optional(), parallel: z.coerce .number({ error: '"parallel" must be a number' }) .int({ error: '"parallel" must be an integer' }) @@ -54,6 +56,7 @@ interface SyncOptions extends GlobalOptions { yes?: boolean; force?: boolean; metadata?: string; + metadataFile?: string; parallel?: number; } @@ -79,6 +82,7 @@ export function createSyncCommand(): Command { "Force re-upload all files, ignoring change detection" ) .option("--metadata ", "Additional metadata for files") + .option("--metadata-file ", "Per-file metadata mapping (JSON/YAML)") .option("--parallel ", "Number of concurrent operations (1-200)") ); @@ -106,7 +110,23 @@ export function createSyncCommand(): Command { // Parse metadata if provided const additionalMetadata = validateMetadata(parsedOptions.metadata); - // Get git info + let metadataMap: Map> | undefined; + if (parsedOptions.metadataFile) { + try { + metadataMap = loadMetadataMapping(parsedOptions.metadataFile); + console.log( + chalk.green("✓"), + `Loaded metadata for ${metadataMap.size} file${metadataMap.size === 1 ? "" : "s"} from ${parsedOptions.metadataFile}` + ); + } catch (error) { + console.error( + chalk.red("✗"), + `Failed to load metadata file: ${error instanceof Error ? error.message : "Unknown error"}` + ); + process.exit(1); + } + } + const gitInfo = await getGitInfo(); const spinner = ora("Loading existing files from store...").start(); @@ -154,6 +174,7 @@ export function createSyncCommand(): Command { gitInfo, fromGit, forceUpload: parsedOptions.force, + metadataMap, }); analyzeSpinner.succeed("Change analysis complete"); @@ -221,6 +242,7 @@ export function createSyncCommand(): Command { strategy: parsedOptions.strategy, contextualization: parsedOptions.contextualization, metadata: additionalMetadata, + metadataMap, gitInfo: gitInfo.isRepo ? gitInfo : undefined, parallel: parsedOptions.parallel, } diff --git a/packages/cli/src/commands/store/upload.ts b/packages/cli/src/commands/store/upload.ts index 1e675bb..120905b 100644 --- a/packages/cli/src/commands/store/upload.ts +++ b/packages/cli/src/commands/store/upload.ts @@ -16,6 +16,10 @@ import { } from "../../utils/global-options"; import { uploadFromManifest } from "../../utils/manifest"; import { validateMetadata } from "../../utils/metadata"; +import { + loadMetadataMapping, + normalizePathForMetadata, +} from "../../utils/metadata-file"; import { formatBytes, formatCountWithSuffix } from "../../utils/output"; import { getStoreFiles, resolveStore } from "../../utils/store"; import { type FileToUpload, uploadFilesInBatch } from "../../utils/upload"; @@ -32,6 +36,7 @@ const UploadStoreSchema = extendGlobalOptions({ .boolean({ error: '"contextualization" must be a boolean' }) .optional(), metadata: z.string().optional(), + metadataFile: z.string().optional(), dryRun: z.boolean().optional(), parallel: z.coerce .number({ error: '"parallel" must be a number' }) @@ -47,6 +52,7 @@ export interface UploadOptions extends GlobalOptions { strategy?: FileCreateParams.Experimental["parsing_strategy"]; contextualization?: boolean; metadata?: string; + metadataFile?: string; dryRun?: boolean; parallel?: number; unique?: boolean; @@ -65,6 +71,7 @@ export function createUploadCommand(): Command { .option("--strategy ", "Processing strategy") .option("--contextualization", "Enable context preservation") .option("--metadata ", "Additional metadata as JSON string") + .option("--metadata-file ", "Per-file metadata mapping (JSON/YAML)") .option("--dry-run", "Preview what would be uploaded", false) .option("--parallel ", "Number of concurrent uploads (1-200)") .option( @@ -93,6 +100,15 @@ export function createUploadCommand(): Command { spinner.succeed("Upload initialized"); + // Validate mutually exclusive options + if (parsedOptions.manifest && parsedOptions.metadataFile) { + console.error( + chalk.red("✗"), + "Cannot use both --manifest and --metadata-file. Use --manifest with per-file metadata entries instead." + ); + process.exit(1); + } + // Handle manifest file upload if (parsedOptions.manifest) { return await uploadFromManifest( @@ -123,6 +139,23 @@ export function createUploadCommand(): Command { const metadata = validateMetadata(parsedOptions.metadata); + let metadataMap: Map> | undefined; + if (parsedOptions.metadataFile) { + try { + metadataMap = loadMetadataMapping(parsedOptions.metadataFile); + console.log( + chalk.green("✓"), + `Loaded metadata for ${metadataMap.size} file${metadataMap.size === 1 ? "" : "s"} from ${parsedOptions.metadataFile}` + ); + } catch (error) { + console.error( + chalk.red("✗"), + `Failed to load metadata file: ${error instanceof Error ? error.message : "Unknown error"}` + ); + process.exit(1); + } + } + // Collect all files matching patterns const files: string[] = []; for (const pattern of parsedOptions.patterns) { @@ -150,6 +183,7 @@ export function createUploadCommand(): Command { }, 0); console.log( + chalk.green("✓"), `Found ${formatCountWithSuffix(uniqueFiles.length, "file")} matching the ${ patterns.length > 1 ? "patterns" : "pattern" } (${formatBytes(totalSize)})` @@ -198,7 +232,7 @@ export function createUploadCommand(): Command { ]) ); spinner.succeed( - `Found ${formatCountWithSuffix(existingFiles.size, "existing file")}` + `Found ${formatCountWithSuffix(existingFiles.size, "existing file")} in store` ); } catch (error) { spinner.fail("Failed to check existing files"); @@ -207,12 +241,20 @@ export function createUploadCommand(): Command { } // Transform files to shared format - const filesToUpload: FileToUpload[] = uniqueFiles.map((filePath) => ({ - path: filePath, - strategy, - contextualization, - metadata, - })); + const filesToUpload: FileToUpload[] = uniqueFiles.map((filePath) => { + const normalizedPath = normalizePathForMetadata(filePath); + const perFileMetadata = metadataMap?.get(normalizedPath) || {}; + + return { + path: filePath, + strategy, + contextualization, + metadata: { + ...metadata, // CLI --metadata (base for all files) + ...perFileMetadata, // Per-file from mapping (overrides) + }, + }; + }); // Upload files with progress tracking await uploadFilesInBatch(client, store.id, filesToUpload, { diff --git a/packages/cli/src/utils/hash.ts b/packages/cli/src/utils/hash.ts index a4381e5..641b962 100644 --- a/packages/cli/src/utils/hash.ts +++ b/packages/cli/src/utils/hash.ts @@ -14,15 +14,6 @@ export async function calculateFileHash(filePath: string): Promise { return `sha256:${hash.digest("hex")}`; } -/** - * Calculate SHA-256 hash of a string or buffer - */ -export function calculateHash(content: string | Buffer): string { - const hash = createHash("sha256"); - hash.update(content); - return `sha256:${hash.digest("hex")}`; -} - /** * Compare two hashes */ diff --git a/packages/cli/src/utils/metadata-file.ts b/packages/cli/src/utils/metadata-file.ts new file mode 100644 index 0000000..0e6417d --- /dev/null +++ b/packages/cli/src/utils/metadata-file.ts @@ -0,0 +1,106 @@ +import { readFileSync } from "node:fs"; +import { normalize, relative } from "node:path"; +import equal from "fast-deep-equal"; +import { parse } from "yaml"; +import type { FileSyncMetadata } from "./sync-state"; + +type SyncMetadataFields = keyof FileSyncMetadata; + +const SYNC_METADATA_FIELDS = new Set([ + "file_path", + "file_hash", + "git_commit", + "git_branch", + "uploaded_at", + "synced", +]); + +/** + * Normalize a file path for consistent metadata map lookups across platforms + * Converts absolute path to relative-to-CWD and removes leading ./ + */ +export function normalizePathForMetadata(filePath: string): string { + const relativePath = relative(process.cwd(), filePath); + return normalize(relativePath).replace(/^\.[\\/]/, ""); +} + +/** + * Load metadata mapping from JSON/YAML file + * Returns a Map with paths normalized relative to CWD + * + * Paths in the metadata file should be relative to CWD. + * They will be normalized to ensure consistent lookups across platforms. + */ +export function loadMetadataMapping( + filePath: string +): Map> { + const content = readFileSync(filePath, "utf-8"); + + // Try JSON first, then YAML + let data: Record>; + try { + data = JSON.parse(content); + } catch { + try { + data = parse(content); + } catch { + throw new Error( + "Metadata file must be valid JSON or YAML and contain an object" + ); + } + } + + if (typeof data !== "object" || data === null || Array.isArray(data)) { + throw new Error( + "Metadata file must contain an object mapping paths to metadata" + ); + } + + const map = new Map>(); + for (const [key, value] of Object.entries(data)) { + if (typeof value !== "object" || value === null || Array.isArray(value)) { + throw new Error( + `Metadata for "${key}" must be an object, got ${typeof value}` + ); + } + + // Normalize path and remove leading ./ + const normalizedKey = normalize(key).replace(/^\.[\\/]/, ""); + + map.set(normalizedKey, value); + } + + return map; +} + +/** + * Extract user-provided metadata by removing sync-specific fields + */ +export function extractUserMetadata( + metadata: Record +): Record { + const userMetadata: Record = {}; + + for (const [key, value] of Object.entries(metadata)) { + if (!SYNC_METADATA_FIELDS.has(key as SyncMetadataFields)) { + userMetadata[key] = value; + } + } + + return userMetadata; +} + +/** + * Deep equality check for metadata objects + * Uses fast-deep-equal for reliable comparison including edge cases like: + * - Date objects + * - undefined values + * - NaN, Infinity + * - Nested objects and arrays + */ +export function metadataEquals( + a: Record, + b: Record +): boolean { + return equal(a, b); +} diff --git a/packages/cli/src/utils/sync-state.ts b/packages/cli/src/utils/sync-state.ts index fa248b4..c1c4cc8 100644 --- a/packages/cli/src/utils/sync-state.ts +++ b/packages/cli/src/utils/sync-state.ts @@ -8,6 +8,7 @@ export interface FileSyncMetadata { git_branch?: string; uploaded_at: string; synced: boolean; + [key: string]: unknown; } /** diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index 6246e59..9410c33 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -9,6 +9,11 @@ import ora from "ora"; import pLimit from "p-limit"; import { getChangedFiles, normalizeGitPatterns } from "./git"; import { calculateFileHash, hashesMatch } from "./hash"; +import { + extractUserMetadata, + metadataEquals, + normalizePathForMetadata, +} from "./metadata-file"; import { formatBytes, formatCountWithSuffix } from "./output"; import { buildFileSyncMetadata, type FileSyncMetadata } from "./sync-state"; import { uploadFile } from "./upload"; @@ -20,6 +25,8 @@ interface FileChange { localHash?: string; remoteHash?: string; fileId?: string; + contentChanged?: boolean; + metadataChanged?: boolean; } interface SyncAnalysis { @@ -55,12 +62,14 @@ interface AnalyzeChangesParams { gitInfo: { commit: string; branch: string; isRepo: boolean }; fromGit?: string; forceUpload?: boolean; + metadataMap?: Map>; } export async function analyzeChanges( params: AnalyzeChangesParams ): Promise { - const { patterns, syncedFiles, gitInfo, fromGit, forceUpload } = params; + const { patterns, syncedFiles, gitInfo, fromGit, forceUpload, metadataMap } = + params; const analysis: SyncAnalysis = { added: [], @@ -122,6 +131,9 @@ export async function analyzeChanges( let isModified = false; let localHash: string | undefined; + let contentChanged = false; + let metadataChanged = false; + if (forceUpload) { // When --force-upload is set, treat all existing files as modified isModified = true; @@ -133,7 +145,26 @@ export async function analyzeChanges( } else { // Default behavior: use hash comparison localHash = await calculateFileHash(filePath); - isModified = !hashesMatch(localHash, syncedFile.metadata.file_hash); + contentChanged = !hashesMatch(localHash, syncedFile.metadata.file_hash); + + // Check metadata changes if --metadata-file provided + if (metadataMap) { + const normalizedPath = normalizePathForMetadata(filePath); + const newMetadata = metadataMap.get(normalizedPath); + + if (newMetadata !== undefined) { + // Extract user metadata from existing file (exclude sync fields) + const existingUserMetadata = extractUserMetadata( + syncedFile.metadata as unknown as Record + ); + metadataChanged = !metadataEquals( + newMetadata, + existingUserMetadata + ); + } + } + + isModified = contentChanged || metadataChanged; } if (isModified) { @@ -144,6 +175,8 @@ export async function analyzeChanges( localHash, remoteHash: syncedFile.metadata.file_hash, fileId: syncedFile.fileId, + contentChanged, + metadataChanged, }); analysis.totalSize += stats.size; } else { @@ -194,8 +227,19 @@ export function formatChangeSummary(analysis: SyncAnalysis): string { ` ${chalk.yellow("Updated:")} (${formatCountWithSuffix(analysis.modified.length, "file")})` ); analysis.modified.forEach((file) => { + const relativePath = path.relative(process.cwd(), file.path); + + let indicator = ""; + if (file.contentChanged && file.metadataChanged) { + indicator = chalk.cyan(" [content + metadata updated]"); + } else if (file.contentChanged) { + indicator = chalk.blue(" [content updated]"); + } else if (file.metadataChanged) { + indicator = chalk.magenta(" [metadata updated]"); + } + const size = file.size ? ` (${formatBytes(file.size)})` : ""; - lines.push(` • ${path.relative(process.cwd(), file.path)}${size}`); + lines.push(` • ${relativePath}${indicator}${size}`); }); lines.push(""); } @@ -248,6 +292,7 @@ export async function executeSyncChanges( strategy?: FileCreateParams.Experimental["parsing_strategy"]; contextualization?: boolean; metadata?: Record; + metadataMap?: Map>; gitInfo?: { commit: string; branch: string }; parallel?: number; } @@ -347,10 +392,16 @@ export async function executeSyncChanges( options.gitInfo ); - // Merge with user-provided metadata + // Get per-file metadata from mapping + const normalizedPath = normalizePathForMetadata(file.path); + const perFileMetadata = + options.metadataMap?.get(normalizedPath) || {}; + + // Merge metadata with precedence: sync fields > per-file > CLI --metadata const finalMetadata = { - ...options.metadata, - ...syncMetadata, + ...options.metadata, // CLI --metadata (base for all files) + ...perFileMetadata, // Per-file from mapping + ...syncMetadata, // Sync fields }; // Check if file is empty diff --git a/packages/cli/tests/commands/store/sync.test.ts b/packages/cli/tests/commands/store/sync.test.ts index 772c237..98b16b4 100644 --- a/packages/cli/tests/commands/store/sync.test.ts +++ b/packages/cli/tests/commands/store/sync.test.ts @@ -181,4 +181,124 @@ describe("Store Sync Command", () => { ); }); }); + + describe("Per-file metadata", () => { + beforeEach(() => { + mockFs({ + "file1.txt": "content1", + "file2.txt": "content2", + "metadata.json": JSON.stringify({ + "file1.txt": { title: "File One", priority: 1 }, + "file2.txt": { title: "File Two", priority: 2 }, + }), + }); + }); + + it("should pass metadata file to analyzeChanges", async () => { + await command.parseAsync([ + "node", + "sync", + "test-store", + "*.txt", + "--metadata-file", + "metadata.json", + "-y", + ]); + + expect(mockAnalyzeChanges).toHaveBeenCalledWith( + expect.objectContaining({ + metadataMap: expect.any(Map), + }) + ); + + // Verify the metadata map was loaded + const call = mockAnalyzeChanges.mock.calls[0][0]; + expect(call.metadataMap?.size).toBe(2); + expect(call.metadataMap?.get("file1.txt")).toEqual({ + title: "File One", + priority: 1, + }); + }); + + it("should pass metadata file to executeSyncChanges", async () => { + mockAnalyzeChanges.mockResolvedValue({ + added: [ + { + path: "file1.txt", + type: "added", + size: 100, + }, + ], + modified: [], + deleted: [], + unchanged: 0, + totalFiles: 1, + totalSize: 100, + }); + + await command.parseAsync([ + "node", + "sync", + "test-store", + "*.txt", + "--metadata-file", + "metadata.json", + "-y", + ]); + + expect(mockExecuteSyncChanges).toHaveBeenCalledWith( + expect.any(Object), + "550e8400-e29b-41d4-a716-446655440040", + expect.any(Object), + expect.objectContaining({ + metadataMap: expect.any(Map), + }) + ); + }); + + it("should handle invalid metadata file", async () => { + mockFs({ + "file1.txt": "content1", + "metadata.json": "invalid json {{{", + }); + + await command.parseAsync([ + "node", + "sync", + "test-store", + "*.txt", + "--metadata-file", + "metadata.json", + "-y", + ]); + + expect(console.error).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("Failed to load metadata file") + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); + + it("should handle non-existent metadata file", async () => { + mockFs({ + "file1.txt": "content1", + }); + + await command.parseAsync([ + "node", + "sync", + "test-store", + "*.txt", + "--metadata-file", + "nonexistent.json", + "-y", + ]); + + expect(console.error).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("Failed to load metadata file") + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); + }); }); diff --git a/packages/cli/tests/commands/store/upload.test.ts b/packages/cli/tests/commands/store/upload.test.ts index dcf2e53..8607f72 100644 --- a/packages/cli/tests/commands/store/upload.test.ts +++ b/packages/cli/tests/commands/store/upload.test.ts @@ -133,6 +133,7 @@ describe("Store Upload Command", () => { ]); expect(console.log).toHaveBeenCalledWith( + expect.any(String), expect.stringContaining("Found 3 files matching the pattern") ); expect(mockUploadFilesInBatch).toHaveBeenCalled(); @@ -161,6 +162,7 @@ describe("Store Upload Command", () => { ]); expect(console.log).toHaveBeenCalledWith( + expect.any(String), expect.stringContaining("Found 3 files matching the patterns") ); expect(mockUploadFilesInBatch).toHaveBeenCalled(); @@ -751,4 +753,255 @@ describe("Store Upload Command", () => { ); }); }); + + describe("Per-file metadata", () => { + it("should load and apply per-file metadata from JSON file", async () => { + mockFs({ + "file1.txt": "content1", + "file2.txt": "content2", + "metadata.json": JSON.stringify({ + "file1.txt": { title: "File One", priority: 1 }, + "file2.txt": { title: "File Two", priority: 2 }, + }), + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + "file2.txt", + ]); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "*.txt", + "--metadata-file", + "metadata.json", + ]); + + expect(mockUploadFilesInBatch).toHaveBeenCalledWith( + expect.any(Object), + "550e8400-e29b-41d4-a716-446655440130", + expect.arrayContaining([ + expect.objectContaining({ + path: "file1.txt", + metadata: expect.objectContaining({ + title: "File One", + priority: 1, + }), + }), + expect.objectContaining({ + path: "file2.txt", + metadata: expect.objectContaining({ + title: "File Two", + priority: 2, + }), + }), + ]), + expect.any(Object) + ); + }); + + it("should load and apply per-file metadata from YAML file", async () => { + mockFs({ + "file1.txt": "content1", + "metadata.yaml": ` +file1.txt: + title: File One + priority: 1 +`, + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "*.txt", + "--metadata-file", + "metadata.yaml", + ]); + + expect(mockUploadFilesInBatch).toHaveBeenCalledWith( + expect.any(Object), + "550e8400-e29b-41d4-a716-446655440130", + expect.arrayContaining([ + expect.objectContaining({ + path: "file1.txt", + metadata: expect.objectContaining({ + title: "File One", + priority: 1, + }), + }), + ]), + expect.any(Object) + ); + }); + + it("should override CLI metadata with per-file metadata", async () => { + mockFs({ + "file1.txt": "content1", + "metadata.json": JSON.stringify({ + "file1.txt": { author: "Jane", version: "2.0" }, + }), + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "*.txt", + "--metadata", + '{"author":"John","project":"test"}', + "--metadata-file", + "metadata.json", + ]); + + expect(mockUploadFilesInBatch).toHaveBeenCalledWith( + expect.any(Object), + "550e8400-e29b-41d4-a716-446655440130", + expect.arrayContaining([ + expect.objectContaining({ + path: "file1.txt", + metadata: expect.objectContaining({ + author: "Jane", // Overridden from per-file + version: "2.0", // From per-file + project: "test", // From CLI metadata + }), + }), + ]), + expect.any(Object) + ); + }); + + it("should use CLI metadata for files not in metadata file", async () => { + mockFs({ + "file1.txt": "content1", + "file2.txt": "content2", + "metadata.json": JSON.stringify({ + "file1.txt": { title: "File One" }, + }), + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + "file2.txt", + ]); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "*.txt", + "--metadata", + '{"author":"John"}', + "--metadata-file", + "metadata.json", + ]); + + expect(mockUploadFilesInBatch).toHaveBeenCalledWith( + expect.any(Object), + "550e8400-e29b-41d4-a716-446655440130", + expect.arrayContaining([ + expect.objectContaining({ + path: "file1.txt", + metadata: expect.objectContaining({ + title: "File One", + author: "John", + }), + }), + expect.objectContaining({ + path: "file2.txt", + metadata: expect.objectContaining({ + author: "John", + }), + }), + ]), + expect.any(Object) + ); + }); + + it("should reject both --manifest and --metadata-file", async () => { + mockFs({ + "manifest.yaml": "files: []", + "metadata.json": "{}", + }); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "--manifest", + "manifest.yaml", + "--metadata-file", + "metadata.json", + ]); + + expect(console.error).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining( + "Cannot use both --manifest and --metadata-file" + ) + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); + + it("should handle invalid metadata file", async () => { + mockFs({ + "file1.txt": "content1", + "metadata.json": "invalid json {{{", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "*.txt", + "--metadata-file", + "metadata.json", + ]); + + expect(console.error).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("Failed to load metadata file") + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); + + it("should handle non-existent metadata file", async () => { + mockFs({ + "file1.txt": "content1", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + await command.parseAsync([ + "node", + "upload", + "test-store", + "*.txt", + "--metadata-file", + "nonexistent.json", + ]); + + expect(console.error).toHaveBeenCalledWith( + expect.any(String), + expect.stringContaining("Failed to load metadata file") + ); + expect(process.exit).toHaveBeenCalledWith(1); + }); + }); }); diff --git a/packages/cli/tests/utils/metadata-file.test.ts b/packages/cli/tests/utils/metadata-file.test.ts new file mode 100644 index 0000000..9df5073 --- /dev/null +++ b/packages/cli/tests/utils/metadata-file.test.ts @@ -0,0 +1,339 @@ +import { mkdtempSync, writeFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { beforeEach, describe, expect, it, jest } from "@jest/globals"; +import { + extractUserMetadata, + loadMetadataMapping, + metadataEquals, + normalizePathForMetadata, +} from "../../src/utils/metadata-file"; + +describe("Metadata File Utils", () => { + let tempDir: string; + + beforeEach(() => { + jest.clearAllMocks(); + // Create a temporary directory for test files + tempDir = mkdtempSync(join(tmpdir(), "metadata-file-test-")); + }); + + describe("loadMetadataMapping", () => { + it("should load JSON metadata file", () => { + const metadataPath = join(tempDir, "metadata.json"); + const metadataContent = { + "file1.txt": { key1: "value1", key2: 123 }, + "file2.txt": { key1: "value2", key2: 456 }, + }; + + writeFileSync(metadataPath, JSON.stringify(metadataContent)); + + const result = loadMetadataMapping(metadataPath); + + expect(result.size).toBe(2); + expect(result.get("file1.txt")).toEqual({ key1: "value1", key2: 123 }); + expect(result.get("file2.txt")).toEqual({ key1: "value2", key2: 456 }); + }); + + it("should load YAML metadata file", () => { + const metadataPath = join(tempDir, "metadata.yaml"); + const yamlContent = ` +file1.txt: + key1: value1 + key2: 123 +file2.txt: + key1: value2 + key2: 456 +`; + + writeFileSync(metadataPath, yamlContent); + + const result = loadMetadataMapping(metadataPath); + + expect(result.size).toBe(2); + expect(result.get("file1.txt")).toEqual({ key1: "value1", key2: 123 }); + expect(result.get("file2.txt")).toEqual({ key1: "value2", key2: 456 }); + }); + + it("should normalize paths by removing leading ./", () => { + const metadataPath = join(tempDir, "metadata.json"); + const metadataContent = { + "./file1.txt": { key1: "value1" }, + "file2.txt": { key1: "value2" }, + ".\\file3.txt": { key1: "value3" }, + }; + + writeFileSync(metadataPath, JSON.stringify(metadataContent)); + + const result = loadMetadataMapping(metadataPath); + + expect(result.size).toBe(3); + expect(result.get("file1.txt")).toEqual({ key1: "value1" }); + expect(result.get("file2.txt")).toEqual({ key1: "value2" }); + expect(result.get("file3.txt")).toEqual({ key1: "value3" }); + }); + + it("should throw error for invalid JSON/YAML", () => { + const metadataPath = join(tempDir, "metadata.json"); + writeFileSync(metadataPath, "not valid json or yaml {{{"); + + expect(() => loadMetadataMapping(metadataPath)).toThrow(); + }); + + it("should throw error if file content is not an object", () => { + const metadataPath = join(tempDir, "metadata.json"); + writeFileSync(metadataPath, JSON.stringify(["array", "not", "object"])); + + expect(() => loadMetadataMapping(metadataPath)).toThrow( + "Metadata file must contain an object mapping paths to metadata" + ); + }); + + it("should throw error if metadata value is not an object", () => { + const metadataPath = join(tempDir, "metadata.json"); + const metadataContent = { + "file1.txt": { key1: "value1" }, + "file2.txt": "not an object", + }; + + writeFileSync(metadataPath, JSON.stringify(metadataContent)); + + expect(() => loadMetadataMapping(metadataPath)).toThrow( + 'Metadata for "file2.txt" must be an object' + ); + }); + + it("should throw error if metadata value is an array", () => { + const metadataPath = join(tempDir, "metadata.json"); + const metadataContent = { + "file1.txt": { key1: "value1" }, + "file2.txt": ["array", "value"], + }; + + writeFileSync(metadataPath, JSON.stringify(metadataContent)); + + expect(() => loadMetadataMapping(metadataPath)).toThrow( + 'Metadata for "file2.txt" must be an object' + ); + }); + }); + + describe("extractUserMetadata", () => { + it("should extract user metadata excluding sync fields", () => { + const metadata = { + file_path: "path/to/file.txt", + file_hash: "abc123", + git_commit: "def456", + git_branch: "main", + uploaded_at: "2024-01-01T00:00:00Z", + synced: true, + custom_key1: "custom_value1", + custom_key2: 123, + }; + + const result = extractUserMetadata(metadata); + + expect(result).toEqual({ + custom_key1: "custom_value1", + custom_key2: 123, + }); + }); + + it("should return empty object when only sync fields present", () => { + const metadata = { + file_path: "path/to/file.txt", + file_hash: "abc123", + synced: true, + }; + + const result = extractUserMetadata(metadata); + + expect(result).toEqual({}); + }); + + it("should return all metadata when no sync fields present", () => { + const metadata = { + custom_key1: "custom_value1", + custom_key2: 123, + custom_key3: { nested: "value" }, + }; + + const result = extractUserMetadata(metadata); + + expect(result).toEqual({ + custom_key1: "custom_value1", + custom_key2: 123, + custom_key3: { nested: "value" }, + }); + }); + + it("should handle empty object", () => { + const metadata = {}; + + const result = extractUserMetadata(metadata); + + expect(result).toEqual({}); + }); + }); + + describe("metadataEquals", () => { + it("should return true for identical objects", () => { + const a = { key1: "value1", key2: 123, key3: true }; + const b = { key1: "value1", key2: 123, key3: true }; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should return true for objects with keys in different order", () => { + const a = { key1: "value1", key2: 123, key3: true }; + const b = { key3: true, key1: "value1", key2: 123 }; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should return false for objects with different values", () => { + const a = { key1: "value1", key2: 123 }; + const b = { key1: "value1", key2: 456 }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should return false for objects with different keys", () => { + const a = { key1: "value1", key2: 123 }; + const b = { key1: "value1", key3: 123 }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should return false when one has extra keys", () => { + const a = { key1: "value1", key2: 123 }; + const b = { key1: "value1", key2: 123, key3: "extra" }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should handle nested objects", () => { + const a = { key1: "value1", nested: { deep: { value: 123 } } }; + const b = { key1: "value1", nested: { deep: { value: 123 } } }; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should detect differences in nested objects", () => { + const a = { key1: "value1", nested: { deep: { value: 123 } } }; + const b = { key1: "value1", nested: { deep: { value: 456 } } }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should handle arrays in metadata", () => { + const a = { key1: "value1", arr: [1, 2, 3] }; + const b = { key1: "value1", arr: [1, 2, 3] }; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should detect array differences", () => { + const a = { key1: "value1", arr: [1, 2, 3] }; + const b = { key1: "value1", arr: [1, 2, 4] }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should return true for empty objects", () => { + const a = {}; + const b = {}; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should handle null values", () => { + const a = { key1: "value1", key2: null }; + const b = { key1: "value1", key2: null }; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should detect null vs undefined differences", () => { + const a = { key1: "value1", key2: null }; + const b = { key1: "value1", key2: undefined }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should handle Date objects", () => { + const date = new Date("2024-01-01T00:00:00.000Z"); + const a = { key1: "value1", date }; + const b = { key1: "value1", date: new Date("2024-01-01T00:00:00.000Z") }; + + expect(metadataEquals(a, b)).toBe(true); + }); + + it("should detect different Date objects", () => { + const a = { key1: "value1", date: new Date("2024-01-01T00:00:00.000Z") }; + const b = { key1: "value1", date: new Date("2024-01-02T00:00:00.000Z") }; + + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should handle mixed Date and string (ISO)", () => { + const a = { key1: "value1", date: new Date("2024-01-01T00:00:00.000Z") }; + const b = { key1: "value1", date: "2024-01-01T00:00:00.000Z" }; + + // Date object vs ISO string are different types + expect(metadataEquals(a, b)).toBe(false); + }); + + it("should handle undefined values correctly", () => { + const a = { key1: "value1", key2: undefined }; + const b = { key1: "value1" }; + + // fast-deep-equal treats missing key differently from undefined + expect(metadataEquals(a, b)).toBe(false); + }); + }); + + describe("normalizePathForMetadata", () => { + it("should convert absolute path to relative-to-CWD", () => { + const absolutePath = join(process.cwd(), "docs", "file.txt"); + const result = normalizePathForMetadata(absolutePath); + + expect(result).toBe(join("docs", "file.txt")); + }); + + it("should remove leading ./ from paths", () => { + const pathWithDot = join(process.cwd(), "./docs/file.txt"); + const result = normalizePathForMetadata(pathWithDot); + + expect(result).toBe(join("docs", "file.txt")); + }); + + it("should handle files in current directory", () => { + const filePath = join(process.cwd(), "file.txt"); + const result = normalizePathForMetadata(filePath); + + expect(result).toBe("file.txt"); + }); + + it("should handle nested directory paths", () => { + const filePath = join( + process.cwd(), + "docs", + "api", + "v1", + "endpoints.txt" + ); + const result = normalizePathForMetadata(filePath); + + expect(result).toBe(join("docs", "api", "v1", "endpoints.txt")); + }); + + it("should handle parent directory references", () => { + const filePath = join(process.cwd(), "..", "sibling", "file.txt"); + const result = normalizePathForMetadata(filePath); + + // Should preserve ../ in the relative path + expect(result).toBe(join("..", "sibling", "file.txt")); + }); + }); +}); diff --git a/packages/cli/tests/utils/sync.test.ts b/packages/cli/tests/utils/sync.test.ts index b280a38..7031bba 100644 --- a/packages/cli/tests/utils/sync.test.ts +++ b/packages/cli/tests/utils/sync.test.ts @@ -173,7 +173,7 @@ describe("Sync Utils", () => { { fileId: string; metadata: FileSyncMetadata } >([ [ - require("path").resolve("modified.txt"), + require("node:path").resolve("modified.txt"), { fileId: "modified-file-id", metadata: { @@ -185,7 +185,7 @@ describe("Sync Utils", () => { }, ], [ - require("path").resolve("unchanged.txt"), + require("node:path").resolve("unchanged.txt"), { fileId: "unchanged-file-id", metadata: { @@ -243,7 +243,7 @@ describe("Sync Utils", () => { { fileId: string; metadata: FileSyncMetadata } >([ [ - require("path").resolve("modified.txt"), + require("node:path").resolve("modified.txt"), { fileId: "modified-file-id", metadata: { @@ -299,14 +299,14 @@ describe("Sync Utils", () => { const analysis = { added: [ { - path: require("path").resolve("new.txt"), + path: require("node:path").resolve("new.txt"), type: "added" as const, size: 11, }, ], modified: [ { - path: require("path").resolve("modified.txt"), + path: require("node:path").resolve("modified.txt"), type: "modified" as const, size: 16, fileId: "modified-file-id", @@ -352,12 +352,12 @@ describe("Sync Utils", () => { const analysis = { added: [ { - path: require("path").resolve("content.txt"), + path: require("node:path").resolve("content.txt"), type: "added" as const, size: 12, }, { - path: require("path").resolve("empty.txt"), + path: require("node:path").resolve("empty.txt"), type: "added" as const, size: 0, }, @@ -404,19 +404,19 @@ describe("Sync Utils", () => { const analysis = { added: [ { - path: require("path").resolve("success.txt"), + path: require("node:path").resolve("success.txt"), type: "added" as const, size: 15, }, { - path: require("path").resolve("empty.txt"), + path: require("node:path").resolve("empty.txt"), type: "added" as const, size: 0, }, ], modified: [ { - path: require("path").resolve("modified.txt"), + path: require("node:path").resolve("modified.txt"), type: "modified" as const, size: 16, fileId: "modified-file-id", @@ -454,6 +454,313 @@ describe("Sync Utils", () => { }); }); + describe("Per-file metadata change detection", () => { + it("should detect metadata-only changes", async () => { + mockFs({ + "file1.txt": "Content 1", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + const syncedFiles = new Map< + string, + { fileId: string; metadata: FileSyncMetadata } + >([ + [ + require("node:path").resolve("file1.txt"), + { + fileId: "file1-id", + metadata: { + file_path: "file1.txt", + file_hash: "hash123", + uploaded_at: "2023-01-01T00:00:00.000Z", + synced: true, + title: "Old Title", // User metadata + }, + }, + ], + ]); + + // Mock hash comparison to indicate content unchanged + mockCalculateFileHash.mockResolvedValue("hash123"); + mockHashesMatch.mockReturnValue(true); // Content unchanged + + // Create metadata map with different metadata + const metadataMap = new Map>([ + ["file1.txt", { title: "New Title", priority: 1 }], + ]); + + const gitInfo = { commit: "abc123", branch: "main", isRepo: true }; + + const analysis = await analyzeChanges({ + patterns: ["*.txt"], + syncedFiles, + gitInfo, + metadataMap, + }); + + expect(analysis.modified).toHaveLength(1); + expect(analysis.modified[0]).toMatchObject({ + path: expect.stringContaining("file1.txt"), + contentChanged: false, + metadataChanged: true, + }); + }); + + it("should detect content-only changes", async () => { + mockFs({ + "file1.txt": "New Content", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + const syncedFiles = new Map< + string, + { fileId: string; metadata: FileSyncMetadata } + >([ + [ + require("node:path").resolve("file1.txt"), + { + fileId: "file1-id", + metadata: { + file_path: "file1.txt", + file_hash: "old-hash", + uploaded_at: "2023-01-01T00:00:00.000Z", + synced: true, + title: "Title", // User metadata + }, + }, + ], + ]); + + // Mock hash comparison to indicate content changed + mockCalculateFileHash.mockResolvedValue("new-hash"); + mockHashesMatch.mockReturnValue(false); // Content changed + + // Create metadata map with same metadata + const metadataMap = new Map>([ + ["file1.txt", { title: "Title" }], + ]); + + const gitInfo = { commit: "abc123", branch: "main", isRepo: true }; + + const analysis = await analyzeChanges({ + patterns: ["*.txt"], + syncedFiles, + gitInfo, + metadataMap, + }); + + expect(analysis.modified).toHaveLength(1); + expect(analysis.modified[0]).toMatchObject({ + path: expect.stringContaining("file1.txt"), + contentChanged: true, + metadataChanged: false, + }); + }); + + it("should detect both content and metadata changes", async () => { + mockFs({ + "file1.txt": "New Content", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + const syncedFiles = new Map< + string, + { fileId: string; metadata: FileSyncMetadata } + >([ + [ + require("node:path").resolve("file1.txt"), + { + fileId: "file1-id", + metadata: { + file_path: "file1.txt", + file_hash: "old-hash", + uploaded_at: "2023-01-01T00:00:00.000Z", + synced: true, + title: "Old Title", + }, + }, + ], + ]); + + // Mock hash comparison to indicate content changed + mockCalculateFileHash.mockResolvedValue("new-hash"); + mockHashesMatch.mockReturnValue(false); // Content changed + + // Create metadata map with different metadata + const metadataMap = new Map>([ + ["file1.txt", { title: "New Title", priority: 1 }], + ]); + + const gitInfo = { commit: "abc123", branch: "main", isRepo: true }; + + const analysis = await analyzeChanges({ + patterns: ["*.txt"], + syncedFiles, + gitInfo, + metadataMap, + }); + + expect(analysis.modified).toHaveLength(1); + expect(analysis.modified[0]).toMatchObject({ + path: expect.stringContaining("file1.txt"), + contentChanged: true, + metadataChanged: true, + }); + }); + + it("should not flag as modified when neither content nor metadata changed", async () => { + mockFs({ + "file1.txt": "Content 1", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + const syncedFiles = new Map< + string, + { fileId: string; metadata: FileSyncMetadata } + >([ + [ + require("node:path").resolve("file1.txt"), + { + fileId: "file1-id", + metadata: { + file_path: "file1.txt", + file_hash: "hash123", + uploaded_at: "2023-01-01T00:00:00.000Z", + synced: true, + title: "Title", + }, + }, + ], + ]); + + // Mock hash comparison to indicate content unchanged + mockCalculateFileHash.mockResolvedValue("hash123"); + mockHashesMatch.mockReturnValue(true); // Content unchanged + + // Create metadata map with same metadata + const metadataMap = new Map>([ + ["file1.txt", { title: "Title" }], + ]); + + const gitInfo = { commit: "abc123", branch: "main", isRepo: true }; + + const analysis = await analyzeChanges({ + patterns: ["*.txt"], + syncedFiles, + gitInfo, + metadataMap, + }); + + expect(analysis.modified).toHaveLength(0); + expect(analysis.unchanged).toBe(1); + }); + + it("should not track change types for git-based detection", async () => { + mockFs({ + "file1.txt": "Content 1", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + const syncedFiles = new Map< + string, + { fileId: string; metadata: FileSyncMetadata } + >([ + [ + require("node:path").resolve("file1.txt"), + { + fileId: "file1-id", + metadata: { + file_path: "file1.txt", + file_hash: "old-hash", + uploaded_at: "2023-01-01T00:00:00.000Z", + synced: true, + }, + }, + ], + ]); + + // Mock git changes + mockNormalizeGitPatterns.mockResolvedValue(["*.txt"]); + mockGetChangedFiles.mockResolvedValue([ + { path: "file1.txt", status: "modified" }, + ]); + + const gitInfo = { commit: "abc123", branch: "main", isRepo: true }; + + const analysis = await analyzeChanges({ + patterns: ["*.txt"], + syncedFiles, + gitInfo, + fromGit: "HEAD~1", + }); + + expect(analysis.modified).toHaveLength(1); + // Change types should not be tracked for git-based detection + expect(analysis.modified[0].contentChanged).toBe(false); + expect(analysis.modified[0].metadataChanged).toBe(false); + }); + + it("should not track change types for force upload", async () => { + mockFs({ + "file1.txt": "Content 1", + }); + + (glob as unknown as jest.MockedFunction).mockResolvedValue([ + "file1.txt", + ]); + + const syncedFiles = new Map< + string, + { fileId: string; metadata: FileSyncMetadata } + >([ + [ + require("node:path").resolve("file1.txt"), + { + fileId: "file1-id", + metadata: { + file_path: "file1.txt", + file_hash: "hash123", + uploaded_at: "2023-01-01T00:00:00.000Z", + synced: true, + }, + }, + ], + ]); + + mockCalculateFileHash.mockResolvedValue("hash456"); + mockHashesMatch.mockReturnValue(false); + + const gitInfo = { commit: "abc123", branch: "main", isRepo: true }; + + const analysis = await analyzeChanges({ + patterns: ["*.txt"], + syncedFiles, + gitInfo, + forceUpload: true, + }); + + expect(analysis.modified).toHaveLength(1); + // Change types should not be tracked for force upload + expect(analysis.modified[0].contentChanged).toBe(false); + expect(analysis.modified[0].metadataChanged).toBe(false); + }); + }); + afterAll(() => { // Restore original console.warn console.warn = originalConsoleWarn; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 464e27f..226c34d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -44,6 +44,9 @@ importers: dotenv: specifier: ^16.4.5 version: 16.6.1 + fast-deep-equal: + specifier: ^3.1.3 + version: 3.1.3 glob: specifier: ^10.4.5 version: 10.4.5 @@ -1777,6 +1780,9 @@ packages: resolution: {integrity: sha512-hMQ4CX1p1izmuLYyZqLMO/qGNw10wSv9QDCPfzXfyFrOaCSSoRfqE1Kf1s5an66J5JZC62NewG+mK49jOCtQew==} engines: {node: '>=4'} + fast-deep-equal@3.1.3: + resolution: {integrity: sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==} + fast-glob@3.3.3: resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==} engines: {node: '>=8.6.0'} @@ -5207,6 +5213,8 @@ snapshots: iconv-lite: 0.4.24 tmp: 0.2.5 + fast-deep-equal@3.1.3: {} + fast-glob@3.3.3: dependencies: '@nodelib/fs.stat': 2.0.5