diff --git a/.changeset/every-rockets-tie.md b/.changeset/every-rockets-tie.md new file mode 100644 index 0000000..ab7ea3e --- /dev/null +++ b/.changeset/every-rockets-tie.md @@ -0,0 +1,5 @@ +--- +"@mixedbread/cli": minor +--- + +Support multipart uploads and use it by default for larger files diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 0691c90..0351b88 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -4,9 +4,9 @@ on: workflow_dispatch: inputs: tag: - description: 'Prerelease tag (e.g., beta, alpha, rc)' + description: "Prerelease tag (e.g., beta, alpha, rc)" required: true - default: 'beta' + default: "beta" type: choice options: - beta @@ -37,8 +37,8 @@ jobs: uses: actions/setup-node@v4 with: node-version: 24 - cache: 'pnpm' - registry-url: 'https://registry.npmjs.org' + cache: "pnpm" + registry-url: "https://registry.npmjs.org" - name: Install dependencies run: pnpm install --frozen-lockfile @@ -60,7 +60,7 @@ jobs: git commit -m "chore: version packages for ${{ inputs.tag }} release" || echo "No changes to commit" - name: Publish prerelease packages - run: pnpm changeset publish --tag ${{ inputs.tag }} + run: pnpm changeset publish env: NPM_CONFIG_PROVENANCE: true @@ -72,4 +72,5 @@ jobs: pnpm changeset pre exit || echo "Not in pre mode" git add . git commit -m "chore: exit prerelease mode" || echo "No changes to commit" - git push || echo "No changes to push" \ No newline at end of file + git push || echo "No changes to push" + diff --git a/packages/cli/package.json b/packages/cli/package.json index 3b220b3..492ebc2 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -50,7 +50,7 @@ }, "dependencies": { "@clack/prompts": "^1.0.1", - "@mixedbread/sdk": "^0.51.0", + "@mixedbread/sdk": "^0.57.0", "@pnpm/tabtab": "^0.5.4", "chalk": "^5.6.2", "cli-table3": "^0.6.5", diff --git a/packages/cli/src/commands/store/sync.ts b/packages/cli/src/commands/store/sync.ts index 1375c75..7caa0a7 100644 --- a/packages/cli/src/commands/store/sync.ts +++ b/packages/cli/src/commands/store/sync.ts @@ -5,6 +5,7 @@ import { z } from "zod"; import { createClient } from "../../utils/client"; import { warnContextualizationDeprecated } from "../../utils/deprecation"; import { getGitInfo } from "../../utils/git"; +import type { MultipartUploadOptions } from "../../utils/upload"; import { addGlobalOptions, extendGlobalOptions, @@ -43,6 +44,19 @@ const SyncStoreSchema = extendGlobalOptions({ .max(200, { error: '"parallel" must be less than or equal to 200' }) .optional() .default(100), + multipartThreshold: z.coerce + .number({ error: '"multipart-threshold" must be a number' }) + .min(5, { error: '"multipart-threshold" must be at least 5 MB' }) + .optional(), + multipartPartSize: z.coerce + .number({ error: '"multipart-part-size" must be a number' }) + .min(5, { error: '"multipart-part-size" must be at least 5 MB' }) + .optional(), + multipartConcurrency: z.coerce + .number({ error: '"multipart-concurrency" must be a number' }) + .int({ error: '"multipart-concurrency" must be an integer' }) + .min(1, { error: '"multipart-concurrency" must be at least 1' }) + .optional(), }); export function createSyncCommand(): Command { @@ -71,6 +85,18 @@ export function createSyncCommand(): Command { ) .option("--metadata ", "Additional metadata for files") .option("--parallel ", "Number of concurrent operations (1-200)") + .option( + "--multipart-threshold ", + "File size threshold in MB to trigger multipart upload", + ) + .option( + "--multipart-part-size ", + "Size of each part in MB for multipart upload", + ) + .option( + "--multipart-concurrency ", + "Number of concurrent part uploads for multipart upload", + ) ); command.action(async (nameOrId: string, patterns: string[]) => { @@ -188,12 +214,27 @@ export function createSyncCommand(): Command { log.success("Auto-proceeding with --yes flag"); } + // Build multipart upload options + const MB = 1024 * 1024; + const multipartUpload: MultipartUploadOptions = { + ...(parsedOptions.multipartThreshold != null && { + threshold: parsedOptions.multipartThreshold * MB, + }), + ...(parsedOptions.multipartPartSize != null && { + partSize: parsedOptions.multipartPartSize * MB, + }), + ...(parsedOptions.multipartConcurrency != null && { + concurrency: parsedOptions.multipartConcurrency, + }), + }; + // Execute changes const syncResults = await executeSyncChanges(client, store.id, analysis, { strategy: parsedOptions.strategy, metadata: additionalMetadata, gitInfo: gitInfo.isRepo ? gitInfo : undefined, parallel: parsedOptions.parallel, + multipartUpload, }); // Display summary diff --git a/packages/cli/src/commands/store/upload.ts b/packages/cli/src/commands/store/upload.ts index b9d0daf..70af091 100644 --- a/packages/cli/src/commands/store/upload.ts +++ b/packages/cli/src/commands/store/upload.ts @@ -18,7 +18,11 @@ import { uploadFromManifest } from "../../utils/manifest"; import { validateMetadata } from "../../utils/metadata"; import { formatBytes, formatCountWithSuffix } from "../../utils/output"; import { checkExistingFiles, resolveStore } from "../../utils/store"; -import { type FileToUpload, uploadFilesInBatch } from "../../utils/upload"; +import { + type FileToUpload, + type MultipartUploadOptions, + uploadFilesInBatch, +} from "../../utils/upload"; const UploadStoreSchema = extendGlobalOptions({ nameOrId: z.string().min(1, { error: '"name-or-id" is required' }), @@ -41,6 +45,19 @@ const UploadStoreSchema = extendGlobalOptions({ .optional(), unique: z.boolean().optional(), manifest: z.string().optional(), + multipartThreshold: z.coerce + .number({ error: '"multipart-threshold" must be a number' }) + .min(5, { error: '"multipart-threshold" must be at least 5 MB' }) + .optional(), + multipartPartSize: z.coerce + .number({ error: '"multipart-part-size" must be a number' }) + .min(5, { error: '"multipart-part-size" must be at least 5 MB' }) + .optional(), + multipartConcurrency: z.coerce + .number({ error: '"multipart-concurrency" must be a number' }) + .int({ error: '"multipart-concurrency" must be an integer' }) + .min(1, { error: '"multipart-concurrency" must be at least 1' }) + .optional(), }); export interface UploadOptions extends GlobalOptions { @@ -51,6 +68,9 @@ export interface UploadOptions extends GlobalOptions { parallel?: number; unique?: boolean; manifest?: string; + multipartThreshold?: number; + multipartPartSize?: number; + multipartConcurrency?: number; } export function createUploadCommand(): Command { @@ -76,6 +96,18 @@ export function createUploadCommand(): Command { false ) .option("--manifest ", "Upload using manifest file") + .option( + "--multipart-threshold ", + "File size threshold in MB to trigger multipart upload", + ) + .option( + "--multipart-part-size ", + "Size of each part in MB for multipart upload", + ) + .option( + "--multipart-concurrency ", + "Number of concurrent part uploads for multipart upload", + ) ); command.action(async (nameOrId: string, patterns: string[]) => { @@ -102,13 +134,27 @@ export function createUploadCommand(): Command { activeSpinner.stop("Upload initialized"); activeSpinner = null; + const MB = 1024 * 1024; + const multipartUpload: MultipartUploadOptions = { + ...(parsedOptions.multipartThreshold != null && { + threshold: parsedOptions.multipartThreshold * MB, + }), + ...(parsedOptions.multipartPartSize != null && { + partSize: parsedOptions.multipartPartSize * MB, + }), + ...(parsedOptions.multipartConcurrency != null && { + concurrency: parsedOptions.multipartConcurrency, + }), + }; + // Handle manifest file upload if (parsedOptions.manifest) { return await uploadFromManifest( client, store.id, parsedOptions.manifest, - parsedOptions + parsedOptions, + multipartUpload ); } @@ -212,6 +258,7 @@ export function createUploadCommand(): Command { unique: parsedOptions.unique || false, existingFiles, parallel, + multipartUpload, }); } catch (error) { activeSpinner?.stop(); diff --git a/packages/cli/src/utils/manifest.ts b/packages/cli/src/utils/manifest.ts index c29c931..472e5e5 100644 --- a/packages/cli/src/utils/manifest.ts +++ b/packages/cli/src/utils/manifest.ts @@ -12,7 +12,11 @@ import { log, spinner } from "./logger"; import { validateMetadata } from "./metadata"; import { formatBytes, formatCountWithSuffix } from "./output"; import { checkExistingFiles } from "./store"; -import { type FileToUpload, uploadFilesInBatch } from "./upload"; +import { + type FileToUpload, + type MultipartUploadOptions, + uploadFilesInBatch, +} from "./upload"; // Manifest file schema const ManifestFileEntrySchema = z.object({ @@ -42,7 +46,8 @@ export async function uploadFromManifest( client: Mixedbread, storeIdentifier: string, manifestPath: string, - options: UploadOptions + options: UploadOptions, + multipartUpload?: MultipartUploadOptions ) { console.log(chalk.bold(`Loading manifest from: ${manifestPath}`)); @@ -199,6 +204,7 @@ export async function uploadFromManifest( existingFiles, parallel: options.parallel ?? config.defaults.upload.parallel ?? 100, showStrategyPerFile: true, + multipartUpload, }); } catch (error) { if (error instanceof z.ZodError) { diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index a25e2c8..8916ed2 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -7,10 +7,14 @@ import { glob } from "glob"; import pLimit from "p-limit"; import { getChangedFiles, normalizeGitPatterns } from "./git"; import { calculateFileHash, hashesMatch } from "./hash"; -import { log } from "./logger"; +import { log, spinner } from "./logger"; import { formatBytes, formatCountWithSuffix } from "./output"; import { buildFileSyncMetadata, type SyncedFileByPath } from "./sync-state"; -import { uploadFile } from "./upload"; +import { + type MultipartUploadOptions, + type UploadProgress, + uploadFile, +} from "./upload"; interface FileChange { path: string; @@ -264,13 +268,12 @@ export async function executeSyncChanges( metadata?: Record; gitInfo?: { commit: string; branch: string }; parallel?: number; + multipartUpload?: MultipartUploadOptions; } ): Promise { const parallel = options.parallel ?? 100; const limit = pLimit(parallel); const { filesToUpload, filesToDelete } = buildSyncPlan(analysis); - const totalOperations = filesToUpload.length + filesToDelete.length; - let completed = 0; console.log(chalk.bold("\nSyncing changes...")); @@ -281,10 +284,11 @@ export async function executeSyncChanges( // Delete legacy modified files and removed files if (filesToDelete.length > 0) { - console.log( - chalk.yellow( - `\nDeleting ${formatCountWithSuffix(filesToDelete.length, "file")}...` - ) + const deleteTotal = filesToDelete.length; + let deleteCompleted = 0; + const deleteSpinner = spinner(); + deleteSpinner.start( + `Deleting 0/${formatCountWithSuffix(deleteTotal, "file")}...` ); const deletePromises: Promise[] = filesToDelete.map((file) => @@ -293,15 +297,18 @@ export async function executeSyncChanges( await client.stores.files.delete(file.fileId!, { store_identifier: storeIdentifier, }); - completed++; - log.success( - `[${completed}/${totalOperations}] Deleted ${path.relative(process.cwd(), file.path)}` + deleteCompleted++; + deleteSpinner.message( + `Deleting ${deleteCompleted}/${formatCountWithSuffix(deleteTotal, "file")}...` ); return { file, success: true }; } catch (error) { - completed++; + deleteCompleted++; + deleteSpinner.message( + `Deleting ${deleteCompleted}/${formatCountWithSuffix(deleteTotal, "file")}...` + ); log.error( - `[${completed}/${totalOperations}] Failed to delete ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` + `Failed to delete ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` ); return { file, @@ -313,6 +320,7 @@ export async function executeSyncChanges( ); const deleteResults = await Promise.allSettled(deletePromises); + deleteResults.forEach((result) => { if (result.status === "fulfilled") { const syncResult = result.value; @@ -323,18 +331,28 @@ export async function executeSyncChanges( } } }); + + const deletedOk = results.deletions.successful.length; + deleteSpinner.stop( + deletedOk === deleteTotal + ? `Deleted ${formatCountWithSuffix(deleteTotal, "file")}` + : `Deleted ${deletedOk}/${formatCountWithSuffix(deleteTotal, "file")} (${results.deletions.failed.length} failed)` + ); } // Upload new and modified files if (filesToUpload.length > 0) { - console.log( - chalk.blue( - `\nUploading ${formatCountWithSuffix(filesToUpload.length, "file")}...` - ) + const uploadTotal = filesToUpload.length; + let uploadCompleted = 0; + const uploadSpinner = spinner(); + uploadSpinner.start( + `Uploading 0/${formatCountWithSuffix(uploadTotal, "file")}...` ); const uploadPromises: Promise[] = filesToUpload.map((file) => limit(async () => { + const relativePath = path.relative(process.cwd(), file.path); + try { // Calculate hash if not already done const fileHash = @@ -356,9 +374,9 @@ export async function executeSyncChanges( // Check if file is empty const stats = await fs.stat(file.path); if (stats.size === 0) { - completed++; - log.warn( - `[${completed}/${totalOperations}] Skipped empty file ${path.relative(process.cwd(), file.path)}` + uploadCompleted++; + uploadSpinner.message( + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}...` ); return { file, success: false, skipped: true }; } @@ -368,17 +386,26 @@ export async function executeSyncChanges( metadata: finalMetadata, strategy: options.strategy, externalId: file.path, + multipartUpload: options.multipartUpload, + onProgress: (progress: UploadProgress) => { + uploadSpinner.message( + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}... (${progress.fileName}: part ${progress.partsCompleted}/${progress.totalParts}, ${formatBytes(progress.uploadedBytes)}/${formatBytes(progress.totalBytes)})` + ); + }, }); - completed++; - log.success( - `[${completed}/${totalOperations}] Uploaded ${path.relative(process.cwd(), file.path)}` + uploadCompleted++; + uploadSpinner.message( + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}...` ); return { file, success: true }; } catch (error) { - completed++; + uploadCompleted++; + uploadSpinner.message( + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}...` + ); log.error( - `[${completed}/${totalOperations}] Failed to upload ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` + `Failed to upload ${relativePath}: ${error instanceof Error ? error.message : "Unknown error"}` ); return { file, @@ -390,6 +417,7 @@ export async function executeSyncChanges( ); const uploadResults = await Promise.allSettled(uploadPromises); + uploadResults.forEach((result) => { if (result.status === "fulfilled") { const syncResult = result.value; @@ -400,6 +428,21 @@ export async function executeSyncChanges( } } }); + + const uploadedOk = results.uploads.successful.length; + const skippedCount = results.uploads.failed.filter((r) => r.skipped).length; + const failedCount = results.uploads.failed.length - skippedCount; + + if (uploadedOk === uploadTotal) { + uploadSpinner.stop(`Uploaded ${formatCountWithSuffix(uploadTotal, "file")}`); + } else { + const parts: string[] = []; + if (failedCount > 0) parts.push(`${failedCount} failed`); + if (skippedCount > 0) parts.push(`${skippedCount} skipped`); + uploadSpinner.stop( + `Uploaded ${uploadedOk}/${formatCountWithSuffix(uploadTotal, "file")} (${parts.join(", ")})` + ); + } } return results; diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 4c0270a..01dbe5d 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -1,19 +1,89 @@ import { readFile, stat } from "node:fs/promises"; import { basename, relative } from "node:path"; +import { cpus, freemem } from "node:os"; import type Mixedbread from "@mixedbread/sdk"; import type { FileCreateParams } from "@mixedbread/sdk/resources/stores"; import chalk from "chalk"; import { lookup } from "mime-types"; import pLimit from "p-limit"; -import { log } from "./logger"; +import { log, spinner } from "./logger"; import { formatBytes, formatCountWithSuffix } from "./output"; export const UPLOAD_TIMEOUT = 1000 * 60 * 10; // 10 minutes +const MB = 1024 * 1024; +const MIN_PART_SIZE = 5 * MB; +const MAX_PARTS = 10_000; + +export interface MultipartUploadOptions { + threshold?: number; + partSize?: number; + concurrency?: number; +} + +/** + * Compute multipart config based on file size and system resources. + * User-provided overrides take precedence. + */ +export function resolveMultipartConfig( + fileSize: number, + overrides?: MultipartUploadOptions +): { threshold: number; partSize: number; concurrency: number } { + // Part size: smaller parts = more granular progress + let partSize: number; + if (fileSize < 200 * MB) { + partSize = 10 * MB; + } else if (fileSize < 1024 * MB) { + partSize = 20 * MB; + } else if (fileSize < 5 * 1024 * MB) { + partSize = 50 * MB; + } else { + partSize = 100 * MB; + } + + // Ensure we don't exceed the 10,000 parts limit + if (Math.ceil(fileSize / partSize) > MAX_PARTS) { + partSize = Math.ceil(fileSize / MAX_PARTS); + } + + partSize = Math.max(partSize, MIN_PART_SIZE); + + // Apply user override, then re-enforce the MAX_PARTS guard so a small + // user-specified part size can't produce more parts than the backend allows. + let finalPartSize = overrides?.partSize ?? partSize; + if (fileSize > 0 && Math.ceil(fileSize / finalPartSize) > MAX_PARTS) { + finalPartSize = Math.ceil(fileSize / MAX_PARTS); + } + finalPartSize = Math.max(finalPartSize, MIN_PART_SIZE); + + // Concurrency: bounded by CPU cores and available memory + const cores = cpus().length; + // Reserve 25% of free memory for other work; each concurrent part holds ~partSize in memory + const memoryBudget = Math.floor(freemem() * 0.75); + const maxByMemory = Math.max(1, Math.floor(memoryBudget / finalPartSize)); + const concurrency = Math.min(cores, maxByMemory, 10); + + return { + threshold: overrides?.threshold ?? 50 * MB, + partSize: finalPartSize, + concurrency: overrides?.concurrency ?? Math.max(concurrency, 2), + }; +} + +export interface UploadProgress { + fileName: string; + partsCompleted: number; + totalParts: number; + uploadedBytes: number; + totalBytes: number; +} + export interface UploadFileOptions { metadata?: Record; strategy?: FileCreateParams.Config["parsing_strategy"]; externalId?: string; + multipartUpload?: MultipartUploadOptions; + onProgress?: (progress: UploadProgress) => void; } export interface FileToUpload { @@ -70,7 +140,8 @@ export async function uploadFile( filePath: string, options: UploadFileOptions = {} ): Promise { - const { metadata = {}, strategy, externalId } = options; + const { metadata = {}, strategy, externalId, multipartUpload, onProgress } = + options; // Read file content const fileContent = await readFile(filePath); @@ -80,18 +151,60 @@ export async function uploadFile( new File([fileContent], fileName, { type: mimeType }) ); - await client.stores.files.upload( + const mpConfig = resolveMultipartConfig(fileContent.length, multipartUpload); + const totalFileBytes = fileContent.length; + + if (totalFileBytes >= mpConfig.threshold) { + const expectedParts = Math.ceil(totalFileBytes / mpConfig.partSize); + const zeroProgress: UploadProgress = { + fileName, + partsCompleted: 0, + totalParts: expectedParts, + uploadedBytes: 0, + totalBytes: totalFileBytes, + }; + if (onProgress) { + onProgress(zeroProgress); + } else { + log.info( + `${fileName}: 0/${expectedParts} parts — ${formatBytes(0)}/${formatBytes(totalFileBytes)}` + ); + } + } + + let partsCompleted = 0; + await client.stores.files.upload({ storeIdentifier, file, - { + body: { metadata, config: { parsing_strategy: strategy, }, ...(externalId ? { external_id: externalId } : {}), }, - { timeout: UPLOAD_TIMEOUT } - ); + options: { timeout: UPLOAD_TIMEOUT }, + multipartUpload: { + ...mpConfig, + onPartUpload: (event) => { + partsCompleted++; + const progress: UploadProgress = { + fileName, + partsCompleted, + totalParts: event.totalParts, + uploadedBytes: event.uploadedBytes, + totalBytes: event.totalBytes, + }; + if (onProgress) { + onProgress(progress); + } else { + log.info( + `${fileName}: part ${partsCompleted}/${event.totalParts} — ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)}` + ); + } + }, + }, + }); } /** @@ -106,6 +219,7 @@ export async function uploadFilesInBatch( existingFiles: Map; parallel: number; showStrategyPerFile?: boolean; + multipartUpload?: MultipartUploadOptions; } ): Promise { const { @@ -113,6 +227,7 @@ export async function uploadFilesInBatch( existingFiles, parallel, showStrategyPerFile = false, + multipartUpload, } = options; console.log( @@ -127,9 +242,23 @@ export async function uploadFilesInBatch( successfulSize: 0, }; - console.log(chalk.gray(`Processing with concurrency ${parallel}...`)); + const configParts = [`${formatCountWithSuffix(parallel, "file")} at a time`]; + const threshold = multipartUpload?.threshold ?? 50 * MB; + configParts.push(`multipart above ${formatBytes(threshold)}`); + if (multipartUpload?.partSize) { + configParts.push(`part size ${formatBytes(multipartUpload.partSize)}`); + } + if (multipartUpload?.concurrency) { + configParts.push(`${multipartUpload.concurrency} concurrent part uploads`); + } + console.log(chalk.gray(`Processing ${configParts.join(", ")}...`)); // Process files with sliding-window concurrency + const total = files.length; + let completed = 0; + const uploadSpinner = spinner(); + uploadSpinner.start(`Uploading 0/${formatCountWithSuffix(total, "file")}...`); + const limit = pLimit(parallel); await Promise.allSettled( files.map((file) => @@ -154,7 +283,8 @@ export async function uploadFilesInBatch( // Check if file is empty const stats = await stat(file.path); if (stats.size === 0) { - log.warn(`${relativePath} - Empty file skipped`); + completed++; + uploadSpinner.message(`Uploading ${completed}/${formatCountWithSuffix(total, "file")}...`); results.skipped++; return; } @@ -168,17 +298,40 @@ export async function uploadFilesInBatch( }) ); - await client.stores.files.upload( + const mpConfig = resolveMultipartConfig( + fileContent.length, + multipartUpload + ); + const totalFileBytes = fileContent.length; + + if (totalFileBytes >= mpConfig.threshold) { + const expectedParts = Math.ceil(totalFileBytes / mpConfig.partSize); + uploadSpinner.message( + `Uploading ${completed}/${formatCountWithSuffix(total, "file")}... (${fileName}: 0/${expectedParts} parts, ${formatBytes(0)}/${formatBytes(totalFileBytes)})` + ); + } + + let partsCompleted = 0; + await client.stores.files.upload({ storeIdentifier, - fileToUpload, - { + file: fileToUpload, + body: { metadata: fileMetadata, config: { parsing_strategy: file.strategy, }, }, - { timeout: UPLOAD_TIMEOUT } - ); + options: { timeout: UPLOAD_TIMEOUT }, + multipartUpload: { + ...mpConfig, + onPartUpload: (event) => { + partsCompleted++; + uploadSpinner.message( + `Uploading ${completed}/${formatCountWithSuffix(total, "file")}... (${fileName}: part ${partsCompleted}/${event.totalParts}, ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)})` + ); + }, + }, + }); if (unique && existingFiles.has(relativePath)) { results.updated++; @@ -187,16 +340,12 @@ export async function uploadFilesInBatch( } results.successfulSize += stats.size; - - let successMessage = `${relativePath} (${formatBytes(stats.size)})`; - - if (showStrategyPerFile) { - successMessage += ` [${file.strategy}]`; - } - - log.success(successMessage); + completed++; + uploadSpinner.message(`Uploading ${completed}/${formatCountWithSuffix(total, "file")}...`); } catch (error) { results.failed++; + completed++; + uploadSpinner.message(`Uploading ${completed}/${formatCountWithSuffix(total, "file")}...`); const errorMsg = error instanceof Error ? error.message : "Unknown error"; log.error(`${relativePath} - ${errorMsg}`); @@ -205,6 +354,13 @@ export async function uploadFilesInBatch( ) ); + const successCount = results.uploaded + results.updated; + uploadSpinner.stop( + successCount === total + ? `Uploaded ${formatCountWithSuffix(total, "file")}` + : `Uploaded ${successCount}/${formatCountWithSuffix(total, "file")} (${results.failed} failed, ${results.skipped} skipped)` + ); + // Summary console.log(`\n${chalk.bold("Upload Summary:")}`); if (results.uploaded > 0) { @@ -231,7 +387,17 @@ export async function uploadFilesInBatch( ); } - if (!showStrategyPerFile && files.length > 0) { + if (showStrategyPerFile && files.length > 0) { + const strategyCounts = new Map(); + for (const file of files) { + const s = file.strategy ?? "default"; + strategyCounts.set(s, (strategyCounts.get(s) ?? 0) + 1); + } + const parts = Array.from(strategyCounts.entries()).map( + ([s, count]) => `${s} (${count})` + ); + console.log(chalk.gray(`Strategies: ${parts.join(", ")}`)); + } else if (files.length > 0) { const firstFile = files[0]; console.log(chalk.gray(`Strategy: ${firstFile.strategy}`)); } diff --git a/packages/cli/tests/utils/upload.test.ts b/packages/cli/tests/utils/upload.test.ts index 36a1ad8..c2d890c 100644 --- a/packages/cli/tests/utils/upload.test.ts +++ b/packages/cli/tests/utils/upload.test.ts @@ -58,12 +58,17 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", - expect.any(File), expect.objectContaining({ - config: { parsing_strategy: undefined }, - }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.any(File), + body: expect.objectContaining({ + config: { parsing_strategy: undefined }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + multipartUpload: expect.objectContaining({ + onPartUpload: expect.any(Function), + }), + }) ); expect(mockConsoleWarn).not.toHaveBeenCalled(); }); @@ -81,12 +86,14 @@ describe("Upload Utils", () => { // uploadFile doesn't have empty file checking, so it will upload expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", - expect.any(File), expect.objectContaining({ - config: { parsing_strategy: undefined }, - }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.any(File), + body: expect.objectContaining({ + config: { parsing_strategy: undefined }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); expect(mockConsoleWarn).not.toHaveBeenCalled(); }); @@ -109,13 +116,15 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", - expect.any(File), expect.objectContaining({ - metadata: { author: "test" }, - config: { parsing_strategy: "high_quality" }, - }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.any(File), + body: expect.objectContaining({ + metadata: { author: "test" }, + config: { parsing_strategy: "high_quality" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); expect(mockConsoleWarn).not.toHaveBeenCalled(); }); @@ -134,13 +143,13 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "test.ts", - type: "text/typescript", - }), - expect.any(Object), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "test.ts", + type: "text/typescript", + }), + }) ); }); @@ -158,13 +167,13 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "script.py", - type: "text/x-python", - }), - expect.any(Object), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "script.py", + type: "text/x-python", + }), + }) ); }); @@ -182,13 +191,13 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "content.mdx", - type: "text/mdx", - }), - expect.any(Object), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "content.mdx", + type: "text/mdx", + }), + }) ); }); }); @@ -444,46 +453,62 @@ describe("Upload Utils", () => { // Verify TypeScript file mime type was fixed expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "app.ts", - type: "text/typescript", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "app.ts", + type: "text/typescript", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); // Verify Python file mime type was fixed expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "utils.py", - type: "text/x-python", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "utils.py", + type: "text/x-python", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); // Verify MDX file mime type was fixed expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "page.mdx", - type: "text/mdx", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "page.mdx", + type: "text/mdx", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); // Verify regular markdown file kept its original mime type expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "readme.md", - type: "text/markdown", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "readme.md", + type: "text/markdown", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7d8435d..5f9bbac 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -30,8 +30,8 @@ importers: specifier: ^1.0.1 version: 1.0.1 '@mixedbread/sdk': - specifier: ^0.51.0 - version: 0.51.0 + specifier: ^0.57.0 + version: 0.57.0 '@pnpm/tabtab': specifier: ^0.5.4 version: 0.5.4 @@ -759,8 +759,8 @@ packages: resolution: {integrity: sha512-B0Un0thpz4UKN69QVmgdVsCIgiz+jR4V+nxuCOgdm/P0Iie1NctqY092VQ/iobY/FSMzhHJofXij086y/9E5rg==} hasBin: true - '@mixedbread/sdk@0.51.0': - resolution: {integrity: sha512-hkM4x4SecFGM54uIqev4jU7Uzx1mDqMlJM83mdVBFRWO3aiVNeb9cq1hKEIKTVh3Hju/58tLnWE7ETRHYIZIxQ==} + '@mixedbread/sdk@0.57.0': + resolution: {integrity: sha512-twoZYKSuSHsR1yKDaiEDB+TRGSPUYC9gfOTMC4duSbSdXYN12mPWiihMXuDfdZT/JuWGh2cnQ73rle8FoG0+rw==} hasBin: true '@napi-rs/wasm-runtime@0.2.12': @@ -4104,7 +4104,7 @@ snapshots: '@mixedbread/sdk@0.26.0': {} - '@mixedbread/sdk@0.51.0': {} + '@mixedbread/sdk@0.57.0': {} '@napi-rs/wasm-runtime@0.2.12': dependencies: