From e69c4078155d9be587eefac883890ef49db1127c Mon Sep 17 00:00:00 2001 From: aavash Date: Tue, 3 Mar 2026 18:03:48 +0100 Subject: [PATCH 01/13] feat: use multipart uploads for larger files --- packages/cli/src/commands/store/sync.ts | 41 +++++++ packages/cli/src/commands/store/upload.ts | 48 +++++++- packages/cli/src/utils/sync.ts | 4 +- packages/cli/src/utils/upload.ts | 102 ++++++++++++---- packages/cli/tests/utils/upload.test.ts | 141 +++++++++++++--------- 5 files changed, 255 insertions(+), 81 deletions(-) diff --git a/packages/cli/src/commands/store/sync.ts b/packages/cli/src/commands/store/sync.ts index 1375c75..7caa0a7 100644 --- a/packages/cli/src/commands/store/sync.ts +++ b/packages/cli/src/commands/store/sync.ts @@ -5,6 +5,7 @@ import { z } from "zod"; import { createClient } from "../../utils/client"; import { warnContextualizationDeprecated } from "../../utils/deprecation"; import { getGitInfo } from "../../utils/git"; +import type { MultipartUploadOptions } from "../../utils/upload"; import { addGlobalOptions, extendGlobalOptions, @@ -43,6 +44,19 @@ const SyncStoreSchema = extendGlobalOptions({ .max(200, { error: '"parallel" must be less than or equal to 200' }) .optional() .default(100), + multipartThreshold: z.coerce + .number({ error: '"multipart-threshold" must be a number' }) + .min(5, { error: '"multipart-threshold" must be at least 5 MB' }) + .optional(), + multipartPartSize: z.coerce + .number({ error: '"multipart-part-size" must be a number' }) + .min(5, { error: '"multipart-part-size" must be at least 5 MB' }) + .optional(), + multipartConcurrency: z.coerce + .number({ error: '"multipart-concurrency" must be a number' }) + .int({ error: '"multipart-concurrency" must be an integer' }) + .min(1, { error: '"multipart-concurrency" must be at least 1' }) + .optional(), }); export function createSyncCommand(): Command { @@ -71,6 +85,18 @@ export function createSyncCommand(): Command { ) .option("--metadata ", "Additional metadata for files") .option("--parallel ", "Number of concurrent operations (1-200)") + .option( + "--multipart-threshold ", + "File size threshold in MB to trigger multipart upload", + ) + .option( + "--multipart-part-size ", + "Size of each part in MB for multipart upload", + ) + .option( + "--multipart-concurrency ", + "Number of concurrent part uploads for multipart upload", + ) ); command.action(async (nameOrId: string, patterns: string[]) => { @@ -188,12 +214,27 @@ export function createSyncCommand(): Command { log.success("Auto-proceeding with --yes flag"); } + // Build multipart upload options + const MB = 1024 * 1024; + const multipartUpload: MultipartUploadOptions = { + ...(parsedOptions.multipartThreshold != null && { + threshold: parsedOptions.multipartThreshold * MB, + }), + ...(parsedOptions.multipartPartSize != null && { + partSize: parsedOptions.multipartPartSize * MB, + }), + ...(parsedOptions.multipartConcurrency != null && { + concurrency: parsedOptions.multipartConcurrency, + }), + }; + // Execute changes const syncResults = await executeSyncChanges(client, store.id, analysis, { strategy: parsedOptions.strategy, metadata: additionalMetadata, gitInfo: gitInfo.isRepo ? gitInfo : undefined, parallel: parsedOptions.parallel, + multipartUpload, }); // Display summary diff --git a/packages/cli/src/commands/store/upload.ts b/packages/cli/src/commands/store/upload.ts index b9d0daf..5d56f30 100644 --- a/packages/cli/src/commands/store/upload.ts +++ b/packages/cli/src/commands/store/upload.ts @@ -18,7 +18,11 @@ import { uploadFromManifest } from "../../utils/manifest"; import { validateMetadata } from "../../utils/metadata"; import { formatBytes, formatCountWithSuffix } from "../../utils/output"; import { checkExistingFiles, resolveStore } from "../../utils/store"; -import { type FileToUpload, uploadFilesInBatch } from "../../utils/upload"; +import { + type FileToUpload, + type MultipartUploadOptions, + uploadFilesInBatch, +} from "../../utils/upload"; const UploadStoreSchema = extendGlobalOptions({ nameOrId: z.string().min(1, { error: '"name-or-id" is required' }), @@ -41,6 +45,19 @@ const UploadStoreSchema = extendGlobalOptions({ .optional(), unique: z.boolean().optional(), manifest: z.string().optional(), + multipartThreshold: z.coerce + .number({ error: '"multipart-threshold" must be a number' }) + .min(5, { error: '"multipart-threshold" must be at least 5 MB' }) + .optional(), + multipartPartSize: z.coerce + .number({ error: '"multipart-part-size" must be a number' }) + .min(5, { error: '"multipart-part-size" must be at least 5 MB' }) + .optional(), + multipartConcurrency: z.coerce + .number({ error: '"multipart-concurrency" must be a number' }) + .int({ error: '"multipart-concurrency" must be an integer' }) + .min(1, { error: '"multipart-concurrency" must be at least 1' }) + .optional(), }); export interface UploadOptions extends GlobalOptions { @@ -51,6 +68,9 @@ export interface UploadOptions extends GlobalOptions { parallel?: number; unique?: boolean; manifest?: string; + multipartThreshold?: number; + multipartPartSize?: number; + multipartConcurrency?: number; } export function createUploadCommand(): Command { @@ -76,6 +96,18 @@ export function createUploadCommand(): Command { false ) .option("--manifest ", "Upload using manifest file") + .option( + "--multipart-threshold ", + "File size threshold in MB to trigger multipart upload", + ) + .option( + "--multipart-part-size ", + "Size of each part in MB for multipart upload", + ) + .option( + "--multipart-concurrency ", + "Number of concurrent part uploads for multipart upload", + ) ); command.action(async (nameOrId: string, patterns: string[]) => { @@ -125,6 +157,19 @@ export function createUploadCommand(): Command { const parallel = parsedOptions.parallel ?? config.defaults?.upload?.parallel ?? 100; + const MB = 1024 * 1024; + const multipartUpload: MultipartUploadOptions = { + ...(parsedOptions.multipartThreshold != null && { + threshold: parsedOptions.multipartThreshold * MB, + }), + ...(parsedOptions.multipartPartSize != null && { + partSize: parsedOptions.multipartPartSize * MB, + }), + ...(parsedOptions.multipartConcurrency != null && { + concurrency: parsedOptions.multipartConcurrency, + }), + }; + const metadata = validateMetadata(parsedOptions.metadata); // Collect all files matching patterns @@ -212,6 +257,7 @@ export function createUploadCommand(): Command { unique: parsedOptions.unique || false, existingFiles, parallel, + multipartUpload, }); } catch (error) { activeSpinner?.stop(); diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index a25e2c8..688637e 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -10,7 +10,7 @@ import { calculateFileHash, hashesMatch } from "./hash"; import { log } from "./logger"; import { formatBytes, formatCountWithSuffix } from "./output"; import { buildFileSyncMetadata, type SyncedFileByPath } from "./sync-state"; -import { uploadFile } from "./upload"; +import { type MultipartUploadOptions, uploadFile } from "./upload"; interface FileChange { path: string; @@ -264,6 +264,7 @@ export async function executeSyncChanges( metadata?: Record; gitInfo?: { commit: string; branch: string }; parallel?: number; + multipartUpload?: MultipartUploadOptions; } ): Promise { const parallel = options.parallel ?? 100; @@ -368,6 +369,7 @@ export async function executeSyncChanges( metadata: finalMetadata, strategy: options.strategy, externalId: file.path, + multipartUpload: options.multipartUpload, }); completed++; diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 4c0270a..842b0f4 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -5,15 +5,27 @@ import type { FileCreateParams } from "@mixedbread/sdk/resources/stores"; import chalk from "chalk"; import { lookup } from "mime-types"; import pLimit from "p-limit"; -import { log } from "./logger"; +import { log, spinner } from "./logger"; import { formatBytes, formatCountWithSuffix } from "./output"; export const UPLOAD_TIMEOUT = 1000 * 60 * 10; // 10 minutes +const MB = 1024 * 1024; +export const DEFAULT_MULTIPART_THRESHOLD = 50 * MB; // 50 MB +export const DEFAULT_MULTIPART_PART_SIZE = 20 * MB; // 20 MB +export const DEFAULT_MULTIPART_CONCURRENCY = 5; + +export interface MultipartUploadOptions { + threshold?: number; + partSize?: number; + concurrency?: number; +} + export interface UploadFileOptions { metadata?: Record; strategy?: FileCreateParams.Config["parsing_strategy"]; externalId?: string; + multipartUpload?: MultipartUploadOptions; } export interface FileToUpload { @@ -70,7 +82,7 @@ export async function uploadFile( filePath: string, options: UploadFileOptions = {} ): Promise { - const { metadata = {}, strategy, externalId } = options; + const { metadata = {}, strategy, externalId, multipartUpload } = options; // Read file content const fileContent = await readFile(filePath); @@ -80,18 +92,34 @@ export async function uploadFile( new File([fileContent], fileName, { type: mimeType }) ); - await client.stores.files.upload( + let partsCompleted = 0; + await client.stores.files.upload({ storeIdentifier, file, - { + body: { metadata, config: { parsing_strategy: strategy, }, ...(externalId ? { external_id: externalId } : {}), }, - { timeout: UPLOAD_TIMEOUT } - ); + options: { timeout: UPLOAD_TIMEOUT }, + multipartUpload: { + threshold: DEFAULT_MULTIPART_THRESHOLD, + partSize: DEFAULT_MULTIPART_PART_SIZE, + concurrency: DEFAULT_MULTIPART_CONCURRENCY, + ...multipartUpload, + onPartUpload: (event) => { + partsCompleted++; + const pct = Math.round( + (partsCompleted / event.totalParts) * 100 + ); + log.info( + `${fileName}: Uploaded part ${partsCompleted}/${event.totalParts} (${pct}%)` + ); + }, + }, + }); } /** @@ -106,6 +134,7 @@ export async function uploadFilesInBatch( existingFiles: Map; parallel: number; showStrategyPerFile?: boolean; + multipartUpload?: MultipartUploadOptions; } ): Promise { const { @@ -113,6 +142,7 @@ export async function uploadFilesInBatch( existingFiles, parallel, showStrategyPerFile = false, + multipartUpload, } = options; console.log( @@ -127,9 +157,24 @@ export async function uploadFilesInBatch( successfulSize: 0, }; - console.log(chalk.gray(`Processing with concurrency ${parallel}...`)); + const configParts = [`${parallel} files at a time`]; + if (multipartUpload?.threshold) { + configParts.push(`multipart threshold ${formatBytes(multipartUpload.threshold)}`); + } + if (multipartUpload?.partSize) { + configParts.push(`part size ${formatBytes(multipartUpload.partSize)}`); + } + if (multipartUpload?.concurrency) { + configParts.push(`${multipartUpload.concurrency} concurrent part uploads`); + } + console.log(chalk.gray(`Processing ${configParts.join(", ")}...`)); // Process files with sliding-window concurrency + const total = files.length; + let completed = 0; + const uploadSpinner = spinner(); + uploadSpinner.start(`Uploading 0/${total} files...`); + const limit = pLimit(parallel); await Promise.allSettled( files.map((file) => @@ -154,7 +199,8 @@ export async function uploadFilesInBatch( // Check if file is empty const stats = await stat(file.path); if (stats.size === 0) { - log.warn(`${relativePath} - Empty file skipped`); + completed++; + uploadSpinner.message(`Uploading ${completed}/${total} files...`); results.skipped++; return; } @@ -168,17 +214,33 @@ export async function uploadFilesInBatch( }) ); - await client.stores.files.upload( + let partsCompleted = 0; + await client.stores.files.upload({ storeIdentifier, - fileToUpload, - { + file: fileToUpload, + body: { metadata: fileMetadata, config: { parsing_strategy: file.strategy, }, }, - { timeout: UPLOAD_TIMEOUT } - ); + options: { timeout: UPLOAD_TIMEOUT }, + multipartUpload: { + threshold: DEFAULT_MULTIPART_THRESHOLD, + partSize: DEFAULT_MULTIPART_PART_SIZE, + concurrency: DEFAULT_MULTIPART_CONCURRENCY, + ...multipartUpload, + onPartUpload: (event) => { + partsCompleted++; + const pct = Math.round( + (partsCompleted / event.totalParts) * 100 + ); + uploadSpinner.message( + `Uploading ${completed}/${total} files... (${fileName}: part ${partsCompleted}/${event.totalParts}, ${pct}%)` + ); + }, + }, + }); if (unique && existingFiles.has(relativePath)) { results.updated++; @@ -187,16 +249,12 @@ export async function uploadFilesInBatch( } results.successfulSize += stats.size; - - let successMessage = `${relativePath} (${formatBytes(stats.size)})`; - - if (showStrategyPerFile) { - successMessage += ` [${file.strategy}]`; - } - - log.success(successMessage); + completed++; + uploadSpinner.message(`Uploading ${completed}/${total} files...`); } catch (error) { results.failed++; + completed++; + uploadSpinner.message(`Uploading ${completed}/${total} files...`); const errorMsg = error instanceof Error ? error.message : "Unknown error"; log.error(`${relativePath} - ${errorMsg}`); @@ -205,6 +263,8 @@ export async function uploadFilesInBatch( ) ); + uploadSpinner.stop(`Uploaded ${total} files`); + // Summary console.log(`\n${chalk.bold("Upload Summary:")}`); if (results.uploaded > 0) { diff --git a/packages/cli/tests/utils/upload.test.ts b/packages/cli/tests/utils/upload.test.ts index 36a1ad8..c2d890c 100644 --- a/packages/cli/tests/utils/upload.test.ts +++ b/packages/cli/tests/utils/upload.test.ts @@ -58,12 +58,17 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", - expect.any(File), expect.objectContaining({ - config: { parsing_strategy: undefined }, - }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.any(File), + body: expect.objectContaining({ + config: { parsing_strategy: undefined }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + multipartUpload: expect.objectContaining({ + onPartUpload: expect.any(Function), + }), + }) ); expect(mockConsoleWarn).not.toHaveBeenCalled(); }); @@ -81,12 +86,14 @@ describe("Upload Utils", () => { // uploadFile doesn't have empty file checking, so it will upload expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", - expect.any(File), expect.objectContaining({ - config: { parsing_strategy: undefined }, - }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.any(File), + body: expect.objectContaining({ + config: { parsing_strategy: undefined }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); expect(mockConsoleWarn).not.toHaveBeenCalled(); }); @@ -109,13 +116,15 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", - expect.any(File), expect.objectContaining({ - metadata: { author: "test" }, - config: { parsing_strategy: "high_quality" }, - }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.any(File), + body: expect.objectContaining({ + metadata: { author: "test" }, + config: { parsing_strategy: "high_quality" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); expect(mockConsoleWarn).not.toHaveBeenCalled(); }); @@ -134,13 +143,13 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "test.ts", - type: "text/typescript", - }), - expect.any(Object), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "test.ts", + type: "text/typescript", + }), + }) ); }); @@ -158,13 +167,13 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "script.py", - type: "text/x-python", - }), - expect.any(Object), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "script.py", + type: "text/x-python", + }), + }) ); }); @@ -182,13 +191,13 @@ describe("Upload Utils", () => { ); expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "content.mdx", - type: "text/mdx", - }), - expect.any(Object), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "content.mdx", + type: "text/mdx", + }), + }) ); }); }); @@ -444,46 +453,62 @@ describe("Upload Utils", () => { // Verify TypeScript file mime type was fixed expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "app.ts", - type: "text/typescript", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "app.ts", + type: "text/typescript", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); // Verify Python file mime type was fixed expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "utils.py", - type: "text/x-python", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "utils.py", + type: "text/x-python", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); // Verify MDX file mime type was fixed expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "page.mdx", - type: "text/mdx", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "page.mdx", + type: "text/mdx", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); // Verify regular markdown file kept its original mime type expect(mockClient.stores.files.upload).toHaveBeenCalledWith( - "test-store", expect.objectContaining({ - name: "readme.md", - type: "text/markdown", - }), - expect.objectContaining({ config: { parsing_strategy: "fast" } }), - { timeout: UPLOAD_TIMEOUT } + storeIdentifier: "test-store", + file: expect.objectContaining({ + name: "readme.md", + type: "text/markdown", + }), + body: expect.objectContaining({ + config: { parsing_strategy: "fast" }, + }), + options: { timeout: UPLOAD_TIMEOUT }, + }) ); }); }); From be4c7fa579efb93a168ea3b362c8c38100cb328d Mon Sep 17 00:00:00 2001 From: aavash Date: Tue, 3 Mar 2026 18:35:14 +0100 Subject: [PATCH 02/13] fix: allow using manifest and mutlipart params --- packages/cli/src/commands/store/upload.ts | 29 ++++++++++++----------- packages/cli/src/utils/manifest.ts | 10 ++++++-- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/packages/cli/src/commands/store/upload.ts b/packages/cli/src/commands/store/upload.ts index 5d56f30..70af091 100644 --- a/packages/cli/src/commands/store/upload.ts +++ b/packages/cli/src/commands/store/upload.ts @@ -134,13 +134,27 @@ export function createUploadCommand(): Command { activeSpinner.stop("Upload initialized"); activeSpinner = null; + const MB = 1024 * 1024; + const multipartUpload: MultipartUploadOptions = { + ...(parsedOptions.multipartThreshold != null && { + threshold: parsedOptions.multipartThreshold * MB, + }), + ...(parsedOptions.multipartPartSize != null && { + partSize: parsedOptions.multipartPartSize * MB, + }), + ...(parsedOptions.multipartConcurrency != null && { + concurrency: parsedOptions.multipartConcurrency, + }), + }; + // Handle manifest file upload if (parsedOptions.manifest) { return await uploadFromManifest( client, store.id, parsedOptions.manifest, - parsedOptions + parsedOptions, + multipartUpload ); } @@ -157,19 +171,6 @@ export function createUploadCommand(): Command { const parallel = parsedOptions.parallel ?? config.defaults?.upload?.parallel ?? 100; - const MB = 1024 * 1024; - const multipartUpload: MultipartUploadOptions = { - ...(parsedOptions.multipartThreshold != null && { - threshold: parsedOptions.multipartThreshold * MB, - }), - ...(parsedOptions.multipartPartSize != null && { - partSize: parsedOptions.multipartPartSize * MB, - }), - ...(parsedOptions.multipartConcurrency != null && { - concurrency: parsedOptions.multipartConcurrency, - }), - }; - const metadata = validateMetadata(parsedOptions.metadata); // Collect all files matching patterns diff --git a/packages/cli/src/utils/manifest.ts b/packages/cli/src/utils/manifest.ts index c29c931..472e5e5 100644 --- a/packages/cli/src/utils/manifest.ts +++ b/packages/cli/src/utils/manifest.ts @@ -12,7 +12,11 @@ import { log, spinner } from "./logger"; import { validateMetadata } from "./metadata"; import { formatBytes, formatCountWithSuffix } from "./output"; import { checkExistingFiles } from "./store"; -import { type FileToUpload, uploadFilesInBatch } from "./upload"; +import { + type FileToUpload, + type MultipartUploadOptions, + uploadFilesInBatch, +} from "./upload"; // Manifest file schema const ManifestFileEntrySchema = z.object({ @@ -42,7 +46,8 @@ export async function uploadFromManifest( client: Mixedbread, storeIdentifier: string, manifestPath: string, - options: UploadOptions + options: UploadOptions, + multipartUpload?: MultipartUploadOptions ) { console.log(chalk.bold(`Loading manifest from: ${manifestPath}`)); @@ -199,6 +204,7 @@ export async function uploadFromManifest( existingFiles, parallel: options.parallel ?? config.defaults.upload.parallel ?? 100, showStrategyPerFile: true, + multipartUpload, }); } catch (error) { if (error instanceof z.ZodError) { From 127f8c0d2854b69ba4f9658b5f280ff7db7b6778 Mon Sep 17 00:00:00 2001 From: aavash Date: Tue, 3 Mar 2026 18:35:49 +0100 Subject: [PATCH 03/13] chore(deps-tmp): use tarball pre-release sdk --- packages/cli/package.json | 2 +- pnpm-lock.yaml | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index 3b220b3..a328158 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -50,7 +50,7 @@ }, "dependencies": { "@clack/prompts": "^1.0.1", - "@mixedbread/sdk": "^0.51.0", + "@mixedbread/sdk": "https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316", "@pnpm/tabtab": "^0.5.4", "chalk": "^5.6.2", "cli-table3": "^0.6.5", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7d8435d..b109862 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -30,8 +30,8 @@ importers: specifier: ^1.0.1 version: 1.0.1 '@mixedbread/sdk': - specifier: ^0.51.0 - version: 0.51.0 + specifier: https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316 + version: https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316 '@pnpm/tabtab': specifier: ^0.5.4 version: 0.5.4 @@ -759,8 +759,9 @@ packages: resolution: {integrity: sha512-B0Un0thpz4UKN69QVmgdVsCIgiz+jR4V+nxuCOgdm/P0Iie1NctqY092VQ/iobY/FSMzhHJofXij086y/9E5rg==} hasBin: true - '@mixedbread/sdk@0.51.0': - resolution: {integrity: sha512-hkM4x4SecFGM54uIqev4jU7Uzx1mDqMlJM83mdVBFRWO3aiVNeb9cq1hKEIKTVh3Hju/58tLnWE7ETRHYIZIxQ==} + '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316': + resolution: {tarball: https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316} + version: 0.56.0 hasBin: true '@napi-rs/wasm-runtime@0.2.12': @@ -4104,7 +4105,7 @@ snapshots: '@mixedbread/sdk@0.26.0': {} - '@mixedbread/sdk@0.51.0': {} + '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316': {} '@napi-rs/wasm-runtime@0.2.12': dependencies: From 1f1e8f9e5260b0cd90d2c34116f0aba681b2f3c4 Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 13:27:21 +0100 Subject: [PATCH 04/13] chore: auto configure multipart upload settings based on system stats and file size and improve logs --- packages/cli/package.json | 2 +- packages/cli/src/utils/upload.ts | 113 +++++++++++++++++++++++-------- pnpm-lock.yaml | 10 +-- 3 files changed, 90 insertions(+), 35 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index a328158..cbcb81b 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -50,7 +50,7 @@ }, "dependencies": { "@clack/prompts": "^1.0.1", - "@mixedbread/sdk": "https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316", + "@mixedbread/sdk": "https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz", "@pnpm/tabtab": "^0.5.4", "chalk": "^5.6.2", "cli-table3": "^0.6.5", diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 842b0f4..63d1bd8 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -1,5 +1,6 @@ import { readFile, stat } from "node:fs/promises"; import { basename, relative } from "node:path"; +import { cpus, freemem } from "node:os"; import type Mixedbread from "@mixedbread/sdk"; import type { FileCreateParams } from "@mixedbread/sdk/resources/stores"; import chalk from "chalk"; @@ -11,9 +12,8 @@ import { formatBytes, formatCountWithSuffix } from "./output"; export const UPLOAD_TIMEOUT = 1000 * 60 * 10; // 10 minutes const MB = 1024 * 1024; -export const DEFAULT_MULTIPART_THRESHOLD = 50 * MB; // 50 MB -export const DEFAULT_MULTIPART_PART_SIZE = 20 * MB; // 20 MB -export const DEFAULT_MULTIPART_CONCURRENCY = 5; +const MIN_PART_SIZE = 5 * MB; +const MAX_PARTS = 10_000; export interface MultipartUploadOptions { threshold?: number; @@ -21,6 +21,47 @@ export interface MultipartUploadOptions { concurrency?: number; } +/** + * Compute multipart config based on file size and system resources. + * User-provided overrides take precedence. + */ +export function resolveMultipartConfig( + fileSize: number, + overrides?: MultipartUploadOptions +): { threshold: number; partSize: number; concurrency: number } { + // Part size: smaller parts = more granular progress + let partSize: number; + if (fileSize < 200 * MB) { + partSize = 10 * MB; + } else if (fileSize < 1024 * MB) { + partSize = 20 * MB; + } else if (fileSize < 5 * 1024 * MB) { + partSize = 50 * MB; + } else { + partSize = 100 * MB; + } + + // Ensure we don't exceed the 10,000 parts limit + if (Math.ceil(fileSize / partSize) > MAX_PARTS) { + partSize = Math.ceil(fileSize / MAX_PARTS); + } + + partSize = Math.max(partSize, MIN_PART_SIZE); + + // Concurrency: bounded by CPU cores and available memory + const cores = cpus().length; + // Reserve 25% of free memory for other work; each concurrent part holds ~partSize in memory + const memoryBudget = Math.floor(freemem() * 0.75); + const maxByMemory = Math.max(1, Math.floor(memoryBudget / partSize)); + const concurrency = Math.min(cores, maxByMemory, 10); + + return { + threshold: overrides?.threshold ?? 50 * MB, + partSize: overrides?.partSize ?? partSize, + concurrency: overrides?.concurrency ?? Math.max(concurrency, 2), + }; +} + export interface UploadFileOptions { metadata?: Record; strategy?: FileCreateParams.Config["parsing_strategy"]; @@ -92,6 +133,16 @@ export async function uploadFile( new File([fileContent], fileName, { type: mimeType }) ); + const mpConfig = resolveMultipartConfig(fileContent.length, multipartUpload); + const totalFileBytes = fileContent.length; + + if (totalFileBytes >= mpConfig.threshold) { + const expectedParts = Math.ceil(totalFileBytes / mpConfig.partSize); + log.info( + `${fileName}: 0/${expectedParts} parts — ${formatBytes(0)}/${formatBytes(totalFileBytes)}` + ); + } + let partsCompleted = 0; await client.stores.files.upload({ storeIdentifier, @@ -105,17 +156,11 @@ export async function uploadFile( }, options: { timeout: UPLOAD_TIMEOUT }, multipartUpload: { - threshold: DEFAULT_MULTIPART_THRESHOLD, - partSize: DEFAULT_MULTIPART_PART_SIZE, - concurrency: DEFAULT_MULTIPART_CONCURRENCY, - ...multipartUpload, + ...mpConfig, onPartUpload: (event) => { partsCompleted++; - const pct = Math.round( - (partsCompleted / event.totalParts) * 100 - ); log.info( - `${fileName}: Uploaded part ${partsCompleted}/${event.totalParts} (${pct}%)` + `${fileName}: part ${partsCompleted}/${event.totalParts} — ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)}` ); }, }, @@ -157,17 +202,20 @@ export async function uploadFilesInBatch( successfulSize: 0, }; - const configParts = [`${parallel} files at a time`]; - if (multipartUpload?.threshold) { - configParts.push(`multipart threshold ${formatBytes(multipartUpload.threshold)}`); - } - if (multipartUpload?.partSize) { - configParts.push(`part size ${formatBytes(multipartUpload.partSize)}`); - } - if (multipartUpload?.concurrency) { - configParts.push(`${multipartUpload.concurrency} concurrent part uploads`); - } + // Show a preview of multipart config using a representative file size (first file) + const previewConfig = resolveMultipartConfig(0, multipartUpload); + const configParts = [ + `${parallel} files at a time`, + `multipart threshold ${formatBytes(previewConfig.threshold)}`, + `part size ${formatBytes(previewConfig.partSize)}`, + `${previewConfig.concurrency} concurrent part uploads`, + ]; console.log(chalk.gray(`Processing ${configParts.join(", ")}...`)); + if (!multipartUpload?.partSize || !multipartUpload?.concurrency) { + console.log( + chalk.gray("(part size and concurrency auto-tune per file size)") + ); + } // Process files with sliding-window concurrency const total = files.length; @@ -214,6 +262,19 @@ export async function uploadFilesInBatch( }) ); + const mpConfig = resolveMultipartConfig( + fileContent.length, + multipartUpload + ); + const totalFileBytes = fileContent.length; + + if (totalFileBytes >= mpConfig.threshold) { + const expectedParts = Math.ceil(totalFileBytes / mpConfig.partSize); + uploadSpinner.message( + `Uploading ${completed}/${total} files... (${fileName}: 0/${expectedParts} parts, ${formatBytes(0)}/${formatBytes(totalFileBytes)})` + ); + } + let partsCompleted = 0; await client.stores.files.upload({ storeIdentifier, @@ -226,17 +287,11 @@ export async function uploadFilesInBatch( }, options: { timeout: UPLOAD_TIMEOUT }, multipartUpload: { - threshold: DEFAULT_MULTIPART_THRESHOLD, - partSize: DEFAULT_MULTIPART_PART_SIZE, - concurrency: DEFAULT_MULTIPART_CONCURRENCY, - ...multipartUpload, + ...mpConfig, onPartUpload: (event) => { partsCompleted++; - const pct = Math.round( - (partsCompleted / event.totalParts) * 100 - ); uploadSpinner.message( - `Uploading ${completed}/${total} files... (${fileName}: part ${partsCompleted}/${event.totalParts}, ${pct}%)` + `Uploading ${completed}/${total} files... (${fileName}: part ${partsCompleted}/${event.totalParts}, ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)})` ); }, }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b109862..75c84a7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -30,8 +30,8 @@ importers: specifier: ^1.0.1 version: 1.0.1 '@mixedbread/sdk': - specifier: https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316 - version: https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316 + specifier: https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz + version: https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz '@pnpm/tabtab': specifier: ^0.5.4 version: 0.5.4 @@ -759,8 +759,8 @@ packages: resolution: {integrity: sha512-B0Un0thpz4UKN69QVmgdVsCIgiz+jR4V+nxuCOgdm/P0Iie1NctqY092VQ/iobY/FSMzhHJofXij086y/9E5rg==} hasBin: true - '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316': - resolution: {tarball: https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316} + '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz': + resolution: {tarball: https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz} version: 0.56.0 hasBin: true @@ -4105,7 +4105,7 @@ snapshots: '@mixedbread/sdk@0.26.0': {} - '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/8f4a498c448b4beeb4d609bcc7b296bd7c3e0316': {} + '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz': {} '@napi-rs/wasm-runtime@0.2.12': dependencies: From 211011c83eecee2f4f64f9687c0af489df3b7140 Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 13:43:12 +0100 Subject: [PATCH 05/13] chore: make logging consistent for file uploads and deletions --- packages/cli/src/utils/sync.ts | 64 +++++++++++++++++++++++--------- packages/cli/src/utils/upload.ts | 46 +++++++++++++++++++---- 2 files changed, 85 insertions(+), 25 deletions(-) diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index 688637e..a1cd579 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -7,10 +7,14 @@ import { glob } from "glob"; import pLimit from "p-limit"; import { getChangedFiles, normalizeGitPatterns } from "./git"; import { calculateFileHash, hashesMatch } from "./hash"; -import { log } from "./logger"; +import { log, spinner } from "./logger"; import { formatBytes, formatCountWithSuffix } from "./output"; import { buildFileSyncMetadata, type SyncedFileByPath } from "./sync-state"; -import { type MultipartUploadOptions, uploadFile } from "./upload"; +import { + type MultipartUploadOptions, + type UploadProgress, + uploadFile, +} from "./upload"; interface FileChange { path: string; @@ -282,10 +286,11 @@ export async function executeSyncChanges( // Delete legacy modified files and removed files if (filesToDelete.length > 0) { - console.log( - chalk.yellow( - `\nDeleting ${formatCountWithSuffix(filesToDelete.length, "file")}...` - ) + const deleteTotal = filesToDelete.length; + let deleteCompleted = 0; + const deleteSpinner = spinner(); + deleteSpinner.start( + `Deleting 0/${deleteTotal} files...` ); const deletePromises: Promise[] = filesToDelete.map((file) => @@ -295,14 +300,19 @@ export async function executeSyncChanges( store_identifier: storeIdentifier, }); completed++; - log.success( - `[${completed}/${totalOperations}] Deleted ${path.relative(process.cwd(), file.path)}` + deleteCompleted++; + deleteSpinner.message( + `Deleting ${deleteCompleted}/${deleteTotal} files...` ); return { file, success: true }; } catch (error) { completed++; + deleteCompleted++; + deleteSpinner.message( + `Deleting ${deleteCompleted}/${deleteTotal} files...` + ); log.error( - `[${completed}/${totalOperations}] Failed to delete ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` + `Failed to delete ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` ); return { file, @@ -314,6 +324,8 @@ export async function executeSyncChanges( ); const deleteResults = await Promise.allSettled(deletePromises); + deleteSpinner.stop(`Deleted ${deleteTotal} files`); + deleteResults.forEach((result) => { if (result.status === "fulfilled") { const syncResult = result.value; @@ -328,14 +340,17 @@ export async function executeSyncChanges( // Upload new and modified files if (filesToUpload.length > 0) { - console.log( - chalk.blue( - `\nUploading ${formatCountWithSuffix(filesToUpload.length, "file")}...` - ) + const uploadTotal = filesToUpload.length; + let uploadCompleted = 0; + const uploadSpinner = spinner(); + uploadSpinner.start( + `Uploading 0/${uploadTotal} files...` ); const uploadPromises: Promise[] = filesToUpload.map((file) => limit(async () => { + const relativePath = path.relative(process.cwd(), file.path); + try { // Calculate hash if not already done const fileHash = @@ -358,8 +373,9 @@ export async function executeSyncChanges( const stats = await fs.stat(file.path); if (stats.size === 0) { completed++; - log.warn( - `[${completed}/${totalOperations}] Skipped empty file ${path.relative(process.cwd(), file.path)}` + uploadCompleted++; + uploadSpinner.message( + `Uploading ${uploadCompleted}/${uploadTotal} files...` ); return { file, success: false, skipped: true }; } @@ -370,17 +386,27 @@ export async function executeSyncChanges( strategy: options.strategy, externalId: file.path, multipartUpload: options.multipartUpload, + onProgress: (progress: UploadProgress) => { + uploadSpinner.message( + `Uploading ${uploadCompleted}/${uploadTotal} files... (${progress.fileName}: part ${progress.partsCompleted}/${progress.totalParts}, ${formatBytes(progress.uploadedBytes)}/${formatBytes(progress.totalBytes)})` + ); + }, }); completed++; - log.success( - `[${completed}/${totalOperations}] Uploaded ${path.relative(process.cwd(), file.path)}` + uploadCompleted++; + uploadSpinner.message( + `Uploading ${uploadCompleted}/${uploadTotal} files...` ); return { file, success: true }; } catch (error) { completed++; + uploadCompleted++; + uploadSpinner.message( + `Uploading ${uploadCompleted}/${uploadTotal} files...` + ); log.error( - `[${completed}/${totalOperations}] Failed to upload ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` + `Failed to upload ${relativePath}: ${error instanceof Error ? error.message : "Unknown error"}` ); return { file, @@ -392,6 +418,8 @@ export async function executeSyncChanges( ); const uploadResults = await Promise.allSettled(uploadPromises); + uploadSpinner.stop(`Uploaded ${uploadTotal} files`); + uploadResults.forEach((result) => { if (result.status === "fulfilled") { const syncResult = result.value; diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 63d1bd8..2ef45b7 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -62,11 +62,20 @@ export function resolveMultipartConfig( }; } +export interface UploadProgress { + fileName: string; + partsCompleted: number; + totalParts: number; + uploadedBytes: number; + totalBytes: number; +} + export interface UploadFileOptions { metadata?: Record; strategy?: FileCreateParams.Config["parsing_strategy"]; externalId?: string; multipartUpload?: MultipartUploadOptions; + onProgress?: (progress: UploadProgress) => void; } export interface FileToUpload { @@ -123,7 +132,8 @@ export async function uploadFile( filePath: string, options: UploadFileOptions = {} ): Promise { - const { metadata = {}, strategy, externalId, multipartUpload } = options; + const { metadata = {}, strategy, externalId, multipartUpload, onProgress } = + options; // Read file content const fileContent = await readFile(filePath); @@ -138,9 +148,20 @@ export async function uploadFile( if (totalFileBytes >= mpConfig.threshold) { const expectedParts = Math.ceil(totalFileBytes / mpConfig.partSize); - log.info( - `${fileName}: 0/${expectedParts} parts — ${formatBytes(0)}/${formatBytes(totalFileBytes)}` - ); + const zeroProgress: UploadProgress = { + fileName, + partsCompleted: 0, + totalParts: expectedParts, + uploadedBytes: 0, + totalBytes: totalFileBytes, + }; + if (onProgress) { + onProgress(zeroProgress); + } else { + log.info( + `${fileName}: 0/${expectedParts} parts — ${formatBytes(0)}/${formatBytes(totalFileBytes)}` + ); + } } let partsCompleted = 0; @@ -159,9 +180,20 @@ export async function uploadFile( ...mpConfig, onPartUpload: (event) => { partsCompleted++; - log.info( - `${fileName}: part ${partsCompleted}/${event.totalParts} — ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)}` - ); + const progress: UploadProgress = { + fileName, + partsCompleted, + totalParts: event.totalParts, + uploadedBytes: event.uploadedBytes, + totalBytes: event.totalBytes, + }; + if (onProgress) { + onProgress(progress); + } else { + log.info( + `${fileName}: part ${partsCompleted}/${event.totalParts} — ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)}` + ); + } }, }, }); From 043abdaed81a16ebc624c18094ab4296e12679ed Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 13:49:11 +0100 Subject: [PATCH 06/13] chore: resepct part size before custom multipart calc and fix show stragy count per manifest --- packages/cli/src/utils/upload.ts | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 2ef45b7..3c99c58 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -48,16 +48,20 @@ export function resolveMultipartConfig( partSize = Math.max(partSize, MIN_PART_SIZE); + // Apply user override before computing concurrency so the memory budget + // accounts for the actual part size that will be used. + const finalPartSize = overrides?.partSize ?? partSize; + // Concurrency: bounded by CPU cores and available memory const cores = cpus().length; // Reserve 25% of free memory for other work; each concurrent part holds ~partSize in memory const memoryBudget = Math.floor(freemem() * 0.75); - const maxByMemory = Math.max(1, Math.floor(memoryBudget / partSize)); + const maxByMemory = Math.max(1, Math.floor(memoryBudget / finalPartSize)); const concurrency = Math.min(cores, maxByMemory, 10); return { threshold: overrides?.threshold ?? 50 * MB, - partSize: overrides?.partSize ?? partSize, + partSize: finalPartSize, concurrency: overrides?.concurrency ?? Math.max(concurrency, 2), }; } @@ -378,7 +382,17 @@ export async function uploadFilesInBatch( ); } - if (!showStrategyPerFile && files.length > 0) { + if (showStrategyPerFile && files.length > 0) { + const strategyCounts = new Map(); + for (const file of files) { + const s = file.strategy ?? "default"; + strategyCounts.set(s, (strategyCounts.get(s) ?? 0) + 1); + } + const parts = Array.from(strategyCounts.entries()).map( + ([s, count]) => `${s} (${count})` + ); + console.log(chalk.gray(`Strategies: ${parts.join(", ")}`)); + } else if (files.length > 0) { const firstFile = files[0]; console.log(chalk.gray(`Strategy: ${firstFile.strategy}`)); } From 7ec6c0de88a6424c604842bf3299866bd7822bfd Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 14:01:06 +0100 Subject: [PATCH 07/13] chore: remove unused vars and fix deleted logs --- packages/cli/src/utils/sync.ts | 23 ++++++++++++++--------- packages/cli/src/utils/upload.ts | 7 ++++++- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index a1cd579..56a82b1 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -274,8 +274,6 @@ export async function executeSyncChanges( const parallel = options.parallel ?? 100; const limit = pLimit(parallel); const { filesToUpload, filesToDelete } = buildSyncPlan(analysis); - const totalOperations = filesToUpload.length + filesToDelete.length; - let completed = 0; console.log(chalk.bold("\nSyncing changes...")); @@ -299,14 +297,12 @@ export async function executeSyncChanges( await client.stores.files.delete(file.fileId!, { store_identifier: storeIdentifier, }); - completed++; deleteCompleted++; deleteSpinner.message( `Deleting ${deleteCompleted}/${deleteTotal} files...` ); return { file, success: true }; } catch (error) { - completed++; deleteCompleted++; deleteSpinner.message( `Deleting ${deleteCompleted}/${deleteTotal} files...` @@ -324,7 +320,6 @@ export async function executeSyncChanges( ); const deleteResults = await Promise.allSettled(deletePromises); - deleteSpinner.stop(`Deleted ${deleteTotal} files`); deleteResults.forEach((result) => { if (result.status === "fulfilled") { @@ -336,6 +331,13 @@ export async function executeSyncChanges( } } }); + + const deletedOk = results.deletions.successful.length; + deleteSpinner.stop( + deletedOk === deleteTotal + ? `Deleted ${deleteTotal} files` + : `Deleted ${deletedOk}/${deleteTotal} files (${results.deletions.failed.length} failed)` + ); } // Upload new and modified files @@ -372,7 +374,6 @@ export async function executeSyncChanges( // Check if file is empty const stats = await fs.stat(file.path); if (stats.size === 0) { - completed++; uploadCompleted++; uploadSpinner.message( `Uploading ${uploadCompleted}/${uploadTotal} files...` @@ -393,14 +394,12 @@ export async function executeSyncChanges( }, }); - completed++; uploadCompleted++; uploadSpinner.message( `Uploading ${uploadCompleted}/${uploadTotal} files...` ); return { file, success: true }; } catch (error) { - completed++; uploadCompleted++; uploadSpinner.message( `Uploading ${uploadCompleted}/${uploadTotal} files...` @@ -418,7 +417,6 @@ export async function executeSyncChanges( ); const uploadResults = await Promise.allSettled(uploadPromises); - uploadSpinner.stop(`Uploaded ${uploadTotal} files`); uploadResults.forEach((result) => { if (result.status === "fulfilled") { @@ -430,6 +428,13 @@ export async function executeSyncChanges( } } }); + + const uploadedOk = results.uploads.successful.length; + uploadSpinner.stop( + uploadedOk === uploadTotal + ? `Uploaded ${uploadTotal} files` + : `Uploaded ${uploadedOk}/${uploadTotal} files (${results.uploads.failed.length} failed)` + ); } return results; diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 3c99c58..8e944da 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -354,7 +354,12 @@ export async function uploadFilesInBatch( ) ); - uploadSpinner.stop(`Uploaded ${total} files`); + const successCount = results.uploaded + results.updated; + uploadSpinner.stop( + successCount === total + ? `Uploaded ${total} files` + : `Uploaded ${successCount}/${total} files (${results.failed} failed, ${results.skipped} skipped)` + ); // Summary console.log(`\n${chalk.bold("Upload Summary:")}`); From 788d7578489c17afb0ab8c2e76fd7c854f15b4df Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 14:12:04 +0100 Subject: [PATCH 08/13] chore: add changeset for minor bump --- .changeset/every-rockets-tie.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/every-rockets-tie.md diff --git a/.changeset/every-rockets-tie.md b/.changeset/every-rockets-tie.md new file mode 100644 index 0000000..ab7ea3e --- /dev/null +++ b/.changeset/every-rockets-tie.md @@ -0,0 +1,5 @@ +--- +"@mixedbread/cli": minor +--- + +Support multipart uploads and use it by default for larger files From cbfec10e7298e12f243acfe9d3813ef97880ab2e Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 14:15:09 +0100 Subject: [PATCH 09/13] fix: max parts check guard and updated spinner messages for skipped vs failures --- packages/cli/src/utils/sync.ts | 18 +++++++++++++----- packages/cli/src/utils/upload.ts | 10 +++++++--- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index 56a82b1..befd7f1 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -430,11 +430,19 @@ export async function executeSyncChanges( }); const uploadedOk = results.uploads.successful.length; - uploadSpinner.stop( - uploadedOk === uploadTotal - ? `Uploaded ${uploadTotal} files` - : `Uploaded ${uploadedOk}/${uploadTotal} files (${results.uploads.failed.length} failed)` - ); + const skippedCount = results.uploads.failed.filter((r) => r.skipped).length; + const failedCount = results.uploads.failed.length - skippedCount; + + if (uploadedOk === uploadTotal) { + uploadSpinner.stop(`Uploaded ${uploadTotal} files`); + } else { + const parts: string[] = []; + if (failedCount > 0) parts.push(`${failedCount} failed`); + if (skippedCount > 0) parts.push(`${skippedCount} skipped`); + uploadSpinner.stop( + `Uploaded ${uploadedOk}/${uploadTotal} files (${parts.join(", ")})` + ); + } } return results; diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 8e944da..43df5cd 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -48,9 +48,13 @@ export function resolveMultipartConfig( partSize = Math.max(partSize, MIN_PART_SIZE); - // Apply user override before computing concurrency so the memory budget - // accounts for the actual part size that will be used. - const finalPartSize = overrides?.partSize ?? partSize; + // Apply user override, then re-enforce the MAX_PARTS guard so a small + // user-specified part size can't produce more parts than the backend allows. + let finalPartSize = overrides?.partSize ?? partSize; + if (fileSize > 0 && Math.ceil(fileSize / finalPartSize) > MAX_PARTS) { + finalPartSize = Math.ceil(fileSize / MAX_PARTS); + } + finalPartSize = Math.max(finalPartSize, MIN_PART_SIZE); // Concurrency: bounded by CPU cores and available memory const cores = cpus().length; From 76efaf0bacc79485da6600d3f6a7279d85b0ea37 Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 14:31:57 +0100 Subject: [PATCH 10/13] fix(workflows): fix pnpm changeset publish command for pre-release workflow --- .github/workflows/prerelease.yml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/prerelease.yml b/.github/workflows/prerelease.yml index 0691c90..0351b88 100644 --- a/.github/workflows/prerelease.yml +++ b/.github/workflows/prerelease.yml @@ -4,9 +4,9 @@ on: workflow_dispatch: inputs: tag: - description: 'Prerelease tag (e.g., beta, alpha, rc)' + description: "Prerelease tag (e.g., beta, alpha, rc)" required: true - default: 'beta' + default: "beta" type: choice options: - beta @@ -37,8 +37,8 @@ jobs: uses: actions/setup-node@v4 with: node-version: 24 - cache: 'pnpm' - registry-url: 'https://registry.npmjs.org' + cache: "pnpm" + registry-url: "https://registry.npmjs.org" - name: Install dependencies run: pnpm install --frozen-lockfile @@ -60,7 +60,7 @@ jobs: git commit -m "chore: version packages for ${{ inputs.tag }} release" || echo "No changes to commit" - name: Publish prerelease packages - run: pnpm changeset publish --tag ${{ inputs.tag }} + run: pnpm changeset publish env: NPM_CONFIG_PROVENANCE: true @@ -72,4 +72,5 @@ jobs: pnpm changeset pre exit || echo "Not in pre mode" git add . git commit -m "chore: exit prerelease mode" || echo "No changes to commit" - git push || echo "No changes to push" \ No newline at end of file + git push || echo "No changes to push" + From 7968949aa2e4307cc479ebbe4600ff7a5216fff6 Mon Sep 17 00:00:00 2001 From: aavash Date: Wed, 4 Mar 2026 16:36:38 +0100 Subject: [PATCH 11/13] fix: do not show auto tuned multipart upload config preview config --- packages/cli/src/utils/upload.ts | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index 43df5cd..f282c4a 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -242,20 +242,16 @@ export async function uploadFilesInBatch( successfulSize: 0, }; - // Show a preview of multipart config using a representative file size (first file) - const previewConfig = resolveMultipartConfig(0, multipartUpload); - const configParts = [ - `${parallel} files at a time`, - `multipart threshold ${formatBytes(previewConfig.threshold)}`, - `part size ${formatBytes(previewConfig.partSize)}`, - `${previewConfig.concurrency} concurrent part uploads`, - ]; - console.log(chalk.gray(`Processing ${configParts.join(", ")}...`)); - if (!multipartUpload?.partSize || !multipartUpload?.concurrency) { - console.log( - chalk.gray("(part size and concurrency auto-tune per file size)") - ); + const configParts = [`${parallel} files at a time`]; + const threshold = multipartUpload?.threshold ?? 50 * MB; + configParts.push(`multipart above ${formatBytes(threshold)}`); + if (multipartUpload?.partSize) { + configParts.push(`part size ${formatBytes(multipartUpload.partSize)}`); } + if (multipartUpload?.concurrency) { + configParts.push(`${multipartUpload.concurrency} concurrent part uploads`); + } + console.log(chalk.gray(`Processing ${configParts.join(", ")}...`)); // Process files with sliding-window concurrency const total = files.length; From 8c56700fd26f9e71916841505bcc73d1b8d8d965 Mon Sep 17 00:00:00 2001 From: aavash Date: Fri, 6 Mar 2026 11:07:00 +0100 Subject: [PATCH 12/13] chore(deps): install mainline sdk --- packages/cli/package.json | 2 +- pnpm-lock.yaml | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/cli/package.json b/packages/cli/package.json index cbcb81b..492ebc2 100644 --- a/packages/cli/package.json +++ b/packages/cli/package.json @@ -50,7 +50,7 @@ }, "dependencies": { "@clack/prompts": "^1.0.1", - "@mixedbread/sdk": "https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz", + "@mixedbread/sdk": "^0.57.0", "@pnpm/tabtab": "^0.5.4", "chalk": "^5.6.2", "cli-table3": "^0.6.5", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 75c84a7..5f9bbac 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -30,8 +30,8 @@ importers: specifier: ^1.0.1 version: 1.0.1 '@mixedbread/sdk': - specifier: https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz - version: https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz + specifier: ^0.57.0 + version: 0.57.0 '@pnpm/tabtab': specifier: ^0.5.4 version: 0.5.4 @@ -759,9 +759,8 @@ packages: resolution: {integrity: sha512-B0Un0thpz4UKN69QVmgdVsCIgiz+jR4V+nxuCOgdm/P0Iie1NctqY092VQ/iobY/FSMzhHJofXij086y/9E5rg==} hasBin: true - '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz': - resolution: {tarball: https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz} - version: 0.56.0 + '@mixedbread/sdk@0.57.0': + resolution: {integrity: sha512-twoZYKSuSHsR1yKDaiEDB+TRGSPUYC9gfOTMC4duSbSdXYN12mPWiihMXuDfdZT/JuWGh2cnQ73rle8FoG0+rw==} hasBin: true '@napi-rs/wasm-runtime@0.2.12': @@ -4105,7 +4104,7 @@ snapshots: '@mixedbread/sdk@0.26.0': {} - '@mixedbread/sdk@https://pkg.stainless.com/s/mixedbread-typescript/6c522ab9642c9efb265a6469a925ad8401a6581e/dist.tar.gz': {} + '@mixedbread/sdk@0.57.0': {} '@napi-rs/wasm-runtime@0.2.12': dependencies: From f1d76d053ee1f98ac905db648338caddffa57b3f Mon Sep 17 00:00:00 2001 From: aavash Date: Fri, 6 Mar 2026 12:31:13 +0100 Subject: [PATCH 13/13] chore: use helper function for spinner logs --- packages/cli/src/utils/sync.ts | 24 ++++++++++++------------ packages/cli/src/utils/upload.ts | 18 +++++++++--------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/packages/cli/src/utils/sync.ts b/packages/cli/src/utils/sync.ts index befd7f1..8916ed2 100644 --- a/packages/cli/src/utils/sync.ts +++ b/packages/cli/src/utils/sync.ts @@ -288,7 +288,7 @@ export async function executeSyncChanges( let deleteCompleted = 0; const deleteSpinner = spinner(); deleteSpinner.start( - `Deleting 0/${deleteTotal} files...` + `Deleting 0/${formatCountWithSuffix(deleteTotal, "file")}...` ); const deletePromises: Promise[] = filesToDelete.map((file) => @@ -299,13 +299,13 @@ export async function executeSyncChanges( }); deleteCompleted++; deleteSpinner.message( - `Deleting ${deleteCompleted}/${deleteTotal} files...` + `Deleting ${deleteCompleted}/${formatCountWithSuffix(deleteTotal, "file")}...` ); return { file, success: true }; } catch (error) { deleteCompleted++; deleteSpinner.message( - `Deleting ${deleteCompleted}/${deleteTotal} files...` + `Deleting ${deleteCompleted}/${formatCountWithSuffix(deleteTotal, "file")}...` ); log.error( `Failed to delete ${path.relative(process.cwd(), file.path)}: ${error instanceof Error ? error.message : "Unknown error"}` @@ -335,8 +335,8 @@ export async function executeSyncChanges( const deletedOk = results.deletions.successful.length; deleteSpinner.stop( deletedOk === deleteTotal - ? `Deleted ${deleteTotal} files` - : `Deleted ${deletedOk}/${deleteTotal} files (${results.deletions.failed.length} failed)` + ? `Deleted ${formatCountWithSuffix(deleteTotal, "file")}` + : `Deleted ${deletedOk}/${formatCountWithSuffix(deleteTotal, "file")} (${results.deletions.failed.length} failed)` ); } @@ -346,7 +346,7 @@ export async function executeSyncChanges( let uploadCompleted = 0; const uploadSpinner = spinner(); uploadSpinner.start( - `Uploading 0/${uploadTotal} files...` + `Uploading 0/${formatCountWithSuffix(uploadTotal, "file")}...` ); const uploadPromises: Promise[] = filesToUpload.map((file) => @@ -376,7 +376,7 @@ export async function executeSyncChanges( if (stats.size === 0) { uploadCompleted++; uploadSpinner.message( - `Uploading ${uploadCompleted}/${uploadTotal} files...` + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}...` ); return { file, success: false, skipped: true }; } @@ -389,20 +389,20 @@ export async function executeSyncChanges( multipartUpload: options.multipartUpload, onProgress: (progress: UploadProgress) => { uploadSpinner.message( - `Uploading ${uploadCompleted}/${uploadTotal} files... (${progress.fileName}: part ${progress.partsCompleted}/${progress.totalParts}, ${formatBytes(progress.uploadedBytes)}/${formatBytes(progress.totalBytes)})` + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}... (${progress.fileName}: part ${progress.partsCompleted}/${progress.totalParts}, ${formatBytes(progress.uploadedBytes)}/${formatBytes(progress.totalBytes)})` ); }, }); uploadCompleted++; uploadSpinner.message( - `Uploading ${uploadCompleted}/${uploadTotal} files...` + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}...` ); return { file, success: true }; } catch (error) { uploadCompleted++; uploadSpinner.message( - `Uploading ${uploadCompleted}/${uploadTotal} files...` + `Uploading ${uploadCompleted}/${formatCountWithSuffix(uploadTotal, "file")}...` ); log.error( `Failed to upload ${relativePath}: ${error instanceof Error ? error.message : "Unknown error"}` @@ -434,13 +434,13 @@ export async function executeSyncChanges( const failedCount = results.uploads.failed.length - skippedCount; if (uploadedOk === uploadTotal) { - uploadSpinner.stop(`Uploaded ${uploadTotal} files`); + uploadSpinner.stop(`Uploaded ${formatCountWithSuffix(uploadTotal, "file")}`); } else { const parts: string[] = []; if (failedCount > 0) parts.push(`${failedCount} failed`); if (skippedCount > 0) parts.push(`${skippedCount} skipped`); uploadSpinner.stop( - `Uploaded ${uploadedOk}/${uploadTotal} files (${parts.join(", ")})` + `Uploaded ${uploadedOk}/${formatCountWithSuffix(uploadTotal, "file")} (${parts.join(", ")})` ); } } diff --git a/packages/cli/src/utils/upload.ts b/packages/cli/src/utils/upload.ts index f282c4a..01dbe5d 100644 --- a/packages/cli/src/utils/upload.ts +++ b/packages/cli/src/utils/upload.ts @@ -242,7 +242,7 @@ export async function uploadFilesInBatch( successfulSize: 0, }; - const configParts = [`${parallel} files at a time`]; + const configParts = [`${formatCountWithSuffix(parallel, "file")} at a time`]; const threshold = multipartUpload?.threshold ?? 50 * MB; configParts.push(`multipart above ${formatBytes(threshold)}`); if (multipartUpload?.partSize) { @@ -257,7 +257,7 @@ export async function uploadFilesInBatch( const total = files.length; let completed = 0; const uploadSpinner = spinner(); - uploadSpinner.start(`Uploading 0/${total} files...`); + uploadSpinner.start(`Uploading 0/${formatCountWithSuffix(total, "file")}...`); const limit = pLimit(parallel); await Promise.allSettled( @@ -284,7 +284,7 @@ export async function uploadFilesInBatch( const stats = await stat(file.path); if (stats.size === 0) { completed++; - uploadSpinner.message(`Uploading ${completed}/${total} files...`); + uploadSpinner.message(`Uploading ${completed}/${formatCountWithSuffix(total, "file")}...`); results.skipped++; return; } @@ -307,7 +307,7 @@ export async function uploadFilesInBatch( if (totalFileBytes >= mpConfig.threshold) { const expectedParts = Math.ceil(totalFileBytes / mpConfig.partSize); uploadSpinner.message( - `Uploading ${completed}/${total} files... (${fileName}: 0/${expectedParts} parts, ${formatBytes(0)}/${formatBytes(totalFileBytes)})` + `Uploading ${completed}/${formatCountWithSuffix(total, "file")}... (${fileName}: 0/${expectedParts} parts, ${formatBytes(0)}/${formatBytes(totalFileBytes)})` ); } @@ -327,7 +327,7 @@ export async function uploadFilesInBatch( onPartUpload: (event) => { partsCompleted++; uploadSpinner.message( - `Uploading ${completed}/${total} files... (${fileName}: part ${partsCompleted}/${event.totalParts}, ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)})` + `Uploading ${completed}/${formatCountWithSuffix(total, "file")}... (${fileName}: part ${partsCompleted}/${event.totalParts}, ${formatBytes(event.uploadedBytes)}/${formatBytes(event.totalBytes)})` ); }, }, @@ -341,11 +341,11 @@ export async function uploadFilesInBatch( results.successfulSize += stats.size; completed++; - uploadSpinner.message(`Uploading ${completed}/${total} files...`); + uploadSpinner.message(`Uploading ${completed}/${formatCountWithSuffix(total, "file")}...`); } catch (error) { results.failed++; completed++; - uploadSpinner.message(`Uploading ${completed}/${total} files...`); + uploadSpinner.message(`Uploading ${completed}/${formatCountWithSuffix(total, "file")}...`); const errorMsg = error instanceof Error ? error.message : "Unknown error"; log.error(`${relativePath} - ${errorMsg}`); @@ -357,8 +357,8 @@ export async function uploadFilesInBatch( const successCount = results.uploaded + results.updated; uploadSpinner.stop( successCount === total - ? `Uploaded ${total} files` - : `Uploaded ${successCount}/${total} files (${results.failed} failed, ${results.skipped} skipped)` + ? `Uploaded ${formatCountWithSuffix(total, "file")}` + : `Uploaded ${successCount}/${formatCountWithSuffix(total, "file")} (${results.failed} failed, ${results.skipped} skipped)` ); // Summary