From 941c5c824c96b3cad5dd1800888bb3a40ef46b5e Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 26 Nov 2025 13:17:03 -0800 Subject: [PATCH] feat: add stdin piping support for search command - Add tryReadStdin() to detect and read piped stdin data - Add uploadBuffer() to upload stdin content to store - Auto-detect stdin mode and sync content before searching - Use SHA-256 hash for stdin caching (__stdin__/) - Display in search results for stdin-originated content - Add tests for stdin piping functionality --- src/commands/search.ts | 62 ++++++++++++++++++++++++++++---- src/utils.ts | 80 ++++++++++++++++++++++++++++++++++++++++++ test/test.bats | 33 +++++++++++++++++ 3 files changed, 168 insertions(+), 7 deletions(-) diff --git a/src/commands/search.ts b/src/commands/search.ts index b3fa86a..1c9b50a 100644 --- a/src/commands/search.ts +++ b/src/commands/search.ts @@ -1,6 +1,7 @@ import { join, normalize } from "node:path"; import type { Command } from "commander"; import { Command as CommanderCommand } from "commander"; +import ora from "ora"; import { createFileSystem, createStore } from "../lib/context"; import type { AskResponse, @@ -12,7 +13,12 @@ import { createIndexingSpinner, formatDryRunSummary, } from "../lib/sync-helpers"; -import { initialSync } from "../utils"; +import { + computeBufferHash, + initialSync, + tryReadStdin, + uploadBuffer, +} from "../utils"; function extractSources(response: AskResponse): { [key: number]: ChunkType } { const sources: { [key: number]: ChunkType } = {}; @@ -70,8 +76,13 @@ function formatSearchResponse(response: SearchResponse, show_content: boolean) { function formatChunk(chunk: ChunkType, show_content: boolean) { const pwd = process.cwd(); - const path = - (chunk.metadata as FileMetadata)?.path?.replace(pwd, "") ?? "Unknown path"; + const rawPath = + (chunk.metadata as FileMetadata)?.path ?? "Unknown path"; + + // Display for stdin paths, otherwise show relative path + const path = rawPath.startsWith("__stdin__/") + ? "" + : rawPath.replace(pwd, ""); let line_range = ""; let content = ""; switch (chunk.type) { @@ -165,7 +176,40 @@ export const search: Command = new CommanderCommand("search") const store = await createStore(); const root = process.cwd(); - if (options.sync) { + // Try to read stdin (returns null if no data available) + const stdinBuffer = await tryReadStdin(); + const isStdinMode = stdinBuffer !== null; + let stdinPath: string | null = null; + + if (isStdinMode) { + if (stdinBuffer.length === 0) { + console.error("Stdin is empty"); + process.exitCode = 1; + return; + } + + // Use hash-based path for caching + const hash = computeBufferHash(stdinBuffer); + stdinPath = `__stdin__/${hash.substring(0, 12)}`; + + // Upload stdin content + const spinner = ora("Uploading stdin content...").start(); + await uploadBuffer(store, options.store, stdinBuffer, stdinPath); + + // Wait for indexing + spinner.text = "Indexing stdin content..."; + while (true) { + const info = await store.getInfo(options.store); + if (info.counts.pending === 0 && info.counts.in_progress === 0) { + break; + } + await new Promise((resolve) => setTimeout(resolve, 500)); + } + spinner.succeed("Stdin content indexed"); + } + + // Skip normal --sync when in stdin mode (stdin already synced) + if (options.sync && !isStdinMode) { const fileSystem = createFileSystem({ ignorePatterns: [ "*.lock", @@ -203,9 +247,13 @@ export const search: Command = new CommanderCommand("search") } } - const search_path = exec_path?.startsWith("/") - ? exec_path - : normalize(join(root, exec_path ?? "")); + // Use stdinPath when in stdin mode, otherwise use exec_path + const search_path = + isStdinMode && stdinPath + ? stdinPath + : exec_path?.startsWith("/") + ? exec_path + : normalize(join(root, exec_path ?? "")); let response: string; if (!options.answer) { diff --git a/src/utils.ts b/src/utils.ts index 3270fb9..76cd6b0 100644 --- a/src/utils.ts +++ b/src/utils.ts @@ -16,6 +16,86 @@ import { getStoredToken } from "./token"; export const isTest = process.env.MGREP_IS_TEST === "1"; +/** + * Reads stdin if data is available (non-blocking check + read) + * Returns null if stdin is a TTY or no data is available + */ +export async function tryReadStdin(): Promise { + // If stdin is a TTY (interactive terminal), no piped data + if (process.stdin.isTTY) { + return null; + } + + const chunks: Buffer[] = []; + + return new Promise((resolve) => { + // Set a short timeout to check if data is available + // If no data arrives quickly, assume no stdin data + const timeout = setTimeout(() => { + process.stdin.removeAllListeners(); + process.stdin.pause(); + if (chunks.length === 0) { + resolve(null); + } else { + resolve(Buffer.concat(chunks)); + } + }, 100); + + process.stdin.on("data", (chunk) => { + clearTimeout(timeout); + chunks.push(Buffer.from(chunk)); + }); + + process.stdin.on("end", () => { + clearTimeout(timeout); + if (chunks.length === 0) { + resolve(null); + } else { + resolve(Buffer.concat(chunks)); + } + }); + + process.stdin.on("error", () => { + clearTimeout(timeout); + resolve(null); + }); + + // Start reading + process.stdin.resume(); + }); +} + +/** + * Uploads a buffer directly to the store + */ +export async function uploadBuffer( + store: Store, + storeId: string, + buffer: Buffer, + externalId: string, +): Promise { + if (buffer.length === 0) { + return false; + } + + const hash = computeBufferHash(buffer); + const options = { + external_id: externalId, + overwrite: true, + metadata: { + path: externalId, + hash, + }, + }; + + await store.uploadFile( + storeId, + new File([new Uint8Array(buffer)], "stdin.txt", { type: "text/plain" }), + options, + ); + return true; +} + export function computeBufferHash(buffer: Buffer): string { return createHash("sha256").update(buffer).digest("hex"); } diff --git a/test/test.bats b/test/test.bats index 9e3a0e2..d3ac46d 100755 --- a/test/test.bats +++ b/test/test.bats @@ -238,3 +238,36 @@ teardown() { refute_output --partial 'model.safetensors' refute_output --partial 'binaryfile.bin' } + +@test "Search with stdin pipe" { + run bash -c 'echo "Hello stdin content for testing" | mgrep search "stdin"' + + assert_success + assert_output --partial '' +} + +@test "Search stdin with answer mode" { + run bash -c 'echo "Hello stdin content" | mgrep search -a "what is this?"' + + assert_success + assert_output --partial 'Stdin content indexed' + assert_output --partial 'mock answer' +} + +@test "Search stdin with content mode" { + run bash -c 'echo "Hello stdin unique content" | mgrep search -c "unique"' + + assert_success + assert_output --partial '' + assert_output --partial 'Hello stdin unique content' +} + +@test "Empty stdin is treated as no stdin" { + # Empty stdin (echo -n "") is treated as no stdin input + # so the command proceeds with normal file search + run bash -c 'echo -n "" | mgrep search "test"' + + assert_success + # Should search existing files, not stdin + assert_output --partial 'test.txt' +}