diff --git a/README.md b/README.md index b651584..20a1e50 100644 --- a/README.md +++ b/README.md @@ -12,16 +12,13 @@ ## Why mgrep? - Natural-language search that feels as immediate as `grep`. - Semantic, multilingual & multimodal (audio, video support coming soon!) -- Smooth background indexing via `mgrep watch`, designed to detect and keep up-to-date everything that matters inside any git repository. +- Automatic background indexing that starts lazily on first search, keeping everything up-to-date inside any git repository. - Friendly device-login flow and first-class coding agent integrations. - Built for agents and humans alike, and **designed to be a helpful tool**, not a restrictive harness: quiet output, thoughtful defaults, and escape hatches everywhere. - Reduces the token usage of your agent by 2x while maintaining superior performance ```bash -# index once -mgrep watch - -# then ask your repo things in natural language +# just ask your repo things in natural language mgrep "where do we set up auth?" ``` @@ -45,28 +42,22 @@ mgrep "where do we set up auth?" ``` This bypasses the browser login flow entirely. -3. **Index a project** +3. **Search anything** ```bash cd path/to/repo - mgrep watch - ``` - `watch` performs an initial sync, respects `.gitignore`, then keeps the Mixedbread store updated as files change. - -4. **Search anything** - ```bash mgrep "where do we set up auth?" src/lib mgrep -m 25 "store schema" ``` - Searches default to the current working directory unless you pass a path. + On first search, mgrep automatically starts a background daemon that indexes your files and keeps them in sync. Searches default to the current working directory unless you pass a path. **Today, `mgrep` works great on:** code, text, PDFs, images. **Coming soon:** audio & video. ## Using it with Coding Agents -- **Claude Code (today)** - 1. Run `mgrep install-claude-code`. The command signs you in (if needed), adds the Mixedbread mgrep plugin to the marketplace, and installs it. - 2. Open Claude Code, enable the plugin, and point your agent at the repo you are indexing with `mgrep watch`. +- **Claude Code (today)** + 1. Run `mgrep install-claude-code`. The command signs you in (if needed), adds the Mixedbread mgrep plugin to the marketplace, and installs it. + 2. Open Claude Code, enable the plugin, and point your agent at your repo. 3. Ask Claude something just like you do locally; results stream straight into the chat with file paths and line hints. - More agents (Codex, Cursor, Windsurf, etc.) are on the way—this section will grow as soon as each integration lands. @@ -110,8 +101,10 @@ We designed `mgrep` to complement `grep`, not replace it. The best code search c | Command | Purpose | | --- | --- | -| `mgrep` / `mgrep search [path]` | Natural-language search with many `grep`-style flags (`-i`, `-r`, `-m`...). | -| `mgrep watch` | Index current repo and keep the Mixedbread store in sync via file watchers. | +| `mgrep` / `mgrep search [path]` | Natural-language search with many `grep`-style flags (`-i`, `-r`, `-m`...). Automatically starts background indexing on first use. | +| `mgrep watch` | Manually start file watching (usually not needed - starts automatically on search). | +| `mgrep daemon:list` | List all running background daemons. | +| `mgrep daemon:stop [dir]` | Stop the daemon for a directory (defaults to current directory). | | `mgrep login` & `mgrep logout` | Manage device-based authentication with Mixedbread. | | `mgrep install-claude-code` | Log in, add the Mixedbread mgrep plugin to Claude Code, and install it for you. | @@ -142,16 +135,22 @@ mgrep -a "What code parsers are available?" # generate an answer to the questio ### mgrep watch -`mgrep watch` is used to index the current repository and keep the Mixedbread -store in sync via file watchers. +`mgrep watch` manually starts the file watcher daemon. This is usually not +needed since `mgrep search` automatically starts a daemon on first use. -It respects the current `.gitignore`, as well as a `.mgrepignore` file in the -root of the repository. The `.mgrepignore` file follows the same syntax as the -[`.gitignore`](https://git-scm.com/docs/gitignore) file. +The daemon respects `.gitignore` as well as `.mgrepignore` files (same syntax as +[`.gitignore`](https://git-scm.com/docs/gitignore)). + +### Daemon Management + +mgrep automatically manages background daemons with hierarchy awareness: +- Running `mgrep` in a subdirectory reuses the parent directory's daemon if one exists +- Running `mgrep` in a parent directory consolidates child daemons into one -**Examples:** ```bash -mgrep watch # index the current repository and keep the Mixedbread store in sync via file watchers +mgrep daemon:list # show all running daemons +mgrep daemon:stop # stop daemon for current directory +mgrep daemon:stop /path/to/dir # stop daemon for specific directory ``` ## Mixedbread under the hood @@ -167,7 +166,7 @@ mgrep watch # index the current repository and keep the Mixedbread store in syn - `--store ` lets you isolate workspaces (per repo, per team, per experiment). Stores are created on demand if they do not exist yet. - Ignore rules come straight from git, so temp files, build outputs, and vendored deps stay out of your embeddings. -- `watch` reports progress (`processed / uploaded`) as it scans; leave it running in a terminal tab to keep your store fresh. +- The background daemon reports progress as it scans and keeps your store fresh automatically. - `search` accepts most `grep`-style switches, and politely ignores anything it cannot support, so existing muscle memory still works. ## Environment Variables @@ -236,7 +235,7 @@ The tests are written using [bats](https://bats-core.readthedocs.io/en/stable/). ## Troubleshooting - **Login keeps reopening**: run `mgrep logout` to clear cached tokens, then try `mgrep login` again. -- **Watcher feels noisy**: set `MXBAI_STORE` or pass `--store` to separate experiments, or pause the watcher and restart after large refactors. +- **Daemon feels noisy**: use `mgrep daemon:stop` to stop it, or set `MXBAI_STORE` / `--store` to separate experiments. - **Need a fresh store**: delete it from the Mixedbread dashboard, then run `mgrep watch`. It will auto-create a new one. ## License diff --git a/src/commands/search.ts b/src/commands/search.ts index f028f98..6ee202b 100644 --- a/src/commands/search.ts +++ b/src/commands/search.ts @@ -2,6 +2,7 @@ import { join, normalize } from "node:path"; import type { Command } from "commander"; import { Command as CommanderCommand } from "commander"; import { createFileSystem, createStore } from "../lib/context"; +import { ensureDaemon } from "../lib/daemon"; import type { AskResponse, ChunkType, @@ -165,6 +166,10 @@ export const search: Command = new CommanderCommand("search") const store = await createStore(); const root = process.cwd(); + if (!options.sync) { + ensureDaemon(options.store, root); + } + if (options.sync) { const fileSystem = createFileSystem({ ignorePatterns: ["*.lock", "*.bin", "*.ipynb", "*.pyc"], diff --git a/src/commands/watch.ts b/src/commands/watch.ts index 0ccaa31..f7e2b61 100644 --- a/src/commands/watch.ts +++ b/src/commands/watch.ts @@ -61,6 +61,11 @@ export const watch = new Command("watch") throw e; } + process.on("SIGTERM", () => { + console.log("Daemon received SIGTERM, shutting down"); + process.exit(0); + }); + console.log("Watching for file changes in", watchRoot); fileSystem.loadMgrepignore(watchRoot); fs.watch(watchRoot, { recursive: true }, (eventType, rawFilename) => { diff --git a/src/index.ts b/src/index.ts index 8de3e15..be06e9e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,12 +1,13 @@ #!/usr/bin/env node import * as fs from "node:fs"; import * as path from "node:path"; -import { program } from "commander"; +import { Command, program } from "commander"; import { login } from "./commands/login"; import { logout } from "./commands/logout"; import { search } from "./commands/search"; import { watch } from "./commands/watch"; import { installClaudeCode, uninstallClaudeCode } from "./install/claude-code"; +import { listDaemons, stopDaemon } from "./lib/daemon"; import { setupLogger } from "./lib/logger"; setupLogger(); @@ -32,4 +33,32 @@ program.addCommand(uninstallClaudeCode); program.addCommand(login); program.addCommand(logout); +const daemonList = new Command("daemon:list") + .description("List all running daemons") + .action(() => { + const daemons = listDaemons(); + if (daemons.length === 0) { + console.log("No daemons running"); + } else { + for (const d of daemons) { + console.log(`PID ${d.pid}: ${d.dir}`); + } + } + }); + +const daemonStop = new Command("daemon:stop") + .description("Stop daemon for directory (default: cwd)") + .argument("[dir]", "Directory to stop daemon for") + .action((dir) => { + const target = dir || process.cwd(); + if (stopDaemon(target)) { + console.log("Daemon stopped"); + } else { + console.log(`No daemon was running for ${target}`); + } + }); + +program.addCommand(daemonList); +program.addCommand(daemonStop); + program.parse(); diff --git a/src/lib/daemon.ts b/src/lib/daemon.ts new file mode 100644 index 0000000..3be39d9 --- /dev/null +++ b/src/lib/daemon.ts @@ -0,0 +1,164 @@ +import { spawn } from "node:child_process"; +import { createHash } from "node:crypto"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import { getLogDir } from "./logger"; + +interface DaemonInfo { + pid: number; + dir: string; +} + +function getStateDir(): string { + return path.dirname(getLogDir("mgrep")); +} + +function getDaemonsDir(): string { + return path.join(getStateDir(), "daemons"); +} + +function hashPath(dir: string): string { + return createHash("sha256").update(dir).digest("hex").slice(0, 16); +} + +function getDaemonFile(dir: string): string { + return path.join(getDaemonsDir(), `${hashPath(dir)}.json`); +} + +function getLockFile(dir: string): string { + return path.join(getDaemonsDir(), `${hashPath(dir)}.lock`); +} + +/** + * Check if process is alive by sending signal 0. + * Signal 0 doesn't actually send anything - it just checks if we have + * permission to send signals to the process (i.e., it exists and we own it). + */ +function isProcessAlive(pid: number): boolean { + try { + process.kill(pid, 0); + return true; + } catch { + return false; + } +} + +function readDaemonInfo(file: string): DaemonInfo | null { + try { + return JSON.parse(fs.readFileSync(file, "utf-8")); + } catch { + return null; + } +} + +function getAllDaemons(): DaemonInfo[] { + const dir = getDaemonsDir(); + if (!fs.existsSync(dir)) return []; + return fs + .readdirSync(dir) + .filter((f) => f.endsWith(".json")) + .map((f) => readDaemonInfo(path.join(dir, f))) + .filter((d): d is DaemonInfo => d !== null && isProcessAlive(d.pid)); +} + +function isParentWatching(targetDir: string): boolean { + const daemons = getAllDaemons(); + const target = path.resolve(targetDir); + return daemons.some( + (d) => target.startsWith(d.dir + path.sep) || target === d.dir, + ); +} + +function getChildDaemons(targetDir: string): DaemonInfo[] { + const daemons = getAllDaemons(); + const target = path.resolve(targetDir); + return daemons.filter((d) => d.dir.startsWith(target + path.sep)); +} + +function killDaemonProcess(info: DaemonInfo): void { + try { + process.kill(info.pid, "SIGTERM"); + fs.unlinkSync(getDaemonFile(info.dir)); + } catch {} +} + +/** + * Ensures a daemon is running for the given directory. + * Implements hierarchy awareness: parent daemons cover children, + * and starting a parent daemon kills child daemons. + */ +export function ensureDaemon(storeId: string, watchRoot: string): void { + const targetDir = path.resolve(watchRoot); + + if (isParentWatching(targetDir)) return; + + const daemonsDir = getDaemonsDir(); + fs.mkdirSync(daemonsDir, { recursive: true }); + + // Atomic lock using O_EXCL: file creation fails if it already exists. + // This is cross-platform (POSIX + Windows) and prevents race conditions + // when multiple mgrep processes try to start a daemon simultaneously. + // Unlike flock(), this doesn't require cleanup on crash - we delete the + // lock file in the finally block, and stale locks from crashes are + // handled by checking if the daemon process is actually alive. + const lockFile = getLockFile(targetDir); + let lockFd: number; + try { + lockFd = fs.openSync( + lockFile, + fs.constants.O_CREAT | fs.constants.O_EXCL | fs.constants.O_RDWR, + ); + } catch { + // Lock file exists - another process is starting the daemon + return; + } + + try { + if (isParentWatching(targetDir)) return; + + for (const child of getChildDaemons(targetDir)) { + killDaemonProcess(child); + } + + const logFile = path.join(getStateDir(), "daemon.log"); + const logFd = fs.openSync(logFile, "a"); + + // Spawn daemon as detached process with stdio redirected to log file. + // detached: true creates a new process group so daemon survives parent exit. + // unref() allows the parent to exit without waiting for the child. + const child = spawn( + process.execPath, + [path.join(__dirname, "../index.js"), "watch", "--store", storeId], + { cwd: targetDir, detached: true, stdio: ["ignore", logFd, logFd] }, + ); + + if (child.pid) { + const info: DaemonInfo = { pid: child.pid, dir: targetDir }; + fs.writeFileSync(getDaemonFile(targetDir), JSON.stringify(info)); + child.unref(); + } + fs.closeSync(logFd); + } finally { + fs.closeSync(lockFd); + try { + fs.unlinkSync(lockFile); + } catch {} + } +} + +/** + * Stops the daemon watching the specified directory. + */ +export function stopDaemon(dir: string): boolean { + const info = readDaemonInfo(getDaemonFile(path.resolve(dir))); + if (!info) return false; + killDaemonProcess(info); + return true; +} + +/** + * Lists all running daemons. + */ +export function listDaemons(): DaemonInfo[] { + return getAllDaemons(); +}