From 7ba83f63540b050d3726655d1c2b9fa92aec7806 Mon Sep 17 00:00:00 2001 From: Ming Jia Date: Mon, 20 Oct 2025 19:31:20 -0700 Subject: [PATCH 1/3] feat: add network monitoring capabilities - Add get_network_logs tool to monitor network requests and responses - Monitor HTTP/HTTPS requests from browser's network tab - View response headers, status codes, and timing information - Debug network issues and API calls during automation - Works alongside existing console monitoring for comprehensive debugging --- README.md | 18 ++++++++++++++++++ src/index.ts | 2 +- src/tools/custom.ts | 28 ++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a85af20..dbf90b8 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,24 @@ Browser MCP is an MCP server + Chrome extension that allows you to automate your - 🔒 Private: Since automation happens locally, your browser activity stays on your device and isn't sent to remote servers. - 👤 Logged In: Uses your existing browser profile, keeping you logged into all your services. - 🥷🏼 Stealth: Avoids basic bot detection and CAPTCHAs by using your real browser fingerprint. +- 📊 Network Monitoring: Monitor network requests and responses from the browser's network tab in addition to console logs. + +## Network Monitoring + +The Browser MCP server now includes network monitoring capabilities that allow you to: + +- **Monitor Network Requests**: Track all HTTP/HTTPS requests made by the browser +- **View Response Details**: Access response headers, status codes, and timing information +- **Debug Network Issues**: Identify failed requests, slow responses, and network errors +- **API Testing**: Monitor API calls and their responses during automation + +### Available Tools + +- `get_network_logs`: Retrieves network request and response data from the browser's network tab +- `get_console_logs`: Retrieves console logs (existing functionality) +- `screenshot`: Takes screenshots of the current page + +The network monitoring feature works alongside the existing console monitoring, providing comprehensive debugging capabilities for web automation tasks. ## Contributing diff --git a/src/index.ts b/src/index.ts index 2278a75..2750799 100644 --- a/src/index.ts +++ b/src/index.ts @@ -24,7 +24,7 @@ function setupExitWatchdog(server: Server) { const commonTools: Tool[] = [common.pressKey, common.wait]; -const customTools: Tool[] = [custom.getConsoleLogs, custom.screenshot]; +const customTools: Tool[] = [custom.getConsoleLogs, custom.getNetworkLogs, custom.screenshot]; const snapshotTools: Tool[] = [ common.navigate(true), diff --git a/src/tools/custom.ts b/src/tools/custom.ts index afaffe4..303affd 100644 --- a/src/tools/custom.ts +++ b/src/tools/custom.ts @@ -1,9 +1,17 @@ import { zodToJsonSchema } from "zod-to-json-schema"; +import { z } from "zod"; import { GetConsoleLogsTool, ScreenshotTool } from "@repo/types/mcp/tool"; import { Tool } from "./tool"; +// Network monitoring tool schema +const GetNetworkLogsTool = z.object({ + name: z.literal("get_network_logs"), + description: z.literal("Get network requests and responses from the browser's network tab"), + arguments: z.object({}).optional(), +}); + export const getConsoleLogs: Tool = { schema: { name: GetConsoleLogsTool.shape.name.value, @@ -24,6 +32,26 @@ export const getConsoleLogs: Tool = { }, }; +export const getNetworkLogs: Tool = { + schema: { + name: GetNetworkLogsTool.shape.name.value, + description: GetNetworkLogsTool.shape.description.value, + inputSchema: zodToJsonSchema(GetNetworkLogsTool.shape.arguments), + }, + handle: async (context, _params) => { + const networkLogs = await context.sendSocketMessage( + "browser_get_network_logs", + {}, + ); + const text: string = networkLogs + .map((log: any) => JSON.stringify(log, null, 2)) + .join("\n\n"); + return { + content: [{ type: "text", text }], + }; + }, +}; + export const screenshot: Tool = { schema: { name: ScreenshotTool.shape.name.value, From 3f4dfd43926cc53576777fde876ab6bdd759b512 Mon Sep 17 00:00:00 2001 From: Pedram Amini Date: Fri, 7 Nov 2025 16:12:55 -0600 Subject: [PATCH 2/3] Add browser_new_page tool for opening URLs in new tabs - Add newPage tool factory in src/tools/common.ts - Register tool in snapshotTools array in src/index.ts - Update documentation in CLAUDE.md - Include extension-changes.patch for maintainers This enables AI applications to open URLs in new browser tabs, enhancing multi-tab workflows. The feature requires corresponding Chrome extension changes documented in extension-changes.patch. --- CLAUDE.md | 125 ++++++++++++++++++++++++++++++++++++++++ extension-changes.patch | 95 ++++++++++++++++++++++++++++++ src/index.ts | 1 + src/tools/common.ts | 28 +++++++++ 4 files changed, 249 insertions(+) create mode 100644 CLAUDE.md create mode 100644 extension-changes.patch diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..fe8fcce --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,125 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Browser MCP is an MCP (Model Context Protocol) server that enables AI applications to automate browser interactions. It connects to a Chrome extension via WebSocket to control the user's actual browser (not headless instances), allowing automation while preserving logged-in sessions and avoiding bot detection. + +**Key distinction**: Unlike Playwright MCP which creates new browser instances, Browser MCP controls the user's existing browser profile through a Chrome extension connection. + +## Build & Development Commands + +```bash +# Type checking +npm run typecheck + +# Build the project +npm run build + +# Watch mode for development +npm run watch + +# Run MCP inspector for debugging +npm run inspector +``` + +## Architecture + +### Core Communication Flow + +1. **MCP Server** (src/index.ts, src/server.ts) - Implements MCP protocol using `@modelcontextprotocol/sdk` +2. **WebSocket Bridge** (src/ws.ts, src/context.ts) - Maintains WebSocket connection to Chrome extension +3. **Tool Handlers** (src/tools/) - Convert MCP tool calls to WebSocket messages sent to the extension +4. **Chrome Extension** (not in this repo) - Executes browser automation commands in the actual browser + +### Context Management + +The `Context` class (src/context.ts) is the central communication hub: +- Manages WebSocket connection to the browser extension +- Provides `sendSocketMessage()` method used by all tools to communicate with the extension +- Throws helpful error message if extension is not connected: "No connection to browser extension..." +- Connection is established when a user clicks "Connect" in the Browser MCP extension + +### Tool Architecture + +Tools are defined in three categories in src/tools/: + +**common.ts** - Navigation and basic interactions: +- `navigate`, `goBack`, `goForward` - Page navigation +- `newPage` - Open URL in new browser tab (see browser_new_page below) +- `wait`, `pressKey` - Timing and keyboard input +- Tools can optionally capture ARIA snapshots after execution (controlled by factory parameter) + +**snapshot.ts** - Snapshot-based interactions (always capture ARIA snapshot after execution): +- `snapshot` - Capture current page state as ARIA tree +- `click`, `hover`, `type`, `selectOption` - Element interactions +- These tools require element references from previous snapshots + +**custom.ts** - Specialized tools: +- `getConsoleLogs` - Retrieve browser console output +- `screenshot` - Capture page screenshot as base64 PNG + +### Tool Implementation Pattern + +Each tool follows this structure: +1. Define schema using Zod types from `@repo/types/mcp/tool` (workspace dependency) +2. Convert Zod schema to JSON Schema using `zod-to-json-schema` +3. Implement `handle()` function that: + - Validates params with Zod schema + - Calls `context.sendSocketMessage()` with appropriate message type + - Returns `ToolResult` with text/image content + +Example message types used: `browser_navigate`, `browser_click`, `browser_snapshot`, `browser_screenshot`, etc. + +### ARIA Snapshot Mechanism + +The `captureAriaSnapshot()` function (src/utils/aria-snapshot.ts) is critical: +- Requests page snapshot from extension via `browser_snapshot` message +- Returns YAML-formatted ARIA tree showing page structure +- Used by snapshot-based tools to provide page context after actions +- Includes page URL and title for reference + +### WebSocket Server Setup + +The WebSocket server (src/ws.ts): +- Defaults to port from `mcpConfig.defaultWsPort` +- Kills any process using the port before starting +- Waits until port is free before creating WebSocketServer +- Only maintains one active connection at a time (new connections close previous ones) + +## Workspace Dependencies + +This repository depends on workspace packages (not yet published): +- `@repo/types` - Shared TypeScript types (e.g., message types, tool schemas) +- `@repo/config` - Configuration objects (app.config, mcp.config) +- `@repo/messaging` - WebSocket messaging utilities +- `@repo/utils` - Shared utility functions +- `@r2r/messaging` - Socket message sender utilities + +**Important**: These dependencies prevent standalone builds outside the monorepo environment. + +## Path Aliases + +TypeScript is configured with path alias `@/*` → `./src/*` + +## New Feature: browser_new_page + +The `browser_new_page` tool allows opening URLs in new browser tabs. + +**Implementation** (src/tools/common.ts:127-148): +- `newPage` tool factory that sends `browser_new_page` WebSocket message +- Registered in `snapshotTools` array (src/index.ts:31) +- Uses inline Zod schema (`NewPageSchema`) for URL validation +- Captures ARIA snapshot of new page after opening + +**Chrome Extension Support**: +- Requires corresponding handler in Chrome extension for `browser_new_page` message +- Extension should use `chrome.tabs.create({url})` API to open new tab +- Feature is backward compatible: gracefully fails if extension doesn't support the message + +## Key Constraints + +- Tools cannot execute browser automation without an active WebSocket connection +- Connection requires user to manually connect a tab via the Browser MCP Chrome extension +- All browser commands have 30-second timeout by default (configurable in `sendSocketMessage`) diff --git a/extension-changes.patch b/extension-changes.patch new file mode 100644 index 0000000..1638362 --- /dev/null +++ b/extension-changes.patch @@ -0,0 +1,95 @@ +# Chrome Extension Changes for browser_new_page + +This patch describes the changes needed in the Browser MCP Chrome extension +to support the `browser_new_page` tool. + +NOTE: The extension uses minified/compiled JavaScript. The changes below show +the conceptual modifications. Line numbers are approximate based on the +compiled output structure. + +## File: background.js (compiled/minified) + +### 1. Add Tool Schema Definition + +Location: After other tool schemas (after `pq`, around line 587) + +```javascript +pq2=H.object({ + name:H.literal("browser_new_page"), + description:H.literal("Open a URL in a new browser tab"), + arguments:H.object({ + url:H.string().describe("The URL to open in a new tab") + }) +}) +``` + +### 2. Add to Discriminated Union + +Location: In the `Iq` discriminated union definition (around line 587) + +```javascript +Iq=H.discriminatedUnion("name",[ + pq, + pq2, // <-- ADD THIS + gq, + mq, + // ... rest of tools +]) +``` + +### 3. Add Message Handler + +Location: In the `Lq` message handler object (around line 587) + +```javascript +const Lq={ + browser_navigate:async({url:t},e)=>{ + const{tabId:n}=e; + await GH(t,n) + }, + browser_new_page:async({url:t},e)=>{ // <-- ADD THIS + await chrome.tabs.create({url:t}) + }, + browser_go_back:async(t,e)=>{ + // ... rest of handlers + }, + // ... +} +``` + +## Summary of Changes + +1. **Schema Definition**: Added `pq2` variable with Zod schema for `browser_new_page` tool +2. **Union Registration**: Added `pq2` to the `Iq` discriminated union +3. **Handler**: Added `browser_new_page` handler that calls `chrome.tabs.create({url})` + +## Testing + +After applying these changes: +1. Load the extension as unpacked in Chrome +2. Connect to a tab +3. Use MCP client to call `browser_new_page` with a URL +4. Verify a new tab opens with the specified URL + +## Build Notes + +If the extension has a build process (TypeScript/bundler), these changes +should be made in the source files before compilation. The schema definition +should use proper Zod imports and the handler should follow the existing +pattern for other tools. + +If modifying the compiled output directly (as was done for testing): +- Back up the original background.js +- Add the three sections shown above to the minified code +- Test thoroughly as minified code is fragile + +## Version Tested + +- Browser MCP Extension: v1.3.4 +- Modification Date: 2025-11-07 +- Tested with: Chrome 131.x + +## Related + +This extension change works with the corresponding MCP server changes that +add the `browser_new_page` tool to the server's tool list. diff --git a/src/index.ts b/src/index.ts index 2278a75..560767a 100644 --- a/src/index.ts +++ b/src/index.ts @@ -28,6 +28,7 @@ const customTools: Tool[] = [custom.getConsoleLogs, custom.screenshot]; const snapshotTools: Tool[] = [ common.navigate(true), + common.newPage(true), common.goBack(true), common.goForward(true), snapshot.snapshot, diff --git a/src/tools/common.ts b/src/tools/common.ts index 3dae834..ce46e79 100644 --- a/src/tools/common.ts +++ b/src/tools/common.ts @@ -1,3 +1,4 @@ +import { z } from "zod"; import { zodToJsonSchema } from "zod-to-json-schema"; import { @@ -118,3 +119,30 @@ export const pressKey: Tool = { }; }, }; + +const NewPageSchema = z.object({ + url: z.string().describe("The URL to open in a new tab"), +}); + +export const newPage: ToolFactory = (snapshot) => ({ + schema: { + name: "browser_new_page", + description: "Open a URL in a new browser tab", + inputSchema: zodToJsonSchema(NewPageSchema), + }, + handle: async (context, params) => { + const { url } = NewPageSchema.parse(params); + await context.sendSocketMessage("browser_new_page", { url }); + if (snapshot) { + return captureAriaSnapshot(context); + } + return { + content: [ + { + type: "text", + text: `Opened ${url} in a new tab`, + }, + ], + }; + }, +}); From a8c24c1607971da9e4fd6b6d02d650ff3dbfd0de Mon Sep 17 00:00:00 2001 From: Pedram Amini Date: Tue, 11 Nov 2025 18:11:32 -0600 Subject: [PATCH 3/3] Update README with merged features section Added merged pull request links for new features. --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index dbf90b8..9a706ed 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,11 @@ Docs

+## My Version + +- Merged [feat: add network monitoring capabilities](https://github.com/BrowserMCP/mcp/pull/132) +- Merged [Add browser_new_page tool for opening URLs in new tabs](https://github.com/BrowserMCP/mcp/pull/136) + ## About Browser MCP is an MCP server + Chrome extension that allows you to automate your browser using AI applications like VS Code, Claude, Cursor, and Windsurf.