From 15e4fac1de4e47faa93a31b72a9e14a477fcf059 Mon Sep 17 00:00:00 2001 From: Nizar Alrifai Date: Thu, 5 Mar 2026 12:23:09 -0800 Subject: [PATCH] Remove single-pass review flow, always use two-pass validator The single-pass review flow was initially created because we were unsure about the performance of the two-pass validator flow. The two-pass flow has since become the default and proven itself, and the single-pass flow is no longer used. This removes the review_use_validator toggle and all single-pass review code. Co-authored-by: factory-droid[bot] <138933559+factory-droid[bot]@users.noreply.github.com> --- action.yml | 16 +- review/action.yml | 10 +- src/create-prompt/templates/review-prompt.ts | 390 ------------------ src/entrypoints/generate-review-prompt.ts | 47 +-- src/entrypoints/prepare-validator.ts | 7 - src/tag/commands/review.ts | 41 +- .../templates/review-prompt.test.ts | 199 --------- test/entrypoints/prepare-validator.test.ts | 7 +- test/modes/tag/review-command.test.ts | 85 +--- 9 files changed, 46 insertions(+), 756 deletions(-) delete mode 100644 src/create-prompt/templates/review-prompt.ts delete mode 100644 test/create-prompt/templates/review-prompt.test.ts diff --git a/action.yml b/action.yml index e419f98..ef1d568 100644 --- a/action.yml +++ b/action.yml @@ -99,16 +99,12 @@ inputs: description: "Override reasoning effort for review flows (passed to Droid Exec as --reasoning-effort). If empty and review_model is also empty, the action defaults internally to gpt-5.2 at high reasoning." required: false default: "high" - review_use_validator: - description: "Enable two-pass review: generate candidate comments to JSON, then validate and post only approved ones." - required: false - default: "true" review_candidates_path: - description: "Path to write review candidates JSON (run #1 when review_use_validator=true)." + description: "Path to write review candidates JSON (pass 1 of the two-pass review)." required: false default: "${{ runner.temp }}/droid-prompts/review_candidates.json" review_validated_path: - description: "Path to write review validated JSON (run #2 when review_use_validator=true)." + description: "Path to write review validated JSON (pass 2 of the two-pass review)." required: false default: "${{ runner.temp }}/droid-prompts/review_validated.json" fill_model: @@ -189,7 +185,6 @@ runs: SECURITY_SCAN_DAYS: ${{ inputs.security_scan_days }} REVIEW_MODEL: ${{ inputs.review_model }} REASONING_EFFORT: ${{ inputs.reasoning_effort }} - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} FILL_MODEL: ${{ inputs.fill_model }} @@ -336,13 +331,12 @@ runs: - name: Prepare validator id: prepare_validator - if: steps.prepare.outputs.contains_trigger == 'true' && inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true' + if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.run_code_review == 'true' shell: bash run: | bun run ${GITHUB_ACTION_PATH}/src/entrypoints/prepare-validator.ts env: GITHUB_TOKEN: ${{ steps.prepare.outputs.github_token }} - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} DROID_COMMENT_ID: ${{ steps.prepare.outputs.droid_comment_id }} @@ -351,7 +345,7 @@ runs: - name: Run Droid Exec (validator) id: droid_validator - if: steps.prepare.outputs.contains_trigger == 'true' && inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true' + if: steps.prepare.outputs.contains_trigger == 'true' && steps.prepare.outputs.run_code_review == 'true' shell: bash run: | @@ -389,7 +383,7 @@ runs: GITHUB_EVENT_NAME: ${{ github.event_name }} TRIGGER_COMMENT_ID: ${{ github.event.comment.id }} IS_PR: ${{ github.event.issue.pull_request != null || github.event_name == 'pull_request_target' || github.event_name == 'pull_request_review_comment' }} - DROID_SUCCESS: ${{ (inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true' && steps.droid_validator.outputs.conclusion == 'success') || (!(inputs.review_use_validator == 'true' && steps.prepare.outputs.run_code_review == 'true') && steps.droid.outputs.conclusion == 'success') }} + DROID_SUCCESS: ${{ (steps.prepare.outputs.run_code_review == 'true' && steps.droid_validator.outputs.conclusion == 'success') || (steps.prepare.outputs.run_code_review != 'true' && steps.droid.outputs.conclusion == 'success') }} TRIGGER_USERNAME: ${{ github.event.comment.user.login || github.event.issue.user.login || github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }} PREPARE_SUCCESS: ${{ steps.prepare.outcome == 'success' }} PREPARE_ERROR: ${{ steps.prepare.outputs.prepare_error || '' }} diff --git a/review/action.yml b/review/action.yml index 6d8e620..6242fb7 100644 --- a/review/action.yml +++ b/review/action.yml @@ -24,10 +24,6 @@ inputs: description: "Path to write review results JSON" required: false default: "" - review_use_validator: - description: "Enable two-pass review: generate candidate comments, then validate and post only approved ones." - required: false - default: "true" review_candidates_path: description: "Path to write review candidates JSON (pass 1)." required: false @@ -84,7 +80,6 @@ runs: DROID_COMMENT_ID: ${{ inputs.tracking_comment_id }} REVIEW_MODEL: ${{ inputs.review_model }} REVIEW_TYPE: "code" - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} REASONING_EFFORT: ${{ inputs.reasoning_effort }} DROID_OUTPUT_FILE: ${{ inputs.output_file }} @@ -113,13 +108,11 @@ runs: - name: Prepare Validator id: prepare_validator - if: steps.prompt.outputs.review_use_validator == 'true' shell: bash run: | bun run ${{ github.action_path }}/../src/entrypoints/prepare-validator.ts env: GITHUB_TOKEN: ${{ steps.token.outputs.github_token }} - REVIEW_USE_VALIDATOR: ${{ inputs.review_use_validator }} REVIEW_VALIDATED_PATH: ${{ inputs.review_validated_path }} REVIEW_CANDIDATES_PATH: ${{ inputs.review_candidates_path }} REVIEW_MODEL: ${{ inputs.review_model }} @@ -128,7 +121,6 @@ runs: - name: Run Validator id: validator - if: steps.prompt.outputs.review_use_validator == 'true' shell: bash run: | bun run ${{ github.action_path }}/../base-action/src/index.ts @@ -153,4 +145,4 @@ runs: OUTPUT_FILE: ${{ inputs.output_file }} TRIGGER_USERNAME: ${{ github.event.pull_request.user.login || github.event.sender.login || github.triggering_actor || github.actor || '' }} PREPARE_SUCCESS: "true" - DROID_SUCCESS: ${{ (steps.prompt.outputs.review_use_validator == 'true' && steps.validator.outcome == 'success') || (steps.prompt.outputs.review_use_validator != 'true' && steps.review.outcome == 'success') }} + DROID_SUCCESS: ${{ steps.validator.outcome == 'success' }} diff --git a/src/create-prompt/templates/review-prompt.ts b/src/create-prompt/templates/review-prompt.ts deleted file mode 100644 index 3216ce9..0000000 --- a/src/create-prompt/templates/review-prompt.ts +++ /dev/null @@ -1,390 +0,0 @@ -import { formatGuidelinesSection } from "../../utils/review-guidelines"; -import type { PreparedContext } from "../types"; - -export function generateReviewPrompt(context: PreparedContext): string { - const prNumber = context.eventData.isPR - ? context.eventData.prNumber - : context.githubContext && "entityNumber" in context.githubContext - ? String(context.githubContext.entityNumber) - : "unknown"; - - const repoFullName = context.repository; - const headRefName = context.prBranchData?.headRefName ?? "unknown"; - const headSha = context.prBranchData?.headRefOid ?? "unknown"; - const baseRefName = context.eventData.baseBranch ?? "unknown"; - - const diffPath = - context.reviewArtifacts?.diffPath ?? "$RUNNER_TEMP/droid-prompts/pr.diff"; - const commentsPath = - context.reviewArtifacts?.commentsPath ?? - "$RUNNER_TEMP/droid-prompts/existing_comments.json"; - const descriptionPath = - context.reviewArtifacts?.descriptionPath ?? - "$RUNNER_TEMP/droid-prompts/pr_description.txt"; - - return `You are performing an automated code review for PR #${prNumber} in ${repoFullName}. -The gh CLI is installed and authenticated via GH_TOKEN. -${formatGuidelinesSection(context.reviewGuidelines)} -### Context - -* Repo: ${repoFullName} -* PR Number: ${prNumber} -* PR Head Ref: ${headRefName} -* PR Head SHA: ${headSha} -* PR Base Ref: ${baseRefName} -* The PR branch has already been checked out. You have full access to read any file in the codebase, not just the diff output. - -### Pre-computed Review Artifacts - -The following files have been pre-computed and contain the COMPLETE data for this PR: - -* **PR Description**: \`${descriptionPath}\` - Contains the PR title and description (body) explaining the intent and scope of the changes -* **Full PR Diff**: \`${diffPath}\` - Contains the COMPLETE diff of ALL changed files (already computed via \`git merge-base\` and \`git diff\`) -* **Existing Comments**: \`${commentsPath}\` - Contains all existing PR comments and reviews in JSON format - -**IMPORTANT**: Use these pre-computed files instead of running git or gh commands to fetch diff/comments. This ensures you have access to the COMPLETE data without truncation. - ---- - -## CRITICAL INSTRUCTION - -**DO NOT STOP UNTIL YOU HAVE REVIEWED EVERY SINGLE CHANGED FILE IN THE DIFF.** - -You MUST: -1. Read the ENTIRE \`${diffPath}\` file first (use offset/limit if needed for large files) -2. Create a mental checklist of ALL files that were changed -3. Review EACH file systematically - do not skip any file -4. Only submit your review AFTER you have analyzed every single changed file - -If the diff is large, work through it methodically. Do not rush. Do not skip files. The quality of your review depends on thoroughness. - ---- - -## Objectives - -1. Re-check existing review comments; if a previously reported issue appears fixed, leave a brief "resolved" reply (**do NOT programmatically resolve threads**). -2. Review the PR diff and identify **high-confidence, actionable bugs** introduced by this PR. -3. Leave concise **inline comments (1-2 sentences)** for qualifying bugs. You may comment on unchanged lines *only* if the PR clearly triggers the issue—explain the trigger path. - ---- - -## Procedure - -Follow these phases **in order**. Do not submit findings until Phase 1 and Phase 2 are complete. - ---- - -## Phase 1: Context Gathering (REQUIRED — do not report bugs yet) - -1. **Read the PR description** to understand the intent and scope: - \`Read ${descriptionPath}\` - -2. **Read existing comments** from the pre-computed file: - \`Read ${commentsPath}\` - -3. **Read the COMPLETE diff** from the pre-computed file: - \`Read ${diffPath}\` - - If the file is large (>2400 lines), read it in chunks using offset/limit parameters. - **DO NOT proceed until you have read the ENTIRE diff.** - -4. **List all changed files** - After reading the diff, explicitly list every file that was changed. This is your checklist. - -5. For **each file in the diff**, gather context: - - * New imports → Grep to confirm the symbol exists - * New/modified functions → Grep for callers to understand usage - * Data-processing code → Read surrounding code to infer expected types - -6. Do **not** identify or report bugs yet. This phase is for understanding only. - ---- - -## Phase 2: Issue Identification (ONLY after Phase 1) - -Review **every changed line**. You must complete the review even if you find issues early. - -### Analysis discipline - -* Verify with Grep/Read before flagging (no speculation) -* Trace data flow to confirm a **real trigger path** -* Check whether the pattern exists elsewhere (may be intentional) - -### Cross-reference checks - -* When reviewing tests, search for related constants, configs, or environment variables -* Verify test assumptions match production behavior - *Example:* if a test sets an env var, Grep where it is consumed to confirm behavior matches prod - -### Import verification - -* Any import referencing a non-existent symbol is a bug (runtime ImportError) - ---- - -## Systematic Analysis Patterns - -Apply these patterns during your review. These target common bug classes: - -### Logic & Variable Usage - -* When reviewing conditionals, verify the correct variable is referenced in each clause -* Check for AND vs OR confusion in permission/validation logic (especially security-critical paths) -* Verify return statements return the intended value: - - Not wrapper objects when data is expected (e.g., \`safeParse()\` result vs \`.data\`) - - Not intermediate variables when final result is expected - - Not wrong object properties (e.g., \`obj.original\` vs \`obj.modified\`) -* In loops or transformations, confirm variable names match semantic purpose -* Flag operations where variable names suggest type mismatches (e.g., \`*Time\` used where \`*Date\` expected) - -### Null/Undefined Safety - -* For each property access chain (\`a.b.c\`), verify none of the intermediate values can be null/undefined/None: - - Check API responses that might return null - - Check database queries that might return no results - - Check array operations like \`array[0]\` or \`array.find()\` -* When Optional types are unwrapped (\`.get()\`, \`!\`, non-null assertions), verify presence is checked first -* In conditional branches, verify null handling exists for all code paths -* Pay special attention to: - - Authentication/authorization contexts (user might not be authenticated) - - Optional relationships (foreign keys, joined data) - - Map/dictionary lookups - - Configuration values that might not be set - -### Type Compatibility & Data Flow - -* Trace what types flow into math operations: - - \`floor\`, \`ceil\`, \`round\`, modulo on datetime/string inputs cause errors - - Arithmetic operations on mixed types -* Verify comparison operators match types: - - Object reference equality (\`===\`, \`==\`) on different instances always false - - Comparing object types when value comparison intended (e.g., dayjs objects) -* Check function parameters receive expected types after transformations: - - Serialization/deserialization boundary crossings - - Database field type conversions - - API request/response parsing -* Flag string operations on numbers or vice versa without explicit conversion -* When data flows through multiple functions, verify type consistency at each step - -### Async/Await Patterns (JavaScript/TypeScript) - -* **CRITICAL**: Flag \`forEach\`/\`map\`/\`filter\` with async callbacks - these don't await: - \`\`\`javascript - // BUG: async callbacks are fire-and-forget - items.forEach(async (item) => await process(item)); - - // CORRECT: use for...of or Promise.all - for (const item of items) await process(item); - \`\`\` -* Verify all async function calls are awaited when their result or side-effect is needed: - - Database writes that must complete before continuing - - API calls whose responses are used - - File operations that affect subsequent code -* Check promise chains have proper error handling (\`.catch()\` or try/catch) -* Verify parallel operations use \`Promise.all\`/\`Promise.allSettled\` appropriately - -### Security Vulnerabilities - -* **SSRF (Server-Side Request Forgery)**: - - Flag unvalidated URL fetching: \`open(url)\`, \`fetch(user_input)\`, \`curl\` with user input - - Verify URL allowlists exist for external requests -* **XSS (Cross-Site Scripting)**: - - Check for unescaped user input in HTML/template contexts - - Verify template engines auto-escape by default -* **Authentication & Session State**: - - OAuth state/nonce must be per-request random, not static or reused - - CSRF tokens must be unpredictable and verified - - Session data must not leak between requests -* **Input Validation Bypasses**: - - \`indexOf()\`/\`startsWith()\` for origin validation can be bypassed (use exact allowlist matching) - - Case sensitivity in security checks (email blacklists, domain checks) -* **Timing Attacks**: - - Secret/token comparison should use constant-time comparison functions -* **Cache Poisoning**: - - Verify security decisions (permissions, auth) aren't cached asymmetrically - - If grants are cached, denials must also be cached (or neither) - -### Concurrency Issues (when applicable) - -* Check if shared state is modified without synchronization: - - Global variables or class fields accessed by multiple threads/goroutines - - Database records that can be modified concurrently -* Verify double-checked locking re-checks condition after acquiring lock: - \`\`\`go - // BUG: doesn't re-check after lock - if !initialized { - lock.Lock() - initialize() - lock.Unlock() - } - - // CORRECT: re-check after acquiring lock - if !initialized { - lock.Lock() - if !initialized { - initialize() - } - lock.Unlock() - } - \`\`\` -* Flag non-atomic operations on shared counters: - - \`count = count + 1\` in concurrent code (use atomic operations) - - Read-modify-write without locks -* Check that resources accessed by multiple threads have proper synchronization - -### API Contract & Breaking Changes - -* When serializers/validators/response formatters change: - - Use Grep to find API consumers and test files - - Verify response structure remains compatible (field names, types, nesting) - - Check for removed fields, changed types, or new required fields -* When database models/schemas change: - - Verify migrations include data backfill for existing records - - Check that existing queries still work with new schema -* When function signatures change: - - Grep for all callers to verify compatibility - - Check if return type changes break caller assumptions -* Review test files for expected API shapes and verify changes match tests - ---- - -## **Reporting Gate (CRITICAL)** - -Only report findings that meet **at least one** of the following: - -### Reportable bugs - -* **Definite runtime failures** (TypeError, KeyError, AttributeError, ImportError) -* **Incorrect logic** with a clear trigger path and observable wrong result -* **Security vulnerabilities** with a realistic exploit path -* **Data corruption or loss** -* **Breaking contract changes** (API / response / schema / validator behavior) where the contract is discoverable in code, tests, or docs - -### Do NOT report - -* Test code hygiene (unused vars, setup patterns) unless it causes test failure -* Defensive "what-if" scenarios without a realistic trigger -* Cosmetic issues (message text, naming, formatting) -* Suggestions to "add guards," "add try/catch," or "be safer" without a concrete failure - -### Confidence rule - -* Prefer **DEFINITE** bugs over **POSSIBLE** bugs -* Report POSSIBLE bugs **only** if you can identify a realistic execution path - ---- - -## Targeted semantic passes (apply when relevant) - -* **API / validator / serializer changes** - Explicitly check for response-format or contract breakage - *(e.g., changed error response structure, removed or renamed fields, different status codes, altered required keys)* - -* **Auth / OAuth / session / state changes** - Check null-state handling, per-request randomness (state/nonce), and failure paths - ---- - -## Deduplication - -* Never open a new finding for an issue previously reported by this bot on this PR -* If an issue appears fixed, reply "resolved" in the existing thread - ---- - -## Priority Levels - -* [P0] Blocking / crash / exploit -* [P1] Urgent correctness or security issue -* [P2] Real bug with limited impact -* [P3] Minor but real bug - ---- - -## Comment format - -Each inline comment must be: - -**[P0-P3] Clear imperative title (≤80 chars)** - - -(blank line) - -One short paragraph explaining *why* this is a bug and *how* it manifests. - -* Max 1 paragraph -* Code snippets ≤3 lines, Markdown fenced -* Matter-of-fact, non-accusatory tone - ---- - -## Phase 3: Submit Review - -### When NOT to submit - -* PR is formatting-only -* You cannot anchor a high-confidence issue to a specific changed line -* All findings are low-severity (P2/P3) -* All findings fail the Reporting Gate above - -### Tools & mechanics - -* Use \`github_inline_comment___create_inline_comment\` - * Anchor using **path + side + line** - * RIGHT = new/modified code, LEFT = removed code - * Line numbers must correspond to the chosen side -* Use \`github_pr___submit_review\` for the summary -* Use \`github_pr___delete_comment\` or \`github_pr___minimize_comment\` for outdated "no issues" comments -* Use \`github_pr___reply_to_comment\` to acknowledge resolved issues -* **Do NOT call** \`github_pr___resolve_review_thread\` -* Do **not** approve or request changes - -### "No issues" handling - -* If no issues and a prior "no issues" comment exists → skip -* If no issues and no prior comment exists → post a brief summary -* If issues exist and a prior "no issues" comment exists → delete/minimize it -* **Do NOT delete** comment ID ${context.droidCommentId} - ---- - -## Review summary - -In the submitted review body: - -* State whether the changes are correct or incorrect -* Provide a 1-3 sentence overall assessment -${ - context.outputFilePath - ? ` ---- - -## Output File (REQUIRED) - -After completing your review, you MUST write your findings to \`${context.outputFilePath}\` as a JSON file with this structure: - -\`\`\`json -{ - "type": "code-review", - "findings": [ - { - "id": "CR-001", - "severity": "P0|P1|P2|P3", - "file": "path/to/file.ts", - "line": 55, - "side": "RIGHT", - "description": "Brief description of the issue", - "suggestion": "Optional suggested fix" - } - ], - "summary": "Brief overall summary of the review" -} -\`\`\` - -If no issues were found, write: \`{"type": "code-review", "findings": [], "summary": "No issues found"}\` - -This file is required for downstream processing of review results. -` - : "" -}`; -} diff --git a/src/entrypoints/generate-review-prompt.ts b/src/entrypoints/generate-review-prompt.ts index 83dbf5c..9163e16 100644 --- a/src/entrypoints/generate-review-prompt.ts +++ b/src/entrypoints/generate-review-prompt.ts @@ -12,7 +12,6 @@ import { fetchPRBranchData } from "../github/data/pr-fetcher"; import { computeReviewArtifacts } from "../github/data/review-artifacts"; import { createPrompt } from "../create-prompt"; import { prepareMcpTools } from "../mcp/install-mcp-server"; -import { generateReviewPrompt } from "../create-prompt/templates/review-prompt"; import { generateReviewCandidatesPrompt } from "../create-prompt/templates/review-candidates-prompt"; import { generateSecurityReviewPrompt } from "../create-prompt/templates/security-review-prompt"; import { normalizeDroidArgs, parseAllowedTools } from "../utils/parse-tools"; @@ -22,9 +21,6 @@ async function run() { try { const githubToken = process.env.GITHUB_TOKEN!; const reviewType = process.env.REVIEW_TYPE || "code"; - const reviewUseValidator = - reviewType === "code" && - (process.env.REVIEW_USE_VALIDATOR ?? "true").trim() !== "false"; const commentId = parseInt(process.env.DROID_COMMENT_ID || "0"); if (!commentId) { @@ -92,13 +88,11 @@ async function run() { githubToken, }); - // Select prompt generator based on review type and validator mode + // Select prompt generator based on review type const generatePrompt = reviewType === "security" ? generateSecurityReviewPrompt - : reviewUseValidator - ? generateReviewCandidatesPrompt - : generateReviewPrompt; + : generateReviewCandidatesPrompt; // Pass the output file path so the prompt can instruct the Droid // to write structured findings for the combine step @@ -146,30 +140,23 @@ async function run() { // Task tool is needed for parallel subagent reviews in candidate generation phase. // FetchUrl is needed to fetch linked tickets from the PR description. - const candidateGenerationTools = reviewUseValidator - ? ["Task", "FetchUrl"] - : []; - - // When validator is enabled, the candidate generation phase should NOT - // have access to PR mutation tools. When disabled, allow them. - const reviewTools = reviewUseValidator - ? [] - : ["github_pr___list_review_comments"]; - - const safeUserAllowedMCPTools = reviewUseValidator - ? userAllowedMCPTools.filter( - (tool) => - tool === "github_comment___update_droid_comment" || - (!tool.startsWith("github_pr___") && - tool !== "github_inline_comment___create_inline_comment"), - ) - : userAllowedMCPTools; + const candidateGenerationTools = + reviewType === "code" ? ["Task", "FetchUrl"] : []; + + const safeUserAllowedMCPTools = + reviewType === "code" + ? userAllowedMCPTools.filter( + (tool) => + tool === "github_comment___update_droid_comment" || + (!tool.startsWith("github_pr___") && + tool !== "github_inline_comment___create_inline_comment"), + ) + : userAllowedMCPTools; const allowedTools = Array.from( new Set([ ...baseTools, ...candidateGenerationTools, - ...reviewTools, ...safeUserAllowedMCPTools, ]), ); @@ -208,11 +195,9 @@ async function run() { // Output for next step - use core.setOutput which handles GITHUB_OUTPUT internally core.setOutput("droid_args", droidArgParts.join(" ").trim()); core.setOutput("mcp_tools", mcpTools); - core.setOutput("review_use_validator", reviewUseValidator.toString()); + core.setOutput("review_use_validator", (reviewType === "code").toString()); - console.log( - `Generated ${reviewType} review prompt (validator=${reviewUseValidator})`, - ); + console.log(`Generated ${reviewType} review prompt`); } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); core.setFailed(`Generate prompt failed: ${errorMessage}`); diff --git a/src/entrypoints/prepare-validator.ts b/src/entrypoints/prepare-validator.ts index b27d3a7..1382f76 100644 --- a/src/entrypoints/prepare-validator.ts +++ b/src/entrypoints/prepare-validator.ts @@ -14,13 +14,6 @@ async function run() { throw new Error("prepare-validator requires a pull request context"); } - // This entrypoint only makes sense when the workflow input is enabled. - if ((process.env.REVIEW_USE_VALIDATOR ?? "true").trim() === "false") { - throw new Error( - "reviewUseValidator must be true to run prepare-validator", - ); - } - const githubToken = await setupGitHubToken(); const octokit = createOctokit(githubToken); diff --git a/src/tag/commands/review.ts b/src/tag/commands/review.ts index 4fabb33..f391d59 100644 --- a/src/tag/commands/review.ts +++ b/src/tag/commands/review.ts @@ -8,7 +8,6 @@ import { prepareMcpTools } from "../../mcp/install-mcp-server"; import { createInitialComment } from "../../github/operations/comments/create-initial"; import { normalizeDroidArgs, parseAllowedTools } from "../../utils/parse-tools"; import { isEntityContext } from "../../github/context"; -import { generateReviewPrompt } from "../../create-prompt/templates/review-prompt"; import { generateReviewCandidatesPrompt } from "../../create-prompt/templates/review-candidates-prompt"; import type { Octokits } from "../../github/api/client"; import type { PrepareResult } from "../../prepare/types"; @@ -85,9 +84,6 @@ export async function prepareReviewMode({ githubToken, }); - const reviewUseValidator = - (process.env.REVIEW_USE_VALIDATOR ?? "true").trim() !== "false"; - await createPrompt({ githubContext: context, commentId, @@ -97,9 +93,7 @@ export async function prepareReviewMode({ headRefName: prData.headRefName, headRefOid: prData.headRefOid, }, - generatePrompt: reviewUseValidator - ? generateReviewCandidatesPrompt - : generateReviewPrompt, + generatePrompt: generateReviewCandidatesPrompt, reviewArtifacts, }); core.exportVariable("DROID_EXEC_RUN_TYPE", "droid-review"); @@ -124,36 +118,19 @@ export async function prepareReviewMode({ // Task tool is needed for parallel subagent reviews in candidate generation phase. // FetchUrl is needed to fetch linked tickets from the PR description. - const candidateGenerationTools = reviewUseValidator - ? ["Task", "FetchUrl"] - : []; - - const reviewTools = reviewUseValidator - ? [] - : [ - "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___delete_comment", - "github_pr___minimize_comment", - "github_pr___reply_to_comment", - "github_pr___resolve_review_thread", - ]; - - const safeUserAllowedMCPTools = reviewUseValidator - ? userAllowedMCPTools.filter( - (tool) => - tool === "github_comment___update_droid_comment" || - (!tool.startsWith("github_pr___") && - tool !== "github_inline_comment___create_inline_comment"), - ) - : userAllowedMCPTools; + const candidateGenerationTools = ["Task", "FetchUrl"]; + + const safeUserAllowedMCPTools = userAllowedMCPTools.filter( + (tool) => + tool === "github_comment___update_droid_comment" || + (!tool.startsWith("github_pr___") && + tool !== "github_inline_comment___create_inline_comment"), + ); const allowedTools = Array.from( new Set([ ...baseTools, ...candidateGenerationTools, - ...reviewTools, ...safeUserAllowedMCPTools, ]), ); diff --git a/test/create-prompt/templates/review-prompt.test.ts b/test/create-prompt/templates/review-prompt.test.ts deleted file mode 100644 index a0b747a..0000000 --- a/test/create-prompt/templates/review-prompt.test.ts +++ /dev/null @@ -1,199 +0,0 @@ -import { describe, expect, it } from "bun:test"; -import { generateReviewPrompt } from "../../../src/create-prompt/templates/review-prompt"; -import type { PreparedContext } from "../../../src/create-prompt/types"; - -function createBaseContext( - overrides: Partial = {}, -): PreparedContext { - return { - repository: "test-owner/test-repo", - droidCommentId: "123", - triggerPhrase: "@droid", - eventData: { - eventName: "issue_comment", - commentId: "456", - prNumber: "42", - isPR: true, - commentBody: "@droid review", - }, - githubContext: undefined, - ...overrides, - } as PreparedContext; -} - -describe("generateReviewPrompt", () => { - it("includes objectives and procedure steps", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("## Objectives"); - expect(prompt).toContain("Re-check existing review comments"); - expect(prompt).toContain("github_inline_comment___create_inline_comment"); - expect(prompt).toContain( - "**Do NOT call** `github_pr___resolve_review_thread`", - ); - }); - - it("includes pre-computed artifact references when provided", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/test/pr.diff", - commentsPath: "/tmp/test/existing_comments.json", - descriptionPath: "/tmp/test/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("### Pre-computed Review Artifacts"); - expect(prompt).toContain("/tmp/test/pr.diff"); - expect(prompt).toContain("/tmp/test/existing_comments.json"); - expect(prompt).toContain("COMPLETE diff"); - }); - - it("includes critical instruction to review all files", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("## CRITICAL INSTRUCTION"); - expect(prompt).toContain( - "DO NOT STOP UNTIL YOU HAVE REVIEWED EVERY SINGLE CHANGED FILE", - ); - expect(prompt).toContain("Review EACH file systematically"); - }); - - it("instructs to read from pre-computed files in Phase 1", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/droid/pr.diff", - commentsPath: "/tmp/droid/comments.json", - descriptionPath: "/tmp/droid/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain( - "Read existing comments** from the pre-computed file", - ); - expect(prompt).toContain("Read /tmp/droid/comments.json"); - expect(prompt).toContain( - "Read the COMPLETE diff** from the pre-computed file", - ); - expect(prompt).toContain("Read /tmp/droid/pr.diff"); - }); - - it("emphasizes accuracy gates and bug detection guidelines", () => { - const prompt = generateReviewPrompt(createBaseContext()); - - expect(prompt).toContain("## Priority Levels"); - expect(prompt).toContain("[P0]"); - expect(prompt).toContain( - "Never open a new finding for an issue previously reported by this bot", - ); - }); - - it("describes submission guidance", () => { - const prompt = generateReviewPrompt(createBaseContext()); - - expect(prompt).toContain( - "Use `github_inline_comment___create_inline_comment`", - ); - expect(prompt).toContain("Do **not** approve or request changes"); - expect(prompt).toContain("github_pr___submit_review"); - expect(prompt).toContain("### When NOT to submit"); - expect(prompt).toContain("All findings are low-severity (P2/P3)"); - }); - - it("references PR description artifact in pre-computed files", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/test/pr.diff", - commentsPath: "/tmp/test/existing_comments.json", - descriptionPath: "/tmp/test/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("/tmp/test/pr_description.txt"); - expect(prompt).toContain("PR Description"); - expect(prompt).toContain( - "Contains the PR title and description (body) explaining the intent and scope", - ); - }); - - it("instructs reading PR description first in Phase 1", () => { - const context = createBaseContext({ - reviewArtifacts: { - diffPath: "/tmp/droid/pr.diff", - commentsPath: "/tmp/droid/comments.json", - descriptionPath: "/tmp/droid/pr_description.txt", - }, - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain( - "Read the PR description** to understand the intent and scope", - ); - expect(prompt).toContain("Read /tmp/droid/pr_description.txt"); - // Verify description is read before comments and diff - const descIdx = prompt.indexOf("Read the PR description"); - const commentsIdx = prompt.indexOf("Read existing comments"); - const diffIdx = prompt.indexOf("Read the COMPLETE diff"); - expect(descIdx).toBeLessThan(commentsIdx); - expect(commentsIdx).toBeLessThan(diffIdx); - }); - - it("uses fallback description path when artifacts are not provided", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("pr_description.txt"); - }); - - it("includes output file instructions when outputFilePath is set", () => { - const context = createBaseContext({ - outputFilePath: "/tmp/results/code-review-results.json", - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain("## Output File (REQUIRED)"); - expect(prompt).toContain("/tmp/results/code-review-results.json"); - expect(prompt).toContain('"type": "code-review"'); - expect(prompt).toContain("downstream processing of review results"); - }); - - it("does not include output file section when outputFilePath is not set", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).not.toContain("## Output File (REQUIRED)"); - }); - - it("includes review guidelines when provided", () => { - const context = createBaseContext({ - reviewGuidelines: "- Always check error handling\n- No magic numbers", - }); - - const prompt = generateReviewPrompt(context); - - expect(prompt).toContain(""); - expect(prompt).toContain("- Always check error handling"); - expect(prompt).toContain("- No magic numbers"); - }); - - it("does not include review guidelines section when not provided", () => { - const context = createBaseContext(); - - const prompt = generateReviewPrompt(context); - - expect(prompt).not.toContain(""); - }); -}); diff --git a/test/entrypoints/prepare-validator.test.ts b/test/entrypoints/prepare-validator.test.ts index 28f9d96..4f56d89 100644 --- a/test/entrypoints/prepare-validator.test.ts +++ b/test/entrypoints/prepare-validator.test.ts @@ -6,7 +6,7 @@ import * as contextMod from "../../src/github/context"; import * as validator from "../../src/tag/commands/review-validator"; describe("prepare-validator entrypoint", () => { - it("fails when reviewUseValidator is false", async () => { + it("fails when DROID_COMMENT_ID is missing", async () => { const setFailedSpy = spyOn(core, "setFailed").mockImplementation(() => {}); const setOutputSpy = spyOn(core, "setOutput").mockImplementation(() => {}); const exitSpy = spyOn(process, "exit").mockImplementation((() => { @@ -33,8 +33,7 @@ describe("prepare-validator entrypoint", () => { isPR: true, } as any); - process.env.REVIEW_USE_VALIDATOR = "false"; - process.env.DROID_COMMENT_ID = "123"; + delete process.env.DROID_COMMENT_ID; spyOn(token, "setupGitHubToken").mockResolvedValue("token"); spyOn(client, "createOctokit").mockReturnValue({} as any); @@ -53,7 +52,7 @@ describe("prepare-validator entrypoint", () => { expect(setFailedSpy).toHaveBeenCalled(); expect(setOutputSpy).toHaveBeenCalledWith( "prepare_error", - expect.stringContaining("reviewUseValidator"), + expect.stringContaining("DROID_COMMENT_ID"), ); expect(exitSpy).toHaveBeenCalledWith(1); diff --git a/test/modes/tag/review-command.test.ts b/test/modes/tag/review-command.test.ts index 5b8b8af..1f637c2 100644 --- a/test/modes/tag/review-command.test.ts +++ b/test/modes/tag/review-command.test.ts @@ -159,10 +159,8 @@ describe("prepareReviewMode", () => { allowedTools: expect.arrayContaining([ "Execute", "github_comment___update_droid_comment", - "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___resolve_review_thread", + "Task", + "FetchUrl", ]), }), ); @@ -175,18 +173,16 @@ describe("prepareReviewMode", () => { (call: unknown[]) => call[0] === "droid_args", ) as [string, string] | undefined; expect(droidArgsCall?.[1]).toContain("Execute"); - [ + expect(droidArgsCall?.[1]).toContain("Task"); + expect(droidArgsCall?.[1]).toContain("FetchUrl"); + expect(droidArgsCall?.[1]).toContain( "github_comment___update_droid_comment", + ); + // Candidate generation phase should NOT have PR mutation tools + expect(droidArgsCall?.[1]).not.toContain( "github_inline_comment___create_inline_comment", - "github_pr___list_review_comments", - "github_pr___submit_review", - "github_pr___delete_comment", - "github_pr___minimize_comment", - "github_pr___reply_to_comment", - "github_pr___resolve_review_thread", - ].forEach((tool) => { - expect(droidArgsCall?.[1]).toContain(tool); - }); + ); + expect(droidArgsCall?.[1]).not.toContain("github_pr___submit_review"); expect(exportVariableSpy).toHaveBeenCalledWith( "DROID_EXEC_RUN_TYPE", "droid-review", @@ -558,9 +554,7 @@ describe("prepareReviewMode", () => { ); }); - it("includes FetchUrl in allowed tools when validator mode is enabled", async () => { - process.env.REVIEW_USE_VALIDATOR = "true"; - + it("always includes Task and FetchUrl in allowed tools for candidate generation", async () => { const context = createMockContext({ eventName: "issue_comment", isPR: true, @@ -608,61 +602,6 @@ describe("prepareReviewMode", () => { (call: unknown[]) => call[0] === "droid_args", ) as [string, string] | undefined; expect(droidArgsCall?.[1]).toContain("FetchUrl"); - - delete process.env.REVIEW_USE_VALIDATOR; - }); - - it("excludes FetchUrl from allowed tools when validator mode is disabled", async () => { - process.env.REVIEW_USE_VALIDATOR = "false"; - - const context = createMockContext({ - eventName: "issue_comment", - isPR: true, - payload: { - comment: { - id: 109, - body: "@droid review", - }, - } as any, - entityNumber: 32, - }); - - const octokit = { - rest: { - issues: { - listComments: () => Promise.resolve({ data: [] }), - }, - pulls: { - listReviewComments: () => Promise.resolve({ data: [] }), - }, - }, - graphql: () => Promise.resolve({}), - } as any; - - graphqlSpy = spyOn(octokit, "graphql").mockResolvedValue({ - repository: { - pullRequest: { - baseRefName: MOCK_PR_DATA.baseRefName, - headRefName: MOCK_PR_DATA.headRefName, - headRefOid: MOCK_PR_DATA.headRefOid, - title: MOCK_PR_DATA.title, - body: MOCK_PR_DATA.body, - }, - }, - }); - - await prepareReviewMode({ - context, - octokit, - githubToken: "token", - trackingCommentId: 562, - }); - - const droidArgsCall = setOutputSpy.mock.calls.find( - (call: unknown[]) => call[0] === "droid_args", - ) as [string, string] | undefined; - expect(droidArgsCall?.[1]).not.toContain("FetchUrl"); - - delete process.env.REVIEW_USE_VALIDATOR; + expect(droidArgsCall?.[1]).toContain("Task"); }); });