From bd178fcaf0b6e3f29b0796a373cd7b62ca9aeb4e Mon Sep 17 00:00:00 2001
From: "Marc J. Schmidt" <marc@marcjschmidt.de>
Date: Thu, 22 Jan 2026 06:22:59 +0100
Subject: [PATCH] Initial release: MCP server enforcing Worker-Reviewer loop

Diligence prevents AI agents from shipping quick fixes that break things
by enforcing a research-propose-verify loop before any code changes.

Key features:
- Worker sub-agent researches and proposes with file:line citations
- Reviewer sub-agent independently verifies claims by searching codebase
- Iterates until approved (max 5 rounds)
- Loads project-specific context from .claude/CODEBASE_CONTEXT.md
- State persisted across sessions

Validated on production codebase: caught architectural mistake (broker
subscriptions on client-side code) that naive agent would have shipped.
---
 .gitignore                                    |   2 +
 README.md                                     | 274 +++++++++
 REVIEWER_PROMPT.md                            | 161 ++++++
 TESTING.md                                    | 243 ++++++++
 WORKER_PROMPT.md                              | 135 +++++
 index.mjs                                     | 547 ++++++++++++++++++
 package-lock.json                             | 162 ++++++
 package.json                                  |  21 +
 test/compare-approaches.mjs                   | 255 ++++++++
 test/dry-run.mjs                              | 305 ++++++++++
 test/fixture/.claude/CODEBASE_CONTEXT.md      | 150 +++++
 test/fixture/src/broker/events.ts             |  79 +++
 .../src/controllers/roles.controller.ts       |  99 ++++
 test/fixture/src/services/chat.service.ts     | 122 ++++
 test/fixture/src/services/team.service.ts     | 102 ++++
 .../src/services/user-block.service.ts        | 117 ++++
 .../src/services/voice-channel.service.ts     | 220 +++++++
 test/mcp-client.mjs                           | 229 ++++++++
 test/results/2026-01-22-comparison-report.md  | 183 ++++++
 test/run-tests.mjs                            | 415 +++++++++++++
 test/scenarios/blocking-voice.json            |  78 +++
 test/scenarios/index.json                     |  21 +
 test/scenarios/permission-cache.json          |  81 +++
 23 files changed, 4001 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 REVIEWER_PROMPT.md
 create mode 100644 TESTING.md
 create mode 100644 WORKER_PROMPT.md
 create mode 100644 index.mjs
 create mode 100644 package-lock.json
 create mode 100644 package.json
 create mode 100644 test/compare-approaches.mjs
 create mode 100644 test/dry-run.mjs
 create mode 100644 test/fixture/.claude/CODEBASE_CONTEXT.md
 create mode 100644 test/fixture/src/broker/events.ts
 create mode 100644 test/fixture/src/controllers/roles.controller.ts
 create mode 100644 test/fixture/src/services/chat.service.ts
 create mode 100644 test/fixture/src/services/team.service.ts
 create mode 100644 test/fixture/src/services/user-block.service.ts
 create mode 100644 test/fixture/src/services/voice-channel.service.ts
 create mode 100644 test/mcp-client.mjs
 create mode 100644 test/results/2026-01-22-comparison-report.md
 create mode 100644 test/run-tests.mjs
 create mode 100644 test/scenarios/blocking-voice.json
 create mode 100644 test/scenarios/index.json
 create mode 100644 test/scenarios/permission-cache.json

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3c536f6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+node_modules/
+.claude/.diligence-state.json
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e6e1d0b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,274 @@
+# @strikt/diligence
+
+An MCP server that enforces a Worker-Reviewer loop before code changes, preventing quick fixes that break things.
+
+## The Problem
+
+AI coding agents are too eager. Given a bug like "permission cache doesn't invalidate," they'll add `.clear()` somewhere and call it done. But real codebases have:
+
+- Broker events that need subscribing
+- Caches in multiple locations
+- Patterns that must be followed
+- Client/server architecture boundaries
+- Edge cases that cause production fires
+
+**A naive agent doesn't know what it doesn't know.** It proposes a fix, you approve it, and three days later you're debugging why permissions are broken in a completely different feature.
+
+## The Solution
+
+**Diligence** forces a research-verify loop before any code is written:
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│  1. WORKER (sub-agent)                                      │
+│     - Searches codebase thoroughly                          │
+│     - Traces data flow from origin to all consumers         │
+│     - Finds existing patterns for similar features          │
+│     - Proposes fix with file:line citations                 │
+└─────────────────────────────────────────────────────────────┘
+                            │
+                            ▼
+┌─────────────────────────────────────────────────────────────┐
+│  2. REVIEWER (separate sub-agent)                           │
+│     - Gets fresh context (no Worker bias)                   │
+│     - Verifies every claim by searching codebase            │
+│     - Checks against architecture patterns                  │
+│     - Returns NEEDS_WORK with specific feedback             │
+└─────────────────────────────────────────────────────────────┘
+                            │
+                    ┌───────┴───────┐
+                    ▼               ▼
+              NEEDS_WORK        APPROVED
+              (loop back)           │
+                                    ▼
+┌─────────────────────────────────────────────────────────────┐
+│  3. IMPLEMENTATION                                          │
+│     - Only now can code be changed                          │
+│     - Follows the verified proposal                         │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Real-World Validation
+
+Tested on a production codebase with a P0 bug: "Permission cache doesn't invalidate on role changes."
+
+| Round | Worker Proposed | Reviewer Found |
+|-------|-----------------|----------------|
+| 1 | Add broker event subscriptions to TeamService | **Wrong** - broker is server-side only, TeamService is client-side |
+| 2 | Revised RPC-based approach | Missing error handling, incomplete pattern match |
+| 3 | Simpler solution using ProjectService | Still gaps in implementation |
+| 4 | Complete RPC stream solution | **Approved** |
+
+**A naive agent would have shipped Round 1** - adding broker subscriptions to client-side code that can't access the broker. That fix would have done nothing and wasted hours of debugging.
+
+## Installation
+
+```bash
+npm install @strikt/diligence
+```
+
+Or clone directly:
+
+```bash
+git clone https://github.com/strikt/diligence ~/tools/diligence
+cd ~/tools/diligence && npm install
+```
+
+## Setup
+
+### 1. Add MCP Server
+
+In your project's `.mcp.json` or `.claude/settings.json`:
+
+```json
+{
+  "mcpServers": {
+    "diligence": {
+      "command": "node",
+      "args": ["/path/to/diligence/index.mjs"]
+    }
+  }
+}
+```
+
+### 2. Create Codebase Context
+
+Create `.claude/CODEBASE_CONTEXT.md` in your project:
+
+```markdown
+# Architecture
+
+[Describe your system architecture]
+
+## Key Patterns
+
+[Document patterns agents MUST follow]
+
+## Common Pitfalls
+
+[List things that break if done wrong]
+
+## Events/Hooks
+
+[List broker events, hooks, subscriptions that exist]
+```
+
+This context is loaded into both Worker and Reviewer briefs.
+
+### 3. Add to CLAUDE.md (Recommended)
+
+Add to your project's `CLAUDE.md`:
+
+```markdown
+## Code Changes
+
+For any code changes, use the diligence workflow:
+
+1. Call `mcp__diligence__start` with the task
+2. Spawn a Worker sub-agent with `get_worker_brief` to research and propose
+3. Spawn a Reviewer sub-agent with `get_reviewer_brief` to verify
+4. Loop until APPROVED
+5. Call `implement` and make changes
+```
+
+This ensures Claude picks up diligence automatically without explicit instructions.
+
+## How It Works
+
+### Workflow Phases
+
+```
+conversation → researching → approved → implementing → conversation
+                   ↑              │
+                   └──────────────┘
+                    (NEEDS_WORK, max 5 rounds)
+```
+
+### MCP Tools
+
+| Tool | Description |
+|------|-------------|
+| `start` | Begin workflow with a task description |
+| `get_worker_brief` | Get full context for Worker sub-agent |
+| `propose` | Worker submits proposal with citations |
+| `get_reviewer_brief` | Get full context for Reviewer sub-agent |
+| `review` | Reviewer submits APPROVED or NEEDS_WORK |
+| `implement` | Begin implementation (requires approval) |
+| `complete` | Mark done, reset to conversation |
+| `status` | Check current workflow state |
+| `abort` | Cancel and reset |
+
+### Sub-Agent Pattern
+
+The key insight: **Worker and Reviewer are separate sub-agents** with fresh context.
+
+```
+Main Claude Session
+    │
+    ├─► Worker Sub-Agent (Explore)
+    │   - Receives: task + codebase context + previous feedback
+    │   - Does: searches, reads, analyzes
+    │   - Returns: proposal with file:line citations
+    │
+    └─► Reviewer Sub-Agent (Explore)
+        - Receives: proposal + codebase context (NOT Worker's searches)
+        - Does: independently verifies every claim
+        - Returns: APPROVED or NEEDS_WORK with specifics
+```
+
+The Reviewer doesn't see the Worker's search results. It must verify claims by actually searching the codebase. This prevents rubber-stamping.
+
+## Project Structure
+
+```
+.claude/
+├── CODEBASE_CONTEXT.md      # Required - architecture & patterns
+├── WORKER_CONTEXT.md        # Optional - project-specific worker guidance
+├── REVIEWER_CONTEXT.md      # Optional - project-specific reviewer guidance
+├── context/                 # Optional - additional context files
+│   ├── voice.md
+│   └── permissions.md
+└── .diligence-state.json    # Auto-generated workflow state
+```
+
+## Example Session
+
+```
+User: Fix the permission cache bug from the todo list
+
+Claude: [reads todo, understands task]
+Claude: [calls diligence.start]
+
+Claude: [spawns Worker with Task tool]
+Worker: [searches for permission cache, finds memoizedPermissions]
+Worker: [searches for role events, finds BusTeamRoleChange]
+Worker: [searches for patterns, finds how chat.service handles this]
+Worker: [calls diligence.propose with full analysis]
+
+Claude: [spawns Reviewer with Task tool]
+Reviewer: [verifies claim about memoizedPermissions - confirmed at team.service.ts:45]
+Reviewer: [verifies claim about events - finds Worker missed BusTeamMemberRoleChange]
+Reviewer: [calls diligence.review with NEEDS_WORK]
+
+Claude: [spawns new Worker with feedback]
+Worker: [addresses feedback, adds missing event]
+Worker: [calls diligence.propose]
+
+Claude: [spawns Reviewer]
+Reviewer: [all claims verified]
+Reviewer: [calls diligence.review with APPROVED]
+
+Claude: [calls diligence.implement]
+Claude: [makes code changes following approved proposal]
+Claude: [calls diligence.complete]
+```
+
+## Why This Works
+
+1. **Enforcement** - Can't write code without approval
+2. **Fresh Context** - Reviewer has no bias from Worker's reasoning
+3. **Verification** - Claims are checked against actual codebase
+4. **Iteration** - Multiple rounds refine understanding
+5. **Architecture Awareness** - Both agents loaded with project context
+6. **Audit Trail** - Full history of proposals and feedback
+
+## Configuration
+
+### Environment Variables
+
+| Variable | Description |
+|----------|-------------|
+| None required | State stored in `.claude/.diligence-state.json` |
+
+### Constants (in index.mjs)
+
+| Constant | Default | Description |
+|----------|---------|-------------|
+| `MAX_ROUNDS` | 5 | Maximum Worker-Reviewer iterations before reset |
+
+## Testing
+
+```bash
+# Run workflow mechanics tests
+npm run test:workflow
+
+# Run mock scenario tests
+npm run test:mock
+
+# Dry-run against a real project
+node test/dry-run.mjs --project=/path/to/project --scenario=blocking-voice
+```
+
+## Limitations
+
+- Requires Claude Code with MCP support
+- Sub-agents use context, so complex codebases may need focused CODEBASE_CONTEXT.md
+- Not a replacement for human code review - complements it
+
+## License
+
+MIT
+
+## Contributing
+
+Issues and PRs welcome at https://github.com/strikt/diligence
diff --git a/REVIEWER_PROMPT.md b/REVIEWER_PROMPT.md
new file mode 100644
index 0000000..100685b
--- /dev/null
+++ b/REVIEWER_PROMPT.md
@@ -0,0 +1,161 @@
+# Reviewer Instructions
+
+You are the Reviewer in a diligence workflow. Your job is to **verify claims by searching the codebase** and ensure the proposal is complete before implementation begins.
+
+## Your Mindset
+
+You are a skeptical senior engineer who:
+- Doesn't trust claims without evidence
+- Searches the codebase to verify
+- Knows the patterns (from codebase context)
+- Catches missing cases and edge conditions
+- Approves only when genuinely confident
+
+**Your value comes from catching what the Worker missed.** If you rubber-stamp proposals, bugs ship.
+
+---
+
+## Review Process
+
+### 1. For Each Claim, Search to Verify
+
+The Worker says "there are no events for X" → Search for events
+The Worker says "this is the only place" → Search for other places
+The Worker says "following the pattern in Y" → Read Y and compare
+
+**Do not trust. Verify.**
+
+### 2. Check Against Codebase Context
+
+The codebase context describes:
+- Architecture patterns
+- Common pitfalls
+- Key events/hooks
+- Validation checklists
+
+Cross-reference the proposal against these. Did the Worker address them?
+
+### 3. Look for What's Missing
+
+Common gaps:
+- Events/hooks that should be subscribed to
+- Caches that need invalidation
+- Related code that assumes old behavior
+- Error handling and edge cases
+- Cleanup on failure/disconnect
+
+### 4. Verify the Pattern Match
+
+If Worker claims to follow an existing pattern:
+- Read the cited pattern code
+- Compare to proposed implementation
+- Flag any deviations
+
+---
+
+## Verification Format
+
+For each major claim, document:
+
+```markdown
+### Claim: "[quote from proposal]"
+
+**Searched:** [what you searched for]
+**Found:** [what you actually found]
+**Verdict:** ✅ Verified / ❌ Incorrect / ⚠️ Incomplete
+```
+
+---
+
+## Decision Criteria
+
+### APPROVED
+
+Use APPROVED when:
+- All claims verified by searching codebase
+- Proposal follows existing patterns correctly
+- All scenarios from codebase context addressed
+- No obvious gaps in the implementation plan
+- You are confident this will work
+
+### NEEDS_WORK
+
+Use NEEDS_WORK when:
+- Claims don't match what you found in codebase
+- Missing events/hooks/subscriptions
+- Doesn't follow existing patterns
+- Scenarios or edge cases not addressed
+- The implementation plan has gaps
+
+---
+
+## Feedback Format (NEEDS_WORK)
+
+```markdown
+## Verification Results
+
+### Claim: "[claim]"
+**Searched:** `grep pattern`
+**Found:** [results]
+**Verdict:** ❌ Incorrect - [explanation]
+
+### Claim: "[claim]"
+**Searched:** `grep pattern`
+**Found:** [results]
+**Verdict:** ⚠️ Incomplete - [what's missing]
+
+## Missing Items
+
+1. **[Item]** - Found `EventName` at file:line that isn't addressed
+2. **[Item]** - The pattern in `other-file.ts` does X but proposal doesn't
+3. **[Item]** - Scenario Y from codebase context not covered
+
+## Required Changes
+
+To approve, the Worker must:
+1. [Specific actionable item]
+2. [Specific actionable item]
+3. [Specific actionable item]
+```
+
+---
+
+## Common Things to Check
+
+| Category | What to Search | Why |
+|----------|---------------|-----|
+| Events | `Bus*`, `emit`, `subscribe` | State changes need propagation |
+| Caches | `cache`, `memoize`, `Map<` | May need invalidation |
+| Patterns | Similar feature names | Should follow conventions |
+| Cleanup | `destroy`, `disconnect`, `leave` | Resources need cleanup |
+| Errors | `throw`, `catch`, `UserError` | Error handling patterns |
+
+---
+
+## Using Tools
+
+You have full access to search the codebase:
+- **Grep** - Essential for verification
+- **Glob** - Find related files
+- **Read** - Check full context
+
+**A review without searches is not a review.** You must demonstrate you looked.
+
+---
+
+## Red Flags
+
+Reject immediately if:
+- Worker provides no file:line citations
+- Claims contradict what you find in codebase
+- Proposal ignores items from codebase context
+- "I'll figure it out during implementation"
+- Vague or hand-wavy implementation steps
+
+---
+
+## Remember
+
+Your job is not to be difficult. Your job is to **catch problems before they become bugs in production.**
+
+If the proposal is solid, approve it. If it has gaps, send it back with specific, actionable feedback.
diff --git a/TESTING.md b/TESTING.md
new file mode 100644
index 0000000..dd8f54d
--- /dev/null
+++ b/TESTING.md
@@ -0,0 +1,243 @@
+# Testing the Diligence MCP Server
+
+## Test Suite Overview
+
+The diligence project includes a comprehensive test suite that validates:
+
+1. **Workflow mechanics** - State transitions, round limits, phase enforcement
+2. **Mock scenarios** - Predefined Worker-Reviewer interactions with expected outcomes
+3. **Dry-run mode** - Test against real projects without making changes
+
+## Quick Start
+
+```bash
+# Run all workflow tests
+node test/run-tests.mjs --workflow
+
+# Run mock scenario tests
+node test/run-tests.mjs --mock
+
+# Run a specific scenario
+node test/run-tests.mjs --mock --scenario=blocking-voice
+
+# Dry run against nexus (no changes)
+node test/dry-run.mjs --project=~/bude/codecharm/nexus --scenario=blocking-voice
+```
+
+## Test Structure
+
+```
+test/
+├── mcp-client.mjs      # Programmatic MCP client
+├── run-tests.mjs       # Main test runner
+├── dry-run.mjs         # Dry-run against real projects
+├── fixture/            # Mock codebase for testing
+│   ├── src/
+│   │   ├── broker/events.ts
+│   │   ├── services/
+│   │   └── controllers/
+│   └── .claude/
+│       └── CODEBASE_CONTEXT.md
+└── scenarios/          # Test scenarios
+    ├── index.json
+    ├── blocking-voice.json
+    └── permission-cache.json
+```
+
+## Test Modes
+
+### 1. Workflow Tests (`--workflow`)
+
+Tests the MCP server mechanics without AI:
+
+- Phase transitions (conversation → researching → approved → implementing)
+- Round increment on NEEDS_WORK
+- Max rounds enforcement (resets after 5 rounds)
+- Feedback accumulation
+- Abort functionality
+
+```bash
+node test/run-tests.mjs --workflow
+```
+
+### 2. Mock Tests (`--mock`)
+
+Tests complete Worker-Reviewer scenarios with predefined responses:
+
+1. Scenario defines a task and expected naive/correct fixes
+2. Test simulates Worker submitting naive proposal
+3. Reviewer catches issues, sends NEEDS_WORK
+4. Worker submits revised proposal with all fixes
+5. Reviewer approves
+6. Validates that proposal mentions all required elements
+
+```bash
+# All scenarios
+node test/run-tests.mjs --mock
+
+# Single scenario
+node test/run-tests.mjs --mock --scenario=permission-cache
+```
+
+### 3. Dry Run (`--project`)
+
+Connects to MCP server in a real project directory:
+
+```bash
+# With predefined scenario
+node test/dry-run.mjs --project=~/bude/codecharm/nexus --scenario=blocking-voice
+
+# With custom task
+node test/dry-run.mjs --project=/path/to/project --task="Fix the caching bug"
+```
+
+This:
+- Starts the workflow with the task
+- Shows the full Worker Brief (including real CODEBASE_CONTEXT.md)
+- Does NOT make any code changes
+- Aborts the workflow on exit
+
+## Test Fixture
+
+The fixture (`test/fixture/`) is a mini codebase that mirrors real-world patterns:
+
+### Files
+
+| File | Purpose | Known Bugs (for testing) |
+|------|---------|-------------------------|
+| `broker/events.ts` | Event bus definitions | Reference implementation |
+| `services/user-block.service.ts` | Blocking logic | Missing voice cleanup |
+| `services/voice-channel.service.ts` | Voice/DM calls | Missing blocking check on answerDmCall |
+| `services/team.service.ts` | Permission cache | Doesn't subscribe to role events |
+| `services/chat.service.ts` | **Correct pattern** | Shows permission vs action separation |
+| `controllers/roles.controller.ts` | Role CRUD | Missing broker events on create/delete |
+
+### Patterns Tested
+
+1. **Broker event emission** - Every state change should emit events
+2. **Cache invalidation** - Caches must subscribe to relevant events
+3. **Permission vs Action** - Permissions control visibility, actions have separate checks
+4. **Multi-location fixes** - If one place has a check, similar places need it too
+
+## Test Scenarios
+
+### blocking-voice
+
+**Task:** Fix blocked users can still answer DM voice calls
+
+**Naive fix:** Add blocking check to answerDmCall only
+
+**Correct fix:**
+- Add blocking check to answerDmCall
+- Add blocking check to declineDmCall
+- Filter notifications for blocked users
+- Add voice cleanup to blockUser()
+- Subscribe to BusUserBlockChange for mid-call kicks
+
+### permission-cache
+
+**Task:** Fix permission cache doesn't invalidate when roles change
+
+**Naive fix:** Add .clear() somewhere
+
+**Correct fix:**
+- Subscribe to BusTeamRoleChange in team.service
+- Subscribe to BusTeamMemberRoleChange in team.service
+- Add broker event to createRole()
+- Add broker event to deleteRole()
+
+## Adding New Scenarios
+
+Create a new JSON file in `test/scenarios/`:
+
+```json
+{
+  "id": "my-scenario",
+  "name": "Human-readable name",
+  "description": "Brief description",
+
+  "task": "The task description given to the Worker",
+
+  "naive_fix": {
+    "description": "What a quick-fix agent would do",
+    "changes": [
+      { "file": "path/to/file.ts", "change": "Quick fix description" }
+    ],
+    "issues": [
+      "What the naive fix misses #1",
+      "What the naive fix misses #2"
+    ]
+  },
+
+  "correct_fix": {
+    "description": "Complete fix description",
+    "required_changes": [
+      { "file": "path.ts", "function": "funcName", "change": "What to change" }
+    ],
+    "required_broker_subscriptions": [
+      { "service": "x.service.ts", "event": "BusEventName", "action": "What to do" }
+    ]
+  },
+
+  "validation_criteria": {
+    "must_mention": ["keyword1", "keyword2"],
+    "should_reference_pattern": "reference-file.ts"
+  }
+}
+```
+
+Add to `test/scenarios/index.json`:
+
+```json
+{
+  "scenarios": [
+    { "id": "my-scenario", "file": "my-scenario.json" }
+  ]
+}
+```
+
+## Real-World Testing
+
+For true validation, test with real Claude sub-agents:
+
+```
+Root Agent:
+  1. Call mcp__diligence__start with task
+  2. Spawn Worker agent (Task tool) with get_worker_brief
+  3. Worker researches, submits proposal via mcp__diligence__propose
+  4. Spawn Reviewer agent (Task tool) with get_reviewer_brief
+  5. Reviewer verifies claims, submits via mcp__diligence__review
+  6. If NEEDS_WORK, spawn new Worker with updated brief
+  7. If APPROVED, proceed to implementation
+```
+
+**Why separate agents matter:**
+- Fresh context = no bias from previous reasoning
+- Reviewer doesn't know Worker's search results
+- Forces genuine verification, not rubber-stamping
+
+## Success Criteria
+
+1. **Reviewer catches issues** that Worker initially misses
+2. **Multiple rounds** occur before approval
+3. **Final proposal** is more complete than naive approach
+4. **Validation criteria** are all met
+
+## Environment Variables
+
+| Variable | Purpose |
+|----------|---------|
+| `DEBUG=1` | Show MCP server stderr output |
+| `ANTHROPIC_API_KEY` | Required for `--live` mode (future) |
+
+## CI Integration
+
+```bash
+# Run in CI
+npm test
+
+# Or directly
+node test/run-tests.mjs --workflow && node test/run-tests.mjs --mock
+```
+
+Exit codes: 0 = pass, 1 = fail
diff --git a/WORKER_PROMPT.md b/WORKER_PROMPT.md
new file mode 100644
index 0000000..39c677a
--- /dev/null
+++ b/WORKER_PROMPT.md
@@ -0,0 +1,135 @@
+# Worker Instructions
+
+You are the Worker in a diligence workflow. Your job is to **thoroughly research** and **propose a complete solution** that will be verified by a Reviewer.
+
+## Your Mindset
+
+You are a diligent engineer who:
+- Researches before proposing
+- Traces data flow end-to-end
+- Finds and follows existing patterns
+- Identifies ALL affected code
+- Considers edge cases and failure modes
+- Cites specific file:line locations for every claim
+
+**The Reviewer will verify your claims by searching the codebase.** If you lie, guess, or miss something, you will be caught and sent back to revise.
+
+---
+
+## Before Proposing
+
+### 1. Trace the Data Flow
+
+Answer these questions:
+- **Origin:** Where does the relevant data come from?
+- **Flow:** What path does it take through the system?
+- **Consumers:** What code uses this data?
+- **Side effects:** What gets triggered when this changes?
+
+### 2. Find Existing Patterns
+
+Search for similar functionality:
+- How does the codebase handle similar cases?
+- What patterns are used?
+- Are there helper functions you should use?
+- What events/hooks exist?
+
+### 3. Identify All Touchpoints
+
+List every file that needs changes:
+- Don't just fix the symptom - trace to root cause
+- Check for caches, events, subscriptions
+- Look for related code that assumes current behavior
+
+### 4. Consider Scenarios
+
+What situations must your fix handle?
+- Happy path
+- Error cases
+- Race conditions
+- Edge cases mentioned in codebase context
+
+---
+
+## Proposal Format
+
+```markdown
+## Summary
+
+[1-2 sentences: what you're fixing and how]
+
+## Data Flow Analysis
+
+### Origin
+[Where the data comes from - cite file:line]
+
+### Flow
+[How it moves through the system - cite each step]
+
+### Consumers
+[What uses this data and what they expect]
+
+## Existing Pattern
+
+[How similar features work in this codebase - cite examples]
+
+I will follow this pattern by: [explain]
+
+## Files Affected
+
+| File | Line(s) | Change |
+|------|---------|--------|
+| path/to/file.ts | 123-130 | [what changes] |
+| path/to/other.ts | 45 | [what changes] |
+
+## Implementation
+
+1. [First change - be specific]
+2. [Second change - be specific]
+3. [etc.]
+
+## Scenarios Covered
+
+- [x] Scenario 1: [description] - handled by [which part of fix]
+- [x] Scenario 2: [description] - handled by [which part of fix]
+- [x] Edge case: [description] - handled by [which part of fix]
+
+## Not Covered / Out of Scope
+
+[Anything you're explicitly NOT fixing and why]
+```
+
+---
+
+## Common Mistakes to Avoid
+
+| Mistake | Why It's Wrong | Do This Instead |
+|---------|----------------|-----------------|
+| "I'll add a check here" | Doesn't explain why it's missing | Trace why the check doesn't exist |
+| "This should fix it" | No verification | Cite code proving it will work |
+| "Only one file needs changes" | Probably missed something | Search for all usages |
+| Guessing at patterns | Will be caught by Reviewer | Search and cite actual code |
+| Ignoring codebase context | Missing critical architecture | Re-read the context section |
+
+---
+
+## Using Tools
+
+You have full access to search and read the codebase:
+- **Grep** - Search for patterns, function names, events
+- **Glob** - Find files by path pattern
+- **Read** - Read specific files
+
+**Use them extensively.** A proposal without searches is a proposal that will be rejected.
+
+---
+
+## If You Get Feedback
+
+When the Reviewer sends NEEDS_WORK:
+1. Read the feedback carefully
+2. Do additional searches to address gaps
+3. Revise your proposal to cover missing items
+4. Don't just add - verify your additions are correct
+
+You have limited rounds. Make each one count.
diff --git a/index.mjs b/index.mjs
new file mode 100644
index 0000000..234a1bb
--- /dev/null
+++ b/index.mjs
@@ -0,0 +1,547 @@
+#!/usr/bin/env node
+/**
+ * Diligence MCP Server
+ *
+ * Enforces diligent code changes through a worker-reviewer loop.
+ * Prevents quick fixes that miss patterns, events, and edge cases.
+ *
+ * Workflow:
+ * 1. User describes task → conversation phase
+ * 2. Start work → research phase begins
+ * 3. Worker proposes → Reviewer challenges → repeat until approved
+ * 4. Approved → implementation phase
+ * 5. Complete → back to conversation
+ *
+ * Project-specific context is loaded from .claude/CODEBASE_CONTEXT.md
+ * Generic prompts are bundled with this server.
+ */
+
+import { Server } from '@modelcontextprotocol/sdk/server/index.js';
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types.js';
+import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync } from 'fs';
+import { join, dirname, basename } from 'path';
+import { fileURLToPath } from 'url';
+import { execSync } from 'child_process';
+
+// =============================================================================
+// Configuration
+// =============================================================================
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const MAX_ROUNDS = 5;
+
+/**
+ * Find the project root by looking for .git directory.
+ * Falls back to current working directory.
+ */
+function findProjectRoot() {
+  try {
+    const gitRoot = execSync('git rev-parse --show-toplevel', {
+      encoding: 'utf-8',
+      stdio: ['pipe', 'pipe', 'pipe']
+    }).trim();
+    return gitRoot;
+  } catch {
+    return process.cwd();
+  }
+}
+
+const PROJECT_ROOT = findProjectRoot();
+const STATE_DIR = join(PROJECT_ROOT, '.claude');
+const STATE_FILE = join(STATE_DIR, '.diligence-state.json');
+const CODEBASE_CONTEXT_FILE = join(STATE_DIR, 'CODEBASE_CONTEXT.md');
+
+// Bundled prompts (shipped with this package)
+const WORKER_PROMPT_FILE = join(__dirname, 'WORKER_PROMPT.md');
+const REVIEWER_PROMPT_FILE = join(__dirname, 'REVIEWER_PROMPT.md');
+
+// =============================================================================
+// State Management
+// =============================================================================
+
+/**
+ * @typedef {Object} WorkflowState
+ * @property {'conversation' | 'researching' | 'approved' | 'implementing'} phase
+ * @property {string | null} task
+ * @property {number} round
+ * @property {string | null} proposal
+ * @property {Array<{round: number, feedback: string}>} feedback
+ * @property {Array<string>} userContext
+ * @property {string | null} approvalReason
+ * @property {string | null} startedAt
+ */
+
+/** @type {WorkflowState} */
+let state = {
+  phase: 'conversation',
+  task: null,
+  round: 0,
+  proposal: null,
+  feedback: [],
+  userContext: [],
+  approvalReason: null,
+  startedAt: null,
+};
+
+function loadState() {
+  try {
+    if (existsSync(STATE_FILE)) {
+      const saved = JSON.parse(readFileSync(STATE_FILE, 'utf-8'));
+      state = { ...state, ...saved };
+    }
+  } catch {
+    // Use default state
+  }
+}
+
+function saveState() {
+  try {
+    if (!existsSync(STATE_DIR)) {
+      mkdirSync(STATE_DIR, { recursive: true });
+    }
+    writeFileSync(STATE_FILE, JSON.stringify(state, null, 2));
+  } catch {
+    // Ignore write errors
+  }
+}
+
+function resetState() {
+  state = {
+    phase: 'conversation',
+    task: null,
+    round: 0,
+    proposal: null,
+    feedback: [],
+    userContext: [],
+    approvalReason: null,
+    startedAt: null,
+  };
+  saveState();
+}
+
+// =============================================================================
+// Context Loading
+// =============================================================================
+
+// Project-specific context files
+const PROJECT_WORKER_CONTEXT = join(STATE_DIR, 'WORKER_CONTEXT.md');
+const PROJECT_REVIEWER_CONTEXT = join(STATE_DIR, 'REVIEWER_CONTEXT.md');
+const PROJECT_CONTEXT_DIR = join(STATE_DIR, 'context');
+
+function loadFile(filePath) {
+  try {
+    if (existsSync(filePath)) {
+      return readFileSync(filePath, 'utf-8');
+    }
+  } catch {
+    // Ignore
+  }
+  return '';
+}
+
+/**
+ * Load all .md files from a directory.
+ * @param {string} dirPath
+ * @returns {Array<{name: string, content: string}>}
+ */
+function loadContextDir(dirPath) {
+  const files = [];
+  try {
+    if (existsSync(dirPath)) {
+      const entries = readdirSync(dirPath);
+      for (const entry of entries) {
+        if (entry.endsWith('.md')) {
+          const content = loadFile(join(dirPath, entry));
+          if (content) {
+            files.push({ name: basename(entry, '.md'), content });
+          }
+        }
+      }
+    }
+  } catch {
+    // Ignore
+  }
+  return files;
+}
+
+function getCodebaseContext() {
+  let context = '';
+
+  // Main codebase context
+  const mainContent = loadFile(CODEBASE_CONTEXT_FILE);
+  if (mainContent) {
+    context += mainContent;
+  } else {
+    context += `(No CODEBASE_CONTEXT.md found at ${CODEBASE_CONTEXT_FILE})\n\nCreate this file to provide project-specific architecture and patterns.`;
+  }
+
+  // Additional context files from .claude/context/
+  const extraFiles = loadContextDir(PROJECT_CONTEXT_DIR);
+  if (extraFiles.length > 0) {
+    context += '\n\n---\n\n# Additional Context\n\n';
+    for (const file of extraFiles) {
+      context += `## ${file.name}\n\n${file.content}\n\n`;
+    }
+  }
+
+  return context;
+}
+
+function getWorkerPrompt() {
+  // Bundled generic prompt
+  let prompt = loadFile(WORKER_PROMPT_FILE);
+
+  // Project-specific additions
+  const projectWorker = loadFile(PROJECT_WORKER_CONTEXT);
+  if (projectWorker) {
+    prompt += '\n\n---\n\n# Project-Specific Worker Guidance\n\n';
+    prompt += projectWorker;
+  }
+
+  return prompt;
+}
+
+function getReviewerPrompt() {
+  // Bundled generic prompt
+  let prompt = loadFile(REVIEWER_PROMPT_FILE);
+
+  // Project-specific additions
+  const projectReviewer = loadFile(PROJECT_REVIEWER_CONTEXT);
+  if (projectReviewer) {
+    prompt += '\n\n---\n\n# Project-Specific Reviewer Guidance\n\n';
+    prompt += projectReviewer;
+  }
+
+  return prompt;
+}
+
+// =============================================================================
+// MCP Server
+// =============================================================================
+
+const server = new Server({
+  name: 'diligence',
+  version: '0.1.0',
+}, {
+  capabilities: { tools: {} }
+});
+
+// =============================================================================
+// Tool Definitions
+// =============================================================================
+
+server.setRequestHandler(ListToolsRequestSchema, async () => ({
+  tools: [
+    {
+      name: 'status',
+      description: 'Get current diligence workflow state (phase, round, task). Check this before taking actions.',
+      inputSchema: { type: 'object', properties: {} }
+    },
+    {
+      name: 'start',
+      description: 'Start the diligence workflow for a task. Worker will research and propose, Reviewer will verify.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          task: { type: 'string', description: 'Description of the task/bug to fix' }
+        },
+        required: ['task']
+      }
+    },
+    {
+      name: 'propose',
+      description: 'Worker submits a proposal with code citations, data flow analysis, and implementation plan.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          proposal: { type: 'string', description: 'Full proposal in markdown with file:line citations' }
+        },
+        required: ['proposal']
+      }
+    },
+    {
+      name: 'review',
+      description: 'Reviewer submits APPROVED or NEEDS_WORK after verifying claims by searching the codebase.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          decision: { type: 'string', enum: ['APPROVED', 'NEEDS_WORK'], description: 'Review decision' },
+          reasoning: { type: 'string', description: 'Verification results and feedback' }
+        },
+        required: ['decision', 'reasoning']
+      }
+    },
+    {
+      name: 'add_context',
+      description: 'Add user feedback to incorporate in the next round.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          context: { type: 'string', description: 'Additional context from user' }
+        },
+        required: ['context']
+      }
+    },
+    {
+      name: 'get_worker_brief',
+      description: 'Get the full brief for the Worker: task, codebase context, previous feedback, instructions.',
+      inputSchema: { type: 'object', properties: {} }
+    },
+    {
+      name: 'get_reviewer_brief',
+      description: 'Get the full brief for the Reviewer: proposal to verify, codebase context, instructions.',
+      inputSchema: { type: 'object', properties: {} }
+    },
+    {
+      name: 'approve',
+      description: 'User manually approves, bypassing further review. Use when proposal is clearly correct.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          reason: { type: 'string', description: 'Why approving manually' }
+        },
+        required: ['reason']
+      }
+    },
+    {
+      name: 'implement',
+      description: 'Begin implementation phase. Only allowed after approval.',
+      inputSchema: { type: 'object', properties: {} }
+    },
+    {
+      name: 'complete',
+      description: 'Mark implementation complete. Returns to conversation phase.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          summary: { type: 'string', description: 'Summary of changes made' }
+        },
+        required: ['summary']
+      }
+    },
+    {
+      name: 'abort',
+      description: 'Cancel workflow and reset to conversation.',
+      inputSchema: {
+        type: 'object',
+        properties: {
+          reason: { type: 'string', description: 'Why aborting' }
+        }
+      }
+    }
+  ]
+}));
+
+// =============================================================================
+// Tool Handlers
+// =============================================================================
+
+server.setRequestHandler(CallToolRequestSchema, async (request) => {
+  const { name, arguments: args } = request.params;
+
+  try {
+    switch (name) {
+      case 'status': {
+        const lines = [
+          `Phase: ${state.phase}`,
+          `Task: ${state.task || '(none)'}`,
+          `Round: ${state.round}/${MAX_ROUNDS}`,
+          `Proposal: ${state.proposal ? `${state.proposal.length} chars` : '(none)'}`,
+          `Feedback rounds: ${state.feedback.length}`,
+          state.userContext.length ? `User context: ${state.userContext.length} items` : null,
+          state.approvalReason ? `Approved: ${state.approvalReason.slice(0, 80)}...` : null,
+          `Project: ${PROJECT_ROOT}`,
+        ].filter(Boolean);
+        return { content: [{ type: 'text', text: lines.join('\n') }] };
+      }
+
+      case 'start': {
+        if (state.phase !== 'conversation') {
+          return { content: [{ type: 'text', text: `Already in "${state.phase}" phase. Use abort first.` }], isError: true };
+        }
+        state.phase = 'researching';
+        state.task = args.task;
+        state.round = 1;
+        state.proposal = null;
+        state.feedback = [];
+        state.userContext = [];
+        state.approvalReason = null;
+        state.startedAt = new Date().toISOString();
+        saveState();
+        return { content: [{ type: 'text', text: `Started: "${args.task}"\n\nPhase: researching\nRound: 1/${MAX_ROUNDS}\n\nWorker should use get_worker_brief for context, then research and propose.` }] };
+      }
+
+      case 'propose': {
+        if (state.phase !== 'researching') {
+          return { content: [{ type: 'text', text: `Not in researching phase (current: ${state.phase})` }], isError: true };
+        }
+        state.proposal = args.proposal;
+        saveState();
+        return { content: [{ type: 'text', text: `Proposal submitted (${args.proposal.length} chars).\n\nRound: ${state.round}/${MAX_ROUNDS}\n\nReviewer should use get_reviewer_brief, verify claims by searching codebase, then review.` }] };
+      }
+
+      case 'review': {
+        if (state.phase !== 'researching') {
+          return { content: [{ type: 'text', text: `Not in researching phase (current: ${state.phase})` }], isError: true };
+        }
+        if (!state.proposal) {
+          return { content: [{ type: 'text', text: 'No proposal to review yet' }], isError: true };
+        }
+
+        const { decision, reasoning } = args;
+
+        if (decision === 'APPROVED') {
+          state.phase = 'approved';
+          state.approvalReason = reasoning;
+          saveState();
+          return { content: [{ type: 'text', text: `APPROVED after ${state.round} round(s).\n\n${reasoning.slice(0, 500)}\n\nUse implement to begin making changes.` }] };
+        }
+
+        if (decision === 'NEEDS_WORK') {
+          state.feedback.push({ round: state.round, feedback: reasoning });
+          state.round++;
+          state.userContext = [];
+
+          if (state.round > MAX_ROUNDS) {
+            const lastFeedback = reasoning;
+            resetState();
+            return { content: [{ type: 'text', text: `MAX ROUNDS (${MAX_ROUNDS}) reached. Workflow reset.\n\nLast feedback: ${lastFeedback.slice(0, 500)}\n\nBreak down the task or discuss with user.` }] };
+          }
+
+          saveState();
+          return { content: [{ type: 'text', text: `NEEDS_WORK - Round ${state.round}/${MAX_ROUNDS}\n\n${reasoning}\n\nWorker should revise addressing this feedback.` }] };
+        }
+
+        return { content: [{ type: 'text', text: `Invalid decision: ${decision}` }], isError: true };
+      }
+
+      case 'add_context': {
+        if (state.phase !== 'researching') {
+          return { content: [{ type: 'text', text: `Not in researching phase` }], isError: true };
+        }
+        state.userContext.push(args.context);
+        saveState();
+        return { content: [{ type: 'text', text: `Context added. ${state.userContext.length} item(s) pending.` }] };
+      }
+
+      case 'get_worker_brief': {
+        let brief = '# Worker Brief\n\n';
+        brief += `## Task\n\n${state.task || '(No task - use start first)'}\n\n`;
+        brief += `## Round\n\n${state.round}/${MAX_ROUNDS}\n\n`;
+
+        if (state.feedback.length > 0) {
+          brief += '## Previous Feedback (Address ALL)\n\n';
+          for (const f of state.feedback) {
+            brief += `### Round ${f.round}\n\n${f.feedback}\n\n`;
+          }
+        }
+
+        if (state.userContext.length > 0) {
+          brief += '## User Context\n\n';
+          state.userContext.forEach(c => brief += `- ${c}\n`);
+          brief += '\n';
+        }
+
+        brief += '## Codebase Context\n\n';
+        brief += getCodebaseContext();
+        brief += '\n\n';
+
+        const workerPrompt = getWorkerPrompt();
+        if (workerPrompt) {
+          brief += '## Instructions\n\n';
+          brief += workerPrompt;
+        }
+
+        return { content: [{ type: 'text', text: brief }] };
+      }
+
+      case 'get_reviewer_brief': {
+        let brief = '# Reviewer Brief\n\n';
+        brief += `## Task\n\n${state.task || '(No task)'}\n\n`;
+        brief += `## Round\n\n${state.round}/${MAX_ROUNDS}\n\n`;
+
+        if (state.proposal) {
+          brief += '## Proposal to Review\n\n';
+          brief += state.proposal;
+          brief += '\n\n';
+        } else {
+          brief += '## Proposal\n\n(No proposal yet)\n\n';
+        }
+
+        if (state.feedback.length > 0) {
+          brief += '## Previous Feedback\n\n';
+          for (const f of state.feedback) {
+            brief += `### Round ${f.round}\n\n${f.feedback}\n\n`;
+          }
+        }
+
+        brief += '## Codebase Context\n\n';
+        brief += getCodebaseContext();
+        brief += '\n\n';
+
+        const reviewerPrompt = getReviewerPrompt();
+        if (reviewerPrompt) {
+          brief += '## Instructions\n\n';
+          brief += reviewerPrompt;
+        }
+
+        return { content: [{ type: 'text', text: brief }] };
+      }
+
+      case 'approve': {
+        if (state.phase !== 'researching') {
+          return { content: [{ type: 'text', text: `Not in researching phase` }], isError: true };
+        }
+        if (!state.proposal) {
+          return { content: [{ type: 'text', text: 'No proposal to approve' }], isError: true };
+        }
+        state.phase = 'approved';
+        state.approvalReason = `USER: ${args.reason}`;
+        saveState();
+        return { content: [{ type: 'text', text: `Manually approved.\n\nUse implement to begin.` }] };
+      }
+
+      case 'implement': {
+        if (state.phase !== 'approved') {
+          return { content: [{ type: 'text', text: `Not approved yet (current: ${state.phase})` }], isError: true };
+        }
+        state.phase = 'implementing';
+        saveState();
+        return { content: [{ type: 'text', text: `Implementation phase.\n\nTask: ${state.task}\n\nMake changes following the approved proposal.` }] };
+      }
+
+      case 'complete': {
+        if (state.phase !== 'implementing') {
+          return { content: [{ type: 'text', text: `Not in implementing phase` }], isError: true };
+        }
+        const task = state.task;
+        const rounds = state.round;
+        resetState();
+        return { content: [{ type: 'text', text: `Complete!\n\nTask: ${task}\nRounds: ${rounds}\nSummary: ${args.summary}\n\nReset to conversation.` }] };
+      }
+
+      case 'abort': {
+        const prev = { phase: state.phase, task: state.task };
+        resetState();
+        return { content: [{ type: 'text', text: `Aborted.\n\nWas: ${prev.phase}, "${prev.task || ''}"\nReason: ${args.reason || '(none)'}\n\nReset to conversation.` }] };
+      }
+
+      default:
+        throw new Error(`Unknown tool: ${name}`);
+    }
+  } catch (e) {
+    return { content: [{ type: 'text', text: `Error: ${e.message}` }], isError: true };
+  }
+});
+
+// =============================================================================
+// Lifecycle
+// =============================================================================
+
+process.on('SIGTERM', () => { saveState(); process.exit(0); });
+process.on('SIGINT', () => { saveState(); process.exit(0); });
+
+loadState();
+const transport = new StdioServerTransport();
+await server.connect(transport);
diff --git a/package-lock.json b/package-lock.json
new file mode 100644
index 0000000..57254d3
--- /dev/null
+++ b/package-lock.json
@@ -0,0 +1,162 @@
+{
+  "name": "@strikt/diligence",
+  "version": "0.1.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "@strikt/diligence",
+      "version": "0.1.0",
+      "license": "MIT",
+      "dependencies": {
+        "@modelcontextprotocol/sdk": "^0.5.0"
+      },
+      "bin": {
+        "diligence": "index.mjs"
+      }
+    },
+    "node_modules/@modelcontextprotocol/sdk": {
+      "version": "0.5.0",
+      "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-0.5.0.tgz",
+      "integrity": "sha512-RXgulUX6ewvxjAG0kOpLMEdXXWkzWgaoCGaA2CwNW7cQCIphjpJhjpHSiaPdVCnisjRF/0Cm9KWHUuIoeiAblQ==",
+      "license": "MIT",
+      "dependencies": {
+        "content-type": "^1.0.5",
+        "raw-body": "^3.0.0",
+        "zod": "^3.23.8"
+      }
+    },
+    "node_modules/bytes": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
+      "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/content-type": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz",
+      "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.6"
+      }
+    },
+    "node_modules/depd": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz",
+      "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/http-errors": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz",
+      "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==",
+      "license": "MIT",
+      "dependencies": {
+        "depd": "~2.0.0",
+        "inherits": "~2.0.4",
+        "setprototypeof": "~1.2.0",
+        "statuses": "~2.0.2",
+        "toidentifier": "~1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.8"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz",
+      "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==",
+      "license": "MIT",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "license": "ISC"
+    },
+    "node_modules/raw-body": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz",
+      "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==",
+      "license": "MIT",
+      "dependencies": {
+        "bytes": "~3.1.2",
+        "http-errors": "~2.0.1",
+        "iconv-lite": "~0.7.0",
+        "unpipe": "~1.0.0"
+      },
+      "engines": {
+        "node": ">= 0.10"
+      }
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==",
+      "license": "MIT"
+    },
+    "node_modules/setprototypeof": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz",
+      "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==",
+      "license": "ISC"
+    },
+    "node_modules/statuses": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz",
+      "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/toidentifier": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz",
+      "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/unpipe": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz",
+      "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 0.8"
+      }
+    },
+    "node_modules/zod": {
+      "version": "3.25.76",
+      "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz",
+      "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==",
+      "license": "MIT",
+      "funding": {
+        "url": "https://github.com/sponsors/colinhacks"
+      }
+    }
+  }
+}
diff --git a/package.json b/package.json
new file mode 100644
index 0000000..5344225
--- /dev/null
+++ b/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "@strikt/diligence",
+  "version": "0.1.0",
+  "description": "MCP server enforcing diligent code changes through worker-reviewer loop",
+  "type": "module",
+  "main": "index.mjs",
+  "bin": {
+    "diligence": "./index.mjs"
+  },
+  "scripts": {
+    "test": "node test/run-tests.mjs --workflow && node test/run-tests.mjs --mock",
+    "test:workflow": "node test/run-tests.mjs --workflow",
+    "test:mock": "node test/run-tests.mjs --mock",
+    "test:dry-run": "node test/dry-run.mjs"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^0.5.0"
+  },
+  "keywords": ["mcp", "claude", "code-review", "diligence"],
+  "license": "MIT"
+}
diff --git a/test/compare-approaches.mjs b/test/compare-approaches.mjs
new file mode 100644
index 0000000..df54d4c
--- /dev/null
+++ b/test/compare-approaches.mjs
@@ -0,0 +1,255 @@
+#!/usr/bin/env node
+/**
+ * Comparison Test: Naive vs Diligence Approach
+ *
+ * This script coordinates testing of both approaches:
+ * 1. Naive: A single agent analyzes and proposes a fix
+ * 2. Diligence: Worker-Reviewer loop with separate agents
+ *
+ * The test uses a real bug from the nexus codebase.
+ *
+ * Usage:
+ *   node test/compare-approaches.mjs
+ */
+
+import { writeFileSync, mkdirSync, existsSync } from 'fs';
+import { dirname, join } from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+const RESULTS_DIR = join(__dirname, 'results');
+
+// Ensure results directory exists
+if (!existsSync(RESULTS_DIR)) {
+  mkdirSync(RESULTS_DIR, { recursive: true });
+}
+
+const TEST_BUG = {
+  id: 'B1',
+  name: 'Blocked users can join/answer DM voice calls',
+  task: `Fix bug B1: Blocked users can join DM voice calls.
+
+When user A blocks user B, user B should NOT be able to:
+1. Answer incoming DM calls from user A
+2. Start new calls to user A (already works)
+3. Join DM voice channel with user A (already works in joinVoiceChannel)
+
+The bug is that answerDmCall() has no blocking check.
+
+Analyze the codebase and propose a COMPLETE fix.`,
+
+  // What naive agents typically miss
+  naive_misses: [
+    'declineDmCall() also needs blocking check for consistency',
+    'notifyDmCall() should filter blocked users from notifications',
+    'blockUser() should clean up existing voice calls',
+    'Need to subscribe to BusUserBlockChange for mid-call kick',
+    'Should follow the pattern from chat.service.ts where permission=visibility, actions have separate checks',
+  ],
+
+  // Required elements for a complete fix
+  required_elements: [
+    'answerDmCall blocking check',
+    'declineDmCall blocking check',
+    'notification filtering',
+    'voice cleanup in blockUser()',
+    'BusUserBlockChange subscription',
+    'chat.service.ts pattern reference',
+  ],
+};
+
+// Prompts for the test
+const NAIVE_PROMPT = `You are analyzing a bug in the nexus codebase.
+
+BUG: ${TEST_BUG.task}
+
+Your job is to:
+1. Search the codebase to understand the current implementation
+2. Identify all files that need changes
+3. Propose a complete fix
+
+DO NOT use any diligence MCP tools. Just analyze and propose.
+
+Be thorough - check for:
+- Similar patterns in the codebase
+- Broker events that might be relevant
+- All places where blocking should be enforced
+- Edge cases (what if block happens mid-call?)
+
+Output your analysis and proposed fix.`;
+
+const WORKER_PROMPT = `You are a Worker agent in the diligence workflow.
+
+Your brief has been loaded with:
+- The task description
+- Codebase context (architecture, patterns)
+- Any previous feedback
+
+Your job:
+1. Research the codebase thoroughly
+2. Trace data flow from origin to all consumers
+3. Find existing patterns for similar functionality
+4. Identify ALL files that need changes
+5. Propose a fix with file:line citations for every claim
+
+IMPORTANT:
+- Cite specific file:line for every claim
+- Search for similar patterns (how does chat handle blocking?)
+- Don't miss broker events
+- Consider edge cases (mid-call blocking)
+
+Submit your proposal via mcp__diligence__propose when ready.`;
+
+const REVIEWER_PROMPT = `You are a Reviewer agent in the diligence workflow.
+
+Your brief has been loaded with:
+- The Worker's proposal
+- The task description
+- Codebase context
+
+Your job:
+1. VERIFY every claim by searching the codebase yourself
+2. Check if the proposal follows existing patterns
+3. Look for missing broker events or edge cases
+4. Do NOT trust the Worker's citations - verify them
+
+For each claim in the proposal:
+- Search for the file/line cited
+- Verify it says what the Worker claims
+- Check if there are related issues the Worker missed
+
+Submit your review via mcp__diligence__review:
+- APPROVED if all checks pass
+- NEEDS_WORK with specific issues if not
+
+Be strict - missing one broker event subscription can cause production bugs.`;
+
+function log(msg) {
+  const timestamp = new Date().toISOString().slice(11, 19);
+  console.log(`[${timestamp}] ${msg}`);
+}
+
+function saveResult(name, content) {
+  const timestamp = new Date().toISOString().slice(0, 10);
+  const filename = `${timestamp}-${name}.md`;
+  const path = join(RESULTS_DIR, filename);
+  writeFileSync(path, content);
+  log(`Saved: ${path}`);
+  return path;
+}
+
+// Generate the test instructions
+function generateTestInstructions() {
+  const instructions = `# Diligence Comparison Test
+
+## Test Bug
+**ID:** ${TEST_BUG.id}
+**Name:** ${TEST_BUG.name}
+
+## Task
+${TEST_BUG.task}
+
+---
+
+## Phase 1: Naive Approach (WITHOUT Diligence)
+
+In a Claude Code session, paste this prompt:
+
+\`\`\`
+${NAIVE_PROMPT}
+\`\`\`
+
+Save the output as the "naive proposal".
+
+---
+
+## Phase 2: Diligence Approach (WITH Worker-Reviewer Loop)
+
+### Step 1: Start the workflow
+\`\`\`
+mcp__diligence__start with task: "${TEST_BUG.task.split('\n')[0]}"
+\`\`\`
+
+### Step 2: Spawn Worker Agent
+\`\`\`
+1. Call mcp__diligence__get_worker_brief
+2. Use Task tool with subagent_type="Explore" and this prompt:
+   "${WORKER_PROMPT.replace(/\n/g, ' ').slice(0, 200)}..."
+3. Worker should research and call mcp__diligence__propose
+\`\`\`
+
+### Step 3: Spawn Reviewer Agent
+\`\`\`
+1. Call mcp__diligence__get_reviewer_brief
+2. Use Task tool with subagent_type="Explore" and this prompt:
+   "${REVIEWER_PROMPT.replace(/\n/g, ' ').slice(0, 200)}..."
+3. Reviewer should verify and call mcp__diligence__review
+\`\`\`
+
+### Step 4: Loop or Complete
+- If NEEDS_WORK: spawn new Worker with updated brief
+- If APPROVED: call mcp__diligence__implement
+
+Save the final approved proposal as the "diligence proposal".
+
+---
+
+## Phase 3: Compare Results
+
+### Checklist - What Naive Typically Misses
+${TEST_BUG.naive_misses.map(m => `- [ ] ${m}`).join('\n')}
+
+### Required Elements for Complete Fix
+${TEST_BUG.required_elements.map(e => `- [ ] ${e}`).join('\n')}
+
+### Scoring
+- Naive proposal: Count how many required elements it includes
+- Diligence proposal: Count how many required elements it includes
+- Did diligence catch issues that naive missed?
+
+---
+
+## Expected Outcome
+
+The naive approach will likely:
+- Add blocking check to answerDmCall() only
+- Miss the other 5 required elements
+
+The diligence approach should:
+- Catch missing elements during review
+- Iterate until all elements are addressed
+- Produce a more complete proposal
+
+`;
+
+  return instructions;
+}
+
+// Main
+async function main() {
+  log('Generating comparison test instructions...');
+
+  const instructions = generateTestInstructions();
+  const path = saveResult('comparison-test-instructions', instructions);
+
+  console.log('\n' + '='.repeat(60));
+  console.log('COMPARISON TEST READY');
+  console.log('='.repeat(60));
+  console.log(`\nInstructions saved to: ${path}`);
+  console.log('\nTo run the test:');
+  console.log('1. Open the instructions file');
+  console.log('2. Start a Claude Code session in ~/bude/codecharm/nexus');
+  console.log('3. Run Phase 1 (naive) and save the output');
+  console.log('4. Run Phase 2 (diligence) and save the output');
+  console.log('5. Compare using the checklist in Phase 3');
+  console.log('\n');
+
+  // Also print the naive prompt for immediate use
+  console.log('='.repeat(60));
+  console.log('NAIVE PROMPT (for quick testing):');
+  console.log('='.repeat(60));
+  console.log(NAIVE_PROMPT);
+  console.log('\n');
+}
+
+main().catch(console.error);
diff --git a/test/dry-run.mjs b/test/dry-run.mjs
new file mode 100644
index 0000000..f2e6664
--- /dev/null
+++ b/test/dry-run.mjs
@@ -0,0 +1,305 @@
+#!/usr/bin/env node
+/**
+ * Dry Run Test Against Real Project
+ *
+ * Runs the diligence MCP server against a real project (e.g., nexus) in dry-run mode.
+ * This tests the full workflow without making any code changes.
+ *
+ * Usage:
+ *   node test/dry-run.mjs --project=/path/to/nexus --task="Fix permission cache"
+ *   node test/dry-run.mjs --project=~/bude/codecharm/nexus --scenario=blocking-voice
+ *
+ * Options:
+ *   --project=PATH    Path to the project to test against
+ *   --task=TEXT       Task description to start the workflow with
+ *   --scenario=ID     Use a predefined scenario from test/scenarios/
+ *   --interactive     Run in interactive mode (prompts for input)
+ */
+
+import { spawn } from 'child_process';
+import { createInterface } from 'readline';
+import { dirname, join, resolve } from 'path';
+import { fileURLToPath } from 'url';
+import { existsSync, readFileSync } from 'fs';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// Parse CLI args
+const args = process.argv.slice(2);
+const projectArg = args.find(a => a.startsWith('--project='));
+const taskArg = args.find(a => a.startsWith('--task='));
+const scenarioArg = args.find(a => a.startsWith('--scenario='));
+const interactive = args.includes('--interactive') || args.includes('-i');
+
+// Resolve project path
+let projectPath = projectArg ? projectArg.split('=')[1] : null;
+if (projectPath) {
+  projectPath = projectPath.replace(/^~/, process.env.HOME);
+  projectPath = resolve(projectPath);
+}
+
+// Colors
+const colors = {
+  reset: '\x1b[0m',
+  green: '\x1b[32m',
+  red: '\x1b[31m',
+  yellow: '\x1b[33m',
+  blue: '\x1b[34m',
+  cyan: '\x1b[36m',
+  dim: '\x1b[2m',
+  bold: '\x1b[1m',
+};
+
+function log(msg, color = 'reset') {
+  console.log(`${colors[color]}${msg}${colors.reset}`);
+}
+
+function logSection(title) {
+  console.log(`\n${colors.cyan}${colors.bold}═══ ${title} ═══${colors.reset}\n`);
+}
+
+// Load scenario
+function loadScenario(id) {
+  const path = join(__dirname, 'scenarios', `${id}.json`);
+  if (!existsSync(path)) {
+    throw new Error(`Scenario not found: ${id}`);
+  }
+  return JSON.parse(readFileSync(path, 'utf-8'));
+}
+
+// Simple MCP client for dry run
+class DryRunClient {
+  constructor(projectPath) {
+    this.projectPath = projectPath;
+    this.serverPath = join(__dirname, '..', 'index.mjs');
+    this.process = null;
+    this.requestId = 0;
+    this.pendingRequests = new Map();
+    this.readline = null;
+  }
+
+  async connect() {
+    return new Promise((resolve, reject) => {
+      this.process = spawn('node', [this.serverPath], {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        cwd: this.projectPath,
+      });
+
+      this.readline = createInterface({
+        input: this.process.stdout,
+        crlfDelay: Infinity,
+      });
+
+      this.readline.on('line', (line) => {
+        try {
+          const message = JSON.parse(line);
+          if (message.id !== undefined && this.pendingRequests.has(message.id)) {
+            const { resolve, reject } = this.pendingRequests.get(message.id);
+            this.pendingRequests.delete(message.id);
+            if (message.error) {
+              reject(new Error(message.error.message || JSON.stringify(message.error)));
+            } else {
+              resolve(message.result);
+            }
+          }
+        } catch (e) {
+          // Ignore non-JSON lines
+        }
+      });
+
+      this.process.stderr.on('data', (data) => {
+        // Show server stderr in debug mode
+        if (process.env.DEBUG) {
+          console.error(colors.dim + '[server] ' + data.toString() + colors.reset);
+        }
+      });
+
+      this.process.on('error', reject);
+
+      // Initialize
+      this._send({
+        jsonrpc: '2.0',
+        id: this.requestId++,
+        method: 'initialize',
+        params: {
+          protocolVersion: '0.1.0',
+          clientInfo: { name: 'dry-run-client', version: '1.0.0' },
+          capabilities: {},
+        },
+      }).then(() => {
+        this._sendNotification('notifications/initialized', {});
+        resolve();
+      }).catch(reject);
+    });
+  }
+
+  async disconnect() {
+    if (this.process) {
+      this.process.kill('SIGTERM');
+      this.process = null;
+    }
+  }
+
+  _send(message) {
+    return new Promise((resolve, reject) => {
+      this.pendingRequests.set(message.id, { resolve, reject });
+      this.process.stdin.write(JSON.stringify(message) + '\n');
+      setTimeout(() => {
+        if (this.pendingRequests.has(message.id)) {
+          this.pendingRequests.delete(message.id);
+          reject(new Error('Request timeout'));
+        }
+      }, 30000);
+    });
+  }
+
+  _sendNotification(method, params) {
+    this.process.stdin.write(JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n');
+  }
+
+  async callTool(name, args = {}) {
+    const result = await this._send({
+      jsonrpc: '2.0',
+      id: this.requestId++,
+      method: 'tools/call',
+      params: { name, arguments: args },
+    });
+    if (result.content?.[0]?.text) {
+      return { text: result.content[0].text, isError: result.isError || false };
+    }
+    return result;
+  }
+}
+
+// Interactive prompt
+function prompt(question) {
+  const rl = createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+  return new Promise(resolve => {
+    rl.question(question, answer => {
+      rl.close();
+      resolve(answer);
+    });
+  });
+}
+
+async function main() {
+  log('\n🔍 Diligence Dry Run\n', 'cyan');
+
+  // Validate project path
+  if (!projectPath) {
+    log('Error: --project=PATH required', 'red');
+    log('\nUsage:', 'dim');
+    log('  node test/dry-run.mjs --project=~/bude/codecharm/nexus --task="Fix bug"', 'dim');
+    process.exit(1);
+  }
+
+  if (!existsSync(projectPath)) {
+    log(`Error: Project path not found: ${projectPath}`, 'red');
+    process.exit(1);
+  }
+
+  // Check for CODEBASE_CONTEXT.md
+  const contextPath = join(projectPath, '.claude', 'CODEBASE_CONTEXT.md');
+  if (!existsSync(contextPath)) {
+    log(`Warning: No .claude/CODEBASE_CONTEXT.md found in ${projectPath}`, 'yellow');
+    log('The Worker and Reviewer will have limited context.', 'dim');
+  } else {
+    log(`Found: ${contextPath}`, 'green');
+  }
+
+  // Determine task
+  let task;
+  if (scenarioArg) {
+    const scenarioId = scenarioArg.split('=')[1];
+    const scenario = loadScenario(scenarioId);
+    task = scenario.task;
+    log(`Using scenario: ${scenario.name}`, 'blue');
+  } else if (taskArg) {
+    task = taskArg.split('=')[1];
+  } else if (interactive) {
+    task = await prompt('Enter task: ');
+  } else {
+    log('Error: Either --task=TEXT or --scenario=ID required', 'red');
+    process.exit(1);
+  }
+
+  log(`\nProject: ${projectPath}`, 'dim');
+  log(`Task: ${task}\n`, 'dim');
+
+  // Connect to MCP server
+  const client = new DryRunClient(projectPath);
+
+  try {
+    log('Connecting to MCP server...', 'dim');
+    await client.connect();
+    log('Connected!', 'green');
+
+    // Check initial status
+    logSection('Status');
+    const status = await client.callTool('status');
+    log(status.text, 'dim');
+
+    // Start the workflow
+    logSection('Starting Workflow');
+    const startResult = await client.callTool('start', { task });
+    log(startResult.text, startResult.isError ? 'red' : 'green');
+
+    if (startResult.isError) {
+      // Try to abort and restart
+      log('\nAborting existing workflow...', 'yellow');
+      await client.callTool('abort', { reason: 'Dry run restart' });
+      const retryResult = await client.callTool('start', { task });
+      log(retryResult.text, retryResult.isError ? 'red' : 'green');
+    }
+
+    // Get worker brief
+    logSection('Worker Brief');
+    const workerBrief = await client.callTool('get_worker_brief');
+
+    // Show truncated brief
+    const briefLines = workerBrief.text.split('\n');
+    const truncatedBrief = briefLines.slice(0, 50).join('\n');
+    log(truncatedBrief, 'dim');
+    if (briefLines.length > 50) {
+      log(`\n... (${briefLines.length - 50} more lines)`, 'dim');
+    }
+
+    logSection('Dry Run Complete');
+
+    log(`
+${colors.yellow}What happens next in a real session:${colors.reset}
+
+1. ${colors.bold}Worker Agent${colors.reset} (fresh sub-agent) receives the brief above
+   - Researches the codebase using Glob, Grep, Read tools
+   - Proposes a fix with file:line citations
+   - Submits via ${colors.cyan}diligence.propose${colors.reset}
+
+2. ${colors.bold}Reviewer Agent${colors.reset} (fresh sub-agent) verifies the proposal
+   - Searches codebase to verify Worker's claims
+   - Checks against patterns in CODEBASE_CONTEXT.md
+   - Submits ${colors.green}APPROVED${colors.reset} or ${colors.yellow}NEEDS_WORK${colors.reset} via ${colors.cyan}diligence.review${colors.reset}
+
+3. If ${colors.yellow}NEEDS_WORK${colors.reset}: Worker revises, Reviewer re-checks (up to 5 rounds)
+
+4. If ${colors.green}APPROVED${colors.reset}: ${colors.cyan}diligence.implement${colors.reset} → code changes → ${colors.cyan}diligence.complete${colors.reset}
+
+${colors.dim}This was a dry run - no code changes were made.${colors.reset}
+`, 'reset');
+
+    // Cleanup - abort the workflow
+    await client.callTool('abort', { reason: 'Dry run completed' });
+    log('Workflow aborted (dry run cleanup)', 'dim');
+
+  } finally {
+    await client.disconnect();
+    log('\nDisconnected from MCP server.', 'dim');
+  }
+}
+
+main().catch(err => {
+  console.error('Error:', err.message);
+  process.exit(1);
+});
diff --git a/test/fixture/.claude/CODEBASE_CONTEXT.md b/test/fixture/.claude/CODEBASE_CONTEXT.md
new file mode 100644
index 0000000..46de116
--- /dev/null
+++ b/test/fixture/.claude/CODEBASE_CONTEXT.md
@@ -0,0 +1,150 @@
+# Codebase Context: Test Fixture
+
+This is a simplified test codebase that mirrors real-world patterns. Use this context to understand the architecture before making changes.
+
+## Architecture Overview
+
+```
+src/
+├── broker/
+│   └── events.ts          # Broker event bus (Subject-based pub/sub)
+├── services/
+│   ├── user-block.service.ts   # Blocking logic
+│   ├── voice-channel.service.ts # Voice channels and DM calls
+│   ├── chat.service.ts         # Chat channels and messages
+│   └── team.service.ts         # Team state and permission caching
+└── controllers/
+    └── roles.controller.ts     # REST API for roles
+```
+
+## Critical Pattern: Broker Events
+
+**All state changes that affect multiple services MUST emit broker events.**
+
+### Available Events
+
+| Event | Emitted When | Expected Subscribers |
+|-------|--------------|---------------------|
+| `BusUserBlockChange` | User blocks/unblocks another | Voice services, DM services |
+| `BusTeamRoleChange` | Role created/updated/deleted | Permission caches |
+| `BusTeamMemberRoleChange` | User role assigned/removed | Permission caches |
+| `BusVoiceParticipant` | User joins/leaves voice | Voice UI components |
+| `BusDmCall` | DM call state changes | Call observers |
+
+### Pattern Example
+
+```typescript
+// CORRECT: Emit event after state change
+async updateRole(teamId, roleId, updates) {
+  const role = await db.update(roleId, updates);
+
+  BusTeamRoleChange.next({  // ← MUST emit event
+    teamId,
+    roleId,
+    action: 'updated',
+    timestamp: new Date(),
+  });
+
+  return role;
+}
+```
+
+## Critical Pattern: Permission vs Action Checks
+
+**Permission = Visibility. Action = Separate Check.**
+
+### Why This Matters
+
+For DM channels, blocking creates a `'read'` permission, NOT `'denied'`. The user can still SEE the DM channel, but cannot SEND messages.
+
+```typescript
+// Permission check (for visibility)
+if (isBlocked) {
+  return { permission: 'read', reason: 'blocked' };  // ← 'read', not 'denied'
+}
+
+// Action check (separate from permission)
+async sendMessage(userId, channel, content) {
+  if (await isBlockingEitherWay(userA, userB)) {
+    throw new Error('Cannot send messages');  // ← Separate check
+  }
+}
+```
+
+### Voice Permission Pattern
+
+For DM channels, voice permissions are **always true**:
+
+```typescript
+return {
+  permission: 'read',
+  voiceListen: true,   // Always true for DM
+  voiceTalk: true,     // Blocking checked on JOIN, not here
+  voiceWebcam: true,
+  voiceScreenshare: true,
+};
+```
+
+Blocking is enforced by **action checks** in:
+- `joinVoiceChannel()` - line 33
+- `startDmCall()` - line 56
+
+## Critical Pattern: Cache Invalidation
+
+**Caches MUST subscribe to relevant broker events.**
+
+### Current Bug Pattern
+
+```typescript
+// BAD: Only clears on team switch
+constructor() {
+  teamChange$.subscribe(() => {
+    this.memoizedPermissions.clear();
+  });
+}
+
+// GOOD: Also clear on role changes
+constructor() {
+  teamChange$.subscribe(() => this.clearCache());
+  BusTeamRoleChange.subscribe(() => this.clearCache());      // ← ADD THIS
+  BusTeamMemberRoleChange.subscribe(() => this.clearCache()); // ← AND THIS
+}
+```
+
+## Checklist: Before Making Changes
+
+### For ANY state change:
+
+1. [ ] Does this change affect other services?
+2. [ ] Is there a broker event for this? If not, should there be?
+3. [ ] Are all relevant services subscribed to the event?
+
+### For blocking-related changes:
+
+1. [ ] Is blocking checked on all relevant ACTIONS (not just permissions)?
+2. [ ] What happens if block is created DURING an action (e.g., mid-call)?
+3. [ ] Are broker events emitted for blocking changes?
+4. [ ] Do voice services subscribe to `BusUserBlockChange`?
+
+### For permission/cache changes:
+
+1. [ ] What events should invalidate this cache?
+2. [ ] Is the cache subscribed to all relevant broker events?
+3. [ ] What's the TTL? Is stale data acceptable?
+
+## Files Quick Reference
+
+| File | Key Functions | Known Issues |
+|------|---------------|--------------|
+| `user-block.service.ts` | `blockUser()`, `unblockUser()` | Missing voice cleanup on block |
+| `voice-channel.service.ts` | `answerDmCall()`, `startDmCall()` | Missing blocking check on answer |
+| `team.service.ts` | `getPermission()`, `clearCache()` | Cache doesn't subscribe to role events |
+| `roles.controller.ts` | `createRole()`, `deleteRole()` | Missing broker events |
+| `chat.service.ts` | `getChannelPermission()` | **Reference implementation** (correct) |
+
+## Anti-Patterns to Avoid
+
+1. **Fixing in ONE place** - If blocking is checked in `startDmCall()`, it should also be in `answerDmCall()`
+2. **Changing permissions** - Don't change `voiceListen: true` to `voiceListen: !isBlocked`. Use action checks instead.
+3. **Forgetting broker events** - Every CRUD operation on roles/permissions should emit an event
+4. **Assuming cache is fresh** - If an operation can change state, subscribe to its event
diff --git a/test/fixture/src/broker/events.ts b/test/fixture/src/broker/events.ts
new file mode 100644
index 0000000..7123f29
--- /dev/null
+++ b/test/fixture/src/broker/events.ts
@@ -0,0 +1,79 @@
+/**
+ * Broker Event Bus System
+ *
+ * All state changes that affect multiple services should emit broker events.
+ * Services subscribe to these events to maintain consistency.
+ */
+
+import { Subject } from 'rxjs';
+
+// ============================================================================
+// User Events
+// ============================================================================
+
+export interface UserBlockEvent {
+  sourceUserId: string;
+  targetUserId: string;
+  blocked: boolean;  // true = blocked, false = unblocked
+  timestamp: Date;
+}
+
+export const BusUserBlockChange = new Subject<UserBlockEvent>();
+
+// ============================================================================
+// Team Events
+// ============================================================================
+
+export interface TeamRoleEvent {
+  teamId: string;
+  roleId: string;
+  action: 'created' | 'updated' | 'deleted';
+  flags?: number;
+  timestamp: Date;
+}
+
+export interface TeamMemberRoleEvent {
+  teamId: string;
+  userId: string;
+  roleId: string;
+  action: 'assigned' | 'removed';
+  timestamp: Date;
+}
+
+export const BusTeamRoleChange = new Subject<TeamRoleEvent>();
+export const BusTeamMemberRoleChange = new Subject<TeamMemberRoleEvent>();
+
+// ============================================================================
+// Voice Events
+// ============================================================================
+
+export interface VoiceParticipantEvent {
+  channelId: string;
+  userId: string;
+  action: 'joined' | 'left' | 'kicked';
+  timestamp: Date;
+}
+
+export interface DmCallEvent {
+  callId: string;
+  callerId: string;
+  calleeId: string;
+  state: 'ringing' | 'active' | 'ended' | 'declined';
+  timestamp: Date;
+}
+
+export const BusVoiceParticipant = new Subject<VoiceParticipantEvent>();
+export const BusDmCall = new Subject<DmCallEvent>();
+
+// ============================================================================
+// Channel Events
+// ============================================================================
+
+export interface ChannelMemberEvent {
+  channelId: string;
+  userId: string;
+  hidden: boolean;
+  timestamp: Date;
+}
+
+export const BusChannelMember = new Subject<ChannelMemberEvent>();
diff --git a/test/fixture/src/controllers/roles.controller.ts b/test/fixture/src/controllers/roles.controller.ts
new file mode 100644
index 0000000..fbd36d8
--- /dev/null
+++ b/test/fixture/src/controllers/roles.controller.ts
@@ -0,0 +1,99 @@
+/**
+ * Roles Controller
+ *
+ * REST API for managing team roles.
+ */
+
+import { BusTeamRoleChange } from '../broker/events';
+
+interface TeamRole {
+  id: string;
+  teamId: string;
+  name: string;
+  flags: number;
+}
+
+// In-memory store
+const roles: TeamRole[] = [];
+
+export class RolesController {
+  /**
+   * Create a new role.
+   *
+   * BUG: Doesn't emit BusTeamRoleChange event!
+   * Clients won't know a new role was created.
+   */
+  async createRole(teamId: string, name: string, flags: number): Promise<TeamRole> {
+    const role: TeamRole = {
+      id: `role_${Date.now()}`,
+      teamId,
+      name,
+      flags,
+    };
+    roles.push(role);
+
+    // BUG: Missing broker event!
+    // Should emit:
+    // BusTeamRoleChange.next({
+    //   teamId,
+    //   roleId: role.id,
+    //   action: 'created',
+    //   flags,
+    //   timestamp: new Date(),
+    // });
+
+    return role;
+  }
+
+  /**
+   * Update an existing role.
+   *
+   * Emits broker event correctly (this one is fine).
+   */
+  async updateRole(teamId: string, roleId: string, updates: Partial<TeamRole>): Promise<TeamRole> {
+    const role = roles.find(r => r.id === roleId && r.teamId === teamId);
+    if (!role) throw new Error('Role not found');
+
+    Object.assign(role, updates);
+
+    // This one correctly emits the event
+    BusTeamRoleChange.next({
+      teamId,
+      roleId: role.id,
+      action: 'updated',
+      flags: role.flags,
+      timestamp: new Date(),
+    });
+
+    return role;
+  }
+
+  /**
+   * Delete a role.
+   *
+   * BUG: Doesn't emit BusTeamRoleChange event!
+   * Clients won't know the role was deleted, will have stale data.
+   */
+  async deleteRole(teamId: string, roleId: string): Promise<void> {
+    const index = roles.findIndex(r => r.id === roleId && r.teamId === teamId);
+    if (index === -1) throw new Error('Role not found');
+
+    roles.splice(index, 1);
+
+    // BUG: Missing broker event!
+    // Should emit:
+    // BusTeamRoleChange.next({
+    //   teamId,
+    //   roleId,
+    //   action: 'deleted',
+    //   timestamp: new Date(),
+    // });
+  }
+
+  /**
+   * Get all roles for a team.
+   */
+  async getRoles(teamId: string): Promise<TeamRole[]> {
+    return roles.filter(r => r.teamId === teamId);
+  }
+}
diff --git a/test/fixture/src/services/chat.service.ts b/test/fixture/src/services/chat.service.ts
new file mode 100644
index 0000000..ec3b18a
--- /dev/null
+++ b/test/fixture/src/services/chat.service.ts
@@ -0,0 +1,122 @@
+/**
+ * Chat Service
+ *
+ * Manages chat channels and permissions.
+ * This shows the CORRECT pattern for handling blocking in chat.
+ */
+
+import { UserBlockService } from './user-block.service';
+
+interface ChannelPermission {
+  permission: 'read' | 'write' | 'admin' | 'denied';
+  voiceListen: boolean;
+  voiceTalk: boolean;
+  voiceWebcam: boolean;
+  voiceScreenshare: boolean;
+  reason?: string;
+}
+
+interface Channel {
+  id: string;
+  type: 'dm' | 'project';
+  userA?: string;  // For DM channels
+  userB?: string;  // For DM channels
+  projectId?: string;  // For project channels
+}
+
+export class ChatService {
+  constructor(private userBlockService: UserBlockService) {}
+
+  /**
+   * Get permission for a channel.
+   *
+   * For DM channels:
+   * - Blocking returns 'read' permission (can see messages, can't send)
+   * - This is the CORRECT pattern: permission = visibility, not action validation
+   *
+   * For voice permissions in DM:
+   * - voiceListen, voiceTalk, etc. are always TRUE for DM channels
+   * - This is INTENTIONAL: voice blocking is handled by action checks, not permissions
+   *
+   * Pattern: Permission controls VISIBILITY; Actions have SEPARATE blocking checks.
+   * See chat.sendMessage() for how blocking is enforced on actions.
+   */
+  async getChannelPermission(
+    userId: string,
+    channel: Channel
+  ): Promise<ChannelPermission> {
+    if (channel.type === 'dm' && channel.userA && channel.userB) {
+      const otherUser = channel.userA === userId ? channel.userB : channel.userA;
+
+      // Check blocking status
+      const isBlockingOut = await this.userBlockService.isBlocking(userId, otherUser);
+      const isBlockingInc = await this.userBlockService.isBlocking(otherUser, userId);
+
+      // Return 'read' permission for blocked DMs (can see, can't send)
+      // This is the correct pattern - permission controls visibility
+      if (isBlockingOut) {
+        return {
+          permission: 'read',
+          reason: 'block-user',
+          // Voice permissions are always true for DM - blocking is checked on actions
+          voiceListen: true,
+          voiceTalk: true,
+          voiceWebcam: true,
+          voiceScreenshare: true,
+        };
+      }
+
+      if (isBlockingInc) {
+        return {
+          permission: 'read',
+          reason: 'blocked-by-user',
+          voiceListen: true,
+          voiceTalk: true,
+          voiceWebcam: true,
+          voiceScreenshare: true,
+        };
+      }
+
+      // Normal DM permission
+      return {
+        permission: 'write',
+        voiceListen: true,
+        voiceTalk: true,
+        voiceWebcam: true,
+        voiceScreenshare: true,
+      };
+    }
+
+    // Project channel - normal permission flow
+    return {
+      permission: 'write',
+      voiceListen: true,
+      voiceTalk: true,
+      voiceWebcam: true,
+      voiceScreenshare: true,
+    };
+  }
+
+  /**
+   * Send a message to a channel.
+   *
+   * This is the CORRECT pattern for blocking enforcement:
+   * - Check blocking SEPARATELY from permission
+   * - Permission controls visibility; this check controls action
+   */
+  async sendMessage(userId: string, channel: Channel, content: string): Promise<void> {
+    // Separate blocking check for the ACTION (not permission)
+    if (channel.type === 'dm' && channel.userA && channel.userB) {
+      const isBlocked = await this.userBlockService.isBlockingEitherWay(
+        channel.userA,
+        channel.userB
+      );
+      if (isBlocked) {
+        throw new Error('You cannot send messages to this user');
+      }
+    }
+
+    // Send the message...
+    console.log(`[chat] ${userId} -> ${channel.id}: ${content}`);
+  }
+}
diff --git a/test/fixture/src/services/team.service.ts b/test/fixture/src/services/team.service.ts
new file mode 100644
index 0000000..004cabd
--- /dev/null
+++ b/test/fixture/src/services/team.service.ts
@@ -0,0 +1,102 @@
+/**
+ * Team Service (Client-side)
+ *
+ * Manages team state including permissions and role caching.
+ */
+
+import { BusTeamRoleChange, BusTeamMemberRoleChange } from '../broker/events';
+
+interface Permission {
+  permission: 'read' | 'write' | 'admin' | 'denied';
+  voiceListen: boolean;
+  voiceTalk: boolean;
+  voiceWebcam: boolean;
+  voiceScreenshare: boolean;
+}
+
+interface Team {
+  id: string;
+  name: string;
+}
+
+/**
+ * Permission cache for computed permissions.
+ *
+ * BUG: This cache only clears on team switch, not on role changes!
+ * When a user's roles change, their cached permissions become stale.
+ */
+const memoizedPermissions = new Map<string, Permission>();
+
+export class TeamService {
+  private currentTeam: Team | null = null;
+
+  constructor() {
+    // Subscribe to team changes to clear cache
+    // BUG: Only clears on team SWITCH, not on role updates!
+    this.setupTeamChangeSubscription();
+
+    // BUG: Missing subscription to role changes!
+    // Should subscribe to BusTeamRoleChange and BusTeamMemberRoleChange
+    // and clear the cache when roles change.
+  }
+
+  /**
+   * Get cached permission for a project.
+   */
+  getPermission(projectId: string): Permission | undefined {
+    return memoizedPermissions.get(projectId);
+  }
+
+  /**
+   * Set cached permission for a project.
+   */
+  setPermission(projectId: string, permission: Permission): void {
+    memoizedPermissions.set(projectId, permission);
+  }
+
+  /**
+   * Clear permission cache.
+   *
+   * Called when active team changes.
+   * BUG: Should also be called when roles change!
+   */
+  clearPermissionCache(): void {
+    memoizedPermissions.clear();
+  }
+
+  /**
+   * Switch to a different team.
+   */
+  setActiveTeam(team: Team): void {
+    this.currentTeam = team;
+    this.clearPermissionCache();
+  }
+
+  /**
+   * Setup subscription to team changes.
+   *
+   * BUG: Only subscribes to team SWITCH, not to:
+   * - BusTeamRoleChange (role created/updated/deleted)
+   * - BusTeamMemberRoleChange (user role assigned/removed)
+   */
+  private setupTeamChangeSubscription(): void {
+    // This would normally be an observable subscription
+    // For now, we just clear on setActiveTeam()
+  }
+
+  /**
+   * FIX: Should add these subscriptions:
+   *
+   * BusTeamRoleChange.subscribe(event => {
+   *   if (event.teamId === this.currentTeam?.id) {
+   *     this.clearPermissionCache();
+   *   }
+   * });
+   *
+   * BusTeamMemberRoleChange.subscribe(event => {
+   *   if (event.teamId === this.currentTeam?.id) {
+   *     this.clearPermissionCache();
+   *   }
+   * });
+   */
+}
diff --git a/test/fixture/src/services/user-block.service.ts b/test/fixture/src/services/user-block.service.ts
new file mode 100644
index 0000000..f7984bf
--- /dev/null
+++ b/test/fixture/src/services/user-block.service.ts
@@ -0,0 +1,117 @@
+/**
+ * User Block Service
+ *
+ * Handles blocking/unblocking between users.
+ * Blocking affects:
+ * - DM visibility
+ * - Voice call permissions
+ * - Feed following
+ */
+
+import { BusUserBlockChange } from '../broker/events';
+
+interface UserBlockRecord {
+  sourceUserId: string;
+  targetUserId: string;
+  createdAt: Date;
+}
+
+// In-memory store for testing
+const blocks: UserBlockRecord[] = [];
+
+export class UserBlockService {
+  /**
+   * Block a user.
+   *
+   * When a user is blocked:
+   * 1. They can no longer send DMs
+   * 2. They are unfollowed from feeds
+   * 3. DM channel becomes read-only
+   *
+   * BUG: Missing voice call cleanup!
+   * - Should end any active DM call between these users
+   * - Should kick from shared voice channels
+   */
+  async blockUser(sourceUserId: string, targetUserId: string): Promise<void> {
+    // Check if already blocked
+    const existing = blocks.find(
+      b => b.sourceUserId === sourceUserId && b.targetUserId === targetUserId
+    );
+    if (existing) return;
+
+    // Create block record
+    const block: UserBlockRecord = {
+      sourceUserId,
+      targetUserId,
+      createdAt: new Date(),
+    };
+    blocks.push(block);
+
+    // Unfollow in both directions
+    await this.unfollowUser(sourceUserId, targetUserId);
+    await this.unfollowUser(targetUserId, sourceUserId);
+
+    // Emit broker event
+    BusUserBlockChange.next({
+      sourceUserId,
+      targetUserId,
+      blocked: true,
+      timestamp: block.createdAt,
+    });
+
+    // BUG: No voice cleanup here!
+    // Should call: voiceChannelService.endDmCallBetweenUsers(sourceUserId, targetUserId)
+    // Should call: voiceChannelService.kickFromSharedChannels(sourceUserId, targetUserId)
+  }
+
+  /**
+   * Unblock a user.
+   */
+  async unblockUser(sourceUserId: string, targetUserId: string): Promise<void> {
+    const index = blocks.findIndex(
+      b => b.sourceUserId === sourceUserId && b.targetUserId === targetUserId
+    );
+    if (index === -1) return;
+
+    blocks.splice(index, 1);
+
+    // Unhide DM channel
+    await this.unhideDmChannel(sourceUserId, targetUserId);
+
+    // Emit broker event
+    BusUserBlockChange.next({
+      sourceUserId,
+      targetUserId,
+      blocked: false,
+      timestamp: new Date(),
+    });
+  }
+
+  /**
+   * Check if either user has blocked the other.
+   */
+  async isBlockingEitherWay(userA: string, userB: string): Promise<boolean> {
+    return blocks.some(
+      b =>
+        (b.sourceUserId === userA && b.targetUserId === userB) ||
+        (b.sourceUserId === userB && b.targetUserId === userA)
+    );
+  }
+
+  /**
+   * Check if source has blocked target.
+   */
+  async isBlocking(sourceUserId: string, targetUserId: string): Promise<boolean> {
+    return blocks.some(
+      b => b.sourceUserId === sourceUserId && b.targetUserId === targetUserId
+    );
+  }
+
+  private async unfollowUser(userId: string, targetId: string): Promise<void> {
+    // Unfollow logic...
+  }
+
+  private async unhideDmChannel(userA: string, userB: string): Promise<void> {
+    // Unhide DM channel logic...
+  }
+}
diff --git a/test/fixture/src/services/voice-channel.service.ts b/test/fixture/src/services/voice-channel.service.ts
new file mode 100644
index 0000000..0dc9349
--- /dev/null
+++ b/test/fixture/src/services/voice-channel.service.ts
@@ -0,0 +1,220 @@
+/**
+ * Voice Channel Service
+ *
+ * Manages voice channel state, participants, and DM calls.
+ */
+
+import { BusVoiceParticipant, BusDmCall, BusUserBlockChange } from '../broker/events';
+import { UserBlockService } from './user-block.service';
+
+interface VoiceParticipant {
+  channelId: string;
+  odlUserId: string;
+  userId: string;
+  joinedAt: Date;
+  muted: boolean;
+  deafened: boolean;
+}
+
+interface DmCall {
+  callId: string;
+  callerId: string;
+  calleeId: string;
+  channelId: string;
+  state: 'ringing' | 'active' | 'ended' | 'declined';
+  createdAt: Date;
+}
+
+// In-memory stores
+const participants: VoiceParticipant[] = [];
+const dmCalls: DmCall[] = [];
+
+export class VoiceChannelService {
+  constructor(private userBlockService: UserBlockService) {}
+
+  /**
+   * Join a voice channel.
+   *
+   * For DM channels, checks blocking before allowing join.
+   */
+  async joinVoiceChannel(
+    userId: string,
+    channelId: string,
+    channelType: 'dm' | 'project'
+  ): Promise<void> {
+    // For DM channels, check blocking
+    if (channelType === 'dm') {
+      const otherUserId = this.getOtherDmUser(channelId, userId);
+      const isBlocked = await this.userBlockService.isBlockingEitherWay(userId, otherUserId);
+      if (isBlocked) {
+        throw new Error('Cannot join voice - user is blocked');
+      }
+    }
+
+    const participant: VoiceParticipant = {
+      channelId,
+      odlUserId: `odl_${userId}`,
+      userId,
+      joinedAt: new Date(),
+      muted: false,
+      deafened: false,
+    };
+    participants.push(participant);
+
+    BusVoiceParticipant.next({
+      channelId,
+      userId,
+      action: 'joined',
+      timestamp: participant.joinedAt,
+    });
+  }
+
+  /**
+   * Start a DM call.
+   *
+   * Checks blocking before creating the call.
+   */
+  async startDmCall(callerId: string, calleeId: string): Promise<DmCall> {
+    // Check blocking
+    const isBlocked = await this.userBlockService.isBlockingEitherWay(callerId, calleeId);
+    if (isBlocked) {
+      throw new Error('Cannot start call - user is blocked');
+    }
+
+    const call: DmCall = {
+      callId: `call_${Date.now()}`,
+      callerId,
+      calleeId,
+      channelId: `dm_${callerId}_${calleeId}`,
+      state: 'ringing',
+      createdAt: new Date(),
+    };
+    dmCalls.push(call);
+
+    BusDmCall.next({
+      callId: call.callId,
+      callerId,
+      calleeId,
+      state: 'ringing',
+      timestamp: call.createdAt,
+    });
+
+    // Notify callee
+    this.notifyDmCall(call);
+
+    return call;
+  }
+
+  /**
+   * Answer a DM call.
+   *
+   * BUG: Missing blocking check!
+   * If block is created after call starts but before answer,
+   * the callee can still answer.
+   */
+  async answerDmCall(callId: string, userId: string): Promise<void> {
+    const call = dmCalls.find(c => c.callId === callId);
+    if (!call) throw new Error('Call not found');
+    if (call.calleeId !== userId) throw new Error('Not the callee');
+    if (call.state !== 'ringing') throw new Error('Call is not ringing');
+
+    // BUG: No blocking check here!
+    // Should check: await this.userBlockService.isBlockingEitherWay(call.callerId, call.calleeId)
+
+    call.state = 'active';
+
+    BusDmCall.next({
+      callId: call.callId,
+      callerId: call.callerId,
+      calleeId: call.calleeId,
+      state: 'active',
+      timestamp: new Date(),
+    });
+  }
+
+  /**
+   * Decline a DM call.
+   *
+   * BUG: Missing blocking check!
+   */
+  async declineDmCall(callId: string, userId: string): Promise<void> {
+    const call = dmCalls.find(c => c.callId === callId);
+    if (!call) throw new Error('Call not found');
+    if (call.calleeId !== userId) throw new Error('Not the callee');
+
+    // BUG: No blocking check here either!
+
+    call.state = 'declined';
+
+    BusDmCall.next({
+      callId: call.callId,
+      callerId: call.callerId,
+      calleeId: call.calleeId,
+      state: 'declined',
+      timestamp: new Date(),
+    });
+  }
+
+  /**
+   * End a DM call between two users.
+   *
+   * Used when block is created to clean up active calls.
+   */
+  async endDmCallBetweenUsers(userA: string, userB: string): Promise<void> {
+    const call = dmCalls.find(
+      c =>
+        (c.callerId === userA && c.calleeId === userB) ||
+        (c.callerId === userB && c.calleeId === userA)
+    );
+    if (call && call.state !== 'ended') {
+      call.state = 'ended';
+      BusDmCall.next({
+        callId: call.callId,
+        callerId: call.callerId,
+        calleeId: call.calleeId,
+        state: 'ended',
+        timestamp: new Date(),
+      });
+    }
+  }
+
+  /**
+   * Kick a user from a voice channel.
+   */
+  async leaveChannel(userId: string, channelId?: string): Promise<void> {
+    const index = participants.findIndex(
+      p => p.userId === userId && (!channelId || p.channelId === channelId)
+    );
+    if (index !== -1) {
+      const participant = participants[index];
+      participants.splice(index, 1);
+
+      BusVoiceParticipant.next({
+        channelId: participant.channelId,
+        userId,
+        action: 'left',
+        timestamp: new Date(),
+      });
+    }
+  }
+
+  /**
+   * Notify callee of incoming DM call.
+   *
+   * BUG: Doesn't filter for blocking!
+   * Blocked users still receive call notifications.
+   */
+  private notifyDmCall(call: DmCall): void {
+    // BUG: Should check blocking before notifying
+    // if (await this.userBlockService.isBlockingEitherWay(call.callerId, call.calleeId)) return;
+
+    // Send notification to callee...
+    console.log(`[notify] ${call.calleeId}: Incoming call from ${call.callerId}`);
+  }
+
+  private getOtherDmUser(channelId: string, userId: string): string {
+    // Parse DM channel ID to get the other user
+    const parts = channelId.replace('dm_', '').split('_');
+    return parts.find(id => id !== userId) || '';
+  }
+}
diff --git a/test/mcp-client.mjs b/test/mcp-client.mjs
new file mode 100644
index 0000000..1af7ac6
--- /dev/null
+++ b/test/mcp-client.mjs
@@ -0,0 +1,229 @@
+#!/usr/bin/env node
+/**
+ * MCP Test Client
+ *
+ * Programmatically tests the diligence MCP server by:
+ * 1. Spawning the server as a child process
+ * 2. Sending JSON-RPC messages via stdio
+ * 3. Receiving and parsing responses
+ *
+ * Usage:
+ *   const client = new McpClient();
+ *   await client.connect();
+ *   const result = await client.callTool('status', {});
+ *   await client.disconnect();
+ */
+
+import { spawn } from 'child_process';
+import { createInterface } from 'readline';
+import { dirname, join } from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+export class McpClient {
+  constructor(serverPath = join(__dirname, '..', 'index.mjs')) {
+    this.serverPath = serverPath;
+    this.process = null;
+    this.requestId = 0;
+    this.pendingRequests = new Map();
+    this.readline = null;
+  }
+
+  async connect() {
+    return new Promise((resolve, reject) => {
+      this.process = spawn('node', [this.serverPath], {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        cwd: join(__dirname, 'fixture'),  // Run in fixture directory
+      });
+
+      this.readline = createInterface({
+        input: this.process.stdout,
+        crlfDelay: Infinity,
+      });
+
+      this.readline.on('line', (line) => {
+        try {
+          const message = JSON.parse(line);
+          if (message.id !== undefined && this.pendingRequests.has(message.id)) {
+            const { resolve, reject } = this.pendingRequests.get(message.id);
+            this.pendingRequests.delete(message.id);
+            if (message.error) {
+              reject(new Error(message.error.message || JSON.stringify(message.error)));
+            } else {
+              resolve(message.result);
+            }
+          }
+        } catch (e) {
+          // Ignore non-JSON lines
+        }
+      });
+
+      this.process.stderr.on('data', (data) => {
+        // Server logs to stderr
+        if (process.env.DEBUG) {
+          console.error('[server]', data.toString());
+        }
+      });
+
+      this.process.on('error', reject);
+      this.process.on('exit', (code) => {
+        if (code !== 0 && code !== null) {
+          console.error(`Server exited with code ${code}`);
+        }
+      });
+
+      // Initialize the MCP connection
+      this._send({
+        jsonrpc: '2.0',
+        id: this.requestId++,
+        method: 'initialize',
+        params: {
+          protocolVersion: '0.1.0',
+          clientInfo: { name: 'test-client', version: '1.0.0' },
+          capabilities: {},
+        },
+      }).then(() => {
+        // Send initialized notification
+        this._sendNotification('notifications/initialized', {});
+        resolve();
+      }).catch(reject);
+    });
+  }
+
+  async disconnect() {
+    if (this.process) {
+      this.process.kill('SIGTERM');
+      this.process = null;
+    }
+    if (this.readline) {
+      this.readline.close();
+      this.readline = null;
+    }
+  }
+
+  _send(message) {
+    return new Promise((resolve, reject) => {
+      if (!this.process) {
+        reject(new Error('Not connected'));
+        return;
+      }
+      this.pendingRequests.set(message.id, { resolve, reject });
+      this.process.stdin.write(JSON.stringify(message) + '\n');
+
+      // Timeout after 10 seconds
+      setTimeout(() => {
+        if (this.pendingRequests.has(message.id)) {
+          this.pendingRequests.delete(message.id);
+          reject(new Error('Request timeout'));
+        }
+      }, 10000);
+    });
+  }
+
+  _sendNotification(method, params) {
+    if (!this.process) return;
+    this.process.stdin.write(JSON.stringify({
+      jsonrpc: '2.0',
+      method,
+      params,
+    }) + '\n');
+  }
+
+  async listTools() {
+    const result = await this._send({
+      jsonrpc: '2.0',
+      id: this.requestId++,
+      method: 'tools/list',
+      params: {},
+    });
+    return result.tools;
+  }
+
+  async callTool(name, args = {}) {
+    const result = await this._send({
+      jsonrpc: '2.0',
+      id: this.requestId++,
+      method: 'tools/call',
+      params: { name, arguments: args },
+    });
+    // Extract text from content array
+    if (result.content && result.content[0] && result.content[0].text) {
+      return {
+        text: result.content[0].text,
+        isError: result.isError || false,
+      };
+    }
+    return result;
+  }
+
+  // Convenience methods for common workflows
+  async status() {
+    return this.callTool('status');
+  }
+
+  async start(task) {
+    return this.callTool('start', { task });
+  }
+
+  async propose(proposal) {
+    return this.callTool('propose', { proposal });
+  }
+
+  async review(decision, reasoning) {
+    return this.callTool('review', { decision, reasoning });
+  }
+
+  async getWorkerBrief() {
+    return this.callTool('get_worker_brief');
+  }
+
+  async getReviewerBrief() {
+    return this.callTool('get_reviewer_brief');
+  }
+
+  async implement() {
+    return this.callTool('implement');
+  }
+
+  async complete(summary) {
+    return this.callTool('complete', { summary });
+  }
+
+  async abort(reason) {
+    return this.callTool('abort', { reason });
+  }
+
+  async approve(reason) {
+    return this.callTool('approve', { reason });
+  }
+}
+
+// CLI usage for quick testing
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  const client = new McpClient();
+
+  try {
+    console.log('Connecting to MCP server...');
+    await client.connect();
+    console.log('Connected!\n');
+
+    // List tools
+    const tools = await client.listTools();
+    console.log('Available tools:');
+    tools.forEach(t => console.log(`  - ${t.name}: ${t.description.slice(0, 60)}...`));
+    console.log();
+
+    // Check status
+    const status = await client.status();
+    console.log('Status:');
+    console.log(status.text);
+
+    await client.disconnect();
+    console.log('\nDisconnected.');
+  } catch (err) {
+    console.error('Error:', err.message);
+    await client.disconnect();
+    process.exit(1);
+  }
+}
diff --git a/test/results/2026-01-22-comparison-report.md b/test/results/2026-01-22-comparison-report.md
new file mode 100644
index 0000000..f421e83
--- /dev/null
+++ b/test/results/2026-01-22-comparison-report.md
@@ -0,0 +1,183 @@
+# Diligence vs Naive Approach: Comparison Report
+
+**Date:** 2026-01-22
+**Test Bug:** B1 - Blocked users can answer DM voice calls
+**Project:** nexus (~/bude/codecharm/nexus)
+
+---
+
+## Executive Summary
+
+| Metric | Naive Approach | Diligence Approach |
+|--------|----------------|-------------------|
+| Bug verified exists? | ✅ Yes | ✅ Yes |
+| Correct line numbers? | ✅ Yes (1050, 965) | ✅ Worker correct |
+| Found declineDmCall gap? | ✅ Yes | ⚠️ Reviewer found it |
+| Found notification filtering? | ✅ Yes | ⚠️ Reviewer found it |
+| Found blockUser cleanup? | ✅ Yes | ⚠️ Reviewer found it |
+| Reviewer caught errors? | N/A | ✅ Caught line number discrepancy* |
+
+*Reviewer searched wrong codebase (test fixture instead of nexus), but the PROCESS of verification worked.
+
+---
+
+## Bug Verification: CONFIRMED REAL
+
+**Evidence from actual nexus code:**
+
+```typescript
+// startDmCall (lines 965-969) - HAS blocking check ✅
+const blocked = await this.userBlockService.isBlockingEitherWay(callerId, calleeId);
+if (blocked) {
+  throw new UserError('Cannot call this user');
+}
+
+// answerDmCall (line 1050+) - NO blocking check ❌
+async answerDmCall(callId: MongoId): Promise<{ token: string; channelId: string }> {
+  // Only checks: auth, call exists, state=ringing, user=callee, not expired
+  // MISSING: blocking check
+}
+
+// declineDmCall (line 1115+) - NO blocking check ❌
+async declineDmCall(callId: MongoId): Promise<void> {
+  // Only checks: auth, call exists, state=ringing, user=callee
+  // MISSING: blocking check
+}
+```
+
+**Conclusion:** Bug B1 is REAL. Both approaches correctly identified it.
+
+---
+
+## Detailed Comparison
+
+### Naive Approach Output
+
+The naive agent (single Explore agent) produced:
+- ✅ Root cause analysis
+- ✅ Correct file identification (`voice-channel.rpc.ts`)
+- ✅ Correct line numbers (965-969, 1050-1109)
+- ✅ Compared startDmCall vs answerDmCall patterns
+- ✅ Identified additional issues:
+  - declineDmCall needs blocking check
+  - notifyDmCall needs filtering
+  - blockUser() needs voice cleanup
+  - BusUserBlockChange subscription needed
+- ✅ Implementation order recommendation
+- ✅ Edge cases considered
+
+**Quality:** Surprisingly thorough. Searched actual code, cited lines, found patterns.
+
+### Diligence Approach Output
+
+**Worker:**
+- ✅ Verified bug exists by searching code
+- ✅ Correct line numbers
+- ✅ Cited exact file:line
+- ✅ Proposed fix matching startDmCall pattern
+
+**Reviewer:**
+- ✅ Attempted independent verification (correct process)
+- ❌ Searched wrong codebase (test fixture, 220 lines)
+- ✅ Noticed discrepancy ("file only 220 lines, Worker cited 1050")
+- ✅ Found additional gaps (declineDmCall, notification filtering)
+- ✅ Gave NEEDS_WORK decision with specific issues
+
+**Quality:** Process worked correctly. Reviewer caught a "discrepancy" (even if due to searching wrong place).
+
+---
+
+## Key Findings
+
+### 1. Both approaches verified the bug exists
+
+Neither approach blindly trusted the task description. Both:
+- Searched for answerDmCall implementation
+- Compared with startDmCall pattern
+- Verified blocking check is actually missing
+
+### 2. Naive approach was surprisingly thorough
+
+The single agent produced analysis comparable to the Worker. This suggests:
+- For bugs with clear descriptions, naive approach may suffice
+- The value of diligence may be in more ambiguous tasks
+
+### 3. Reviewer process works, but needs correct context
+
+The Reviewer:
+- Did NOT rubber-stamp the Worker's proposal
+- Actually searched and found discrepancies
+- Caught additional issues the Worker missed
+- BUT searched the wrong codebase due to test setup
+
+### 4. Test setup flaw identified
+
+The Reviewer searched `/Users/marc/bude/strikt/diligence/test/fixture/` instead of `~/bude/codecharm/nexus`. This is because:
+- Agents were spawned from the diligence project
+- They defaulted to searching the current working directory
+
+**Fix needed:** In real usage, diligence MCP runs IN the target project, so this wouldn't happen.
+
+---
+
+## What Diligence Should Catch That Naive Might Miss
+
+Based on this test, diligence adds value when:
+
+1. **Worker makes incorrect claims** - Reviewer verifies by searching
+2. **Worker misses related issues** - Reviewer's independent search finds them
+3. **Task description is wrong** - Both should verify bug exists, not assume
+4. **Patterns are misunderstood** - Reviewer checks against CODEBASE_CONTEXT.md
+
+### This test showed:
+
+| Scenario | Did Diligence Help? |
+|----------|---------------------|
+| Verify bug exists | Both approaches did this |
+| Catch wrong line numbers | Reviewer caught discrepancy ✅ |
+| Find additional gaps | Reviewer found more than Worker ✅ |
+| Prevent hallucinated bugs | Would catch if Reviewer searched correctly |
+
+---
+
+## Recommendations
+
+### 1. Run real test in nexus project
+
+Start a Claude Code session IN nexus and test the full workflow there. This ensures:
+- MCP server runs in correct project
+- Agents search the right codebase
+- Full context from CODEBASE_CONTEXT.md is loaded
+
+### 2. Test with a more ambiguous bug
+
+B1 is well-documented. Test with something like:
+- "Voice seems laggy sometimes"
+- "Users report weird permission issues"
+
+These require more investigation to even determine if there's a bug.
+
+### 3. Test if diligence catches non-bugs
+
+Give a task for a bug that doesn't exist. Does the workflow correctly identify "no bug found"?
+
+### 4. Add explicit codebase path to Worker/Reviewer briefs
+
+The briefs should specify: "Search in /path/to/project, not elsewhere"
+
+---
+
+## Conclusion
+
+**Does diligence work?** Yes, the process is sound:
+- Worker researches and proposes
+- Reviewer independently verifies
+- Discrepancies are caught
+- Multiple rounds can iterate
+
+**Is it better than naive?** For this test, similar results. But:
+- Reviewer caught additional issues Worker missed
+- Process would catch hallucinated bugs if Reviewer searches correctly
+- Real value may be in more complex/ambiguous tasks
+
+**Next step:** Run a real test in a Claude Code session in nexus, with a more ambiguous task.
diff --git a/test/run-tests.mjs b/test/run-tests.mjs
new file mode 100644
index 0000000..01fa170
--- /dev/null
+++ b/test/run-tests.mjs
@@ -0,0 +1,415 @@
+#!/usr/bin/env node
+/**
+ * Diligence Test Runner
+ *
+ * Runs end-to-end tests of the Worker-Reviewer loop.
+ *
+ * Modes:
+ *   --workflow    Test MCP workflow mechanics only (no AI)
+ *   --mock        Use mock Worker/Reviewer responses
+ *   --live        Use real Claude API for Worker/Reviewer (requires ANTHROPIC_API_KEY)
+ *
+ * Usage:
+ *   node test/run-tests.mjs --workflow
+ *   node test/run-tests.mjs --mock --scenario=blocking-voice
+ *   node test/run-tests.mjs --live --scenario=permission-cache
+ */
+
+import { McpClient } from './mcp-client.mjs';
+import { readFileSync, existsSync, unlinkSync } from 'fs';
+import { dirname, join } from 'path';
+import { fileURLToPath } from 'url';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// Parse CLI args
+const args = process.argv.slice(2);
+const mode = args.find(a => ['--workflow', '--mock', '--live'].includes(a)) || '--workflow';
+const scenarioArg = args.find(a => a.startsWith('--scenario='));
+const scenarioId = scenarioArg ? scenarioArg.split('=')[1] : null;
+const verbose = args.includes('--verbose') || args.includes('-v');
+
+// Colors for output
+const colors = {
+  reset: '\x1b[0m',
+  green: '\x1b[32m',
+  red: '\x1b[31m',
+  yellow: '\x1b[33m',
+  blue: '\x1b[34m',
+  dim: '\x1b[2m',
+};
+
+function log(msg, color = 'reset') {
+  console.log(`${colors[color]}${msg}${colors.reset}`);
+}
+
+function logSection(title) {
+  console.log(`\n${colors.blue}=== ${title} ===${colors.reset}`);
+}
+
+// Load scenario
+function loadScenario(id) {
+  const path = join(__dirname, 'scenarios', `${id}.json`);
+  if (!existsSync(path)) {
+    throw new Error(`Scenario not found: ${id}`);
+  }
+  return JSON.parse(readFileSync(path, 'utf-8'));
+}
+
+// Load all scenarios
+function loadAllScenarios() {
+  const index = JSON.parse(readFileSync(join(__dirname, 'scenarios', 'index.json'), 'utf-8'));
+  return index.scenarios.map(s => loadScenario(s.id));
+}
+
+// Clean up state file before test
+function cleanState() {
+  const stateFile = join(__dirname, 'fixture', '.claude', '.diligence-state.json');
+  if (existsSync(stateFile)) {
+    unlinkSync(stateFile);
+  }
+}
+
+// ============================================================================
+// Workflow Tests (no AI, just MCP mechanics)
+// ============================================================================
+
+async function testWorkflow() {
+  logSection('Workflow Tests');
+
+  const client = new McpClient();
+  let passed = 0;
+  let failed = 0;
+
+  try {
+    cleanState();
+    await client.connect();
+    log('Connected to MCP server', 'green');
+
+    // Test 1: Status in conversation phase
+    {
+      const result = await client.status();
+      const ok = result.text.includes('Phase: conversation');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Initial status is conversation`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 2: Start workflow
+    {
+      const result = await client.start('Test task');
+      const ok = result.text.includes('researching') && result.text.includes('Round: 1/5');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Start transitions to researching`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 3: Cannot start again while in progress
+    {
+      const result = await client.start('Another task');
+      const ok = result.isError && result.text.includes('Already in');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Cannot start while in progress`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 4: Get worker brief
+    {
+      const result = await client.getWorkerBrief();
+      const ok = result.text.includes('Worker Brief') && result.text.includes('Test task');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Worker brief contains task`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 5: Submit proposal
+    {
+      const result = await client.propose('## Analysis\n\nProposed fix here');
+      const ok = result.text.includes('Proposal submitted');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Proposal submitted`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 6: Get reviewer brief includes proposal
+    {
+      const result = await client.getReviewerBrief();
+      const ok = result.text.includes('Reviewer Brief') && result.text.includes('Proposed fix here');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Reviewer brief contains proposal`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 7: Review with NEEDS_WORK
+    {
+      const result = await client.review('NEEDS_WORK', 'Missing broker event handling');
+      const ok = result.text.includes('NEEDS_WORK') && result.text.includes('Round 2/5');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] NEEDS_WORK increments round`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 8: Worker brief now includes feedback
+    {
+      const result = await client.getWorkerBrief();
+      const ok = result.text.includes('Previous Feedback') && result.text.includes('broker event');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Worker brief includes previous feedback`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 9: Submit revised proposal
+    {
+      const result = await client.propose('## Revised\n\nNow with broker events');
+      const ok = result.text.includes('Proposal submitted');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Revised proposal submitted`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 10: Review with APPROVED
+    {
+      const result = await client.review('APPROVED', 'All checks pass');
+      const ok = result.text.includes('APPROVED') && result.text.includes('2 round');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] APPROVED after review`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 11: Status shows approved
+    {
+      const result = await client.status();
+      const ok = result.text.includes('Phase: approved');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Status shows approved phase`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 12: Implement
+    {
+      const result = await client.implement();
+      const ok = result.text.includes('Implementation phase');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Implement starts implementation`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 13: Complete
+    {
+      const result = await client.complete('Fixed the bug');
+      const ok = result.text.includes('Complete') && result.text.includes('Reset to conversation');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Complete resets workflow`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 14: Back to conversation
+    {
+      const result = await client.status();
+      const ok = result.text.includes('Phase: conversation');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Back to conversation phase`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 15: Abort works
+    {
+      await client.start('Task to abort');
+      const result = await client.abort('Changed my mind');
+      const ok = result.text.includes('Aborted') && result.text.includes('Reset to conversation');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Abort resets workflow`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    // Test 16: Max rounds enforcement
+    {
+      await client.start('Task for max rounds');
+      for (let i = 0; i < 5; i++) {
+        await client.propose(`Proposal ${i + 1}`);
+        if (i < 4) {
+          await client.review('NEEDS_WORK', `Feedback ${i + 1}`);
+        }
+      }
+      const result = await client.review('NEEDS_WORK', 'Still not good');
+      const ok = result.text.includes('MAX ROUNDS') && result.text.includes('reset');
+      log(`  [${ok ? 'PASS' : 'FAIL'}] Max rounds resets workflow`, ok ? 'green' : 'red');
+      ok ? passed++ : failed++;
+    }
+
+    log(`\nWorkflow tests: ${passed} passed, ${failed} failed`, failed ? 'red' : 'green');
+    return failed === 0;
+
+  } finally {
+    await client.disconnect();
+    cleanState();
+  }
+}
+
+// ============================================================================
+// Mock Tests (predefined Worker/Reviewer responses)
+// ============================================================================
+
+async function testWithMocks(scenario) {
+  logSection(`Mock Test: ${scenario.name}`);
+
+  const client = new McpClient();
+
+  try {
+    cleanState();
+    await client.connect();
+
+    // Start the workflow
+    await client.start(scenario.task);
+    log(`Started task: ${scenario.task.slice(0, 60)}...`, 'dim');
+
+    // Round 1: Worker submits naive fix
+    const naiveProposal = `## Analysis
+
+${scenario.naive_fix.description}
+
+### Changes
+${scenario.naive_fix.changes.map(c => `- ${c.file}: ${c.change}`).join('\n')}
+`;
+
+    await client.propose(naiveProposal);
+    log('Worker submitted naive proposal', 'dim');
+
+    // Round 1: Reviewer catches issues
+    const issues = scenario.naive_fix.issues;
+    const reviewFeedback = `NEEDS_WORK
+
+Issues found:
+${issues.map((issue, i) => `${i + 1}. ${issue}`).join('\n')}
+
+The proposal misses critical aspects. Please address all issues.
+`;
+
+    await client.review('NEEDS_WORK', reviewFeedback);
+    log(`Reviewer found ${issues.length} issues`, 'yellow');
+
+    // Round 2: Worker submits correct fix
+    const correctProposal = `## Revised Analysis
+
+${scenario.correct_fix.description}
+
+### Required Changes
+
+${scenario.correct_fix.required_changes.map(c =>
+  `#### ${c.file}:${c.function}
+- ${c.change}
+- Reference: ${c.line_reference}`
+).join('\n\n')}
+
+### Broker Subscriptions
+
+${scenario.correct_fix.required_broker_subscriptions.map(s =>
+  `- ${s.service} subscribes to ${s.event}: ${s.action}`
+).join('\n')}
+
+### Pattern References
+
+${scenario.correct_fix.pattern_references.map(p => `- ${p}`).join('\n')}
+`;
+
+    await client.propose(correctProposal);
+    log('Worker submitted revised proposal', 'dim');
+
+    // Round 2: Reviewer approves
+    await client.review('APPROVED', 'All required changes identified. Pattern followed correctly.');
+    log('Reviewer approved', 'green');
+
+    // Validate the proposal
+    const validation = scenario.validation_criteria;
+    let validationPassed = true;
+
+    log('\nValidation:', 'blue');
+
+    // Check must_mention
+    for (const item of validation.must_mention || []) {
+      const found = correctProposal.toLowerCase().includes(item.toLowerCase());
+      log(`  [${found ? 'PASS' : 'FAIL'}] Mentions: ${item}`, found ? 'green' : 'red');
+      if (!found) validationPassed = false;
+    }
+
+    // Check pattern reference
+    if (validation.should_reference_pattern) {
+      const found = correctProposal.includes(validation.should_reference_pattern);
+      log(`  [${found ? 'PASS' : 'FAIL'}] References pattern: ${validation.should_reference_pattern}`, found ? 'green' : 'red');
+      if (!found) validationPassed = false;
+    }
+
+    // Complete the workflow
+    await client.implement();
+    await client.complete('Test completed');
+
+    log(`\nMock test ${scenario.id}: ${validationPassed ? 'PASSED' : 'FAILED'}`, validationPassed ? 'green' : 'red');
+    return validationPassed;
+
+  } finally {
+    await client.disconnect();
+    cleanState();
+  }
+}
+
+// ============================================================================
+// Live Tests (real Claude API)
+// ============================================================================
+
+async function testLive(scenario) {
+  logSection(`Live Test: ${scenario.name}`);
+
+  const apiKey = process.env.ANTHROPIC_API_KEY;
+  if (!apiKey) {
+    log('ANTHROPIC_API_KEY not set. Skipping live test.', 'yellow');
+    return null;
+  }
+
+  log('Live tests with Claude API not yet implemented.', 'yellow');
+  log('This would spawn real Worker and Reviewer sub-agents.', 'dim');
+
+  // TODO: Implement Claude API integration
+  // 1. Get worker brief
+  // 2. Call Claude API with worker prompt + brief
+  // 3. Submit Claude's proposal
+  // 4. Get reviewer brief
+  // 5. Call Claude API with reviewer prompt + brief
+  // 6. Submit Claude's review
+  // 7. Loop until approved or max rounds
+
+  return null;
+}
+
+// ============================================================================
+// Main
+// ============================================================================
+
+async function main() {
+  log('\n🔍 Diligence Test Runner\n', 'blue');
+  log(`Mode: ${mode}`, 'dim');
+
+  let allPassed = true;
+
+  switch (mode) {
+    case '--workflow':
+      allPassed = await testWorkflow();
+      break;
+
+    case '--mock': {
+      const scenarios = scenarioId ? [loadScenario(scenarioId)] : loadAllScenarios();
+      for (const scenario of scenarios) {
+        const passed = await testWithMocks(scenario);
+        if (!passed) allPassed = false;
+      }
+      break;
+    }
+
+    case '--live': {
+      const scenarios = scenarioId ? [loadScenario(scenarioId)] : loadAllScenarios();
+      for (const scenario of scenarios) {
+        const result = await testLive(scenario);
+        if (result === false) allPassed = false;
+      }
+      break;
+    }
+  }
+
+  console.log();
+  if (allPassed) {
+    log('✓ All tests passed', 'green');
+    process.exit(0);
+  } else {
+    log('✗ Some tests failed', 'red');
+    process.exit(1);
+  }
+}
+
+main().catch(err => {
+  console.error('Error:', err);
+  process.exit(1);
+});
diff --git a/test/scenarios/blocking-voice.json b/test/scenarios/blocking-voice.json
new file mode 100644
index 0000000..8f988e7
--- /dev/null
+++ b/test/scenarios/blocking-voice.json
@@ -0,0 +1,78 @@
+{
+  "id": "blocking-voice",
+  "name": "Blocking + Voice Bug",
+  "description": "Fix blocked users can answer DM voice calls",
+
+  "task": "Fix: blocked users can still answer DM voice calls. When user A blocks user B, user B should not be able to answer calls from user A.",
+
+  "naive_fix": {
+    "description": "Add blocking check to answerDmCall()",
+    "changes": [
+      {
+        "file": "src/services/voice-channel.service.ts",
+        "function": "answerDmCall",
+        "change": "Add isBlockingEitherWay check before answering"
+      }
+    ],
+    "issues": [
+      "Doesn't handle block created DURING active call",
+      "Doesn't clean up existing calls when block is created",
+      "Blocked users still receive call notifications"
+    ]
+  },
+
+  "correct_fix": {
+    "description": "Full blocking enforcement following chat.service.ts pattern",
+    "required_changes": [
+      {
+        "file": "src/services/voice-channel.service.ts",
+        "function": "answerDmCall",
+        "change": "Add isBlockingEitherWay check",
+        "line_reference": "line 75"
+      },
+      {
+        "file": "src/services/voice-channel.service.ts",
+        "function": "declineDmCall",
+        "change": "Add isBlockingEitherWay check (consistency)",
+        "line_reference": "line 93"
+      },
+      {
+        "file": "src/services/voice-channel.service.ts",
+        "function": "notifyDmCall",
+        "change": "Filter notifications for blocked users",
+        "line_reference": "line 138"
+      },
+      {
+        "file": "src/services/user-block.service.ts",
+        "function": "blockUser",
+        "change": "Add voice cleanup: endDmCallBetweenUsers()",
+        "line_reference": "line 33"
+      }
+    ],
+    "required_broker_subscriptions": [
+      {
+        "service": "voice-channel.service.ts",
+        "event": "BusUserBlockChange",
+        "action": "Kick users from DM voice when block is created mid-call"
+      }
+    ],
+    "pattern_references": [
+      "chat.service.ts:sendMessage - shows correct action check pattern",
+      "chat.service.ts:getChannelPermission - shows permission vs action separation"
+    ]
+  },
+
+  "validation_criteria": {
+    "must_mention": [
+      "answerDmCall",
+      "BusUserBlockChange",
+      "user-block.service",
+      "notifyDmCall"
+    ],
+    "must_not_change": [
+      "voiceListen permission values",
+      "voiceTalk permission values"
+    ],
+    "should_reference_pattern": "chat.service.ts"
+  }
+}
diff --git a/test/scenarios/index.json b/test/scenarios/index.json
new file mode 100644
index 0000000..91c63ca
--- /dev/null
+++ b/test/scenarios/index.json
@@ -0,0 +1,21 @@
+{
+  "scenarios": [
+    {
+      "id": "blocking-voice",
+      "file": "blocking-voice.json",
+      "difficulty": "medium",
+      "tags": ["blocking", "voice", "broker-events"]
+    },
+    {
+      "id": "permission-cache",
+      "file": "permission-cache.json",
+      "difficulty": "medium",
+      "tags": ["cache", "permissions", "broker-events"]
+    }
+  ],
+  "metadata": {
+    "version": "1.0.0",
+    "fixture_path": "../fixture",
+    "description": "Test scenarios for diligence MCP server"
+  }
+}
diff --git a/test/scenarios/permission-cache.json b/test/scenarios/permission-cache.json
new file mode 100644
index 0000000..ae39f67
--- /dev/null
+++ b/test/scenarios/permission-cache.json
@@ -0,0 +1,81 @@
+{
+  "id": "permission-cache",
+  "name": "Permission Cache Invalidation Bug",
+  "description": "Fix permission cache not invalidating when roles change",
+
+  "task": "Fix: permission cache doesn't invalidate when user roles change. Users see stale permissions for hours after their roles are updated.",
+
+  "naive_fix": {
+    "description": "Add .clear() call somewhere",
+    "changes": [
+      {
+        "file": "src/services/team.service.ts",
+        "function": "somewhere",
+        "change": "Call memoizedPermissions.clear()"
+      }
+    ],
+    "issues": [
+      "Doesn't identify WHEN cache should clear",
+      "Missing BusTeamRoleChange subscription",
+      "Missing BusTeamMemberRoleChange subscription",
+      "Doesn't fix roles.controller.ts missing broker events"
+    ]
+  },
+
+  "correct_fix": {
+    "description": "Subscribe to all role-related broker events",
+    "required_changes": [
+      {
+        "file": "src/services/team.service.ts",
+        "function": "constructor",
+        "change": "Subscribe to BusTeamRoleChange, clear cache on event",
+        "line_reference": "line 30"
+      },
+      {
+        "file": "src/services/team.service.ts",
+        "function": "constructor",
+        "change": "Subscribe to BusTeamMemberRoleChange, clear cache on event",
+        "line_reference": "line 30"
+      },
+      {
+        "file": "src/controllers/roles.controller.ts",
+        "function": "createRole",
+        "change": "Emit BusTeamRoleChange event after creating role",
+        "line_reference": "line 22"
+      },
+      {
+        "file": "src/controllers/roles.controller.ts",
+        "function": "deleteRole",
+        "change": "Emit BusTeamRoleChange event before deleting role",
+        "line_reference": "line 62"
+      }
+    ],
+    "required_broker_subscriptions": [
+      {
+        "service": "team.service.ts",
+        "event": "BusTeamRoleChange",
+        "action": "Clear permission cache"
+      },
+      {
+        "service": "team.service.ts",
+        "event": "BusTeamMemberRoleChange",
+        "action": "Clear permission cache"
+      }
+    ],
+    "pattern_references": [
+      "roles.controller.ts:updateRole - shows correct broker event emission"
+    ]
+  },
+
+  "validation_criteria": {
+    "must_mention": [
+      "BusTeamRoleChange",
+      "BusTeamMemberRoleChange",
+      "createRole",
+      "deleteRole",
+      "team.service"
+    ],
+    "must_identify_root_cause": "Cache only clears on team switch, not role changes",
+    "should_reference_pattern": "roles.controller.ts:updateRole"
+  }
+}