Initial release: MCP server enforcing Worker-Reviewer loop

Diligence prevents AI agents from shipping quick fixes that break things by enforcing a research-propose-verify loop before any code changes. Key features: - Worker sub-agent researches and proposes with file:line citations - Reviewer sub-agent independently verifies claims by searching codebase - Iterates until approved (max 5 rounds) - Loads project-specific context from .claude/CODEBASE_CONTEXT.md - State persisted across sessions Validated on production codebase: caught architectural mistake (broker subscriptions on client-side code) that naive agent would have shipped.
2026-01-22 06:22:59 +01:00
commit bd178fcaf0
23 changed files with 4001 additions and 0 deletions
--- a/test/dry-run.mjs
+++ b/test/dry-run.mjs
@@ -0,0 +1,305 @@
+#!/usr/bin/env node
+/**
+ * Dry Run Test Against Real Project
+ *
+ * Runs the diligence MCP server against a real project (e.g., nexus) in dry-run mode.
+ * This tests the full workflow without making any code changes.
+ *
+ * Usage:
+ *   node test/dry-run.mjs --project=/path/to/nexus --task="Fix permission cache"
+ *   node test/dry-run.mjs --project=~/bude/codecharm/nexus --scenario=blocking-voice
+ *
+ * Options:
+ *   --project=PATH    Path to the project to test against
+ *   --task=TEXT       Task description to start the workflow with
+ *   --scenario=ID     Use a predefined scenario from test/scenarios/
+ *   --interactive     Run in interactive mode (prompts for input)
+ */
+
+import { spawn } from 'child_process';
+import { createInterface } from 'readline';
+import { dirname, join, resolve } from 'path';
+import { fileURLToPath } from 'url';
+import { existsSync, readFileSync } from 'fs';
+
+const __dirname = dirname(fileURLToPath(import.meta.url));
+
+// Parse CLI args
+const args = process.argv.slice(2);
+const projectArg = args.find(a => a.startsWith('--project='));
+const taskArg = args.find(a => a.startsWith('--task='));
+const scenarioArg = args.find(a => a.startsWith('--scenario='));
+const interactive = args.includes('--interactive') || args.includes('-i');
+
+// Resolve project path
+let projectPath = projectArg ? projectArg.split('=')[1] : null;
+if (projectPath) {
+  projectPath = projectPath.replace(/^~/, process.env.HOME);
+  projectPath = resolve(projectPath);
+}
+
+// Colors
+const colors = {
+  reset: '\x1b[0m',
+  green: '\x1b[32m',
+  red: '\x1b[31m',
+  yellow: '\x1b[33m',
+  blue: '\x1b[34m',
+  cyan: '\x1b[36m',
+  dim: '\x1b[2m',
+  bold: '\x1b[1m',
+};
+
+function log(msg, color = 'reset') {
+  console.log(`${colors[color]}${msg}${colors.reset}`);
+}
+
+function logSection(title) {
+  console.log(`\n${colors.cyan}${colors.bold}═══ ${title} ═══${colors.reset}\n`);
+}
+
+// Load scenario
+function loadScenario(id) {
+  const path = join(__dirname, 'scenarios', `${id}.json`);
+  if (!existsSync(path)) {
+    throw new Error(`Scenario not found: ${id}`);
+  }
+  return JSON.parse(readFileSync(path, 'utf-8'));
+}
+
+// Simple MCP client for dry run
+class DryRunClient {
+  constructor(projectPath) {
+    this.projectPath = projectPath;
+    this.serverPath = join(__dirname, '..', 'index.mjs');
+    this.process = null;
+    this.requestId = 0;
+    this.pendingRequests = new Map();
+    this.readline = null;
+  }
+
+  async connect() {
+    return new Promise((resolve, reject) => {
+      this.process = spawn('node', [this.serverPath], {
+        stdio: ['pipe', 'pipe', 'pipe'],
+        cwd: this.projectPath,
+      });
+
+      this.readline = createInterface({
+        input: this.process.stdout,
+        crlfDelay: Infinity,
+      });
+
+      this.readline.on('line', (line) => {
+        try {
+          const message = JSON.parse(line);
+          if (message.id !== undefined && this.pendingRequests.has(message.id)) {
+            const { resolve, reject } = this.pendingRequests.get(message.id);
+            this.pendingRequests.delete(message.id);
+            if (message.error) {
+              reject(new Error(message.error.message || JSON.stringify(message.error)));
+            } else {
+              resolve(message.result);
+            }
+          }
+        } catch (e) {
+          // Ignore non-JSON lines
+        }
+      });
+
+      this.process.stderr.on('data', (data) => {
+        // Show server stderr in debug mode
+        if (process.env.DEBUG) {
+          console.error(colors.dim + '[server] ' + data.toString() + colors.reset);
+        }
+      });
+
+      this.process.on('error', reject);
+
+      // Initialize
+      this._send({
+        jsonrpc: '2.0',
+        id: this.requestId++,
+        method: 'initialize',
+        params: {
+          protocolVersion: '0.1.0',
+          clientInfo: { name: 'dry-run-client', version: '1.0.0' },
+          capabilities: {},
+        },
+      }).then(() => {
+        this._sendNotification('notifications/initialized', {});
+        resolve();
+      }).catch(reject);
+    });
+  }
+
+  async disconnect() {
+    if (this.process) {
+      this.process.kill('SIGTERM');
+      this.process = null;
+    }
+  }
+
+  _send(message) {
+    return new Promise((resolve, reject) => {
+      this.pendingRequests.set(message.id, { resolve, reject });
+      this.process.stdin.write(JSON.stringify(message) + '\n');
+      setTimeout(() => {
+        if (this.pendingRequests.has(message.id)) {
+          this.pendingRequests.delete(message.id);
+          reject(new Error('Request timeout'));
+        }
+      }, 30000);
+    });
+  }
+
+  _sendNotification(method, params) {
+    this.process.stdin.write(JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n');
+  }
+
+  async callTool(name, args = {}) {
+    const result = await this._send({
+      jsonrpc: '2.0',
+      id: this.requestId++,
+      method: 'tools/call',
+      params: { name, arguments: args },
+    });
+    if (result.content?.[0]?.text) {
+      return { text: result.content[0].text, isError: result.isError || false };
+    }
+    return result;
+  }
+}
+
+// Interactive prompt
+function prompt(question) {
+  const rl = createInterface({
+    input: process.stdin,
+    output: process.stdout,
+  });
+  return new Promise(resolve => {
+    rl.question(question, answer => {
+      rl.close();
+      resolve(answer);
+    });
+  });
+}
+
+async function main() {
+  log('\n🔍 Diligence Dry Run\n', 'cyan');
+
+  // Validate project path
+  if (!projectPath) {
+    log('Error: --project=PATH required', 'red');
+    log('\nUsage:', 'dim');
+    log('  node test/dry-run.mjs --project=~/bude/codecharm/nexus --task="Fix bug"', 'dim');
+    process.exit(1);
+  }
+
+  if (!existsSync(projectPath)) {
+    log(`Error: Project path not found: ${projectPath}`, 'red');
+    process.exit(1);
+  }
+
+  // Check for CODEBASE_CONTEXT.md
+  const contextPath = join(projectPath, '.claude', 'CODEBASE_CONTEXT.md');
+  if (!existsSync(contextPath)) {
+    log(`Warning: No .claude/CODEBASE_CONTEXT.md found in ${projectPath}`, 'yellow');
+    log('The Worker and Reviewer will have limited context.', 'dim');
+  } else {
+    log(`Found: ${contextPath}`, 'green');
+  }
+
+  // Determine task
+  let task;
+  if (scenarioArg) {
+    const scenarioId = scenarioArg.split('=')[1];
+    const scenario = loadScenario(scenarioId);
+    task = scenario.task;
+    log(`Using scenario: ${scenario.name}`, 'blue');
+  } else if (taskArg) {
+    task = taskArg.split('=')[1];
+  } else if (interactive) {
+    task = await prompt('Enter task: ');
+  } else {
+    log('Error: Either --task=TEXT or --scenario=ID required', 'red');
+    process.exit(1);
+  }
+
+  log(`\nProject: ${projectPath}`, 'dim');
+  log(`Task: ${task}\n`, 'dim');
+
+  // Connect to MCP server
+  const client = new DryRunClient(projectPath);
+
+  try {
+    log('Connecting to MCP server...', 'dim');
+    await client.connect();
+    log('Connected!', 'green');
+
+    // Check initial status
+    logSection('Status');
+    const status = await client.callTool('status');
+    log(status.text, 'dim');
+
+    // Start the workflow
+    logSection('Starting Workflow');
+    const startResult = await client.callTool('start', { task });
+    log(startResult.text, startResult.isError ? 'red' : 'green');
+
+    if (startResult.isError) {
+      // Try to abort and restart
+      log('\nAborting existing workflow...', 'yellow');
+      await client.callTool('abort', { reason: 'Dry run restart' });
+      const retryResult = await client.callTool('start', { task });
+      log(retryResult.text, retryResult.isError ? 'red' : 'green');
+    }
+
+    // Get worker brief
+    logSection('Worker Brief');
+    const workerBrief = await client.callTool('get_worker_brief');
+
+    // Show truncated brief
+    const briefLines = workerBrief.text.split('\n');
+    const truncatedBrief = briefLines.slice(0, 50).join('\n');
+    log(truncatedBrief, 'dim');
+    if (briefLines.length > 50) {
+      log(`\n... (${briefLines.length - 50} more lines)`, 'dim');
+    }
+
+    logSection('Dry Run Complete');
+
+    log(`
+${colors.yellow}What happens next in a real session:${colors.reset}
+
+1. ${colors.bold}Worker Agent${colors.reset} (fresh sub-agent) receives the brief above
+   - Researches the codebase using Glob, Grep, Read tools
+   - Proposes a fix with file:line citations
+   - Submits via ${colors.cyan}diligence.propose${colors.reset}
+
+2. ${colors.bold}Reviewer Agent${colors.reset} (fresh sub-agent) verifies the proposal
+   - Searches codebase to verify Worker's claims
+   - Checks against patterns in CODEBASE_CONTEXT.md
+   - Submits ${colors.green}APPROVED${colors.reset} or ${colors.yellow}NEEDS_WORK${colors.reset} via ${colors.cyan}diligence.review${colors.reset}
+
+3. If ${colors.yellow}NEEDS_WORK${colors.reset}: Worker revises, Reviewer re-checks (up to 5 rounds)
+
+4. If ${colors.green}APPROVED${colors.reset}: ${colors.cyan}diligence.implement${colors.reset} → code changes → ${colors.cyan}diligence.complete${colors.reset}
+
+${colors.dim}This was a dry run - no code changes were made.${colors.reset}
+`, 'reset');
+
+    // Cleanup - abort the workflow
+    await client.callTool('abort', { reason: 'Dry run completed' });
+    log('Workflow aborted (dry run cleanup)', 'dim');
+
+  } finally {
+    await client.disconnect();
+    log('\nDisconnected from MCP server.', 'dim');
+  }
+}
+
+main().catch(err => {
+  console.error('Error:', err.message);
+  process.exit(1);
+});