#!/usr/bin/env node /** * Dry Run Test Against Real Project * * Runs the diligence MCP server against a real project (e.g., nexus) in dry-run mode. * This tests the full workflow without making any code changes. * * Usage: * node test/dry-run.mjs --project=/path/to/nexus --task="Fix permission cache" * node test/dry-run.mjs --project=~/bude/codecharm/nexus --scenario=blocking-voice * * Options: * --project=PATH Path to the project to test against * --task=TEXT Task description to start the workflow with * --scenario=ID Use a predefined scenario from test/scenarios/ * --interactive Run in interactive mode (prompts for input) */ import { spawn } from 'child_process'; import { createInterface } from 'readline'; import { dirname, join, resolve } from 'path'; import { fileURLToPath } from 'url'; import { existsSync, readFileSync } from 'fs'; const __dirname = dirname(fileURLToPath(import.meta.url)); // Parse CLI args const args = process.argv.slice(2); const projectArg = args.find(a => a.startsWith('--project=')); const taskArg = args.find(a => a.startsWith('--task=')); const scenarioArg = args.find(a => a.startsWith('--scenario=')); const interactive = args.includes('--interactive') || args.includes('-i'); // Resolve project path let projectPath = projectArg ? projectArg.split('=')[1] : null; if (projectPath) { projectPath = projectPath.replace(/^~/, process.env.HOME); projectPath = resolve(projectPath); } // Colors const colors = { reset: '\x1b[0m', green: '\x1b[32m', red: '\x1b[31m', yellow: '\x1b[33m', blue: '\x1b[34m', cyan: '\x1b[36m', dim: '\x1b[2m', bold: '\x1b[1m', }; function log(msg, color = 'reset') { console.log(`${colors[color]}${msg}${colors.reset}`); } function logSection(title) { console.log(`\n${colors.cyan}${colors.bold}═══ ${title} ═══${colors.reset}\n`); } // Load scenario function loadScenario(id) { const path = join(__dirname, 'scenarios', `${id}.json`); if (!existsSync(path)) { throw new Error(`Scenario not found: ${id}`); } return JSON.parse(readFileSync(path, 'utf-8')); } // Simple MCP client for dry run class DryRunClient { constructor(projectPath) { this.projectPath = projectPath; this.serverPath = join(__dirname, '..', 'index.mjs'); this.process = null; this.requestId = 0; this.pendingRequests = new Map(); this.readline = null; } async connect() { return new Promise((resolve, reject) => { this.process = spawn('node', [this.serverPath], { stdio: ['pipe', 'pipe', 'pipe'], cwd: this.projectPath, }); this.readline = createInterface({ input: this.process.stdout, crlfDelay: Infinity, }); this.readline.on('line', (line) => { try { const message = JSON.parse(line); if (message.id !== undefined && this.pendingRequests.has(message.id)) { const { resolve, reject } = this.pendingRequests.get(message.id); this.pendingRequests.delete(message.id); if (message.error) { reject(new Error(message.error.message || JSON.stringify(message.error))); } else { resolve(message.result); } } } catch (e) { // Ignore non-JSON lines } }); this.process.stderr.on('data', (data) => { // Show server stderr in debug mode if (process.env.DEBUG) { console.error(colors.dim + '[server] ' + data.toString() + colors.reset); } }); this.process.on('error', reject); // Initialize this._send({ jsonrpc: '2.0', id: this.requestId++, method: 'initialize', params: { protocolVersion: '0.1.0', clientInfo: { name: 'dry-run-client', version: '1.0.0' }, capabilities: {}, }, }).then(() => { this._sendNotification('notifications/initialized', {}); resolve(); }).catch(reject); }); } async disconnect() { if (this.process) { this.process.kill('SIGTERM'); this.process = null; } } _send(message) { return new Promise((resolve, reject) => { this.pendingRequests.set(message.id, { resolve, reject }); this.process.stdin.write(JSON.stringify(message) + '\n'); setTimeout(() => { if (this.pendingRequests.has(message.id)) { this.pendingRequests.delete(message.id); reject(new Error('Request timeout')); } }, 30000); }); } _sendNotification(method, params) { this.process.stdin.write(JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n'); } async callTool(name, args = {}) { const result = await this._send({ jsonrpc: '2.0', id: this.requestId++, method: 'tools/call', params: { name, arguments: args }, }); if (result.content?.[0]?.text) { return { text: result.content[0].text, isError: result.isError || false }; } return result; } } // Interactive prompt function prompt(question) { const rl = createInterface({ input: process.stdin, output: process.stdout, }); return new Promise(resolve => { rl.question(question, answer => { rl.close(); resolve(answer); }); }); } async function main() { log('\n🔍 Diligence Dry Run\n', 'cyan'); // Validate project path if (!projectPath) { log('Error: --project=PATH required', 'red'); log('\nUsage:', 'dim'); log(' node test/dry-run.mjs --project=~/bude/codecharm/nexus --task="Fix bug"', 'dim'); process.exit(1); } if (!existsSync(projectPath)) { log(`Error: Project path not found: ${projectPath}`, 'red'); process.exit(1); } // Check for CODEBASE_CONTEXT.md const contextPath = join(projectPath, '.claude', 'CODEBASE_CONTEXT.md'); if (!existsSync(contextPath)) { log(`Warning: No .claude/CODEBASE_CONTEXT.md found in ${projectPath}`, 'yellow'); log('The Worker and Reviewer will have limited context.', 'dim'); } else { log(`Found: ${contextPath}`, 'green'); } // Determine task let task; if (scenarioArg) { const scenarioId = scenarioArg.split('=')[1]; const scenario = loadScenario(scenarioId); task = scenario.task; log(`Using scenario: ${scenario.name}`, 'blue'); } else if (taskArg) { task = taskArg.split('=')[1]; } else if (interactive) { task = await prompt('Enter task: '); } else { log('Error: Either --task=TEXT or --scenario=ID required', 'red'); process.exit(1); } log(`\nProject: ${projectPath}`, 'dim'); log(`Task: ${task}\n`, 'dim'); // Connect to MCP server const client = new DryRunClient(projectPath); try { log('Connecting to MCP server...', 'dim'); await client.connect(); log('Connected!', 'green'); // Check initial status logSection('Status'); const status = await client.callTool('status'); log(status.text, 'dim'); // Start the workflow logSection('Starting Workflow'); const startResult = await client.callTool('start', { task }); log(startResult.text, startResult.isError ? 'red' : 'green'); if (startResult.isError) { // Try to abort and restart log('\nAborting existing workflow...', 'yellow'); await client.callTool('abort', { reason: 'Dry run restart' }); const retryResult = await client.callTool('start', { task }); log(retryResult.text, retryResult.isError ? 'red' : 'green'); } // Get worker brief logSection('Worker Brief'); const workerBrief = await client.callTool('get_worker_brief'); // Show truncated brief const briefLines = workerBrief.text.split('\n'); const truncatedBrief = briefLines.slice(0, 50).join('\n'); log(truncatedBrief, 'dim'); if (briefLines.length > 50) { log(`\n... (${briefLines.length - 50} more lines)`, 'dim'); } logSection('Dry Run Complete'); log(` ${colors.yellow}What happens next in a real session:${colors.reset} 1. ${colors.bold}Worker Agent${colors.reset} (fresh sub-agent) receives the brief above - Researches the codebase using Glob, Grep, Read tools - Proposes a fix with file:line citations - Submits via ${colors.cyan}diligence.propose${colors.reset} 2. ${colors.bold}Reviewer Agent${colors.reset} (fresh sub-agent) verifies the proposal - Searches codebase to verify Worker's claims - Checks against patterns in CODEBASE_CONTEXT.md - Submits ${colors.green}APPROVED${colors.reset} or ${colors.yellow}NEEDS_WORK${colors.reset} via ${colors.cyan}diligence.review${colors.reset} 3. If ${colors.yellow}NEEDS_WORK${colors.reset}: Worker revises, Reviewer re-checks (up to 5 rounds) 4. If ${colors.green}APPROVED${colors.reset}: ${colors.cyan}diligence.implement${colors.reset} → code changes → ${colors.cyan}diligence.complete${colors.reset} ${colors.dim}This was a dry run - no code changes were made.${colors.reset} `, 'reset'); // Cleanup - abort the workflow await client.callTool('abort', { reason: 'Dry run completed' }); log('Workflow aborted (dry run cleanup)', 'dim'); } finally { await client.disconnect(); log('\nDisconnected from MCP server.', 'dim'); } } main().catch(err => { console.error('Error:', err.message); process.exit(1); });