diligence/test/dry-run.mjs

#!/usr/bin/env node
/**
 * Dry Run Test Against Real Project
 *
 * Runs the diligence MCP server against a real project (e.g., nexus) in dry-run mode.
 * This tests the full workflow without making any code changes.
 *
 * Usage:
 *   node test/dry-run.mjs --project=/path/to/nexus --task="Fix permission cache"
 *   node test/dry-run.mjs --project=~/bude/codecharm/nexus --scenario=blocking-voice
 *
 * Options:
 *   --project=PATH    Path to the project to test against
 *   --task=TEXT       Task description to start the workflow with
 *   --scenario=ID     Use a predefined scenario from test/scenarios/
 *   --interactive     Run in interactive mode (prompts for input)
 */

import { spawn } from 'child_process';
import { createInterface } from 'readline';
import { dirname, join, resolve } from 'path';
import { fileURLToPath } from 'url';
import { existsSync, readFileSync } from 'fs';

const __dirname = dirname(fileURLToPath(import.meta.url));

// Parse CLI args
const args = process.argv.slice(2);
const projectArg = args.find(a => a.startsWith('--project='));
const taskArg = args.find(a => a.startsWith('--task='));
const scenarioArg = args.find(a => a.startsWith('--scenario='));
const interactive = args.includes('--interactive') || args.includes('-i');

// Resolve project path
let projectPath = projectArg ? projectArg.split('=')[1] : null;
if (projectPath) {
  projectPath = projectPath.replace(/^~/, process.env.HOME);
  projectPath = resolve(projectPath);
}

// Colors
const colors = {
  reset: '\x1b[0m',
  green: '\x1b[32m',
  red: '\x1b[31m',
  yellow: '\x1b[33m',
  blue: '\x1b[34m',
  cyan: '\x1b[36m',
  dim: '\x1b[2m',
  bold: '\x1b[1m',
};

function log(msg, color = 'reset') {
  console.log(`${colors[color]}${msg}${colors.reset}`);
}

function logSection(title) {
  console.log(`\n${colors.cyan}${colors.bold}═══ ${title} ═══${colors.reset}\n`);
}

// Load scenario
function loadScenario(id) {
  const path = join(__dirname, 'scenarios', `${id}.json`);
  if (!existsSync(path)) {
    throw new Error(`Scenario not found: ${id}`);
  }
  return JSON.parse(readFileSync(path, 'utf-8'));
}

// Simple MCP client for dry run
class DryRunClient {
  constructor(projectPath) {
    this.projectPath = projectPath;
    this.serverPath = join(__dirname, '..', 'index.mjs');
    this.process = null;
    this.requestId = 0;
    this.pendingRequests = new Map();
    this.readline = null;
  }

  async connect() {
    return new Promise((resolve, reject) => {
      this.process = spawn('node', [this.serverPath], {
        stdio: ['pipe', 'pipe', 'pipe'],
        cwd: this.projectPath,
      });

      this.readline = createInterface({
        input: this.process.stdout,
        crlfDelay: Infinity,
      });

      this.readline.on('line', (line) => {
        try {
          const message = JSON.parse(line);
          if (message.id !== undefined && this.pendingRequests.has(message.id)) {
            const { resolve, reject } = this.pendingRequests.get(message.id);
            this.pendingRequests.delete(message.id);
            if (message.error) {
              reject(new Error(message.error.message || JSON.stringify(message.error)));
            } else {
              resolve(message.result);
            }
          }
        } catch (e) {
          // Ignore non-JSON lines
        }
      });

      this.process.stderr.on('data', (data) => {
        // Show server stderr in debug mode
        if (process.env.DEBUG) {
          console.error(colors.dim + '[server] ' + data.toString() + colors.reset);
        }
      });

      this.process.on('error', reject);

      // Initialize
      this._send({
        jsonrpc: '2.0',
        id: this.requestId++,
        method: 'initialize',
        params: {
          protocolVersion: '0.1.0',
          clientInfo: { name: 'dry-run-client', version: '1.0.0' },
          capabilities: {},
        },
      }).then(() => {
        this._sendNotification('notifications/initialized', {});
        resolve();
      }).catch(reject);
    });
  }

  async disconnect() {
    if (this.process) {
      this.process.kill('SIGTERM');
      this.process = null;
    }
  }

  _send(message) {
    return new Promise((resolve, reject) => {
      this.pendingRequests.set(message.id, { resolve, reject });
      this.process.stdin.write(JSON.stringify(message) + '\n');
      setTimeout(() => {
        if (this.pendingRequests.has(message.id)) {
          this.pendingRequests.delete(message.id);
          reject(new Error('Request timeout'));
        }
      }, 30000);
    });
  }

  _sendNotification(method, params) {
    this.process.stdin.write(JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n');
  }

  async callTool(name, args = {}) {
    const result = await this._send({
      jsonrpc: '2.0',
      id: this.requestId++,
      method: 'tools/call',
      params: { name, arguments: args },
    });
    if (result.content?.[0]?.text) {
      return { text: result.content[0].text, isError: result.isError || false };
    }
    return result;
  }
}

// Interactive prompt
function prompt(question) {
  const rl = createInterface({
    input: process.stdin,
    output: process.stdout,
  });
  return new Promise(resolve => {
    rl.question(question, answer => {
      rl.close();
      resolve(answer);
    });
  });
}

async function main() {
  log('\n🔍 Diligence Dry Run\n', 'cyan');

  // Validate project path
  if (!projectPath) {
    log('Error: --project=PATH required', 'red');
    log('\nUsage:', 'dim');
    log('  node test/dry-run.mjs --project=~/bude/codecharm/nexus --task="Fix bug"', 'dim');
    process.exit(1);
  }

  if (!existsSync(projectPath)) {
    log(`Error: Project path not found: ${projectPath}`, 'red');
    process.exit(1);
  }

  // Check for CODEBASE_CONTEXT.md
  const contextPath = join(projectPath, '.claude', 'CODEBASE_CONTEXT.md');
  if (!existsSync(contextPath)) {
    log(`Warning: No .claude/CODEBASE_CONTEXT.md found in ${projectPath}`, 'yellow');
    log('The Worker and Reviewer will have limited context.', 'dim');
  } else {
    log(`Found: ${contextPath}`, 'green');
  }

  // Determine task
  let task;
  if (scenarioArg) {
    const scenarioId = scenarioArg.split('=')[1];
    const scenario = loadScenario(scenarioId);
    task = scenario.task;
    log(`Using scenario: ${scenario.name}`, 'blue');
  } else if (taskArg) {
    task = taskArg.split('=')[1];
  } else if (interactive) {
    task = await prompt('Enter task: ');
  } else {
    log('Error: Either --task=TEXT or --scenario=ID required', 'red');
    process.exit(1);
  }

  log(`\nProject: ${projectPath}`, 'dim');
  log(`Task: ${task}\n`, 'dim');

  // Connect to MCP server
  const client = new DryRunClient(projectPath);

  try {
    log('Connecting to MCP server...', 'dim');
    await client.connect();
    log('Connected!', 'green');

    // Check initial status
    logSection('Status');
    const status = await client.callTool('status');
    log(status.text, 'dim');

    // Start the workflow
    logSection('Starting Workflow');
    const startResult = await client.callTool('start', { task });
    log(startResult.text, startResult.isError ? 'red' : 'green');

    if (startResult.isError) {
      // Try to abort and restart
      log('\nAborting existing workflow...', 'yellow');
      await client.callTool('abort', { reason: 'Dry run restart' });
      const retryResult = await client.callTool('start', { task });
      log(retryResult.text, retryResult.isError ? 'red' : 'green');
    }

    // Get worker brief
    logSection('Worker Brief');
    const workerBrief = await client.callTool('get_worker_brief');

    // Show truncated brief
    const briefLines = workerBrief.text.split('\n');
    const truncatedBrief = briefLines.slice(0, 50).join('\n');
    log(truncatedBrief, 'dim');
    if (briefLines.length > 50) {
      log(`\n... (${briefLines.length - 50} more lines)`, 'dim');
    }

    logSection('Dry Run Complete');

    log(`
${colors.yellow}What happens next in a real session:${colors.reset}

1. ${colors.bold}Worker Agent${colors.reset} (fresh sub-agent) receives the brief above
   - Researches the codebase using Glob, Grep, Read tools
   - Proposes a fix with file:line citations
   - Submits via ${colors.cyan}diligence.propose${colors.reset}

2. ${colors.bold}Reviewer Agent${colors.reset} (fresh sub-agent) verifies the proposal
   - Searches codebase to verify Worker's claims
   - Checks against patterns in CODEBASE_CONTEXT.md
   - Submits ${colors.green}APPROVED${colors.reset} or ${colors.yellow}NEEDS_WORK${colors.reset} via ${colors.cyan}diligence.review${colors.reset}

3. If ${colors.yellow}NEEDS_WORK${colors.reset}: Worker revises, Reviewer re-checks (up to 5 rounds)

4. If ${colors.green}APPROVED${colors.reset}: ${colors.cyan}diligence.implement${colors.reset} → code changes → ${colors.cyan}diligence.complete${colors.reset}

${colors.dim}This was a dry run - no code changes were made.${colors.reset}
`, 'reset');

    // Cleanup - abort the workflow
    await client.callTool('abort', { reason: 'Dry run completed' });
    log('Workflow aborted (dry run cleanup)', 'dim');

  } finally {
    await client.disconnect();
    log('\nDisconnected from MCP server.', 'dim');
  }
}

main().catch(err => {
  console.error('Error:', err.message);
  process.exit(1);
});