diff --git a/src/tui/utils/stuckDetection.test.ts b/src/tui/utils/stuckDetection.test.ts new file mode 100644 index 0000000..8053481 --- /dev/null +++ b/src/tui/utils/stuckDetection.test.ts @@ -0,0 +1,233 @@ +/** + * Tests for Stuck Worker Detection + */ + +import { describe, it, expect } from 'vitest'; +import { isWorkerStuck, getStuckReason, getStuckIndicator, StuckPattern } from './stuckDetection.js'; +import { LogEvent, WorkerInfo } from '../../types.js'; + +const makeWorker = (overrides: Partial = {}): WorkerInfo => ({ + id: 'w-test', + status: 'active', + beadsCompleted: 3, + firstSeen: Date.now() - 5 * 60 * 1000, + lastActivity: Date.now(), + activeFiles: [], + hasCollision: false, + activeDirectories: [], + collisionTypes: [], + eventCount: 10, + ...overrides, +}); + +const makeEvent = (overrides: Partial = {}): LogEvent => ({ + ts: Date.now(), + worker: 'w-test', + level: 'info', + msg: 'test event', + ...overrides, +}); + +describe('Stuck Detection', () => { + describe('isWorkerStuck', () => { + it('returns null for a healthy worker with recent events', () => { + const worker = makeWorker(); + const events = [makeEvent()]; + + expect(isWorkerStuck(worker, events)).toBeNull(); + }); + + it('returns null when no events exist', () => { + const worker = makeWorker(); + + expect(isWorkerStuck(worker, [])).toBeNull(); + }); + }); + + describe('state-transition gap detection', () => { + it('detects worker stuck in WORKING with no state transition for too long', () => { + const gapMs = 10 * 60 * 1000; // 10 minutes + const worker = makeWorker({ + needleState: 'WORKING', + lastStateTransition: Date.now() - gapMs, + }); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).not.toBeNull(); + expect(pattern!.type).toBe('state_gap'); + expect(pattern!.severity).toBe('warning'); + expect(pattern!.reason).toContain('WORKING'); + expect(pattern!.reason).toContain('10m'); + }); + + it('escalates to critical at 2x the gap threshold', () => { + const gapMs = 15 * 60 * 1000; // 15 minutes (> 2×5min threshold) + const worker = makeWorker({ + needleState: 'WORKING', + lastStateTransition: Date.now() - gapMs, + }); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).not.toBeNull(); + expect(pattern!.severity).toBe('critical'); + }); + + it('does not fire for STOPPED workers', () => { + const worker = makeWorker({ + needleState: 'STOPPED', + lastStateTransition: Date.now() - 10 * 60 * 1000, + }); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).toBeNull(); + }); + + it('does not fire when gap is under threshold', () => { + const worker = makeWorker({ + needleState: 'WORKING', + lastStateTransition: Date.now() - 2 * 60 * 1000, // 2 min + }); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).toBeNull(); + }); + + it('does not fire when needleState is not set', () => { + const worker = makeWorker(); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).toBeNull(); + }); + + it('detects gap for SELECTING state', () => { + const worker = makeWorker({ + needleState: 'SELECTING', + lastStateTransition: Date.now() - 8 * 60 * 1000, + }); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).not.toBeNull(); + expect(pattern!.type).toBe('state_gap'); + expect(pattern!.reason).toContain('SELECTING'); + }); + + it('detects gap for CLAIMING state', () => { + const worker = makeWorker({ + needleState: 'CLAIMING', + lastStateTransition: Date.now() - 7 * 60 * 1000, + }); + const events = [makeEvent()]; + + const pattern = isWorkerStuck(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(pattern).not.toBeNull(); + expect(pattern!.reason).toContain('CLAIMING'); + }); + }); + + describe('getStuckReason', () => { + it('returns the reason string when stuck', () => { + const worker = makeWorker({ + needleState: 'WORKING', + lastStateTransition: Date.now() - 10 * 60 * 1000, + }); + const events = [makeEvent()]; + + const reason = getStuckReason(worker, events, { + stateTransitionGapMs: 5 * 60 * 1000, + }); + + expect(reason).toContain('WORKING'); + }); + + it('returns null when not stuck', () => { + const worker = makeWorker(); + const events = [makeEvent()]; + + expect(getStuckReason(worker, events)).toBeNull(); + }); + }); + + describe('getStuckIndicator', () => { + it('returns ⚠ for critical', () => { + const pattern: StuckPattern = { + type: 'state_gap', + reason: 'test', + severity: 'critical', + evidence: [], + suggestion: 'test', + }; + expect(getStuckIndicator(pattern)).toBe('⚠'); + }); + + it('returns ⚡ for warning', () => { + const pattern: StuckPattern = { + type: 'state_gap', + reason: 'test', + severity: 'warning', + evidence: [], + suggestion: 'test', + }; + expect(getStuckIndicator(pattern)).toBe('⚡'); + }); + + it('returns empty string for null', () => { + expect(getStuckIndicator(null)).toBe(''); + }); + }); + + describe('legacy detection (non-state-transition)', () => { + it('still detects repeated tool calls', () => { + const worker = makeWorker(); + const events: LogEvent[] = []; + for (let i = 0; i < 6; i++) { + events.push(makeEvent({ tool: 'Read', path: '/src/index.ts', ts: Date.now() - i * 10000 })); + } + + const pattern = isWorkerStuck(worker, events); + + expect(pattern).not.toBeNull(); + expect(pattern!.type).toBe('repeated_tool'); + }); + + it('still detects no progress', () => { + const worker = makeWorker({ + lastActivity: Date.now() - 3 * 60 * 1000, + }); + const events = [makeEvent({ ts: Date.now() - 3 * 60 * 1000 })]; + + const pattern = isWorkerStuck(worker, events, { + noProgressThresholdMs: 2 * 60 * 1000, + }); + + expect(pattern).not.toBeNull(); + expect(pattern!.type).toBe('no_progress'); + }); + }); +}); diff --git a/src/tui/utils/stuckDetection.ts b/src/tui/utils/stuckDetection.ts index 97cdfdf..a886e3a 100644 --- a/src/tui/utils/stuckDetection.ts +++ b/src/tui/utils/stuckDetection.ts @@ -9,7 +9,7 @@ import { LogEvent, WorkerInfo } from '../../types.js'; export interface StuckPattern { /** Type of stuck pattern detected */ - type: 'repeated_tool' | 'no_progress' | 'circular_edit' | 'long_running'; + type: 'repeated_tool' | 'no_progress' | 'circular_edit' | 'long_running' | 'state_gap'; /** Human-readable description */ reason: string; @@ -36,6 +36,11 @@ export interface StuckDetectionOptions { /** Threshold for long-running tasks (ms), default 10 minutes */ longRunningThresholdMs?: number; + + /** Threshold for state-transition gap (ms), default 5 minutes. + * Fires when a worker with a needleState has not transitioned + * within this window while in an active state (WORKING, etc.). */ + stateTransitionGapMs?: number; } const DEFAULT_OPTIONS: Required = { @@ -43,6 +48,7 @@ const DEFAULT_OPTIONS: Required = { repeatedToolThreshold: 5, noProgressThresholdMs: 2 * 60 * 1000, // 2 minutes longRunningThresholdMs: 10 * 60 * 1000, // 10 minutes + stateTransitionGapMs: 5 * 60 * 1000, // 5 minutes }; /** @@ -68,6 +74,7 @@ export function isWorkerStuck( // Check patterns in order of severity const patterns = [ + detectStateTransitionGap(worker, opts), detectRepeatedToolCalls(recentEvents, opts), detectNoProgress(worker, recentEvents, opts), detectCircularEdits(recentEvents, opts), @@ -96,6 +103,47 @@ export function getStuckReason( return pattern?.reason ?? null; } +/** + * Detect state-transition gap — worker stuck in an active state without + * transitioning. Uses lastStateTransition from WorkerInfo (set by the store + * when processing worker.state_transition events). + */ +function detectStateTransitionGap( + worker: WorkerInfo, + opts: Required +): StuckPattern | null { + if (!worker.needleState || !worker.lastStateTransition) { + return null; + } + + // Only active states are relevant — STOPPED workers are fine. + const activeStates = ['BOOTING', 'SELECTING', 'CLAIMING', 'WORKING', 'CLOSING'] as const; + if (!activeStates.includes(worker.needleState as typeof activeStates[number])) { + return null; + } + + const now = Date.now(); + const gapMs = now - worker.lastStateTransition; + + if (gapMs > opts.stateTransitionGapMs) { + const minutes = Math.floor(gapMs / 60000); + const isCritical = gapMs > opts.stateTransitionGapMs * 2; + + return { + type: 'state_gap', + reason: `No state transition for ${minutes}m while in ${worker.needleState}`, + severity: isCritical ? 'critical' : 'warning', + evidence: [ + `State: ${worker.needleState} since ${new Date(worker.lastStateTransition).toISOString()}`, + `Gap: ${minutes}m (threshold: ${Math.floor(opts.stateTransitionGapMs / 60000)}m)`, + ], + suggestion: 'Worker may be stuck — check if the agent is waiting on an external resource or deadlocked', + }; + } + + return null; +} + /** * Detect repeated tool calls with same parameters */ diff --git a/src/web/frontend/src/components/WorkerDetail.tsx b/src/web/frontend/src/components/WorkerDetail.tsx index 7d16573..d173179 100644 --- a/src/web/frontend/src/components/WorkerDetail.tsx +++ b/src/web/frontend/src/components/WorkerDetail.tsx @@ -1,5 +1,23 @@ import React from 'react'; -import { WorkerInfo, LogEvent } from '../types'; +import { WorkerInfo, LogEvent, NeedleState } from '../types'; + +const NEEDLE_STATE_ICONS: Record = { + BOOTING: '⏳', + SELECTING: '🔍', + CLAIMING: '🎯', + WORKING: '●', + CLOSING: '⏹', + STOPPED: '○', +}; + +const NEEDLE_STATE_COLORS: Record = { + BOOTING: '#5bc0de', + SELECTING: '#f0ad4e', + CLAIMING: '#9b59b6', + WORKING: '#5cb85c', + CLOSING: '#f0ad4e', + STOPPED: '#777', +}; interface WorkerDetailProps { /** The worker to display details for */ @@ -12,16 +30,6 @@ interface WorkerDetailProps { allWorkerEvents?: LogEvent[]; } -/** - * WorkerDetail Component - * - * Displays detailed information about a selected worker including: - * - Worker ID and status - * - Activity statistics (event count, current tool) - * - Timing information (last seen, uptime) - * - Recent events list - * - Collision information if applicable - */ const WorkerDetail: React.FC = ({ worker, onClose, @@ -42,18 +50,14 @@ const WorkerDetail: React.FC = ({ return new Date(timestamp).toLocaleTimeString(); }; - const getStatusIcon = (): string => { - switch (worker.status) { - case 'active': - return '●'; - case 'idle': - return '○'; - case 'error': - return '✗'; - default: - return '?'; - } - }; + const stateIcon = worker.needleState + ? NEEDLE_STATE_ICONS[worker.needleState] + : worker.status === 'active' ? '●' : worker.status === 'idle' ? '○' : '✗'; + const stateLabel = worker.needleState ?? worker.status.toUpperCase(); + const stateColor = worker.needleState + ? NEEDLE_STATE_COLORS[worker.needleState] + : undefined; + const stateCssClass = worker.needleState ? undefined : worker.status; const eventsToShow = allWorkerEvents || worker.recentEvents || []; @@ -62,8 +66,11 @@ const WorkerDetail: React.FC = ({ {/* Header with close button */}

- - {getStatusIcon()} + + {stateIcon} {worker.id}

@@ -103,8 +110,11 @@ const WorkerDetail: React.FC = ({

Status

State - - {worker.status} + + {stateLabel}