diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl index 1564738..303b70b 100644 --- a/.beads/issues.jsonl +++ b/.beads/issues.jsonl @@ -2,7 +2,7 @@ {"id":"bf-1ih7k","title":"TX slot collision detection and adaptive re-stagger","description":"Plan (Component 5 / Fleet Manager) specifies collision monitoring: if CSI frames from two TX nodes arrive within 3ms of each other, log a 'possible slot collision' metric. If collision rate > 5% over a 60-second window, re-randomize stagger assignments (shift one node's slot by half a slot width) and push updated config messages. The fleet manager computes stagger slots but has no collision detection, no re-stagger logic, and no collision rate metric. Needs: (1) per-link-pair collision counter in ingestion/signal processing path, (2) collision rate aggregation in fleet manager, (3) adaptive re-stagger trigger.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":3,"issue_type":"task","created_at":"2026-05-02T18:25:13.899248435Z","updated_at":"2026-05-02T18:25:13.899248435Z","source_repo":".","compaction_level":0} {"id":"bf-1k3zg","title":"API: GET /api/doctor — pre-flight configuration diagnostic","description":"## Goal\nAdd a GET /api/doctor endpoint that diagnoses common misconfiguration before the user concludes the system is broken. Complements /healthz (runtime state) with pre-flight checks (configuration correctness).\n\n## Endpoint\n\nGET /api/doctor\n- Requires session cookie (same as all /api/* endpoints)\n- Returns 200 with a JSON report regardless of check results (HTTP status reflects reachability, not check results)\n\n## Checks to run\n\n| Check | Pass condition | Fail message |\n|---|---|---|\n| data_dir_writable | /data is writable and has >100 MB free | 'Data directory not writable' or 'Disk space low: Nf MB free' |\n| db_integrity | PRAGMA integrity_check returns 'ok' | 'SQLite integrity check failed' |\n| firmware_dir | /firmware contains at least one *.bin file | 'No firmware binaries found — OTA updates unavailable' |\n| mdns_binding | mDNS service is registered (or SPAXEL_MDNS_ENABLED=false) | 'mDNS not advertising — nodes cannot auto-discover mothership' |\n| mqtt_reachable | If SPAXEL_MQTT_BROKER is set: TCP connect to broker succeeds within 3s | 'MQTT broker unreachable: ' |\n| ntp_reachable | UDP ping to SPAXEL_NTP_SERVER:123 resolves within 3s | 'NTP server unreachable — node clock sync may fail' |\n| install_secret | install_secret row exists in auth table | 'Installation secret missing — re-run container to regenerate' |\n| pin_configured | pin_bcrypt is non-null in auth table | 'Dashboard PIN not configured — run first-time setup' |\n| node_token_consistency | All nodes in registry have non-null node_token | 'N nodes missing auth tokens — re-provision via Web Serial' |\n\n## Response format\n\n{\n 'checks': [\n {'name': 'db_integrity', 'status': 'ok', 'message': null},\n {'name': 'mqtt_reachable', 'status': 'warn', 'message': 'MQTT broker unreachable: mqtt://ha.local:1883'},\n {'name': 'firmware_dir', 'status': 'error', 'message': 'No firmware binaries found'}\n ],\n 'overall': 'warn', // 'ok' | 'warn' | 'error' (worst of all checks)\n 'checked_at': '2024-03-15T07:00:00Z'\n}\n\nStatus levels: 'ok' (pass), 'warn' (degraded but functional), 'error' (action required).\n\n## Dashboard integration\n- Command palette: 'doctor' → calls /api/doctor, shows results inline\n- Guided troubleshooting (Component 36): 'Node offline' flow links to 'Run diagnostics' which calls /api/doctor\n- /healthz already covers runtime health; /api/doctor covers configuration health — keep them separate\n\n## Acceptance\n- GET /api/doctor returns 200 with all checks when fully configured\n- Reports 'firmware_dir: error' when /firmware is empty\n- Reports 'mqtt_reachable: warn' when MQTT broker env is set but broker is unreachable\n- Unit tests cover each check in isolation with mocked dependencies","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-golf","created_at":"2026-05-02T12:22:51.188946318Z","updated_at":"2026-05-05T12:39:28.566216915Z","closed_at":"2026-05-05T12:39:28.566216915Z","close_reason":"Completed","source_repo":".","compaction_level":0} {"id":"bf-1t0kn","title":"OTA auto-update with canary strategy and quiet window","description":"Plan specifies a canary OTA strategy (Component 6): update one node first, monitor quality for 10 min, then roll out fleet-wide. Also needs a configurable quiet window (default 02:00–05:00 local) and auto-update mode toggle. Currently the fleet manager only does manual rolling OTA — no canary logic, no scheduled quiet window, no auto-update-on-firmware-detect. Implementation needed in internal/ota and/or fleet manager with a settings key for auto_update_enabled, quiet_window, canary_duration_min.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:24:59.888109951Z","updated_at":"2026-05-05T16:42:25.163065452Z","closed_at":"2026-05-05T16:42:25.163065452Z","close_reason":"Completed","source_repo":".","compaction_level":0} -{"id":"bf-232u3","title":"GET /api/notifications/preview — rendered test thumbnail endpoint","description":"Plan (Component 30, Renderer spec) specifies a test endpoint: GET /api/notifications/preview?type=fall&person=Alice returns a rendered test image for UI development and QA. The render package (internal/render/floorplan.go) implements thumbnail generation with fogleman/gg, but the preview HTTP endpoint is never registered in main.go. Needs: handler that accepts ?type and ?person query params, calls the appropriate Generate*Thumbnail function, returns the PNG bytes with Content-Type: image/png.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-02T18:25:20.897907993Z","updated_at":"2026-05-02T18:25:20.897907993Z","source_repo":".","compaction_level":0} +{"id":"bf-232u3","title":"GET /api/notifications/preview — rendered test thumbnail endpoint","description":"Plan (Component 30, Renderer spec) specifies a test endpoint: GET /api/notifications/preview?type=fall&person=Alice returns a rendered test image for UI development and QA. The render package (internal/render/floorplan.go) implements thumbnail generation with fogleman/gg, but the preview HTTP endpoint is never registered in main.go. Needs: handler that accepts ?type and ?person query params, calls the appropriate Generate*Thumbnail function, returns the PNG bytes with Content-Type: image/png.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:25:20.897907993Z","updated_at":"2026-05-05T17:25:10.053618969Z","closed_at":"2026-05-05T17:25:10.053618969Z","close_reason":"GET /api/notifications/preview endpoint already implemented and registered in commit 37571ec. The endpoint accepts ?type (fall, anomaly, zone_enter, sleep) and ?person query params, calls the appropriate Generate*Thumbnail function from internal/render/floorplan.go, and returns PNG bytes with Content-Type: image/png. All tests pass.","source_repo":".","compaction_level":0} {"id":"bf-25dmx","title":"Guided troubleshooting (Component 36)","description":"## Goal\nWhen system detects that user might be struggling or detection quality has degraded, proactively offer contextual help — but never when things are working well.\n\n## Trigger conditions and responses:\n\nDetection quality drops:\n- Condition: Zone-level detection quality below 60% for >24 hours\n- Banner in timeline and 3D view: 'Detection in the kitchen has been less reliable this week. Want me to help diagnose?'\n- Guided flow: Check node connectivity → show link health with explainability → suggest node repositioning → offer re-baseline → 'Still not right? Try adding a node here [highlighted position]'\n\nRepeated setting changes:\n- Condition: Same settings key modified 3+ times within 60-minute sliding window (qualifying keys: delta_rms_threshold, breathing_sensitivity, tau_s, fresnel_decay, n_subcarriers)\n- Tracking: per-key edit counter in memory, resets after 60 min inactivity\n- Trigger: when counter reaches 3, set hint_pending flag\n- Frontend: show non-intrusive banner: 'You've adjusted the detection threshold several times. Would you like me to show you what the system is seeing?' with [Show me] and [×] dismiss\n- [Show me]: opens time-travel to most recent detection event before first edit, with explainability overlay pre-activated\n- Cooldown: 24 hours after hint is shown\n\nNode offline:\n- Condition: Any node offline for >2 hours\n- Timeline event with expandable troubleshooting steps\n\nFirst-time feature discovery:\n- Condition: User opens feature panel for first time\n- Brief, non-intrusive tooltip (not modal): 'Draw a box around an area, then choose what happens when someone enters or leaves. [Got it]'\n- Shown once, never repeated\n\nAfter false positive feedback:\n- Inline response in timeline: 'Got it. I've slightly raised the detection threshold for the contributing links. If this keeps happening at this time of day, my hourly baseline will adapt within a few days.'\n\nAfter successful calibration:\n- Positive reinforcement: 'Re-baseline complete. Detection quality in the kitchen improved from 64% to 89%.'\n\n## Design principles\n- Reactive, not proactive: help appears only when something seems wrong or when user is clearly exploring\n- Dismissible in one tap: never blocks UI\n- Never repeats after dismissal (stored in localStorage)\n- Always explains what will happen next\n- Never condescending: assumes user is intelligent but may not know CSI physics\n\n## Acceptance\n- Detection quality drop triggers helpful banner\n- Repeated setting changes trigger hint\n- Node offline shows troubleshooting steps\n- First-time feature discovery shows tooltip once\n- Feedback responses are helpful\n- Calibration success shows positive reinforcement","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:06:29.724435180Z","updated_at":"2026-05-05T04:06:29.724435180Z","source_repo":".","compaction_level":0} {"id":"bf-2lfti","title":"Activity timeline (Component 27)","description":"## Goal\nImplement universal event stream timeline that serves as primary navigation for time and space.\n\n## Scope\n- Event types: detections, zone transitions, portal crossings, automation triggers, alerts (fall/anomaly/security), system events (node online/offline, OTA, baseline changes), learning milestones\n- Tap any event → 3D view jumps to that exact moment via time-travel\n- Inline actions per event: thumbs up/down (feedback), 'Why?' (explainability), create automation from event\n- Filters: By person, by zone, by event type, by time range (combinable)\n- Search: Natural language queries like 'kitchen occupied after midnight last week'\n- Scroll up = go back in time. Open dashboard after being away → scroll up to see everything that happened\n\n## Location\ndashboard/static/js/timeline.js (new module)\ninternal/api/events.go (GET /api/events endpoint already exists)\n\n## Acceptance\n- Timeline sidebar in expert mode shows all events in scrollable stream\n- Simple mode: timeline IS the main view as activity feed, with room cards above it\n- Tap event → 3D scene shows state at that moment\n- Search filters events correctly\n- FTS5 index on events table for natural language search","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:05:43.262510021Z","updated_at":"2026-05-05T04:05:43.262510021Z","source_repo":".","compaction_level":0} {"id":"bf-2nofd","title":"Ambient dashboard mode (Component 31)","description":"## Goal\nDedicated display mode for wall-mounted tablets or always-on screens. Served at /ambient as separate lightweight route.\n\n## Scope\n- Simplified, stylized top-down floor plan — clean lines, soft rounded corners, no UI chrome\n- People appear as softly glowing colored circles (BLE-identified) or neutral dots (unknown), with names\n- Room labels show subtle occupancy: 'Kitchen · Alice' or 'Bedroom · Empty'\n- Smooth, calm animations: dots drift with interpolated positions, no jitter, no snapping\n- No toolbar, no buttons, no panels — just floor plan, people, small status line\n- Time-of-day awareness: morning (bright/cool), day (neutral), evening (warm/amber), night (very dim, minimal)\n- Adaptive behavior: house empty 30+ min → screen goes fully dark, 'All secure' in tiny text\n- Alert event: entire display transitions to alert mode with pulsing red border, large text, action buttons\n- Morning briefing integration: when first person detected, display briefly shows briefing text before fading to ambient\n\n## Implementation\n/ambient route serving lightweight HTML page\nNo Three.js — use Canvas 2D or SVG for minimal resource usage\nWebSocket receives same dashboard feed but only uses blob positions, zone counts, alerts\n<30 MB RAM, <5% CPU on 2018 iPad\n\n## Acceptance\n- Ambient mode runs unattended on wall-mounted tablet for 7+ days\n- Time-of-day palette transitions smoothly\n- Alert mode breaks the calm appropriately\n- Morning briefing displays on first detection\n- Resource usage: <30 MB RAM, <5% CPU","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:06:11.529140576Z","updated_at":"2026-05-05T04:06:11.529140576Z","source_repo":".","compaction_level":0} @@ -17,17 +17,17 @@ {"id":"bf-3p4bj","title":"Command palette (Component 34)","description":"## Goal\nCtrl+K (Cmd+K on Mac) opens universal search and command interface. Invisible to casual users, indispensable for power users.\n\n## Scope\nSearch:\n- 'kitchen' → Kitchen zone, kitchen nodes, kitchen automations, recent kitchen events\n- 'alice' → Alice's current location, today's timeline, sleep report, BLE devices\n- 'node 3' → Node details, diagnostics, link health\n\nNavigate time:\n- 'last night 2am' → timeline jumps there\n- 'yesterday kitchen' → filters timeline to kitchen events yesterday\n- 'this morning' → jumps to first detection today\n\nExecute commands:\n- 'update all nodes', 're-baseline kitchen', 'add node', 'arm security', 'disarm security'\n- 'dark mode'/'light mode', 'export config', 'restart node kitchen-north'\n\nGet help:\n- 'help fall detection', 'why false positive', 'troubleshoot kitchen'\n\nBehavior:\n- Fuzzy matching: 'flr pln' matches 'Floor Plan settings'\n- Recently used commands appear first\n- Results show keyboard shortcut hints where applicable\n- Escape closes, Enter executes top result\n- Works in expert mode only\n\n## Implementation\nFrontend-only component\nCommand registry maps keywords to actions\nSearch runs against: zone names, person names, node names, setting names, help topics\n\n## Acceptance\n- Ctrl+K/Cmd+K opens command palette\n- Search finds zones, people, nodes, settings, help topics\n- Commands execute correctly\n- Time navigation jumps to correct moments\n- Fuzzy matching works\n- Escape closes palette","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:06:11.571696739Z","updated_at":"2026-05-05T04:06:11.571696739Z","source_repo":".","compaction_level":0} {"id":"bf-4truh","title":"Comprehensive notification system tests (open bead spaxel-40tl expansion)","description":"Open bead spaxel-40tl 'Write comprehensive tests for notification system' is open. The notify package (internal/notify/) has ntfy.go, pushover.go, webhook.go but tests are missing or incomplete. Needs tests covering: batching logic (30s dedup window), quiet hours gate (suppress non-critical during quiet window), morning digest aggregation, delivery retry logic, channel enable/disable, test-notification endpoint, and notification history API. The existing service_enhanced.go has complex batching logic that needs coverage.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","created_at":"2026-05-02T18:26:14.365679205Z","updated_at":"2026-05-02T18:26:51.067231767Z","closed_at":"2026-05-02T18:26:51.067231767Z","close_reason":"Duplicate of existing open bead spaxel-40tl which already comprehensively tracks notification system tests","source_repo":".","compaction_level":0} {"id":"bf-55sg5","title":"Mobile-responsive expert mode","description":"## Goal\nMake expert mode fully functional on mobile devices with touch gestures.\n\n## Scope\n- Touch orbit/pan/zoom: single-finger rotate, two-finger pan, pinch to zoom (already supported by Three.js OrbitControls)\n- Hamburger menu for panels: collapsible sidebar for fleet status, settings, zones, triggers\n- Responsive layout: panels slide in from bottom on mobile, from right on desktop\n- Touch-optimized buttons: minimum 44×44px tap targets\n- No hover-dependent UI: all interactions work with tap\n- Mobile-specific shortcuts: long-press for context menu (replaces right-click)\n\n## Location\ndashboard/static/js/mobile.js (new module)\ndashboard/static/css/mobile.css (new stylesheet)\n\n## Acceptance\n- Three.js scene responds to touch gestures (orbit, pan, zoom)\n- Hamburger menu opens panel navigation\n- Panels slide in from bottom on mobile\n- All buttons are touch-friendly (≥44px)\n- No features require hover\n- Long-press context menu works on mobile","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:06:29.813640292Z","updated_at":"2026-05-05T04:06:29.813640292Z","source_repo":".","compaction_level":0} -{"id":"bf-59me3","title":"GET /api/status and GET /api/occupancy endpoints","description":"Plan's REST API spec defines: (1) GET /api/status returning {version, nodes, blobs, uptime_s, detection_quality} and (2) GET /api/occupancy returning {zones:{:{count, people:[]}}}. Neither endpoint is registered in main.go. /api/blobs exists. These are simple read-only endpoints that dashboard and HA users would expect for quick system checks and occupancy queries without WebSocket.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-02T18:25:26.769707158Z","updated_at":"2026-05-02T18:25:26.769707158Z","source_repo":".","compaction_level":0} -{"id":"bf-5fo3h","title":"Node disable/enable API endpoints","description":"Plan's REST API spec defines POST /api/nodes/:mac/disable (sets role to IDLE) and POST /api/nodes/:mac/enable (restores prior role). The fleet handler (internal/fleet/handler.go) has identify, reboot, OTA, position, role endpoints but no dedicated disable/enable. The quick-actions.js context menu exposes 'Disable / Enable' for nodes but there's no corresponding backend route.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-02T18:25:37.488896455Z","updated_at":"2026-05-02T18:25:37.488896455Z","source_repo":".","compaction_level":0} +{"id":"bf-59me3","title":"GET /api/status and GET /api/occupancy endpoints","description":"Plan's REST API spec defines: (1) GET /api/status returning {version, nodes, blobs, uptime_s, detection_quality} and (2) GET /api/occupancy returning {zones:{:{count, people:[]}}}. Neither endpoint is registered in main.go. /api/blobs exists. These are simple read-only endpoints that dashboard and HA users would expect for quick system checks and occupancy queries without WebSocket.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:25:26.769707158Z","updated_at":"2026-05-05T17:40:16.692260524Z","closed_at":"2026-05-05T17:40:16.692260524Z","close_reason":"Completed","source_repo":".","compaction_level":0} +{"id":"bf-5fo3h","title":"Node disable/enable API endpoints","description":"Plan's REST API spec defines POST /api/nodes/:mac/disable (sets role to IDLE) and POST /api/nodes/:mac/enable (restores prior role). The fleet handler (internal/fleet/handler.go) has identify, reboot, OTA, position, role endpoints but no dedicated disable/enable. The quick-actions.js context menu exposes 'Disable / Enable' for nodes but there's no corresponding backend route.","design":"","acceptance_criteria":"","notes":"","status":"in_progress","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:25:37.488896455Z","updated_at":"2026-05-05T18:00:00.932422629Z","source_repo":".","compaction_level":0} {"id":"bf-5o576","title":"Fuzz tests for binary frame parser and JSON protocol","description":"Open bead bf-3d55l tracks this but has been sitting open with no implementation started. The ingestion frame parser (internal/ingestion/frame.go) and JSON message parser (internal/ingestion/message.go) parse untrusted input from ESP32 nodes. Need Go fuzz tests (testing.F) in frame_fuzz_test.go and message_fuzz_test.go covering: malformed header lengths, n_sub overflow, invalid channel values, truncated payloads, invalid JSON type discriminators, and extra fields.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","created_at":"2026-05-02T18:25:51.799073946Z","updated_at":"2026-05-02T18:26:32.910185523Z","closed_at":"2026-05-02T18:26:32.910185523Z","close_reason":"Duplicate of existing open bead bf-3d55l which already tracks fuzz tests for binary frame parser and JSON protocol","source_repo":".","compaction_level":0} {"id":"bf-5txbb","title":"Fleet status page","description":"## Goal\nFull table view of all registered nodes with all metrics, bulk actions, camera fly-to on click.\n\n## Scope\nTable columns:\n- Name: user-assigned friendly name\n- MAC: hardware address\n- Role: TX/RX/TX_RX — editable dropdown\n- Position: (x, y, z) — click to highlight node in 3D view and fly camera to it\n- Firmware: version string + 'Update available' badge\n- RSSI: last reported WiFi signal strength\n- Status: ONLINE/STALE/OFFLINE with colored indicator\n- Uptime: time since last boot\n- Actions: Restart, Update, Remove, Identify (blink LED)\n\nGlobal actions:\n- Update All (rolling OTA)\n- Re-baseline All\n- Export Config\n- Import Config\n\n## Location\ndashboard/static/js/fleet.js (new module, extract from existing code)\ninternal/api/fleet.go (already exists)\n\n## Acceptance\n- Table shows all registered nodes\n- Click position → camera flies to node in 3D view\n- Role dropdown changes node role\n- Actions execute correctly\n- Bulk actions work on all nodes\n- Export/import config works","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:06:29.834674580Z","updated_at":"2026-05-05T04:06:29.834674580Z","source_repo":".","compaction_level":0} {"id":"bf-5vhya","title":"CI: pipeline timing benchmark gate","description":"## Goal\nAdd a benchmark that enforces the fusion loop timing budget as a CI quality gate, per plan §Quality Gates / Definition of Done (item 9).\n\n## What to build\n\nFile: internal/localizer/fusion/timing_budget_test.go\n\nRun the full fusion pipeline (phase sanitization → feature extraction → Fresnel accumulation → peak extraction → UKF update) against synthetic CSI data from spaxel-sim output.\n\nAssert:\n- Median fusion iteration < 15 ms over 600 iterations (60 seconds at 10 Hz)\n- P99 < 40 ms (hard limit)\n\n## CI integration\nAdd to Argo Workflows CI step after go test ./...:\n go test -bench=BenchmarkFusionLoop -benchtime=60s -count=1 ./internal/localizer/fusion/ | tee /tmp/bench.txt\n # fail if median exceeds 15ms threshold\n\n## Acceptance\n- Benchmark runs in the Argo CI workflow\n- Workflow fails if median latency exceeds 15 ms on the CI runner (allowance: 2x for slower hardware → 30 ms CI threshold, 15 ms production target)","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-golf","created_at":"2026-05-02T12:09:00.487025943Z","updated_at":"2026-05-04T14:25:01.352506963Z","closed_at":"2026-05-04T14:25:01.352506963Z","close_reason":"Implementation already complete in commit 7afbdc9. The timing budget benchmark:\n\n1. File: mothership/internal/localizer/fusion/timing_budget_test.go\n - Runs full fusion pipeline (phase sanitization → feature extraction → Fresnel accumulation → peak extraction → UKF update)\n - Uses synthetic CSI data simulating 4 nodes with 2 walkers\n - Runs 600 iterations (60 seconds at 10 Hz)\n\n2. Timing constraints enforced:\n - Median fusion iteration: 2.6ms (well below 15ms production target and 30ms CI threshold)\n - P99: ~10ms (well below 40ms hard limit)\n\n3. CI integration: .github/workflows/benchmark-ci.yml\n - Benchmark runs on every push/PR to main\n - Workflow fails if median exceeds 30ms (CI threshold)\n - Workflow fails if P99 exceeds 40ms (hard limit)\n\nAll acceptance criteria met.","source_repo":".","compaction_level":0} -{"id":"bf-5wb3n","title":"MQTT bidirectional commands: security_mode and rebaseline subscriptions","description":"Plan specifies that the mothership subscribes to {prefix}/command/security_mode (arm|disarm) and {prefix}/command/rebaseline (zone name or 'all') so Home Assistant automations can control these without opening the dashboard. The mqtt package has SubscribeToSystemMode but no SubscribeToRebaseline, and neither command subscription is wired in main.go to actual arm/disarm or rebaseline actions. Needs: (1) SubscribeToRebaseline in mqtt/client.go, (2) wiring both subscriptions to the relevant internal handlers in main.go when MQTT is configured.","design":"","acceptance_criteria":"","notes":"","status":"in_progress","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:25:06.167277244Z","updated_at":"2026-05-05T16:42:53.801156307Z","source_repo":".","compaction_level":0} +{"id":"bf-5wb3n","title":"MQTT bidirectional commands: security_mode and rebaseline subscriptions","description":"Plan specifies that the mothership subscribes to {prefix}/command/security_mode (arm|disarm) and {prefix}/command/rebaseline (zone name or 'all') so Home Assistant automations can control these without opening the dashboard. The mqtt package has SubscribeToSystemMode but no SubscribeToRebaseline, and neither command subscription is wired in main.go to actual arm/disarm or rebaseline actions. Needs: (1) SubscribeToRebaseline in mqtt/client.go, (2) wiring both subscriptions to the relevant internal handlers in main.go when MQTT is configured.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:25:06.167277244Z","updated_at":"2026-05-05T16:52:27.277793127Z","closed_at":"2026-05-05T16:52:27.277793127Z","close_reason":"Completed","source_repo":".","compaction_level":0} {"id":"bf-5wfsa","title":"Pre-deployment simulator (Component 17)","description":"## Goal\nBefore purchasing hardware, users can define their space, place virtual nodes, and run physics-based simulation to see expected detection quality.\n\n## Scope\n- Space definition: same 3D editor used for real setup — draw room boxes, set dimensions\n- Virtual nodes: place ghost nodes (wireframe, dashed links) that participate in GDOP computation\n- Simulation engine: simplified ray-based propagation (direct path + first-order reflections)\n- Synthetic walkers: virtual people moving along user-defined paths or random walk\n- Visualization: GDOP overlay, expected detection quality, coverage gaps highlighted\n- Outputs: minimum node count recommendation, optimal positions for N nodes, accuracy estimates, shopping list\n\n## Location\ndashboard/static/js/simulator.js (new module)\ninternal/sim/propagation.go (new package)\n\n## Acceptance\n- User draws room, places 2-4 virtual nodes\n- Click 'Simulate' → synthetic walkers generate CSI using same propagation model\n- GDOP overlay shows expected detection quality across floor\n- 'Shopping list' shows recommended node count and positions\n- 'Add another node here' highlights worst-GDOP positions","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:05:43.355796818Z","updated_at":"2026-05-05T04:05:43.355796818Z","source_repo":".","compaction_level":0} {"id":"bf-5y8tm","title":"Fresnel zone debug overlay","description":"## Goal\nToggle-able wireframe ellipsoids between active links in the 3D scene for debugging coverage geometry.\n\n## Scope\n- Toggle button in toolbar: 'Fresnel zones'\n- When enabled: render first Fresnel zone ellipsoids as wireframe meshes between active link pairs\n- Helps users understand coverage geometry visually\n- Shows zone 1 (most sensitive) as green wireframe\n- Multiple zones per link can be shown (zones 1-5)\n\n## Location\ndashboard/static/js/viz3d.js (extend existing 3D visualization)\n\n## Acceptance\n- Toggle button shows/hides Fresnel zone ellipsoids\n- Zones render correctly for all active TX→RX links\n- Update in real-time as nodes are moved\n- Performance: <5ms render time for 8-node fleet (28 links)","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:05:43.410156795Z","updated_at":"2026-05-05T04:05:43.410156795Z","source_repo":".","compaction_level":0} {"id":"bf-ao8eq","title":"Detection explainability (Component 28)","description":"## Goal\nImplement 'Why is this here?' on any blob/alert that shows exactly why the system made that decision.\n\n## Scope\n- X-ray overlay: non-contributing visual elements dim to 20% opacity\n- Links that contributed to detection glow, brightness proportional to deltaRMS contribution\n- Fresnel zone ellipsoids appear for active links\n- BLE match: dotted line from matched device's strongest node to blob, labeled with RSSI\n- Detail sidebar: per-link contribution table (link name, deltaRMS, threshold, Fresnel zone number, learned weight)\n- Confidence breakdown: spatial confidence + identity confidence with percentages\n\n## Location\ndashboard/static/js/explainability.js (new module)\ninternal/api/explain.go (new package)\n\n## Acceptance\n- Tap/click blob in 3D view → 'Why?' button appears\n- Tap 'Why?' → X-ray overlay activates, showing contributing links\n- Detail sidebar shows per-link breakdown\n- For alerts: specific conditions that triggered with values vs thresholds\n- Makes false positive cause obvious","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:05:43.300430327Z","updated_at":"2026-05-05T04:05:43.300430327Z","source_repo":".","compaction_level":0} {"id":"bf-awtza","title":"MQTT command/rebaseline and HA auto-discovery lifecycle management","description":"Plan specifies full HA auto-discovery lifecycle: configs published with retain=true on first connect AND whenever zones/persons are added or renamed; when zone or person is deleted, publish empty retained payload to remove HA entity. Also missing: rebaseline command subscription wiring. Currently mqtt/client.go publishes discovery on connect but has no mechanism to detect zone/person CRUD events and re-publish or un-publish discovery configs. Needs event bus subscription for zone/person changes.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":3,"issue_type":"task","created_at":"2026-05-02T18:26:21.696828674Z","updated_at":"2026-05-02T18:26:21.696828674Z","source_repo":".","compaction_level":0} -{"id":"bf-m6f5g","title":"GET /api/baseline and POST /api/baseline/capture endpoints","description":"Plan's REST API spec defines GET /api/baseline (returns [{link_id, snapshot_time, confidence}] for all links) and POST /api/baseline/capture (optional ?links body, starts 60s quiet-room capture). The baselines SQLite table exists (from migrations.go) and the baseline system runs internally, but no HTTP endpoints expose read/capture to the dashboard. The fleet handler has /api/nodes/:mac/rebaseline and /api/nodes/rebaseline-all but no standalone baseline endpoints.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-02T18:25:32.710840129Z","updated_at":"2026-05-02T18:25:32.710840129Z","source_repo":".","compaction_level":0} +{"id":"bf-m6f5g","title":"GET /api/baseline and POST /api/baseline/capture endpoints","description":"Plan's REST API spec defines GET /api/baseline (returns [{link_id, snapshot_time, confidence}] for all links) and POST /api/baseline/capture (optional ?links body, starts 60s quiet-room capture). The baselines SQLite table exists (from migrations.go) and the baseline system runs internally, but no HTTP endpoints expose read/capture to the dashboard. The fleet handler has /api/nodes/:mac/rebaseline and /api/nodes/rebaseline-all but no standalone baseline endpoints.","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-foxtrot","created_at":"2026-05-02T18:25:32.710840129Z","updated_at":"2026-05-05T17:55:00.504558262Z","closed_at":"2026-05-05T17:55:00.504558262Z","close_reason":"Completed","source_repo":".","compaction_level":0} {"id":"bf-qonqo","title":"GET /api/zones/:id/history occupancy history endpoint","description":"Plan's REST API spec defines GET /api/zones/:id/history?period=24h returning [{timestamp, count, people:[]}] in hourly buckets. The zones CRUD exists (internal/api/zones.go) but the history sub-endpoint is not implemented. The anomaly/pattern system stores per-zone per-hour data in anomaly_patterns, and zone occupancy is tracked in memory and SQLite. Needed for the 'Zone history' quick action and the occupancy chart in the dashboard sidebar.","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-02T18:26:02.249193078Z","updated_at":"2026-05-02T18:26:02.249193078Z","source_repo":".","compaction_level":0} {"id":"bf-usafo","title":"Morning briefing (Component 35)","description":"## Goal\nWhen user first opens dashboard each day, brief warm summary appears.\n\n## Content (generated from existing data):\n- Sleep summary (if available): 'You slept 7h 39m — 12 minutes more than your average. Breathing was regular.'\n- Who is home: 'Bob left at 8:15am. The house has been empty since 8:22am.'\n- Overnight anomalies: 'Last night: One unusual event at 2:34am — motion in kitchen for 30 seconds. No BLE match, low-confidence blob. Likely environmental.'\n- System health: 'System health: Excellent (94%). All 6 nodes online. Accuracy improved 2% this week thanks to your 8 corrections.'\n- Today's forecast: 'Based on your Wednesday pattern, you usually return around 5:45pm. Security mode will auto-activate when you leave.'\n\nDisplay:\n- Expert mode: card overlay on first dashboard open, dismissible with tap or 'Got it' button. Slides away after 10s if not interacted\n- Simple mode: morning card is first card in layout, stays visible until dismissed\n- Ambient mode: text fades in over ambient display when first person detected in morning, stays for 30s\n\nDelivery channels:\n- Dashboard (default)\n- Push notification at configured time (e.g., 7am)\n- Webhook to Slack/Discord\n\n## Implementation\nGo function GenerateBriefing(date string, person string) string\nAssembled in priority order: critical alerts → sleep → who's home → anomalies → system health → predictions → learning progress\nStored as daily record in briefings table\n\n## Acceptance\n- Briefing accurately summarizes overnight activity\n- Shows sleep report when available\n- Lists overnight anomalies with context\n- Shows system health\n- Shows today's predictions\n- Dismissible and non-intrusive","design":"","acceptance_criteria":"","notes":"","status":"open","priority":2,"issue_type":"task","created_at":"2026-05-05T04:06:11.592787579Z","updated_at":"2026-05-05T04:06:11.592787579Z","source_repo":".","compaction_level":0} {"id":"bf-w15bj","title":"CI: golangci-lint static analysis gate","description":"## Goal\nAdd golangci-lint to the Argo CI workflow as a hard quality gate, per plan §Quality Gates / Definition of Done (item 3).\n\n## Configuration\nFile: .golangci.yml at repo root\n\nEnabled linters (minimum set):\n- errcheck: all errors must be handled or explicitly discarded with _\n- staticcheck: includes S-series (simplifications) and SA-series (bugs)\n- gosimple: simplification suggestions (SA-series overlap)\n- govet: same as go vet but integrated\n- ineffassign: catch dead assignments\n- unused: catch unused exported identifiers\n\nDisabled (too noisy for this codebase):\n- gocyclo, funlen, wsl (style preferences, not correctness)\n\n## CI integration\nAdd to the spaxel-build Argo WorkflowTemplate as a parallel step alongside go test:\n golangci-lint run --timeout 5m ./...\n\n## Acceptance\n- golangci-lint run passes on the current codebase (fix any pre-existing findings before adding the gate)\n- Argo CI fails on new lint violations\n- .golangci.yml committed to repo root","design":"","acceptance_criteria":"","notes":"","status":"closed","priority":2,"issue_type":"task","assignee":"claude-code-glm-4.7-golf","created_at":"2026-05-02T12:09:09.633464353Z","updated_at":"2026-05-05T06:49:00.297475830Z","closed_at":"2026-05-05T06:49:00.297475830Z","close_reason":"Completed","source_repo":".","compaction_level":0} diff --git a/.needle-predispatch-sha b/.needle-predispatch-sha index ef335ea..ad7dde9 100644 --- a/.needle-predispatch-sha +++ b/.needle-predispatch-sha @@ -1 +1 @@ -5712459de3f65cc10ee6cfe1d03fa33837a83019 +9d656ab93eccbecebebaa4bf9ccc07360fe28e33 diff --git a/mothership/internal/fleet/handler.go b/mothership/internal/fleet/handler.go index b6eee8f..4421d70 100644 --- a/mothership/internal/fleet/handler.go +++ b/mothership/internal/fleet/handler.go @@ -67,6 +67,8 @@ func (h *Handler) SetMigrationDeadlineProvider(p MigrationDeadlineProvider) { // POST /api/nodes/{mac}/identify — blink LED for identification // POST /api/nodes/{mac}/reboot — reboot node // POST /api/nodes/{mac}/ota — trigger OTA update +// POST /api/nodes/{mac}/disable — disable node (set role to IDLE) +// POST /api/nodes/{mac}/enable — enable node (restore prior role) // POST /api/nodes/update-all — OTA update all nodes // POST /api/nodes/rebaseline-all — re-baseline all links // POST /api/nodes/virtual — add a virtual planning node @@ -87,6 +89,8 @@ func (h *Handler) RegisterRoutes(r chi.Router) { r.Post("/api/nodes/{mac}/locate", h.identifyNode) // alias for identify r.Post("/api/nodes/{mac}/reboot", h.rebootNode) r.Post("/api/nodes/{mac}/ota", h.triggerNodeOTA) + r.Post("/api/nodes/{mac}/disable", h.disableNode) + r.Post("/api/nodes/{mac}/enable", h.enableNode) r.Post("/api/nodes/update-all", h.updateAllNodes) r.Post("/api/nodes/rebaseline-all", h.rebaselineAllNodes) r.Post("/api/nodes/virtual", h.addVirtualNode) @@ -327,7 +331,7 @@ func (h *Handler) getNode(w http.ResponseWriter, r *http.Request) { } var validRoles = map[string]bool{ - "tx": true, "rx": true, "tx_rx": true, "passive": true, "virtual": true, + "tx": true, "rx": true, "tx_rx": true, "passive": true, "virtual": true, "idle": true, } type setRoleRequest struct { @@ -756,3 +760,99 @@ func (h *Handler) triggerNodeOTA(w http.ResponseWriter, r *http.Request) { "version": req.Version, }) } + +// disableNode sets a node's role to IDLE, saving its prior role for restoration. +func (h *Handler) disableNode(w http.ResponseWriter, r *http.Request) { + mac := chi.URLParam(r, "mac") + + // Verify node exists and get its current state. + node, err := h.mgr.registry.GetNode(mac) + if errors.Is(err, sql.ErrNoRows) { + http.Error(w, "node not found", http.StatusNotFound) + return + } else if err != nil { + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + // Don't disable if already IDLE. + if node.Role == "idle" { + writeJSON(w, map[string]bool{"ok": true}) + return + } + + // Save current role before disabling. + if err := h.mgr.registry.SetNodeRoleBeforeDisable(mac, node.Role); err != nil { + log.Printf("[WARN] fleet: failed to save role_before_disable for %s: %v", mac, err) + } + + // Set role to IDLE. + if err := h.mgr.OverrideRole(mac, "idle"); err != nil { + http.Error(w, "failed to disable node", http.StatusInternalServerError) + return + } + + writeJSON(w, map[string]interface{}{ + "ok": true, + "mac": mac, + "prior_role": node.Role, + "current_role": "idle", + }) +} + +// enableNode restores a node's role from its saved prior role. +func (h *Handler) enableNode(w http.ResponseWriter, r *http.Request) { + mac := chi.URLParam(r, "mac") + + // Verify node exists. + node, err := h.mgr.registry.GetNode(mac) + if errors.Is(err, sql.ErrNoRows) { + http.Error(w, "node not found", http.StatusNotFound) + return + } else if err != nil { + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + // Get the saved role before disable. + priorRole, err := h.mgr.registry.GetNodeRoleBeforeDisable(mac) + if err != nil { + http.Error(w, "internal error", http.StatusInternalServerError) + return + } + + // If no saved role, use a sensible default based on current state. + if priorRole == "" { + if node.Role == "idle" { + // Node was disabled before we started saving prior_role. + // Default to "rx" as a safe fallback. + priorRole = "rx" + } else { + // Node isn't idle, just return current state. + writeJSON(w, map[string]interface{}{ + "ok": true, + "mac": mac, + "current_role": node.Role, + "note": "node already enabled", + }) + return + } + } + + // Restore the prior role. + if err := h.mgr.OverrideRole(mac, priorRole); err != nil { + http.Error(w, "failed to enable node", http.StatusInternalServerError) + return + } + + // Clear the saved role. + if err := h.mgr.registry.SetNodeRoleBeforeDisable(mac, ""); err != nil { + log.Printf("[WARN] fleet: failed to clear role_before_disable for %s: %v", mac, err) + } + + writeJSON(w, map[string]interface{}{ + "ok": true, + "mac": mac, + "restored_role": priorRole, + }) +} diff --git a/mothership/internal/fleet/handler_test.go b/mothership/internal/fleet/handler_test.go index 2ebd4ea..50f11c8 100644 --- a/mothership/internal/fleet/handler_test.go +++ b/mothership/internal/fleet/handler_test.go @@ -2191,3 +2191,322 @@ func TestFleetListUnpairedNotInRegistry(t *testing.T) { t.Errorf("Unregistered unpaired node: expected default role 'rx', got '%s'", unregNode.Role) } } + +// ─── Disable node endpoint tests ─────────────────────────────────────────────────── + +func TestHandlerDisableNode(t *testing.T) { + tests := []struct { + name string + mac string + initialRole string + nodeExists bool + wantStatus int + expectedRole string + expectedPrior string + }{ + { + name: "successful disable from tx", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "tx", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "idle", + expectedPrior: "tx", + }, + { + name: "successful disable from rx", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "rx", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "idle", + expectedPrior: "rx", + }, + { + name: "successful disable from tx_rx", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "tx_rx", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "idle", + expectedPrior: "tx_rx", + }, + { + name: "node already idle returns success", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "idle", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "idle", + expectedPrior: "", + }, + { + name: "node not found", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "tx", + nodeExists: false, + wantStatus: http.StatusNotFound, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reg := newTestRegistry(t) + if tt.nodeExists { + reg.UpsertNode(tt.mac, "1.0.0", "ESP32-S3") + reg.SetNodeLabel(tt.mac, "Test Node") + reg.SetNodeRole(tt.mac, tt.initialRole) + } + + mgr := NewManager(reg) + h := &Handler{mgr: mgr} + + req := httptest.NewRequest("POST", "/api/nodes/"+tt.mac+"/disable", nil) + rctx := chi.NewRouteContext() + rctx.URLParams.Add("mac", tt.mac) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + + w := httptest.NewRecorder() + h.disableNode(w, req) + + if w.Code != tt.wantStatus { + t.Errorf("disableNode() status = %v, want %v", w.Code, tt.wantStatus) + } + + if tt.wantStatus == http.StatusOK && tt.nodeExists && tt.initialRole != "idle" { + var resp map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + if resp["ok"] != true { + t.Errorf("Expected ok to be true, got %v", resp["ok"]) + } + + if resp["current_role"] != tt.expectedRole { + t.Errorf("Expected current_role to be %s, got %v", tt.expectedRole, resp["current_role"]) + } + + if resp["prior_role"] != tt.expectedPrior { + t.Errorf("Expected prior_role to be %s, got %v", tt.expectedPrior, resp["prior_role"]) + } + + // Verify the role was saved to role_before_disable + savedRole, err := reg.GetNodeRoleBeforeDisable(tt.mac) + if err != nil { + t.Errorf("Failed to get role_before_disable: %v", err) + } + if savedRole != tt.expectedPrior { + t.Errorf("Expected role_before_disable to be %s, got %s", tt.expectedPrior, savedRole) + } + } + }) + } +} + +// ─── Enable node endpoint tests ─────────────────────────────────────────────────── + +func TestHandlerEnableNode(t *testing.T) { + tests := []struct { + name string + mac string + initialRole string + savedPriorRole string + nodeExists bool + wantStatus int + expectedRole string + expectedNote string + }{ + { + name: "successful enable from idle with saved prior role", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "idle", + savedPriorRole: "tx", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "tx", + }, + { + name: "successful enable from idle with rx saved", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "idle", + savedPriorRole: "rx", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "rx", + }, + { + name: "successful enable from idle with tx_rx saved", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "idle", + savedPriorRole: "tx_rx", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "tx_rx", + }, + { + name: "enable idle node with no saved role defaults to rx", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "idle", + savedPriorRole: "", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "rx", + }, + { + name: "node already enabled returns current state", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "tx", + savedPriorRole: "", + nodeExists: true, + wantStatus: http.StatusOK, + expectedRole: "tx", + expectedNote: "node already enabled", + }, + { + name: "node not found", + mac: "AA:BB:CC:DD:EE:FF", + initialRole: "idle", + savedPriorRole: "tx", + nodeExists: false, + wantStatus: http.StatusNotFound, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reg := newTestRegistry(t) + if tt.nodeExists { + reg.UpsertNode(tt.mac, "1.0.0", "ESP32-S3") + reg.SetNodeLabel(tt.mac, "Test Node") + reg.SetNodeRole(tt.mac, tt.initialRole) + if tt.savedPriorRole != "" { + reg.SetNodeRoleBeforeDisable(tt.mac, tt.savedPriorRole) + } + } + + mgr := NewManager(reg) + h := &Handler{mgr: mgr} + + req := httptest.NewRequest("POST", "/api/nodes/"+tt.mac+"/enable", nil) + rctx := chi.NewRouteContext() + rctx.URLParams.Add("mac", tt.mac) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + + w := httptest.NewRecorder() + h.enableNode(w, req) + + if w.Code != tt.wantStatus { + t.Errorf("enableNode() status = %v, want %v", w.Code, tt.wantStatus) + } + + if tt.wantStatus == http.StatusOK && tt.nodeExists { + var resp map[string]interface{} + if err := json.NewDecoder(w.Body).Decode(&resp); err != nil { + t.Fatalf("Failed to decode response: %v", err) + } + + if resp["ok"] != true { + t.Errorf("Expected ok to be true, got %v", resp["ok"]) + } + + // When node is already enabled, response contains current_role instead of restored_role + roleKey := "restored_role" + if tt.expectedNote != "" { + roleKey = "current_role" + if resp["note"] != tt.expectedNote { + t.Errorf("Expected note to be %s, got %v", tt.expectedNote, resp["note"]) + } + } + + if resp[roleKey] != tt.expectedRole { + t.Errorf("Expected %s to be %s, got %v", roleKey, tt.expectedRole, resp[roleKey]) + } + + // Verify the node's current role was updated + node, err := reg.GetNode(tt.mac) + if err != nil { + t.Errorf("Failed to get node after enable: %v", err) + } else if node.Role != tt.expectedRole { + t.Errorf("Expected node role to be %s, got %s", tt.expectedRole, node.Role) + } + } + }) + } +} + +// TestHandlerDisableEnableRoundTrip tests the full disable/enable cycle. +func TestHandlerDisableEnableRoundTrip(t *testing.T) { + reg := newTestRegistry(t) + mac := "AA:BB:CC:DD:EE:FF" + + // Setup node with initial role + reg.UpsertNode(mac, "1.0.0", "ESP32-S3") + reg.SetNodeLabel(mac, "Test Node") + reg.SetNodeRole(mac, "tx") + + mgr := NewManager(reg) + h := &Handler{mgr: mgr} + + // Disable the node + req := httptest.NewRequest("POST", "/api/nodes/"+mac+"/disable", nil) + rctx := chi.NewRouteContext() + rctx.URLParams.Add("mac", mac) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + + w := httptest.NewRecorder() + h.disableNode(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("disableNode() status = %v, want %v", w.Code, http.StatusOK) + } + + // Verify role is now idle + node, err := reg.GetNode(mac) + if err != nil { + t.Fatalf("Failed to get node after disable: %v", err) + } + if node.Role != "idle" { + t.Errorf("Expected role to be idle after disable, got %s", node.Role) + } + + // Verify prior role was saved + savedRole, err := reg.GetNodeRoleBeforeDisable(mac) + if err != nil { + t.Fatalf("Failed to get role_before_disable: %v", err) + } + if savedRole != "tx" { + t.Errorf("Expected role_before_disable to be tx, got %s", savedRole) + } + + // Enable the node + req = httptest.NewRequest("POST", "/api/nodes/"+mac+"/enable", nil) + rctx = chi.NewRouteContext() + rctx.URLParams.Add("mac", mac) + req = req.WithContext(context.WithValue(req.Context(), chi.RouteCtxKey, rctx)) + + w = httptest.NewRecorder() + h.enableNode(w, req) + + if w.Code != http.StatusOK { + t.Fatalf("enableNode() status = %v, want %v", w.Code, http.StatusOK) + } + + // Verify role was restored + node, err = reg.GetNode(mac) + if err != nil { + t.Fatalf("Failed to get node after enable: %v", err) + } + if node.Role != "tx" { + t.Errorf("Expected role to be tx after enable, got %s", node.Role) + } + + // Verify role_before_disable was cleared + savedRole, err = reg.GetNodeRoleBeforeDisable(mac) + if err != nil { + t.Fatalf("Failed to get role_before_disable after enable: %v", err) + } + if savedRole != "" { + t.Errorf("Expected role_before_disable to be cleared after enable, got %s", savedRole) + } +} diff --git a/mothership/internal/fleet/registry.go b/mothership/internal/fleet/registry.go index 1c1f873..e9c8d94 100644 --- a/mothership/internal/fleet/registry.go +++ b/mothership/internal/fleet/registry.go @@ -143,6 +143,7 @@ func (r *Registry) migrate() error { "ALTER TABLE nodes ADD COLUMN went_offline_at INTEGER NOT NULL DEFAULT 0", "ALTER TABLE nodes ADD COLUMN health_score REAL NOT NULL DEFAULT 0", "ALTER TABLE nodes ADD COLUMN manufacturer TEXT NOT NULL DEFAULT ''", + "ALTER TABLE nodes ADD COLUMN role_before_disable TEXT NOT NULL DEFAULT ''", } for _, m := range migrations { _, _ = r.db.Exec(m) // Ignore errors (column may already exist) @@ -222,6 +223,22 @@ func (r *Registry) GetNodePreviousRole(mac string) (string, error) { return role, err } +// SetNodeRoleBeforeDisable saves the current role before disabling the node. +func (r *Registry) SetNodeRoleBeforeDisable(mac, role string) error { + _, err := r.db.Exec(`UPDATE nodes SET role_before_disable=? WHERE mac=?`, role, mac) + return err +} + +// GetNodeRoleBeforeDisable returns the role saved before disabling the node. +func (r *Registry) GetNodeRoleBeforeDisable(mac string) (string, error) { + var role string + err := r.db.QueryRow(`SELECT role_before_disable FROM nodes WHERE mac=?`, mac).Scan(&role) + if err == sql.ErrNoRows { + return "", nil + } + return role, err +} + // GetNodeWentOfflineAt returns when a node went offline. func (r *Registry) GetNodeWentOfflineAt(mac string) (time.Time, error) { var ns int64