- Fix TestAnalyticsHandler_ErrorHandling to use proper in-memory database
instead of nil database which caused nil pointer dereference
- Update handleGetCorridors to return corridors wrapped in {corridors: [...]}
for consistency with frontend expectations from crowdflow.js
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
648 lines
17 KiB
Go
648 lines
17 KiB
Go
package fleet
|
|
|
|
import (
|
|
"context"
|
|
"log"
|
|
"sort"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/spaxel/mothership/internal/events"
|
|
)
|
|
|
|
// NodeStateNotifier is called when the manager sends a role or config to a node.
|
|
type NodeStateNotifier interface {
|
|
// SendRoleToMAC sends a role assignment message to a connected node.
|
|
SendRoleToMAC(mac, role, passiveBSSID string)
|
|
// SendConfigToMAC sends a rate config to a connected node.
|
|
SendConfigToMAC(mac string, rateHz int, varianceThreshold float64)
|
|
// SendIdentifyToMAC sends an LED blink command to a connected node.
|
|
// Returns false if the node is not connected.
|
|
SendIdentifyToMAC(mac string, durationMS int) bool
|
|
// GetConnectedMACs returns the MACs of currently-connected nodes.
|
|
GetConnectedMACs() []string
|
|
}
|
|
|
|
// RegistryBroadcaster is called when fleet state changes that the dashboard should see.
|
|
type RegistryBroadcaster interface {
|
|
BroadcastRegistryState(nodes []NodeRecord, room RoomConfig)
|
|
}
|
|
|
|
// ModeChangeBroadcaster is called when system mode changes.
|
|
type ModeChangeBroadcaster interface {
|
|
BroadcastSystemModeChange(event events.SystemModeChangeEvent)
|
|
}
|
|
|
|
// BLEPresenceProvider provides BLE device presence information for auto-away detection.
|
|
type BLEPresenceProvider interface {
|
|
// GetAllRegisteredDevices returns all registered BLE devices (MAC -> person_id)
|
|
GetAllRegisteredDevices() (map[string]string, error)
|
|
// GetRecentRSSIObservations returns recent RSSI observations for a device
|
|
GetRecentRSSIObservations(mac string, maxAge time.Duration) []BLEObservation
|
|
}
|
|
|
|
// PersonNameProvider provides person name lookups for mode change events.
|
|
type PersonNameProvider interface {
|
|
GetPersonName(personID string) string
|
|
}
|
|
|
|
// BLEObservation represents a BLE RSSI observation with device info.
|
|
type BLEObservation struct {
|
|
DeviceMAC string // The BLE device MAC address
|
|
NodeMAC string // The node that observed this device
|
|
RSSIdBm int
|
|
Timestamp time.Time
|
|
}
|
|
|
|
// AutoAwayConfig holds configuration for auto-away detection.
|
|
type AutoAwayConfig struct {
|
|
Enabled bool `json:"enabled"`
|
|
AbsenceDuration time.Duration `json:"absence_duration"` // Default: 15 minutes
|
|
AutoDisarmRSSI int `json:"auto_disarm_rssi"` // Default: -70 dBm
|
|
ManualOverridePause time.Duration `json:"manual_override_pause"` // Default: 30 minutes
|
|
}
|
|
|
|
// DefaultAutoAwayConfig returns default auto-away configuration.
|
|
func DefaultAutoAwayConfig() AutoAwayConfig {
|
|
return AutoAwayConfig{
|
|
Enabled: true,
|
|
AbsenceDuration: 15 * time.Minute,
|
|
AutoDisarmRSSI: -70,
|
|
ManualOverridePause: 30 * time.Minute,
|
|
}
|
|
}
|
|
|
|
// Manager handles fleet-level operations: role assignment, stagger scheduling, and self-healing.
|
|
type Manager struct {
|
|
mu sync.RWMutex
|
|
registry *Registry
|
|
notifier NodeStateNotifier
|
|
bcaster RegistryBroadcaster
|
|
|
|
// online tracks which MACs are currently connected.
|
|
online map[string]struct{}
|
|
|
|
// roleIndex tracks which nodes have been assigned TX.
|
|
txNodes []string
|
|
|
|
// stagger scheduling: how many TX nodes have been assigned.
|
|
txCount int
|
|
|
|
// healTick is how often we check for stale/missing assignments.
|
|
healTick time.Duration
|
|
|
|
// System mode management
|
|
systemMode events.SystemMode
|
|
modeChangeBroadcaster ModeChangeBroadcaster
|
|
autoAwayConfig AutoAwayConfig
|
|
blePresenceProvider BLEPresenceProvider
|
|
personProvider PersonNameProvider
|
|
manualOverrideUntil time.Time
|
|
lastDeviceSeen map[string]time.Time // MAC -> last seen time
|
|
modeCheckInterval time.Duration
|
|
|
|
// Callback for mode changes
|
|
onModeChange func(events.SystemModeChangeEvent)
|
|
}
|
|
|
|
// NewManager creates a fleet manager backed by registry.
|
|
func NewManager(reg *Registry) *Manager {
|
|
return &Manager{
|
|
registry: reg,
|
|
online: make(map[string]struct{}),
|
|
healTick: 60 * time.Second,
|
|
systemMode: events.ModeHome,
|
|
autoAwayConfig: DefaultAutoAwayConfig(),
|
|
lastDeviceSeen: make(map[string]time.Time),
|
|
modeCheckInterval: 30 * time.Second,
|
|
}
|
|
}
|
|
|
|
// SetNotifier sets the ingestion server callback.
|
|
func (m *Manager) SetNotifier(n NodeStateNotifier) {
|
|
m.mu.Lock()
|
|
m.notifier = n
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// SetBroadcaster sets the dashboard broadcaster.
|
|
func (m *Manager) SetBroadcaster(b RegistryBroadcaster) {
|
|
m.mu.Lock()
|
|
m.bcaster = b
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// OnNodeConnected is called when a node completes its hello handshake.
|
|
// It persists the node, assigns a role, and broadcasts updated state.
|
|
func (m *Manager) OnNodeConnected(mac, firmware, chip string) {
|
|
if err := m.registry.UpsertNode(mac, firmware, chip); err != nil {
|
|
log.Printf("[WARN] fleet: upsert node %s: %v", mac, err)
|
|
}
|
|
|
|
m.mu.Lock()
|
|
m.online[mac] = struct{}{}
|
|
m.mu.Unlock()
|
|
|
|
role := m.assignRole(mac)
|
|
if err := m.registry.SetNodeRole(mac, role); err != nil {
|
|
log.Printf("[WARN] fleet: set role %s: %v", mac, err)
|
|
}
|
|
|
|
m.applyRoleAndConfig(mac, role)
|
|
m.broadcastRegistry()
|
|
log.Printf("[INFO] fleet: node %s joined as %s", mac, role)
|
|
}
|
|
|
|
// OnNodeDisconnected is called when a node disconnects.
|
|
func (m *Manager) OnNodeDisconnected(mac string) {
|
|
m.mu.Lock()
|
|
delete(m.online, mac)
|
|
m.mu.Unlock()
|
|
|
|
// If the lost node was a TX node, reassign TX roles.
|
|
m.rebalanceRoles()
|
|
m.broadcastRegistry()
|
|
log.Printf("[INFO] fleet: node %s left, rebalanced roles", mac)
|
|
}
|
|
|
|
// Run starts the periodic self-healing loop.
|
|
func (m *Manager) Run(ctx context.Context) {
|
|
ticker := time.NewTicker(m.healTick)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
m.selfHeal()
|
|
}
|
|
}
|
|
}
|
|
|
|
// GetRegistry returns the underlying registry for REST API use.
|
|
func (m *Manager) GetRegistry() *Registry {
|
|
return m.registry
|
|
}
|
|
|
|
// BroadcastRegistry triggers a one-off registry state broadcast.
|
|
func (m *Manager) BroadcastRegistry() {
|
|
m.broadcastRegistry()
|
|
}
|
|
|
|
// OverrideRole manually sets a node's role, pushing the update to the node if online,
|
|
// and broadcasting the updated registry state.
|
|
func (m *Manager) OverrideRole(mac, role string) error {
|
|
if err := m.registry.SetNodeRole(mac, role); err != nil {
|
|
return err
|
|
}
|
|
m.mu.RLock()
|
|
notifier := m.notifier
|
|
m.mu.RUnlock()
|
|
if notifier != nil {
|
|
notifier.SendRoleToMAC(mac, role, "")
|
|
}
|
|
m.broadcastRegistry()
|
|
return nil
|
|
}
|
|
|
|
// IdentifyNode sends an LED blink command to a node for identification.
|
|
// Returns true if the command was sent successfully, false if the node is not connected.
|
|
func (m *Manager) IdentifyNode(mac string, durationMS int) bool {
|
|
m.mu.RLock()
|
|
notifier := m.notifier
|
|
m.mu.RUnlock()
|
|
if notifier == nil {
|
|
return false
|
|
}
|
|
return notifier.SendIdentifyToMAC(mac, durationMS)
|
|
}
|
|
|
|
// ─── Role Assignment ────────────────────────────────────────────────────────
|
|
|
|
// assignRole determines the role for the given MAC based on total connected count.
|
|
//
|
|
// Strategy:
|
|
// - 1 node: tx_rx (both TX and RX, single-node mode)
|
|
// - 2 nodes: one TX, one RX (alternating by join order)
|
|
// - 3+ nodes: floor(N/2) nodes assigned TX, rest RX, staggered TX slots
|
|
func (m *Manager) assignRole(mac string) string {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
n := len(m.online)
|
|
switch {
|
|
case n <= 1:
|
|
return "tx_rx"
|
|
case n == 2:
|
|
// First to arrive is TX, second is RX.
|
|
if m.txCount == 0 {
|
|
m.txCount++
|
|
m.txNodes = append(m.txNodes, mac)
|
|
return "tx"
|
|
}
|
|
return "rx"
|
|
default:
|
|
// Keep TX count at floor(N/2), promote this node to TX if needed.
|
|
targetTX := n / 2
|
|
if len(m.txNodes) < targetTX {
|
|
m.txCount++
|
|
m.txNodes = append(m.txNodes, mac)
|
|
return "tx"
|
|
}
|
|
return "rx"
|
|
}
|
|
}
|
|
|
|
// rebalanceRoles re-evaluates TX/RX assignments when a node leaves.
|
|
func (m *Manager) rebalanceRoles() {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
// Rebuild tx node list from only online nodes.
|
|
online := make([]string, 0, len(m.online))
|
|
for mac := range m.online {
|
|
online = append(online, mac)
|
|
}
|
|
sort.Strings(online)
|
|
|
|
n := len(online)
|
|
if n == 0 {
|
|
m.txNodes = nil
|
|
m.txCount = 0
|
|
return
|
|
}
|
|
|
|
targetTX := 1
|
|
if n >= 2 {
|
|
targetTX = n / 2
|
|
}
|
|
|
|
newTX := make([]string, 0, targetTX)
|
|
for i := 0; i < len(online) && len(newTX) < targetTX; i++ {
|
|
newTX = append(newTX, online[i])
|
|
}
|
|
m.txNodes = newTX
|
|
m.txCount = len(newTX)
|
|
|
|
notifier := m.notifier
|
|
if notifier == nil {
|
|
return
|
|
}
|
|
|
|
// Send updated roles; stagger TX slot assignments.
|
|
nTX := len(newTX)
|
|
for i, mac := range online {
|
|
role := "rx"
|
|
for _, txMAC := range newTX {
|
|
if mac == txMAC {
|
|
role = "tx"
|
|
break
|
|
}
|
|
}
|
|
if n == 1 {
|
|
role = "tx_rx"
|
|
}
|
|
|
|
_ = m.registry.SetNodeRole(mac, role) //nolint:errcheck
|
|
// Stagger TX slot: divide 1s into nTX slots.
|
|
rateHz := 20
|
|
txSlotUS := 0
|
|
if role == "tx" && nTX > 1 {
|
|
slotUS := 1000000 / (rateHz * nTX)
|
|
txSlotUS = i * slotUS
|
|
}
|
|
_ = txSlotUS // will send via config when we have the param available
|
|
notifier.SendRoleToMAC(mac, role, "")
|
|
}
|
|
}
|
|
|
|
// applyRoleAndConfig sends role and rate config to a single node.
|
|
func (m *Manager) applyRoleAndConfig(mac, role string) {
|
|
m.mu.RLock()
|
|
notifier := m.notifier
|
|
m.mu.RUnlock()
|
|
|
|
if notifier == nil {
|
|
return
|
|
}
|
|
|
|
notifier.SendRoleToMAC(mac, role, "")
|
|
|
|
rateHz := 20
|
|
if role == "rx" || role == "tx_rx" {
|
|
notifier.SendConfigToMAC(mac, rateHz, 0.02)
|
|
}
|
|
}
|
|
|
|
// selfHeal checks for mismatched roles and re-pushes config if needed.
|
|
func (m *Manager) selfHeal() {
|
|
nodes, err := m.registry.GetAllNodes()
|
|
if err != nil {
|
|
log.Printf("[WARN] fleet: self-heal query: %v", err)
|
|
return
|
|
}
|
|
|
|
m.mu.RLock()
|
|
notifier := m.notifier
|
|
m.mu.RUnlock()
|
|
|
|
if notifier == nil {
|
|
return
|
|
}
|
|
|
|
connected := make(map[string]struct{})
|
|
for _, mac := range notifier.GetConnectedMACs() {
|
|
connected[mac] = struct{}{}
|
|
}
|
|
|
|
for _, n := range nodes {
|
|
if _, ok := connected[n.MAC]; !ok {
|
|
continue
|
|
}
|
|
// Re-push stored role for nodes that are online.
|
|
notifier.SendRoleToMAC(n.MAC, n.Role, "")
|
|
}
|
|
}
|
|
|
|
// broadcastRegistry sends current node and room state to dashboard clients.
|
|
func (m *Manager) broadcastRegistry() {
|
|
m.mu.RLock()
|
|
bcaster := m.bcaster
|
|
m.mu.RUnlock()
|
|
|
|
if bcaster == nil {
|
|
return
|
|
}
|
|
|
|
nodes, err := m.registry.GetAllNodes()
|
|
if err != nil {
|
|
log.Printf("[WARN] fleet: get nodes for broadcast: %v", err)
|
|
return
|
|
}
|
|
|
|
room, err := m.registry.GetRoom()
|
|
if err != nil {
|
|
log.Printf("[WARN] fleet: get room for broadcast: %v", err)
|
|
return
|
|
}
|
|
|
|
bcaster.BroadcastRegistryState(nodes, *room)
|
|
}
|
|
|
|
// ─── System Mode Management ─────────────────────────────────────────────────────
|
|
|
|
// SetModeChangeBroadcaster sets the broadcaster for mode change events.
|
|
func (m *Manager) SetModeChangeBroadcaster(b ModeChangeBroadcaster) {
|
|
m.mu.Lock()
|
|
m.modeChangeBroadcaster = b
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// SetBLEPresenceProvider sets the BLE presence provider for auto-away detection.
|
|
func (m *Manager) SetBLEPresenceProvider(p BLEPresenceProvider) {
|
|
m.mu.Lock()
|
|
m.blePresenceProvider = p
|
|
m.mu.Unlock()
|
|
}
|
|
|
|
// ProcessBLEObservations processes BLE observations for auto-away/disarm detection.
|
|
// This should be called when BLE data is received from nodes.
|
|
func (m *Manager) ProcessBLEObservations(observations []BLEObservation) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
// Skip if no BLE provider or auto-away is disabled
|
|
if m.blePresenceProvider == nil || !m.autoAwayConfig.Enabled {
|
|
return
|
|
}
|
|
|
|
// Check if manual override is active
|
|
if time.Now().Before(m.manualOverrideUntil) {
|
|
return
|
|
}
|
|
|
|
now := time.Now()
|
|
|
|
// Get all registered devices
|
|
registeredDevices, err := m.blePresenceProvider.GetAllRegisteredDevices()
|
|
if err != nil {
|
|
log.Printf("[WARN] fleet: get registered devices: %v", err)
|
|
return
|
|
}
|
|
|
|
// Check for auto-disarm: any registered device seen with RSSI > threshold
|
|
if m.systemMode == events.ModeAway {
|
|
for _, obs := range observations {
|
|
if personID, isRegistered := registeredDevices[obs.DeviceMAC]; isRegistered {
|
|
if obs.RSSIdBm >= m.autoAwayConfig.AutoDisarmRSSI {
|
|
// Get person name if available
|
|
personName := ""
|
|
if m.personProvider != nil {
|
|
personName = m.personProvider.GetPersonName(personID)
|
|
}
|
|
|
|
// Auto-disarm
|
|
prevMode := m.systemMode
|
|
m.systemMode = events.ModeHome
|
|
|
|
event := events.SystemModeChangeEvent{
|
|
PreviousMode: prevMode,
|
|
NewMode: events.ModeHome,
|
|
Reason: "auto_disarm",
|
|
Timestamp: now,
|
|
PersonID: personID,
|
|
PersonName: personName,
|
|
}
|
|
|
|
if m.modeChangeBroadcaster != nil {
|
|
m.modeChangeBroadcaster.BroadcastSystemModeChange(event)
|
|
}
|
|
|
|
if m.onModeChange != nil {
|
|
go m.onModeChange(event)
|
|
}
|
|
|
|
log.Printf("[INFO] fleet: auto-disarm triggered - registered device %s seen (RSSI: %d)", obs.DeviceMAC, obs.RSSIdBm)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update last seen times for registered devices
|
|
for _, obs := range observations {
|
|
if _, isRegistered := registeredDevices[obs.DeviceMAC]; isRegistered {
|
|
m.lastDeviceSeen[obs.DeviceMAC] = now
|
|
}
|
|
}
|
|
}
|
|
|
|
// CheckAutoAway checks if all registered devices have been absent for the configured duration.
|
|
// This should be called periodically.
|
|
func (m *Manager) CheckAutoAway() {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
// Skip if no BLE provider or auto-away is disabled
|
|
if m.blePresenceProvider == nil || !m.autoAwayConfig.Enabled {
|
|
return
|
|
}
|
|
|
|
// Check if manual override is active
|
|
if time.Now().Before(m.manualOverrideUntil) {
|
|
return
|
|
}
|
|
|
|
// Don't auto-away if already away
|
|
if m.systemMode == events.ModeAway {
|
|
return
|
|
}
|
|
|
|
// Get all registered devices
|
|
registeredDevices, err := m.blePresenceProvider.GetAllRegisteredDevices()
|
|
if err != nil {
|
|
log.Printf("[WARN] fleet: get registered devices for auto-away: %v", err)
|
|
return
|
|
}
|
|
|
|
if len(registeredDevices) == 0 {
|
|
return // No registered devices, can't determine away status
|
|
}
|
|
|
|
// Check if all devices have been absent for the configured duration
|
|
now := time.Now()
|
|
allAbsent := true
|
|
|
|
for mac := range registeredDevices {
|
|
lastSeen, exists := m.lastDeviceSeen[mac]
|
|
if !exists || now.Sub(lastSeen) >= m.autoAwayConfig.AbsenceDuration {
|
|
// Device not seen recently
|
|
continue
|
|
}
|
|
// At least one device is present
|
|
allAbsent = false
|
|
break
|
|
}
|
|
|
|
if allAbsent {
|
|
// Auto-away
|
|
prevMode := m.systemMode
|
|
m.systemMode = events.ModeAway
|
|
|
|
event := events.SystemModeChangeEvent{
|
|
PreviousMode: prevMode,
|
|
NewMode: events.ModeAway,
|
|
Reason: "auto_away",
|
|
Timestamp: now,
|
|
}
|
|
|
|
if m.modeChangeBroadcaster != nil {
|
|
m.modeChangeBroadcaster.BroadcastSystemModeChange(event)
|
|
}
|
|
|
|
if m.onModeChange != nil {
|
|
go m.onModeChange(event)
|
|
}
|
|
|
|
log.Printf("[INFO] fleet: auto-away activated - all BLE devices absent for %v", m.autoAwayConfig.AbsenceDuration)
|
|
}
|
|
}
|
|
|
|
// RunModeCheck starts the periodic auto-away check loop.
|
|
func (m *Manager) RunModeCheck(ctx context.Context) {
|
|
ticker := time.NewTicker(m.modeCheckInterval)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
m.CheckAutoAway()
|
|
}
|
|
}
|
|
}
|
|
|
|
// GetAutoAwayConfig returns the current auto-away configuration.
|
|
func (m *Manager) GetAutoAwayConfig() AutoAwayConfig {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
return m.autoAwayConfig
|
|
}
|
|
|
|
// SetAutoAwayConfig updates the auto-away configuration.
|
|
func (m *Manager) SetAutoAwayConfig(cfg AutoAwayConfig) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.autoAwayConfig = cfg
|
|
}
|
|
|
|
// SetPersonProvider sets the person name provider for mode change events.
|
|
func (m *Manager) SetPersonProvider(p PersonNameProvider) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.personProvider = p
|
|
}
|
|
|
|
// GetSystemMode returns the current system mode.
|
|
func (m *Manager) GetSystemMode() events.SystemMode {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
return m.systemMode
|
|
}
|
|
|
|
// SetSystemMode manually sets the system mode with a reason.
|
|
func (m *Manager) SetSystemMode(mode events.SystemMode, reason string) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
prevMode := m.systemMode
|
|
if prevMode == mode {
|
|
return nil // No change needed
|
|
}
|
|
|
|
m.systemMode = mode
|
|
|
|
// Set manual override pause
|
|
m.manualOverrideUntil = time.Now().Add(m.autoAwayConfig.ManualOverridePause)
|
|
|
|
event := events.SystemModeChangeEvent{
|
|
PreviousMode: prevMode,
|
|
NewMode: mode,
|
|
Reason: reason,
|
|
Timestamp: time.Now(),
|
|
}
|
|
|
|
if m.modeChangeBroadcaster != nil {
|
|
m.modeChangeBroadcaster.BroadcastSystemModeChange(event)
|
|
}
|
|
|
|
if m.onModeChange != nil {
|
|
go m.onModeChange(event)
|
|
}
|
|
|
|
log.Printf("[INFO] fleet: system mode changed: %s -> %s (reason: %s)", prevMode, mode, reason)
|
|
return nil
|
|
}
|
|
|
|
// SetOnModeChange sets the callback for mode change events.
|
|
func (m *Manager) SetOnModeChange(cb func(events.SystemModeChangeEvent)) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
m.onModeChange = cb
|
|
}
|
|
|
|
// IsSecurityMode returns true if the system is in away mode (security mode).
|
|
func (m *Manager) IsSecurityMode() bool {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
return m.systemMode == events.ModeAway
|
|
}
|
|
|
|
// IsManualOverrideActive returns true if a manual mode override is currently active.
|
|
func (m *Manager) IsManualOverrideActive() bool {
|
|
m.mu.RLock()
|
|
defer m.mu.RUnlock()
|
|
return time.Now().Before(m.manualOverrideUntil)
|
|
}
|