ai-code-battle/cmd/acb-evolver/internal/arena/gate.go
jedarden 76e8791e4d Add evaluation arena, promotion gate, and retirement policy (Phase 7)
- arena/arena.go: 10-match mini-tournament running candidate as a local
  subprocess against diverse live opponents sampled across the rating
  distribution; AES-GCM secret decryption for opponent auth
- arena/psro.go: Nash equilibrium computation for the 1×K meta-game;
  FictitiousPlayNash included for future K×K support
- arena/winrate.go: Wilson-score 95% CI for win-rate calculation; draws
  counted as 0.5 wins
- arena/gate.go: two-part promotion gate — Nash value ≥ threshold AND
  MAP-Elites niche fill or improvement; detailed reason strings
- promoter/promoter.go: full promotion pipeline — bot source + Dockerfile
  + K8s Secret/Deployment/Service manifests, docker build, git commit/push
  (ArgoCD sync), kubectl readiness poll, bots-table INSERT, programs-table
  update; RetireBot and EnforcePolicy (rating threshold + population cap 50)
- db/db.go: add bot_name / bot_secret migration columns
- db/programs.go: ListPromoted, SetBotNameAndSecret, UnsetPromoted,
  GetByBotID, PromotedCount helpers for promotion/retirement lifecycle
- main.go: evaluate and retire subcommands wiring arena + gate + promoter;
  remove unused island flag from evaluate
- arena/arena_test.go: 21 unit tests covering Nash, Wilson CI, Gate logic,
  and selectDiverse opponent sampling
- promoter/promoter_test.go: tests for Dockerfiles, bot-ID/secret generation,
  AES-GCM helpers, and K8s manifest templates

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 23:32:37 -04:00

144 lines
4.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package arena — promotion gate.
//
// The gate applies two independent criteria before promoting a candidate:
//
// 1. Nash value (PSRO) ≥ NashThreshold — sufficient win rate
// 2. MAP-Elites niche fill or improvement — behavioral novelty
//
// Both must be satisfied. The Wilson-score CI lower bound is an optional
// secondary guard on the overall win rate.
package arena
import (
"fmt"
"strings"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
)
// GateConfig holds the promotion thresholds.
type GateConfig struct {
// NashThreshold is the minimum Nash value (worst-case win rate across
// opponents) required for promotion. Default: 0.50.
NashThreshold float64
// WinRateLowerBound is the minimum Wilson-score 95% CI lower bound for
// the overall win rate. Set ≤ 0 to disable. Default: 0.40.
WinRateLowerBound float64
}
// DefaultGateConfig returns production-ready promotion thresholds.
func DefaultGateConfig() GateConfig {
return GateConfig{
NashThreshold: 0.50,
WinRateLowerBound: 0.40,
}
}
// GateResult holds the full promotion decision with supporting evidence.
type GateResult struct {
// Promoted is true when all criteria are met.
Promoted bool
// Nash is the PSRO result for the mini-tournament.
Nash NashResult
// WinRate is the overall win rate with 95% Wilson CI.
WinRate WinRateResult
// MapElitesPlaced is true when the candidate was written to the MAP-Elites
// grid (filled an empty cell or outperformed the incumbent).
MapElitesPlaced bool
// MapElitesImproved is true when the candidate beat an existing champion
// (as opposed to simply filling an empty niche).
MapElitesImproved bool
// Placement is the (X, Y) grid cell the candidate occupies.
Placement mapelites.Placement
// Reason is a human-readable explanation of the promotion decision.
Reason string
}
// Gate applies the promotion criteria to mini-tournament results.
type Gate struct {
cfg GateConfig
grid *mapelites.Grid
}
// NewGate creates a Gate backed by the provided MAP-Elites grid.
// The grid is shared across evaluations so niche occupancy persists across
// multiple Evaluate calls within one evolution run.
func NewGate(cfg GateConfig, grid *mapelites.Grid) *Gate {
return &Gate{cfg: cfg, grid: grid}
}
// Evaluate applies the two-part promotion gate to the arena result.
//
// programID and fitness are the candidate's identifiers in the programs table.
// behaviorVec is [aggression, economy] ∈ [0,1]²; defaults to [0.5, 0.5] when
// nil or short.
//
// Side effect: g.grid.TryPlace is called — the cell is updated when the
// candidate wins its behavioral niche.
func (g *Gate) Evaluate(result *Result, programID int64, fitness float64, behaviorVec []float64) *GateResult {
wr := ComputeFromResult(result)
nash := ComputeNash(result.WinRateVec)
agg, eco := 0.5, 0.5
if len(behaviorVec) >= 2 {
agg, eco = behaviorVec[0], behaviorVec[1]
}
// Sample the cell state before TryPlace so we can distinguish
// "fills empty niche" from "beats existing champion".
cellX, cellY := g.grid.BehaviorToCell(agg, eco)
priorCell := g.grid.Get(cellX, cellY)
placement, placed := g.grid.TryPlace(programID, fitness, agg, eco)
gr := &GateResult{
Nash: nash,
WinRate: wr,
MapElitesPlaced: placed,
MapElitesImproved: placed && priorCell.Occupied,
Placement: placement,
}
nashOK := nash.NashValue >= g.cfg.NashThreshold
winOK := g.cfg.WinRateLowerBound <= 0 || wr.Lower >= g.cfg.WinRateLowerBound
mapOK := placed
if nashOK && winOK && mapOK {
gr.Promoted = true
if !priorCell.Occupied {
gr.Reason = fmt.Sprintf(
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f%.3f), fills new niche [%d,%d]",
nash.NashValue, g.cfg.NashThreshold,
wr.Rate, wr.Lower, wr.Upper,
placement.X, placement.Y)
} else {
gr.Reason = fmt.Sprintf(
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f%.3f), beats niche [%d,%d] champion (%.3f→%.3f)",
nash.NashValue, g.cfg.NashThreshold,
wr.Rate, wr.Lower, wr.Upper,
placement.X, placement.Y, priorCell.Fitness, fitness)
}
return gr
}
var why []string
if !nashOK {
why = append(why, fmt.Sprintf("Nash=%.3f < %.3f", nash.NashValue, g.cfg.NashThreshold))
}
if !winOK {
why = append(why, fmt.Sprintf("WR CI lower=%.3f < %.3f", wr.Lower, g.cfg.WinRateLowerBound))
}
if !mapOK {
why = append(why, fmt.Sprintf("niche [%d,%d] occupied by fitter bot (fitness=%.3f)",
placement.X, placement.Y, priorCell.Fitness))
}
gr.Reason = "rejected: " + strings.Join(why, "; ")
return gr
}