ai-code-battle/cmd/acb-evolver/internal/arena/gate.go
jedarden 80334c6e34 feat(evolver): expand MAP-Elites from 2-D to 4-D grid per §10.2
- Add Exploration and Formation axis definitions with feature extraction
  from source code pattern matching (exploration/formation indicators)
- Extend Grid key from (x,y) to (x,y,z,w) with 3⁴=81-cell behavior grid
- Update bin assignment, promotion gate, and persistence (JSON snapshot)
- Add Slice() for 2-D dashboard visualization across any axis pair
- Migration: old 2-D archives project at z=middle, w=middle
- Update cross-pollination to pad 2-element behavior vectors to 4
- Add Prometheus metrics to matchmaker (bot crashes, stale job count)
- Add rivalry detection to index builder (data/meta/rivalries.json)
- Web: batched bot list loading, leaderboard keyboard accessibility,
  improved ARIA attributes on match/playlist cards

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-22 15:44:39 -04:00

146 lines
4.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package arena — promotion gate.
//
// The gate applies two independent criteria before promoting a candidate:
//
// 1. Nash value (PSRO) ≥ NashThreshold — sufficient win rate
// 2. MAP-Elites niche fill or improvement — behavioral novelty
//
// Both must be satisfied. The Wilson-score CI lower bound is an optional
// secondary guard on the overall win rate.
package arena
import (
"fmt"
"strings"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
)
// GateConfig holds the promotion thresholds.
type GateConfig struct {
// NashThreshold is the minimum Nash value (worst-case win rate across
// opponents) required for promotion. Default: 0.50.
NashThreshold float64
// WinRateLowerBound is the minimum Wilson-score 95% CI lower bound for
// the overall win rate. Set ≤ 0 to disable. Default: 0.40.
WinRateLowerBound float64
}
// DefaultGateConfig returns production-ready promotion thresholds.
func DefaultGateConfig() GateConfig {
return GateConfig{
NashThreshold: 0.50,
WinRateLowerBound: 0.40,
}
}
// GateResult holds the full promotion decision with supporting evidence.
type GateResult struct {
// Promoted is true when all criteria are met.
Promoted bool
// Nash is the PSRO result for the mini-tournament.
Nash NashResult
// WinRate is the overall win rate with 95% Wilson CI.
WinRate WinRateResult
// MapElitesPlaced is true when the candidate was written to the MAP-Elites
// grid (filled an empty cell or outperformed the incumbent).
MapElitesPlaced bool
// MapElitesImproved is true when the candidate beat an existing champion
// (as opposed to simply filling an empty niche).
MapElitesImproved bool
// Placement is the 4-D grid cell the candidate occupies.
Placement mapelites.Placement
// Reason is a human-readable explanation of the promotion decision.
Reason string
}
// Gate applies the promotion criteria to mini-tournament results.
type Gate struct {
cfg GateConfig
grid *mapelites.Grid
}
// NewGate creates a Gate backed by the provided MAP-Elites grid.
// The grid is shared across evaluations so niche occupancy persists across
// multiple Evaluate calls within one evolution run.
func NewGate(cfg GateConfig, grid *mapelites.Grid) *Gate {
return &Gate{cfg: cfg, grid: grid}
}
// Evaluate applies the two-part promotion gate to the arena result.
//
// programID and fitness are the candidate's identifiers in the programs table.
// behaviorVec is [aggression, economy, exploration, formation] ∈ [0,1]⁴;
// defaults to [0.5, 0.5, 0.5, 0.5] when nil or short.
//
// Side effect: g.grid.TryPlace is called — the cell is updated when the
// candidate wins its behavioral niche.
func (g *Gate) Evaluate(result *Result, programID int64, fitness float64, behaviorVec []float64) *GateResult {
wr := ComputeFromResult(result)
nash := ComputeNash(result.WinRateVec)
// Default behavior: all dimensions at 0.5 (center of grid)
dims := [4]float64{0.5, 0.5, 0.5, 0.5}
for i := 0; i < len(behaviorVec) && i < 4; i++ {
dims[i] = behaviorVec[i]
}
agg, eco, expl, form := dims[0], dims[1], dims[2], dims[3]
// Sample the cell state before TryPlace so we can distinguish
// "fills empty niche" from "beats existing champion".
cellX, cellY, cellZ, cellW := g.grid.BehaviorToCell(agg, eco, expl, form)
priorCell := g.grid.Get(cellX, cellY, cellZ, cellW)
placement, placed := g.grid.TryPlace(programID, fitness, agg, eco, expl, form)
gr := &GateResult{
Nash: nash,
WinRate: wr,
MapElitesPlaced: placed,
MapElitesImproved: placed && priorCell.Occupied,
Placement: placement,
}
nashOK := nash.NashValue >= g.cfg.NashThreshold
winOK := g.cfg.WinRateLowerBound <= 0 || wr.Lower >= g.cfg.WinRateLowerBound
mapOK := placed
if nashOK && winOK && mapOK {
gr.Promoted = true
if !priorCell.Occupied {
gr.Reason = fmt.Sprintf(
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f%.3f), fills new niche [%d,%d,%d,%d]",
nash.NashValue, g.cfg.NashThreshold,
wr.Rate, wr.Lower, wr.Upper,
placement.X, placement.Y, placement.Z, placement.W)
} else {
gr.Reason = fmt.Sprintf(
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f%.3f), beats niche [%d,%d,%d,%d] champion (%.3f→%.3f)",
nash.NashValue, g.cfg.NashThreshold,
wr.Rate, wr.Lower, wr.Upper,
placement.X, placement.Y, placement.Z, placement.W, priorCell.Fitness, fitness)
}
return gr
}
var why []string
if !nashOK {
why = append(why, fmt.Sprintf("Nash=%.3f < %.3f", nash.NashValue, g.cfg.NashThreshold))
}
if !winOK {
why = append(why, fmt.Sprintf("WR CI lower=%.3f < %.3f", wr.Lower, g.cfg.WinRateLowerBound))
}
if !mapOK {
why = append(why, fmt.Sprintf("niche [%d,%d,%d,%d] occupied by fitter bot (fitness=%.3f)",
placement.X, placement.Y, placement.Z, placement.W, priorCell.Fitness))
}
gr.Reason = "rejected: " + strings.Join(why, "; ")
return gr
}