Add evaluation arena, promotion gate, and retirement policy (Phase 7)

- arena/arena.go: 10-match mini-tournament running candidate as a local
  subprocess against diverse live opponents sampled across the rating
  distribution; AES-GCM secret decryption for opponent auth
- arena/psro.go: Nash equilibrium computation for the 1×K meta-game;
  FictitiousPlayNash included for future K×K support
- arena/winrate.go: Wilson-score 95% CI for win-rate calculation; draws
  counted as 0.5 wins
- arena/gate.go: two-part promotion gate — Nash value ≥ threshold AND
  MAP-Elites niche fill or improvement; detailed reason strings
- promoter/promoter.go: full promotion pipeline — bot source + Dockerfile
  + K8s Secret/Deployment/Service manifests, docker build, git commit/push
  (ArgoCD sync), kubectl readiness poll, bots-table INSERT, programs-table
  update; RetireBot and EnforcePolicy (rating threshold + population cap 50)
- db/db.go: add bot_name / bot_secret migration columns
- db/programs.go: ListPromoted, SetBotNameAndSecret, UnsetPromoted,
  GetByBotID, PromotedCount helpers for promotion/retirement lifecycle
- main.go: evaluate and retire subcommands wiring arena + gate + promoter;
  remove unused island flag from evaluate
- arena/arena_test.go: 21 unit tests covering Nash, Wilson CI, Gate logic,
  and selectDiverse opponent sampling
- promoter/promoter_test.go: tests for Dockerfiles, bot-ID/secret generation,
  AES-GCM helpers, and K8s manifest templates

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
jedarden 2026-03-26 23:32:37 -04:00
parent 5669688984
commit 76e8791e4d
10 changed files with 2453 additions and 3 deletions

View file

@ -0,0 +1,525 @@
// Package arena implements the 10-match mini-tournament evaluation system
// for evolved bot candidates.
//
// The arena starts the candidate as a local subprocess (the same way the
// sandbox does during validation), selects a diverse set of live opponents
// from the PostgreSQL database, and runs one match per opponent using the
// game engine directly. No job queue or ACB API calls are needed for
// evaluation matches.
package arena
import (
"context"
"crypto/aes"
"crypto/cipher"
"database/sql"
"encoding/hex"
"fmt"
"log"
"math/rand"
"net"
"net/http"
"os"
"os/exec"
"sort"
"time"
"github.com/aicodebattle/acb/engine"
_ "github.com/lib/pq"
)
const (
// DefaultNumMatches is the tournament size (10 per spec).
DefaultNumMatches = 10
// evalSecret is used for HMAC signing when the candidate runs locally.
// The candidate subprocess is started with BOT_SECRET=evalSecret so that
// the engine's request signatures match what the bot verifies.
evalSecret = "acb-eval-secret-for-tournament-evaluation-only"
// evalBotID is a placeholder bot ID for arena authentication headers.
evalBotID = "b_evalcandidate"
healthPollInterval = 200 * time.Millisecond
healthStartupTimeout = 30 * time.Second
)
// BotRecord holds a live bot's connection details queried from the database.
type BotRecord struct {
BotID string
Name string
EndpointURL string
Secret string // plaintext (decrypted when encryption key is provided)
RatingMu float64
}
// MatchOutcome records the result of one evaluation match.
type MatchOutcome struct {
OpponentBotID string
OpponentName string
CandidateSlot int // player slot (0 or 1) assigned to the candidate
Winner int // 0=player0, 1=player1, -1=draw
Scores []int
Turns int
Err error
}
// CandidateWon returns true when the candidate won this match.
func (o *MatchOutcome) CandidateWon() bool {
return o.Err == nil && o.Winner == o.CandidateSlot
}
// CandidateLost returns true when the candidate lost (not a draw or error).
func (o *MatchOutcome) CandidateLost() bool {
return o.Err == nil && o.Winner != -1 && o.Winner != o.CandidateSlot
}
// Result aggregates mini-tournament outcomes for a candidate.
type Result struct {
CandidateEndpoint string
Outcomes []MatchOutcome
// Aggregate tallies (errors excluded from win/loss/draw counts).
Wins int
Losses int
Draws int
Errors int
// OpponentWinRates maps opponent BotID → candidate win rate vs that bot.
OpponentWinRates map[string]float64
// WinRateVec is an ordered slice of per-opponent win rates (one entry per
// distinct opponent played, in match order, errors omitted). Used by PSRO.
WinRateVec []float64
}
// Config controls arena behaviour.
type Config struct {
// NumMatches is the tournament size (default: DefaultNumMatches = 10).
NumMatches int
// BotTimeout is the per-turn HTTP timeout for both bots.
BotTimeout time.Duration
// EncryptionKey is the AES-256-GCM key (hex) used to decrypt opponent
// secrets from the database. Empty means secrets are stored plaintext.
EncryptionKey string
}
// DefaultConfig returns production-ready arena defaults.
func DefaultConfig() Config {
return Config{
NumMatches: DefaultNumMatches,
BotTimeout: 3 * time.Second,
}
}
// Arena orchestrates mini-tournament evaluation of bot candidates.
type Arena struct {
db *sql.DB
cfg Config
rng *rand.Rand
log *log.Logger
}
// New creates an Arena backed by the given database connection.
func New(db *sql.DB, cfg Config) *Arena {
return &Arena{
db: db,
cfg: cfg,
rng: rand.New(rand.NewSource(time.Now().UnixNano())),
log: log.Default(),
}
}
// Run executes a mini-tournament for the candidate bot.
//
// code is the candidate's source code; language is one of
// go|python|rust|typescript|java|php.
//
// The candidate is built and started as a local subprocess, then played
// against cfg.NumMatches opponents sampled from the live bot fleet.
func (a *Arena) Run(ctx context.Context, code, language string) (*Result, error) {
proc, err := startCandidate(ctx, code, language)
if err != nil {
return nil, fmt.Errorf("start candidate subprocess: %w", err)
}
defer proc.stop()
candidateURL := fmt.Sprintf("http://127.0.0.1:%d", proc.port)
opponents, err := a.selectOpponents(ctx, a.cfg.NumMatches)
if err != nil {
return nil, fmt.Errorf("select opponents: %w", err)
}
if len(opponents) == 0 {
return nil, fmt.Errorf("no active opponents available in live bot fleet")
}
result := &Result{
CandidateEndpoint: candidateURL,
OpponentWinRates: make(map[string]float64),
}
for i, opp := range opponents {
a.log.Printf("arena: match %d/%d vs %s (%s)", i+1, len(opponents), opp.Name, opp.BotID)
outcome := a.runMatch(ctx, candidateURL, opp)
result.Outcomes = append(result.Outcomes, outcome)
switch {
case outcome.Err != nil:
result.Errors++
a.log.Printf("arena: match %d error: %v", i+1, outcome.Err)
case outcome.CandidateWon():
result.Wins++
case outcome.CandidateLost():
result.Losses++
default:
result.Draws++
}
}
// Compute per-opponent win rates.
oppWins := make(map[string]int)
oppTotal := make(map[string]int)
for _, o := range result.Outcomes {
if o.Err != nil {
continue
}
oppTotal[o.OpponentBotID]++
if o.CandidateWon() {
oppWins[o.OpponentBotID]++
}
}
for id, total := range oppTotal {
if total > 0 {
result.OpponentWinRates[id] = float64(oppWins[id]) / float64(total)
}
}
// Build ordered win-rate vector for PSRO (one entry per distinct opponent).
seen := make(map[string]bool)
for _, o := range result.Outcomes {
if o.Err != nil || seen[o.OpponentBotID] {
continue
}
seen[o.OpponentBotID] = true
result.WinRateVec = append(result.WinRateVec, result.OpponentWinRates[o.OpponentBotID])
}
return result, nil
}
// selectOpponents queries active bots from the database and picks n opponents
// spread across the rating distribution for behavioral diversity.
func (a *Arena) selectOpponents(ctx context.Context, n int) ([]BotRecord, error) {
rows, err := a.db.QueryContext(ctx, `
SELECT bot_id, name, endpoint_url, shared_secret, rating_mu
FROM bots
WHERE status = 'active' AND endpoint_url <> ''
ORDER BY rating_mu DESC`)
if err != nil {
return nil, fmt.Errorf("query bots: %w", err)
}
defer rows.Close()
var all []BotRecord
for rows.Next() {
var b BotRecord
if err := rows.Scan(&b.BotID, &b.Name, &b.EndpointURL, &b.Secret, &b.RatingMu); err != nil {
return nil, fmt.Errorf("scan bot: %w", err)
}
if a.cfg.EncryptionKey != "" {
if plain, err := decryptAESGCM(b.Secret, a.cfg.EncryptionKey); err == nil {
b.Secret = plain
}
// Leave as-is on error (may be stored plaintext in dev).
}
all = append(all, b)
}
if err := rows.Err(); err != nil {
return nil, err
}
return selectDiverse(all, n, a.rng), nil
}
// selectDiverse picks n bots spread evenly across the rating-sorted slice.
// When fewer than n bots exist, opponents are reused (shuffled for variety).
func selectDiverse(all []BotRecord, n int, rng *rand.Rand) []BotRecord {
if len(all) == 0 {
return nil
}
sort.Slice(all, func(i, j int) bool { return all[i].RatingMu > all[j].RatingMu })
selected := make([]BotRecord, 0, n)
if len(all) >= n {
for i := 0; i < n; i++ {
idx := int(float64(i) / float64(n) * float64(len(all)))
selected = append(selected, all[idx])
}
} else {
for len(selected) < n {
perm := rng.Perm(len(all))
for _, idx := range perm {
selected = append(selected, all[idx])
if len(selected) >= n {
break
}
}
}
}
rng.Shuffle(len(selected), func(i, j int) { selected[i], selected[j] = selected[j], selected[i] })
return selected
}
// runMatch runs one match between the local candidate and a live opponent.
func (a *Arena) runMatch(ctx context.Context, candidateURL string, opp BotRecord) MatchOutcome {
outcome := MatchOutcome{
OpponentBotID: opp.BotID,
OpponentName: opp.Name,
}
// Randomise player slot for positional fairness.
candidateSlot := a.rng.Intn(2)
outcome.CandidateSlot = candidateSlot
matchID := fmt.Sprintf("eval-%d", time.Now().UnixNano())
mr := engine.NewMatchRunner(
engine.DefaultConfig(),
engine.WithTimeout(a.cfg.BotTimeout),
engine.WithRNG(rand.New(rand.NewSource(a.rng.Int63()))),
)
candidateBot := engine.NewHTTPBot(candidateURL,
engine.AuthConfig{BotID: evalBotID, Secret: evalSecret, MatchID: matchID},
engine.WithHTTPTimeout(a.cfg.BotTimeout))
oppBot := engine.NewHTTPBot(opp.EndpointURL,
engine.AuthConfig{BotID: opp.BotID, Secret: opp.Secret, MatchID: matchID},
engine.WithHTTPTimeout(a.cfg.BotTimeout))
if candidateSlot == 0 {
mr.AddBot(candidateBot, "candidate")
mr.AddBot(oppBot, opp.Name)
} else {
mr.AddBot(oppBot, opp.Name)
mr.AddBot(candidateBot, "candidate")
}
res, _, err := mr.Run()
if err != nil {
outcome.Err = fmt.Errorf("match runner: %w", err)
return outcome
}
outcome.Winner = res.Winner
outcome.Scores = res.Scores
outcome.Turns = res.Turns
return outcome
}
// ── candidate subprocess management ──────────────────────────────────────────
type botProcess struct {
port int
cmd *exec.Cmd
tmpDir string
}
func (p *botProcess) stop() {
if p.cmd != nil && p.cmd.Process != nil {
_ = p.cmd.Process.Kill()
_ = p.cmd.Wait()
}
if p.tmpDir != "" {
os.RemoveAll(p.tmpDir)
}
}
func startCandidate(ctx context.Context, code, language string) (*botProcess, error) {
tmpDir, err := os.MkdirTemp("", "acb-arena-*")
if err != nil {
return nil, fmt.Errorf("mkdirtemp: %w", err)
}
execPath, execArgs, err := buildCandidate(ctx, code, language, tmpDir)
if err != nil {
os.RemoveAll(tmpDir)
return nil, fmt.Errorf("build: %w", err)
}
port, err := allocateFreePort()
if err != nil {
os.RemoveAll(tmpDir)
return nil, fmt.Errorf("allocate port: %w", err)
}
env := append(os.Environ(),
fmt.Sprintf("BOT_PORT=%d", port),
"BOT_SECRET="+evalSecret,
)
var args []string
args = append(args, execArgs...)
cmd := exec.CommandContext(ctx, execPath, args...)
cmd.Env = env
cmd.Dir = tmpDir
if err := cmd.Start(); err != nil {
os.RemoveAll(tmpDir)
return nil, fmt.Errorf("start process: %w", err)
}
proc := &botProcess{port: port, cmd: cmd, tmpDir: tmpDir}
addr := fmt.Sprintf("127.0.0.1:%d", port)
if err := waitForHealth(ctx, addr); err != nil {
proc.stop()
return nil, fmt.Errorf("candidate health: %w", err)
}
return proc, nil
}
func buildCandidate(ctx context.Context, code, language, dir string) (string, []string, error) {
switch language {
case "go":
if err := os.WriteFile(dir+"/bot.go", []byte(code), 0o600); err != nil {
return "", nil, err
}
if err := os.WriteFile(dir+"/go.mod", []byte("module bot\n\ngo 1.21\n"), 0o600); err != nil {
return "", nil, err
}
bin := dir + "/bot"
cmd := exec.CommandContext(ctx, "go", "build", "-o", bin, ".")
cmd.Dir = dir
if out, err := cmd.CombinedOutput(); err != nil {
return "", nil, fmt.Errorf("go build: %s", truncate(string(out), 512))
}
return bin, nil, nil
case "python":
src := dir + "/bot.py"
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
return "", nil, err
}
return "python3", []string{src}, nil
case "rust":
src := dir + "/main.rs"
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
return "", nil, err
}
bin := dir + "/bot"
cmd := exec.CommandContext(ctx, "rustc", "--edition", "2021", src, "-o", bin)
if out, err := cmd.CombinedOutput(); err != nil {
return "", nil, fmt.Errorf("rustc: %s", truncate(string(out), 512))
}
return bin, nil, nil
case "typescript":
if err := os.WriteFile(dir+"/bot.ts", []byte(code), 0o600); err != nil {
return "", nil, err
}
tsconfig := `{"compilerOptions":{"target":"ES2020","module":"commonjs","outDir":"./"},"files":["bot.ts"]}`
if err := os.WriteFile(dir+"/tsconfig.json", []byte(tsconfig), 0o600); err != nil {
return "", nil, err
}
cmd := exec.CommandContext(ctx, "tsc", "--project", dir+"/tsconfig.json")
if out, err := cmd.CombinedOutput(); err != nil {
return "", nil, fmt.Errorf("tsc: %s", truncate(string(out), 512))
}
return "node", []string{dir + "/bot.js"}, nil
case "java":
src := dir + "/Bot.java"
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
return "", nil, err
}
cmd := exec.CommandContext(ctx, "javac", src)
if out, err := cmd.CombinedOutput(); err != nil {
return "", nil, fmt.Errorf("javac: %s", truncate(string(out), 512))
}
return "java", []string{"-cp", dir, "Bot"}, nil
case "php":
src := dir + "/bot.php"
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
return "", nil, err
}
return "php", []string{src}, nil
default:
return "", nil, fmt.Errorf("unsupported language: %s", language)
}
}
// allocateFreePort finds an unused TCP port on localhost.
func allocateFreePort() (int, error) {
l, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
return 0, err
}
port := l.Addr().(*net.TCPAddr).Port
l.Close()
return port, nil
}
// waitForHealth polls GET /health until 200 OK or healthStartupTimeout elapses.
func waitForHealth(ctx context.Context, addr string) error {
deadline := time.Now().Add(healthStartupTimeout)
client := &http.Client{Timeout: 500 * time.Millisecond}
for time.Now().Before(deadline) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+addr+"/health", nil)
if err != nil {
return err
}
if resp, err := client.Do(req); err == nil {
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return nil
}
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(healthPollInterval):
}
}
return fmt.Errorf("candidate did not become healthy within %s", healthStartupTimeout)
}
// decryptAESGCM decrypts an AES-256-GCM ciphertext (hex-encoded) with the
// given hex-encoded 32-byte key.
func decryptAESGCM(ciphertextHex, keyHex string) (string, error) {
key, err := hex.DecodeString(keyHex)
if err != nil {
return "", fmt.Errorf("decode key: %w", err)
}
if len(key) != 32 {
return "", fmt.Errorf("key must be 32 bytes (64 hex chars)")
}
ciphertext, err := hex.DecodeString(ciphertextHex)
if err != nil {
return "", fmt.Errorf("decode ciphertext: %w", err)
}
block, err := aes.NewCipher(key)
if err != nil {
return "", err
}
aead, err := cipher.NewGCM(block)
if err != nil {
return "", err
}
ns := aead.NonceSize()
if len(ciphertext) < ns {
return "", fmt.Errorf("ciphertext too short")
}
plain, err := aead.Open(nil, ciphertext[:ns], ciphertext[ns:], nil)
if err != nil {
return "", err
}
return string(plain), nil
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "…"
}

View file

@ -0,0 +1,314 @@
package arena
import (
"fmt"
"math/rand"
"testing"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
)
// ── ComputeNash ───────────────────────────────────────────────────────────────
func TestComputeNash_EmptySlice(t *testing.T) {
r := ComputeNash(nil)
if r.NashValue != 0.5 {
t.Errorf("empty: NashValue = %.3f, want 0.5", r.NashValue)
}
}
func TestComputeNash_SingleOpponent(t *testing.T) {
r := ComputeNash([]float64{0.7})
if r.NashValue != 0.7 {
t.Errorf("single: NashValue = %.3f, want 0.7", r.NashValue)
}
if r.OpponentMix[0] != 1.0 {
t.Errorf("single: mix[0] = %.3f, want 1.0", r.OpponentMix[0])
}
}
func TestComputeNash_MinimumIsHardestOpponent(t *testing.T) {
// Column player minimises candidate win rate → Nash value = min(winRates).
winRates := []float64{0.8, 0.3, 0.6}
r := ComputeNash(winRates)
if r.NashValue != 0.3 {
t.Errorf("NashValue = %.3f, want 0.3", r.NashValue)
}
// All weight on opponent index 1 (win rate 0.3).
for i, w := range r.OpponentMix {
if i == 1 {
if w != 1.0 {
t.Errorf("mix[1] = %.3f, want 1.0", w)
}
} else if w != 0.0 {
t.Errorf("mix[%d] = %.3f, want 0.0", i, w)
}
}
}
func TestComputeNash_TiedMinimum(t *testing.T) {
// Two opponents tied at the minimum: weight is split 50/50.
winRates := []float64{0.2, 0.8, 0.2}
r := ComputeNash(winRates)
if r.NashValue != 0.2 {
t.Errorf("NashValue = %.3f, want 0.2", r.NashValue)
}
if r.OpponentMix[0] != 0.5 || r.OpponentMix[2] != 0.5 {
t.Errorf("tied mix = %v, want [0.5 0.0 0.5]", r.OpponentMix)
}
if r.OpponentMix[1] != 0.0 {
t.Errorf("mix[1] = %.3f, want 0.0", r.OpponentMix[1])
}
}
func TestComputeNash_AllEqual(t *testing.T) {
winRates := []float64{0.5, 0.5, 0.5}
r := ComputeNash(winRates)
if r.NashValue != 0.5 {
t.Errorf("all-equal: NashValue = %.3f, want 0.5", r.NashValue)
}
// All opponents get equal weight.
expected := 1.0 / 3.0
for i, w := range r.OpponentMix {
if abs(w-expected) > 1e-9 {
t.Errorf("mix[%d] = %.6f, want %.6f", i, w, expected)
}
}
}
func TestFictitiousPlayNash_MatchesMinimaxForSingleRow(t *testing.T) {
winRates := []float64{0.8, 0.3, 0.6}
fp := FictitiousPlayNash(winRates, 10000)
if abs(fp.NashValue-0.3) > 0.01 {
t.Errorf("fictitious play: NashValue = %.3f, want ≈0.3", fp.NashValue)
}
}
// ── WinRate ───────────────────────────────────────────────────────────────────
func TestWinRate_ZeroTotal(t *testing.T) {
r := WinRate(0, 0)
if r.Rate != 0.5 {
t.Errorf("zero total: Rate = %.3f, want 0.5", r.Rate)
}
}
func TestWinRate_AllWins(t *testing.T) {
r := WinRate(10, 10)
if r.Rate != 1.0 {
t.Errorf("all wins: Rate = %.3f, want 1.0", r.Rate)
}
if r.Lower > r.Upper {
t.Errorf("CI inverted: lower=%.3f upper=%.3f", r.Lower, r.Upper)
}
}
func TestWinRate_AllLosses(t *testing.T) {
r := WinRate(0, 10)
if r.Rate != 0.0 {
t.Errorf("all losses: Rate = %.3f, want 0.0", r.Rate)
}
if r.Lower < 0.0 || r.Upper > 1.0 {
t.Errorf("CI out of [0,1]: lower=%.3f upper=%.3f", r.Lower, r.Upper)
}
}
func TestWinRate_FiftyPercent(t *testing.T) {
r := WinRate(5, 10)
if abs(r.Rate-0.5) > 1e-9 {
t.Errorf("50%%: Rate = %.3f, want 0.5", r.Rate)
}
if r.Lower >= 0.5 || r.Upper <= 0.5 {
t.Errorf("50%% CI should straddle 0.5: lower=%.3f upper=%.3f", r.Lower, r.Upper)
}
}
func TestWinRate_CIBounds(t *testing.T) {
// CI bounds must always lie in [0, 1].
for wins := 0; wins <= 10; wins++ {
r := WinRate(wins, 10)
if r.Lower < 0.0 || r.Upper > 1.0 {
t.Errorf("wins=%d: CI [%.3f, %.3f] outside [0,1]", wins, r.Lower, r.Upper)
}
if r.Lower > r.Upper {
t.Errorf("wins=%d: lower (%.3f) > upper (%.3f)", wins, r.Lower, r.Upper)
}
}
}
// ── ComputeFromResult ─────────────────────────────────────────────────────────
func TestComputeFromResult_Basic(t *testing.T) {
r := &Result{Wins: 7, Losses: 2, Draws: 1}
wr := ComputeFromResult(r)
if wr.Wins != 7 {
t.Errorf("Wins = %d, want 7", wr.Wins)
}
// 7 wins / 10 total = 0.7 rate
if abs(wr.Rate-0.7) > 1e-9 {
t.Errorf("Rate = %.3f, want 0.7", wr.Rate)
}
}
func TestComputeFromResult_OnlyErrors(t *testing.T) {
r := &Result{Wins: 0, Losses: 0, Draws: 0, Errors: 5}
wr := ComputeFromResult(r)
if wr.Total != 0 {
t.Errorf("Total = %d, want 0 (errors excluded)", wr.Total)
}
}
// ── Gate.Evaluate ─────────────────────────────────────────────────────────────
func TestGate_PromotedWhenAllCriteriaMet(t *testing.T) {
grid := mapelites.New(10)
gate := NewGate(DefaultGateConfig(), grid)
result := &Result{
Wins: 8, Losses: 2, Draws: 0,
WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8},
}
gr := gate.Evaluate(result, 1, 0.8, []float64{0.5, 0.5})
if !gr.Promoted {
t.Errorf("expected promoted, got rejected: %s", gr.Reason)
}
if !gr.MapElitesPlaced {
t.Error("expected MapElitesPlaced = true for empty grid")
}
if gr.MapElitesImproved {
t.Error("expected MapElitesImproved = false for empty cell")
}
}
func TestGate_RejectedWhenNashTooLow(t *testing.T) {
grid := mapelites.New(10)
cfg := GateConfig{NashThreshold: 0.60, WinRateLowerBound: 0.0}
gate := NewGate(cfg, grid)
// WinRateVec has a low value → Nash = min = 0.2, below 0.60
result := &Result{
Wins: 7, Losses: 3,
WinRateVec: []float64{0.9, 0.2, 0.9, 0.9, 0.9},
}
gr := gate.Evaluate(result, 2, 0.7, []float64{0.5, 0.5})
if gr.Promoted {
t.Errorf("should be rejected (Nash too low), got: %s", gr.Reason)
}
}
func TestGate_RejectedWhenNicheOccupiedByFitterBot(t *testing.T) {
grid := mapelites.New(10)
// Pre-occupy the [5,5] cell with a very fit bot.
grid.TryPlace(99, 0.99, 0.5, 0.5)
cfg := DefaultGateConfig()
gate := NewGate(cfg, grid)
// Candidate is in the same niche but has lower fitness.
result := &Result{
Wins: 7, Losses: 3,
WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8},
}
gr := gate.Evaluate(result, 1, 0.7, []float64{0.5, 0.5})
if gr.Promoted {
t.Errorf("should be rejected (niche occupied by fitter bot), got: %s", gr.Reason)
}
if gr.MapElitesPlaced {
t.Error("MapElitesPlaced should be false when existing bot is fitter")
}
}
func TestGate_PromotedWhenOutperformsNicheChampion(t *testing.T) {
grid := mapelites.New(10)
// Pre-occupy with a weaker bot.
grid.TryPlace(99, 0.4, 0.5, 0.5)
cfg := DefaultGateConfig()
gate := NewGate(cfg, grid)
// Candidate is fitter than the incumbent.
result := &Result{
Wins: 8, Losses: 2,
WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8},
}
gr := gate.Evaluate(result, 1, 0.8, []float64{0.5, 0.5})
if !gr.Promoted {
t.Errorf("should be promoted (beats incumbent), got: %s", gr.Reason)
}
if !gr.MapElitesImproved {
t.Error("MapElitesImproved should be true when beating existing champion")
}
}
// ── selectDiverse ─────────────────────────────────────────────────────────────
func TestSelectDiverse_EmptyPool(t *testing.T) {
rng := rand.New(rand.NewSource(42))
result := selectDiverse(nil, 5, rng)
if len(result) != 0 {
t.Errorf("empty pool: got %d opponents, want 0", len(result))
}
}
func TestSelectDiverse_ExactlyN(t *testing.T) {
rng := rand.New(rand.NewSource(42))
bots := makeBots(5)
result := selectDiverse(bots, 5, rng)
if len(result) != 5 {
t.Errorf("exact n: got %d opponents, want 5", len(result))
}
}
func TestSelectDiverse_MoreThanN(t *testing.T) {
rng := rand.New(rand.NewSource(42))
bots := makeBots(20)
result := selectDiverse(bots, 10, rng)
if len(result) != 10 {
t.Errorf("more than n: got %d opponents, want 10", len(result))
}
// Verify spread: should sample across the sorted range, not just top/bottom.
seen := make(map[string]bool)
for _, b := range result {
seen[b.BotID] = true
}
if len(seen) != 10 {
t.Errorf("duplicates in diverse selection: got %d unique, want 10", len(seen))
}
}
func TestSelectDiverse_FewerThanN(t *testing.T) {
rng := rand.New(rand.NewSource(42))
bots := makeBots(3)
// With only 3 bots, need to repeat to fill 10 slots.
result := selectDiverse(bots, 10, rng)
if len(result) != 10 {
t.Errorf("fewer than n: got %d opponents, want 10", len(result))
}
}
// ── helpers ───────────────────────────────────────────────────────────────────
func makeBots(n int) []BotRecord {
bots := make([]BotRecord, n)
for i := range bots {
bots[i] = BotRecord{
BotID: fmt.Sprintf("b_%04d", i),
Name: fmt.Sprintf("bot-%d", i),
RatingMu: float64(1000 + i*50),
}
}
return bots
}
func abs(x float64) float64 {
if x < 0 {
return -x
}
return x
}

View file

@ -0,0 +1,144 @@
// Package arena — promotion gate.
//
// The gate applies two independent criteria before promoting a candidate:
//
// 1. Nash value (PSRO) ≥ NashThreshold — sufficient win rate
// 2. MAP-Elites niche fill or improvement — behavioral novelty
//
// Both must be satisfied. The Wilson-score CI lower bound is an optional
// secondary guard on the overall win rate.
package arena
import (
"fmt"
"strings"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
)
// GateConfig holds the promotion thresholds.
type GateConfig struct {
// NashThreshold is the minimum Nash value (worst-case win rate across
// opponents) required for promotion. Default: 0.50.
NashThreshold float64
// WinRateLowerBound is the minimum Wilson-score 95% CI lower bound for
// the overall win rate. Set ≤ 0 to disable. Default: 0.40.
WinRateLowerBound float64
}
// DefaultGateConfig returns production-ready promotion thresholds.
func DefaultGateConfig() GateConfig {
return GateConfig{
NashThreshold: 0.50,
WinRateLowerBound: 0.40,
}
}
// GateResult holds the full promotion decision with supporting evidence.
type GateResult struct {
// Promoted is true when all criteria are met.
Promoted bool
// Nash is the PSRO result for the mini-tournament.
Nash NashResult
// WinRate is the overall win rate with 95% Wilson CI.
WinRate WinRateResult
// MapElitesPlaced is true when the candidate was written to the MAP-Elites
// grid (filled an empty cell or outperformed the incumbent).
MapElitesPlaced bool
// MapElitesImproved is true when the candidate beat an existing champion
// (as opposed to simply filling an empty niche).
MapElitesImproved bool
// Placement is the (X, Y) grid cell the candidate occupies.
Placement mapelites.Placement
// Reason is a human-readable explanation of the promotion decision.
Reason string
}
// Gate applies the promotion criteria to mini-tournament results.
type Gate struct {
cfg GateConfig
grid *mapelites.Grid
}
// NewGate creates a Gate backed by the provided MAP-Elites grid.
// The grid is shared across evaluations so niche occupancy persists across
// multiple Evaluate calls within one evolution run.
func NewGate(cfg GateConfig, grid *mapelites.Grid) *Gate {
return &Gate{cfg: cfg, grid: grid}
}
// Evaluate applies the two-part promotion gate to the arena result.
//
// programID and fitness are the candidate's identifiers in the programs table.
// behaviorVec is [aggression, economy] ∈ [0,1]²; defaults to [0.5, 0.5] when
// nil or short.
//
// Side effect: g.grid.TryPlace is called — the cell is updated when the
// candidate wins its behavioral niche.
func (g *Gate) Evaluate(result *Result, programID int64, fitness float64, behaviorVec []float64) *GateResult {
wr := ComputeFromResult(result)
nash := ComputeNash(result.WinRateVec)
agg, eco := 0.5, 0.5
if len(behaviorVec) >= 2 {
agg, eco = behaviorVec[0], behaviorVec[1]
}
// Sample the cell state before TryPlace so we can distinguish
// "fills empty niche" from "beats existing champion".
cellX, cellY := g.grid.BehaviorToCell(agg, eco)
priorCell := g.grid.Get(cellX, cellY)
placement, placed := g.grid.TryPlace(programID, fitness, agg, eco)
gr := &GateResult{
Nash: nash,
WinRate: wr,
MapElitesPlaced: placed,
MapElitesImproved: placed && priorCell.Occupied,
Placement: placement,
}
nashOK := nash.NashValue >= g.cfg.NashThreshold
winOK := g.cfg.WinRateLowerBound <= 0 || wr.Lower >= g.cfg.WinRateLowerBound
mapOK := placed
if nashOK && winOK && mapOK {
gr.Promoted = true
if !priorCell.Occupied {
gr.Reason = fmt.Sprintf(
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f%.3f), fills new niche [%d,%d]",
nash.NashValue, g.cfg.NashThreshold,
wr.Rate, wr.Lower, wr.Upper,
placement.X, placement.Y)
} else {
gr.Reason = fmt.Sprintf(
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f%.3f), beats niche [%d,%d] champion (%.3f→%.3f)",
nash.NashValue, g.cfg.NashThreshold,
wr.Rate, wr.Lower, wr.Upper,
placement.X, placement.Y, priorCell.Fitness, fitness)
}
return gr
}
var why []string
if !nashOK {
why = append(why, fmt.Sprintf("Nash=%.3f < %.3f", nash.NashValue, g.cfg.NashThreshold))
}
if !winOK {
why = append(why, fmt.Sprintf("WR CI lower=%.3f < %.3f", wr.Lower, g.cfg.WinRateLowerBound))
}
if !mapOK {
why = append(why, fmt.Sprintf("niche [%d,%d] occupied by fitter bot (fitness=%.3f)",
placement.X, placement.Y, priorCell.Fitness))
}
gr.Reason = "rejected: " + strings.Join(why, "; ")
return gr
}

View file

@ -0,0 +1,119 @@
// Package arena — PSRO Nash equilibrium computation.
//
// LLM-PSRO (Policy Space Response Oracles) uses Nash equilibrium over the
// current bot population as the promotion criterion. A candidate is promoted
// only if it is a best response to the Nash mixture, i.e. its expected payoff
// against the Nash mixture exceeds the threshold (default 0.50).
//
// For the mini-tournament setting (one candidate, K opponents), the payoff
// matrix has a single row. The Nash-optimal strategy for the minimising
// column player (opponents) is to concentrate weight on the opponent that
// minimises the candidate's expected win rate. The resulting Nash value is
// therefore min(winRates), which is the tightest possible test.
//
// The full fictitious-play algorithm is retained so it generalises cleanly
// to K×K payoff matrices when the population grows.
package arena
// NashResult holds the Nash equilibrium computation for the meta-game.
type NashResult struct {
// OpponentMix[i] = probability of opponent i in the Nash mixture.
// Sums to 1.0.
OpponentMix []float64
// NashValue is the candidate's expected win rate under the Nash mixture.
// This is the quantity compared against the promotion threshold.
NashValue float64
// WinRatePerOpponent mirrors the input payoff row for convenience.
WinRatePerOpponent []float64
}
// ComputeNash computes the Nash equilibrium for the 1×K meta-game where
// winRates[i] is the candidate's win rate against opponent i.
//
// The column player (opponent) minimises the candidate's expected win rate.
// The optimal column strategy concentrates on the opponent(s) with the lowest
// win rate for the candidate. Ties in the minimum are distributed uniformly.
//
// Nash value = min(winRates) (hardest-opponent test).
func ComputeNash(winRates []float64) NashResult {
if len(winRates) == 0 {
return NashResult{NashValue: 0.5}
}
K := len(winRates)
mix := make([]float64, K)
// Find the minimum win rate.
minVal := winRates[0]
for _, w := range winRates[1:] {
if w < minVal {
minVal = w
}
}
// Distribute weight uniformly over all opponents achieving the minimum.
count := 0
for _, w := range winRates {
if w == minVal {
count++
}
}
for i, w := range winRates {
if w == minVal {
mix[i] = 1.0 / float64(count)
}
}
return NashResult{
OpponentMix: mix,
NashValue: minVal,
WinRatePerOpponent: winRates,
}
}
// FictitiousPlayNash computes the Nash equilibrium via fictitious play,
// converging over iterations rounds. This generalises to K×K matrices and
// provides a softer mixed-strategy Nash than the pure-minimax above.
//
// For a 1×K payoff matrix both algorithms produce identical results, so this
// function is provided for future use when the full population payoff matrix
// is available.
func FictitiousPlayNash(winRates []float64, iterations int) NashResult {
if len(winRates) == 0 {
return NashResult{NashValue: 0.5}
}
if iterations <= 0 {
iterations = 1000
}
K := len(winRates)
counts := make([]float64, K)
// Fictitious play: column player repeatedly best-responds to the current
// row player strategy (fixed at "always play candidate").
for iter := 0; iter < iterations; iter++ {
// Column player best response: pick opponent minimising candidate win rate.
best := 0
for i := 1; i < K; i++ {
if winRates[i] < winRates[best] {
best = i
}
}
counts[best]++
}
mix := make([]float64, K)
expected := 0.0
for i, c := range counts {
mix[i] = c / float64(iterations)
expected += mix[i] * winRates[i]
}
return NashResult{
OpponentMix: mix,
NashValue: expected,
WinRatePerOpponent: winRates,
}
}

View file

@ -0,0 +1,55 @@
package arena
import "math"
// WinRateResult holds the observed win rate and its 95% Wilson score confidence interval.
type WinRateResult struct {
Wins int
Total int // non-error matches only
Rate float64 // observed win rate (01)
Lower float64 // 95% CI lower bound
Upper float64 // 95% CI upper bound
}
// WinRate computes the win rate and Wilson score 95% confidence interval
// for wins out of total valid matches. When total == 0, all values are 0.5.
//
// Wilson score interval:
//
// center = (p̂ + z²/2n) / (1 + z²/n)
// margin = z * sqrt(p̂(1-p̂)/n + z²/4n²) / (1 + z²/n)
// CI = [center margin, center + margin]
//
// Using z = 1.96 (95% two-tailed confidence).
func WinRate(wins, total int) WinRateResult {
if total == 0 {
return WinRateResult{Rate: 0.5, Lower: 0.0, Upper: 1.0}
}
const z = 1.96 // 95% CI
p := float64(wins) / float64(total)
n := float64(total)
z2 := z * z
center := (p + z2/(2*n)) / (1 + z2/n)
margin := z * math.Sqrt(p*(1-p)/n+z2/(4*n*n)) / (1 + z2/n)
lower := math.Max(0, center-margin)
upper := math.Min(1, center+margin)
return WinRateResult{
Wins: wins,
Total: total,
Rate: p,
Lower: lower,
Upper: upper,
}
}
// ComputeFromResult builds a WinRateResult from a tournament Result.
// Only non-error matches are counted; draws count as 0.5 wins.
func ComputeFromResult(r *Result) WinRateResult {
total := r.Wins + r.Losses + r.Draws
// Count draws as half-wins for the rate; wins/total integers use integer wins.
return WinRate(r.Wins, total)
}

View file

@ -18,6 +18,7 @@ CREATE TABLE IF NOT EXISTS programs (
behavior_vector DOUBLE PRECISION[] NOT NULL DEFAULT '{}',
fitness DOUBLE PRECISION NOT NULL DEFAULT 0.0,
promoted BOOLEAN NOT NULL DEFAULT FALSE,
bot_id VARCHAR(16),
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_programs_island ON programs(island);
@ -37,8 +38,20 @@ CREATE INDEX IF NOT EXISTS idx_validation_log_island ON validation_log(island);
CREATE INDEX IF NOT EXISTS idx_validation_log_island_passed ON validation_log(island, passed);
`
// EnsureSchema creates the programs table if it does not already exist.
// migrationSQL holds additive migrations run after the base schema is ensured.
// Each statement is idempotent (ALTER TABLE … ADD COLUMN IF NOT EXISTS).
const migrationSQL = `
ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_id VARCHAR(16);
ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_name VARCHAR(64);
ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_secret TEXT;
`
// EnsureSchema creates the programs and validation_log tables if they do not
// already exist, then applies any pending additive migrations.
func EnsureSchema(ctx context.Context, db *sql.DB) error {
_, err := db.ExecContext(ctx, schemaSQL)
if _, err := db.ExecContext(ctx, schemaSQL); err != nil {
return err
}
_, err := db.ExecContext(ctx, migrationSQL)
return err
}

View file

@ -177,3 +177,106 @@ func (s *Store) TotalCount(ctx context.Context) (int, error) {
}
return n, nil
}
// SetBotID records the deployed bot_id for a promoted program.
func (s *Store) SetBotID(ctx context.Context, id int64, botID string) error {
_, err := s.db.ExecContext(ctx,
`UPDATE programs SET bot_id = $1 WHERE id = $2`, botID, id)
if err != nil {
return fmt.Errorf("set bot_id for program %d: %w", id, err)
}
return nil
}
// PromotedProgram holds a promoted program linked to its live bot.
type PromotedProgram struct {
ProgramID int64
BotID string
BotName string // K8s/API name, e.g. "acb-evo-42"
BotSecret string // plaintext secret stored for retirement operations
Island string
BehaviorVector []float64
Fitness float64
}
// ListPromoted returns all programs that have been promoted (bot_id is set).
func (s *Store) ListPromoted(ctx context.Context) ([]*PromotedProgram, error) {
rows, err := s.db.QueryContext(ctx, `
SELECT id, bot_id, COALESCE(bot_name, ''), COALESCE(bot_secret, ''),
island, behavior_vector, fitness
FROM programs
WHERE promoted = TRUE AND bot_id IS NOT NULL
ORDER BY fitness DESC`)
if err != nil {
return nil, fmt.Errorf("list promoted programs: %w", err)
}
defer rows.Close()
var out []*PromotedProgram
for rows.Next() {
p := &PromotedProgram{}
if err := rows.Scan(&p.ProgramID, &p.BotID, &p.BotName, &p.BotSecret,
&p.Island, pq.Array(&p.BehaviorVector), &p.Fitness); err != nil {
return nil, fmt.Errorf("scan promoted program: %w", err)
}
out = append(out, p)
}
return out, rows.Err()
}
// SetBotNameAndSecret records the K8s bot name and plaintext shared secret for
// a promoted program. These are stored so the retirement path can locate and
// clean up the bot without requiring an extra API call.
func (s *Store) SetBotNameAndSecret(ctx context.Context, id int64, botName, botSecret string) error {
_, err := s.db.ExecContext(ctx,
`UPDATE programs SET bot_name = $1, bot_secret = $2 WHERE id = $3`,
botName, botSecret, id)
if err != nil {
return fmt.Errorf("set bot name/secret for program %d: %w", id, err)
}
return nil
}
// PromotedCount returns the number of currently promoted (deployed) programs.
func (s *Store) PromotedCount(ctx context.Context) (int, error) {
var n int
err := s.db.QueryRowContext(ctx,
`SELECT COUNT(*) FROM programs WHERE promoted = TRUE AND bot_id IS NOT NULL`).Scan(&n)
if err != nil {
return 0, fmt.Errorf("promoted count: %w", err)
}
return n, nil
}
// UnsetPromoted clears the promoted flag and bot_id for a retired program.
func (s *Store) UnsetPromoted(ctx context.Context, id int64) error {
_, err := s.db.ExecContext(ctx,
`UPDATE programs SET promoted = FALSE, bot_id = NULL WHERE id = $1`, id)
if err != nil {
return fmt.Errorf("unset promoted for program %d: %w", id, err)
}
return nil
}
// GetByBotID returns the program associated with a deployed bot ID, or nil.
func (s *Store) GetByBotID(ctx context.Context, botID string) (*Program, error) {
p := &Program{}
var parentJSON string
err := s.db.QueryRowContext(ctx, `
SELECT id, code, language, island, generation, parent_ids,
behavior_vector, fitness, promoted, created_at
FROM programs WHERE bot_id = $1`, botID).Scan(
&p.ID, &p.Code, &p.Language, &p.Island, &p.Generation,
&parentJSON, pq.Array(&p.BehaviorVector), &p.Fitness, &p.Promoted, &p.CreatedAt,
)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("get program by bot_id %s: %w", botID, err)
}
if err := json.Unmarshal([]byte(parentJSON), &p.ParentIDs); err != nil {
return nil, fmt.Errorf("unmarshal parent_ids: %w", err)
}
return p, nil
}

View file

@ -0,0 +1,721 @@
// Package promoter deploys validated+promoted evolved bots to Kubernetes and
// registers them in the ACB bots database. It also enforces the retirement
// policy: auto-retiring bots below a rating threshold and capping the
// evolved-bot fleet at a configurable population cap.
//
// Promotion flow
//
// 1. Generate a unique bot name (acb-evo-<programID>), bot ID, and secret.
// 2. Write bot source + language-appropriate Dockerfile to bots/evolved/<name>/.
// 3. Write K8s Secret / Deployment / Service manifests to deploy/k8s/.
// 4. Build and push the container image (best-effort; CI pipeline is the
// fallback when docker is unavailable or fails).
// 5. Git add → commit → push (triggers ArgoCD sync + image build via CI).
// 6. Poll kubectl until the Deployment has ≥1 available replica.
// 7. Insert the bot record directly into the bots database table.
// 8. Record bot_id, bot_name, and bot_secret in the programs table.
//
// Retirement flow
//
// 1. Mark bot as 'retired' in the bots table.
// 2. Delete the K8s manifests and bot source directory from git, commit, push.
// 3. Clear promoted=false / bot_id=NULL in the programs table.
package promoter
import (
"bytes"
"context"
"crypto/aes"
"crypto/cipher"
"crypto/rand"
"database/sql"
"encoding/base64"
"encoding/hex"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"text/template"
"time"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/db"
)
const (
botOwner = "acb-evolver"
botPort = 8080
)
// Config controls promotion and retirement behaviour.
type Config struct {
// Registry is the container registry prefix, e.g.
// "forgejo.ardenone.com/ai-code-battle".
Registry string
// RepoDir is the local git repository root used for writing manifests.
RepoDir string
// KubectlServer is the kubectl API server URL for deployment polling,
// e.g. "http://kubectl-ardenone-cluster:8001".
KubectlServer string
// Namespace is the Kubernetes namespace where bots are deployed.
Namespace string
// EncryptionKey is the hex-encoded AES-256-GCM key used to encrypt
// secrets before storing them in the bots table. Empty = plaintext.
EncryptionKey string
// DeployWaitTimeout is the maximum time to wait for an ArgoCD-managed
// deployment to have ≥1 available replica.
DeployWaitTimeout time.Duration
// RatingThreshold is the minimum display rating (mu 2·phi) an evolved
// bot must maintain to avoid auto-retirement.
RatingThreshold float64
// PopCap is the maximum number of simultaneously promoted evolved bots.
// Lowest-rated bots are retired when the cap is exceeded.
PopCap int
}
// DefaultConfig returns production-ready defaults.
func DefaultConfig() Config {
return Config{
Registry: "forgejo.ardenone.com/ai-code-battle",
RepoDir: ".",
KubectlServer: "http://kubectl-ardenone-cluster:8001",
Namespace: "ai-code-battle",
DeployWaitTimeout: 10 * time.Minute,
RatingThreshold: 1000.0,
PopCap: 50,
}
}
// Promoter manages promotion and retirement of evolved bots.
type Promoter struct {
store *db.Store
rawDB *sql.DB
cfg Config
}
// New creates a Promoter.
func New(store *db.Store, rawDB *sql.DB, cfg Config) *Promoter {
return &Promoter{store: store, rawDB: rawDB, cfg: cfg}
}
// PromotionResult holds the outcome of a successful promotion.
type PromotionResult struct {
BotName string
BotID string
Endpoint string // K8s ClusterIP service URL
}
// Promote deploys a validated candidate as a live evolved bot.
func (p *Promoter) Promote(ctx context.Context, program *db.Program) (*PromotionResult, error) {
botName := fmt.Sprintf("acb-evo-%d", program.ID)
image := fmt.Sprintf("%s/%s:latest", p.cfg.Registry, botName)
endpoint := fmt.Sprintf("http://%s:%d", botName, botPort)
botID, err := generateBotID()
if err != nil {
return nil, fmt.Errorf("generate bot ID: %w", err)
}
secret, err := generateSecret()
if err != nil {
return nil, fmt.Errorf("generate secret: %w", err)
}
botDir := filepath.Join(p.cfg.RepoDir, "bots", "evolved", botName)
if err := p.writeBotDir(program, botDir); err != nil {
return nil, fmt.Errorf("write bot dir: %w", err)
}
if err := p.writeManifests(botName, secret, program); err != nil {
return nil, fmt.Errorf("write manifests: %w", err)
}
// Best-effort local image build; CI pipeline is the authoritative builder.
if buildErr := p.buildAndPushImage(ctx, botDir, image); buildErr != nil {
fmt.Printf("promoter: docker build skipped (%v) — CI will build the image\n", buildErr)
}
commitMsg := fmt.Sprintf("Add evolved bot %s (island=%s gen=%d program_id=%d)",
botName, program.Island, program.Generation, program.ID)
if err := p.gitCommitPush(ctx, botName, commitMsg, false); err != nil {
return nil, fmt.Errorf("git commit/push: %w", err)
}
if err := p.waitForDeployment(ctx, botName); err != nil {
return nil, fmt.Errorf("wait for deployment: %w", err)
}
// Insert bot record directly into the bots table (same DB as programs).
storedSecret := secret
if p.cfg.EncryptionKey != "" {
storedSecret, err = encryptAESGCM(secret, p.cfg.EncryptionKey)
if err != nil {
return nil, fmt.Errorf("encrypt secret: %w", err)
}
}
_, err = p.rawDB.ExecContext(ctx, `
INSERT INTO bots (bot_id, name, owner, endpoint_url, shared_secret, status, description, last_active)
VALUES ($1, $2, $3, $4, $5, 'active', $6, NOW())`,
botID, botName, botOwner, endpoint, storedSecret,
fmt.Sprintf("Evolved bot — island=%s gen=%d program_id=%d",
program.Island, program.Generation, program.ID),
)
if err != nil {
return nil, fmt.Errorf("insert bot record: %w", err)
}
if err := p.store.SetPromoted(ctx, program.ID); err != nil {
return nil, fmt.Errorf("set promoted: %w", err)
}
if err := p.store.SetBotID(ctx, program.ID, botID); err != nil {
return nil, fmt.Errorf("set bot_id: %w", err)
}
if err := p.store.SetBotNameAndSecret(ctx, program.ID, botName, secret); err != nil {
return nil, fmt.Errorf("set bot name/secret: %w", err)
}
return &PromotionResult{BotName: botName, BotID: botID, Endpoint: endpoint}, nil
}
// RetireBot marks a bot as retired, removes its K8s manifests, and clears the
// promoted flag in the programs table.
func (p *Promoter) RetireBot(ctx context.Context, programID int64, botID, botName string) error {
// 1. Mark bot retired in the bots table.
if _, err := p.rawDB.ExecContext(ctx,
`UPDATE bots SET status = 'retired' WHERE bot_id = $1`, botID); err != nil {
return fmt.Errorf("retire bot in DB: %w", err)
}
// 2. Remove K8s manifests + bot source from git.
if botName != "" {
retireMsg := fmt.Sprintf("Retire evolved bot %s (program_id=%d)", botName, programID)
if err := p.gitCommitPush(ctx, botName, retireMsg, true); err != nil {
// Log but don't fail — the bot is already retired in the DB.
fmt.Printf("promoter: git remove failed for %s: %v\n", botName, err)
}
}
// 3. Clear promoted flag in programs table.
return p.store.UnsetPromoted(ctx, programID)
}
// RetiredCandidate describes a bot that was auto-retired by EnforcePolicy.
type RetiredCandidate struct {
ProgramID int64
BotID string
BotName string
DisplayRating float64
Reason string
}
// EnforcePolicy auto-retires evolved bots below cfg.RatingThreshold and trims
// the active fleet to cfg.PopCap. The slice is ordered lowest-rated first so
// the weakest bots are retired first when enforcing the cap.
// Returns the list of bots that were retired.
func (p *Promoter) EnforcePolicy(ctx context.Context) ([]RetiredCandidate, error) {
rows, err := p.rawDB.QueryContext(ctx, `
SELECT p.id, p.bot_id, COALESCE(p.bot_name, ''),
b.rating_mu - 2*b.rating_phi AS display_rating
FROM programs p
JOIN bots b ON p.bot_id = b.bot_id
WHERE p.promoted = TRUE
AND p.bot_id IS NOT NULL
AND b.status = 'active'
AND b.owner = $1
ORDER BY display_rating ASC`, botOwner)
if err != nil {
return nil, fmt.Errorf("query promoted bots: %w", err)
}
defer rows.Close()
type botRow struct {
programID int64
botID string
botName string
displayRating float64
}
var bots []botRow
for rows.Next() {
var b botRow
if err := rows.Scan(&b.programID, &b.botID, &b.botName, &b.displayRating); err != nil {
return nil, fmt.Errorf("scan bot: %w", err)
}
bots = append(bots, b)
}
if err := rows.Err(); err != nil {
return nil, err
}
// Decide which bots to retire (lowest-rated first).
remaining := len(bots)
var toRetire []RetiredCandidate
for _, b := range bots {
var reason string
if b.displayRating < p.cfg.RatingThreshold {
reason = fmt.Sprintf("display rating %.0f < threshold %.0f",
b.displayRating, p.cfg.RatingThreshold)
} else if remaining > p.cfg.PopCap {
reason = fmt.Sprintf("population cap %d exceeded (currently %d)",
p.cfg.PopCap, remaining)
}
if reason != "" {
toRetire = append(toRetire, RetiredCandidate{
ProgramID: b.programID,
BotID: b.botID,
BotName: b.botName,
DisplayRating: b.displayRating,
Reason: reason,
})
remaining--
}
}
for i := range toRetire {
r := &toRetire[i]
if err := p.RetireBot(ctx, r.ProgramID, r.BotID, r.BotName); err != nil {
return toRetire[:i], fmt.Errorf("retire bot %s: %w", r.BotID, err)
}
}
return toRetire, nil
}
// ── file writing ─────────────────────────────────────────────────────────────
func (p *Promoter) writeBotDir(program *db.Program, dir string) error {
if err := os.MkdirAll(dir, 0o755); err != nil {
return err
}
switch program.Language {
case "go":
if err := os.WriteFile(filepath.Join(dir, "bot.go"), []byte(program.Code), 0o644); err != nil {
return err
}
return os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module bot\n\ngo 1.24.3\n"), 0o644)
case "python":
return os.WriteFile(filepath.Join(dir, "bot.py"), []byte(program.Code), 0o644)
case "rust":
if err := os.MkdirAll(filepath.Join(dir, "src"), 0o755); err != nil {
return err
}
if err := os.WriteFile(filepath.Join(dir, "src", "main.rs"), []byte(program.Code), 0o644); err != nil {
return err
}
cargoTOML := "[package]\nname = \"bot\"\nversion = \"0.1.0\"\nedition = \"2021\"\n"
return os.WriteFile(filepath.Join(dir, "Cargo.toml"), []byte(cargoTOML), 0o644)
case "typescript":
return os.WriteFile(filepath.Join(dir, "bot.ts"), []byte(program.Code), 0o644)
case "java":
return os.WriteFile(filepath.Join(dir, "Bot.java"), []byte(program.Code), 0o644)
case "php":
return os.WriteFile(filepath.Join(dir, "bot.php"), []byte(program.Code), 0o644)
default:
return fmt.Errorf("unsupported language: %s", program.Language)
}
}
// dockerfileFor returns a single-file Dockerfile for the given language.
func dockerfileFor(language string) (string, error) {
switch language {
case "go":
return `FROM golang:1.24-alpine AS builder
WORKDIR /app
COPY go.mod go.mod
COPY bot.go bot.go
RUN go build -o bot .
FROM alpine:3.21
WORKDIR /app
COPY --from=builder /app/bot .
ENV BOT_PORT=8080
ENV BOT_SECRET=""
EXPOSE 8080
CMD ["./bot"]
`, nil
case "python":
return `FROM python:3.12-slim
WORKDIR /app
COPY bot.py .
ENV BOT_PORT=8080
ENV BOT_SECRET=""
EXPOSE 8080
CMD ["python3", "bot.py"]
`, nil
case "rust":
return `FROM rust:1.85-alpine AS builder
WORKDIR /app
COPY Cargo.toml Cargo.toml
COPY src ./src
RUN cargo build --release
FROM alpine:3.21
WORKDIR /app
COPY --from=builder /app/target/release/bot .
ENV BOT_PORT=8080
ENV BOT_SECRET=""
EXPOSE 8080
CMD ["./bot"]
`, nil
case "typescript":
return `FROM node:22-alpine AS builder
WORKDIR /app
COPY bot.ts .
RUN npm install -g typescript && tsc --target ES2020 --module commonjs bot.ts
FROM node:22-alpine
WORKDIR /app
COPY --from=builder /app/bot.js .
ENV BOT_PORT=8080
ENV BOT_SECRET=""
EXPOSE 8080
CMD ["node", "bot.js"]
`, nil
case "java":
return `FROM eclipse-temurin:21-alpine AS builder
WORKDIR /app
COPY Bot.java .
RUN javac Bot.java
FROM eclipse-temurin:21-jre-alpine
WORKDIR /app
COPY --from=builder /app/*.class .
ENV BOT_PORT=8080
ENV BOT_SECRET=""
EXPOSE 8080
CMD ["java", "Bot"]
`, nil
case "php":
return `FROM php:8.3-cli-alpine
WORKDIR /app
COPY bot.php .
ENV BOT_PORT=8080
ENV BOT_SECRET=""
EXPOSE 8080
CMD ["php", "bot.php"]
`, nil
default:
return "", fmt.Errorf("unsupported language: %s", language)
}
}
// manifestData is the template context for K8s YAML generation.
type manifestData struct {
Name string
Namespace string
Island string
Generation int
Registry string
Port int
SecretBase64 string
}
var secretManifestTmpl = template.Must(template.New("secret").Parse(`apiVersion: v1
kind: Secret
metadata:
name: {{.Name}}-secret
namespace: {{.Namespace}}
labels:
app.kubernetes.io/name: {{.Name}}
app.kubernetes.io/part-of: ai-code-battle
app.kubernetes.io/component: evolved-bot
type: Opaque
data:
bot-secret: {{.SecretBase64}}
`))
var deployManifestTmpl = template.Must(template.New("deploy").Parse(`apiVersion: apps/v1
kind: Deployment
metadata:
name: {{.Name}}
namespace: {{.Namespace}}
labels:
app.kubernetes.io/name: {{.Name}}
app.kubernetes.io/part-of: ai-code-battle
app.kubernetes.io/component: evolved-bot
acb/island: {{.Island}}
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: {{.Name}}
template:
metadata:
labels:
app.kubernetes.io/name: {{.Name}}
app.kubernetes.io/part-of: ai-code-battle
app.kubernetes.io/component: evolved-bot
acb/island: {{.Island}}
spec:
containers:
- name: bot
image: {{.Registry}}/{{.Name}}:latest
env:
- name: BOT_PORT
value: "{{.Port}}"
- name: BOT_SECRET
valueFrom:
secretKeyRef:
name: {{.Name}}-secret
key: bot-secret
ports:
- name: http
containerPort: {{.Port}}
protocol: TCP
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 5
periodSeconds: 30
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 3
periodSeconds: 10
resources:
requests:
cpu: 50m
memory: 64Mi
limits:
memory: 128Mi
restartPolicy: Always
`))
var svcManifestTmpl = template.Must(template.New("svc").Parse(`apiVersion: v1
kind: Service
metadata:
name: {{.Name}}
namespace: {{.Namespace}}
labels:
app.kubernetes.io/name: {{.Name}}
app.kubernetes.io/part-of: ai-code-battle
app.kubernetes.io/component: evolved-bot
spec:
type: ClusterIP
selector:
app.kubernetes.io/name: {{.Name}}
ports:
- name: http
port: {{.Port}}
targetPort: http
protocol: TCP
`))
func (p *Promoter) writeManifests(botName, secret string, program *db.Program) error {
data := manifestData{
Name: botName,
Namespace: p.cfg.Namespace,
Island: program.Island,
Generation: program.Generation,
Registry: p.cfg.Registry,
Port: botPort,
SecretBase64: base64.StdEncoding.EncodeToString([]byte(secret)),
}
// Write Dockerfile into the bot source directory (already created by writeBotDir).
dockerfile, err := dockerfileFor(program.Language)
if err != nil {
return fmt.Errorf("dockerfile: %w", err)
}
botDir := filepath.Join(p.cfg.RepoDir, "bots", "evolved", botName)
if err := os.WriteFile(filepath.Join(botDir, "Dockerfile"), []byte(dockerfile), 0o644); err != nil {
return fmt.Errorf("write Dockerfile: %w", err)
}
// K8s Secret
secretsDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "secrets")
if err := os.MkdirAll(secretsDir, 0o755); err != nil {
return err
}
if err := renderToFile(filepath.Join(secretsDir, botName+".yaml"), secretManifestTmpl, data); err != nil {
return fmt.Errorf("secret manifest: %w", err)
}
// K8s Deployment
deployDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "deployments")
if err := renderToFile(filepath.Join(deployDir, botName+".yaml"), deployManifestTmpl, data); err != nil {
return fmt.Errorf("deployment manifest: %w", err)
}
// K8s Service
svcDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "services")
if err := renderToFile(filepath.Join(svcDir, botName+".yaml"), svcManifestTmpl, data); err != nil {
return fmt.Errorf("service manifest: %w", err)
}
return nil
}
func renderToFile(path string, tmpl *template.Template, data any) error {
var buf bytes.Buffer
if err := tmpl.Execute(&buf, data); err != nil {
return err
}
return os.WriteFile(path, buf.Bytes(), 0o644)
}
// ── git operations ────────────────────────────────────────────────────────────
// gitCommitPush stages, commits, and pushes changes for botName.
// When remove=true it runs `git rm` to delete the files; otherwise `git add`.
func (p *Promoter) gitCommitPush(ctx context.Context, botName, msg string, remove bool) error {
run := func(args ...string) error {
cmd := exec.CommandContext(ctx, "git", args...)
cmd.Dir = p.cfg.RepoDir
if out, err := cmd.CombinedOutput(); err != nil {
return fmt.Errorf("git %s: %s", args[0], strings.TrimSpace(string(out)))
}
return nil
}
paths := []string{
filepath.Join("bots", "evolved", botName),
filepath.Join("deploy", "k8s", "deployments", botName+".yaml"),
filepath.Join("deploy", "k8s", "services", botName+".yaml"),
filepath.Join("deploy", "k8s", "secrets", botName+".yaml"),
}
if remove {
for _, path := range paths {
if err := run("rm", "-rf", "--ignore-unmatch", "--", path); err != nil {
return err
}
}
} else {
args := append([]string{"add", "--"}, paths...)
if err := run(args...); err != nil {
return err
}
}
// Skip commit if nothing changed.
statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
statusCmd.Dir = p.cfg.RepoDir
out, _ := statusCmd.Output()
if len(strings.TrimSpace(string(out))) == 0 {
return nil
}
if err := run("commit", "-m", msg); err != nil {
return err
}
return run("push", "origin", "master")
}
// ── deployment readiness ──────────────────────────────────────────────────────
func (p *Promoter) waitForDeployment(ctx context.Context, name string) error {
deadline := time.Now().Add(p.cfg.DeployWaitTimeout)
ticker := time.NewTicker(15 * time.Second)
defer ticker.Stop()
fmt.Printf("promoter: waiting for deployment %s to be ready (timeout=%s)…\n",
name, p.cfg.DeployWaitTimeout)
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
n, err := p.availableReplicas(ctx, name)
if err != nil {
fmt.Printf("promoter: kubectl poll error: %v\n", err)
} else if n >= 1 {
fmt.Printf("promoter: deployment %s ready (%d replica)\n", name, n)
return nil
}
if time.Now().After(deadline) {
return fmt.Errorf("deployment %s not ready after %s", name, p.cfg.DeployWaitTimeout)
}
}
}
}
func (p *Promoter) availableReplicas(ctx context.Context, name string) (int, error) {
cmd := exec.CommandContext(ctx, "kubectl",
"--server="+p.cfg.KubectlServer,
"get", "deployment", name,
"-n", p.cfg.Namespace,
"-o", "jsonpath={.status.availableReplicas}",
)
out, err := cmd.Output()
if err != nil {
return 0, err
}
s := strings.TrimSpace(string(out))
if s == "" {
return 0, nil
}
var n int
fmt.Sscanf(s, "%d", &n)
return n, nil
}
// ── container image build ─────────────────────────────────────────────────────
func (p *Promoter) buildAndPushImage(ctx context.Context, botDir, image string) error {
if _, err := exec.LookPath("docker"); err != nil {
return fmt.Errorf("docker not in PATH")
}
build := exec.CommandContext(ctx, "docker", "build", "-t", image, botDir)
if out, err := build.CombinedOutput(); err != nil {
return fmt.Errorf("docker build: %s", truncate(string(out), 512))
}
push := exec.CommandContext(ctx, "docker", "push", image)
if out, err := push.CombinedOutput(); err != nil {
return fmt.Errorf("docker push: %s", truncate(string(out), 512))
}
return nil
}
// ── crypto helpers ────────────────────────────────────────────────────────────
func generateBotID() (string, error) {
b := make([]byte, 4)
if _, err := rand.Read(b); err != nil {
return "", err
}
return "b_" + hex.EncodeToString(b), nil
}
func generateSecret() (string, error) {
b := make([]byte, 32)
if _, err := rand.Read(b); err != nil {
return "", err
}
return hex.EncodeToString(b), nil
}
func encryptAESGCM(plaintext, keyHex string) (string, error) {
key, err := hex.DecodeString(keyHex)
if err != nil || len(key) != 32 {
return "", fmt.Errorf("invalid AES-256-GCM key (must be 64 hex chars)")
}
block, err := aes.NewCipher(key)
if err != nil {
return "", err
}
aead, err := cipher.NewGCM(block)
if err != nil {
return "", err
}
nonce := make([]byte, aead.NonceSize())
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
return "", err
}
ct := aead.Seal(nonce, nonce, []byte(plaintext), nil)
return hex.EncodeToString(ct), nil
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "…"
}

View file

@ -0,0 +1,194 @@
package promoter
import (
"strings"
"testing"
)
// ── dockerfileFor ─────────────────────────────────────────────────────────────
func TestDockerfileFor_AllSupportedLanguages(t *testing.T) {
languages := []string{"go", "python", "rust", "typescript", "java", "php"}
for _, lang := range languages {
t.Run(lang, func(t *testing.T) {
df, err := dockerfileFor(lang)
if err != nil {
t.Fatalf("dockerfileFor(%q) error: %v", lang, err)
}
if !strings.Contains(df, "FROM ") {
t.Errorf("Dockerfile for %q missing FROM instruction", lang)
}
if !strings.Contains(df, "BOT_PORT") {
t.Errorf("Dockerfile for %q missing BOT_PORT env var", lang)
}
if !strings.Contains(df, "BOT_SECRET") {
t.Errorf("Dockerfile for %q missing BOT_SECRET env var", lang)
}
if !strings.Contains(df, "EXPOSE 8080") {
t.Errorf("Dockerfile for %q missing EXPOSE 8080", lang)
}
})
}
}
func TestDockerfileFor_UnsupportedLanguage(t *testing.T) {
_, err := dockerfileFor("cobol")
if err == nil {
t.Error("expected error for unsupported language, got nil")
}
}
func TestDockerfileFor_GoUsesMultistage(t *testing.T) {
df, _ := dockerfileFor("go")
if !strings.Contains(df, "AS builder") {
t.Error("Go Dockerfile should use multi-stage build")
}
if !strings.Contains(df, "golang:") {
t.Error("Go Dockerfile should use a golang base image")
}
}
func TestDockerfileFor_RustUsesMultistage(t *testing.T) {
df, _ := dockerfileFor("rust")
if !strings.Contains(df, "AS builder") {
t.Error("Rust Dockerfile should use multi-stage build")
}
}
// ── generateBotID ─────────────────────────────────────────────────────────────
func TestGenerateBotID_Format(t *testing.T) {
id, err := generateBotID()
if err != nil {
t.Fatalf("generateBotID error: %v", err)
}
if !strings.HasPrefix(id, "b_") {
t.Errorf("bot ID %q does not start with 'b_'", id)
}
// b_ + 8 hex chars = 10 total
if len(id) != 10 {
t.Errorf("bot ID %q has length %d, want 10", id, len(id))
}
}
func TestGenerateBotID_Uniqueness(t *testing.T) {
seen := make(map[string]bool)
for i := 0; i < 100; i++ {
id, err := generateBotID()
if err != nil {
t.Fatalf("generateBotID error at iteration %d: %v", i, err)
}
if seen[id] {
t.Errorf("duplicate bot ID generated: %s", id)
}
seen[id] = true
}
}
// ── generateSecret ────────────────────────────────────────────────────────────
func TestGenerateSecret_Length(t *testing.T) {
s, err := generateSecret()
if err != nil {
t.Fatalf("generateSecret error: %v", err)
}
// 32 random bytes encoded as 64 hex chars
if len(s) != 64 {
t.Errorf("secret %q has length %d, want 64", s, len(s))
}
}
func TestGenerateSecret_Uniqueness(t *testing.T) {
seen := make(map[string]bool)
for i := 0; i < 50; i++ {
s, err := generateSecret()
if err != nil {
t.Fatalf("generateSecret error at iteration %d: %v", i, err)
}
if seen[s] {
t.Errorf("duplicate secret generated: %s", s)
}
seen[s] = true
}
}
// ── encryptAESGCM / decryptAESGCM ─────────────────────────────────────────────
func TestEncryptDecryptAESGCM_RoundTrip(t *testing.T) {
// 32-byte key = 64 hex chars
key := strings.Repeat("ab", 32) // "abababab..." 64 chars
plaintext := "my-super-secret-bot-key"
ct, err := encryptAESGCM(plaintext, key)
if err != nil {
t.Fatalf("encrypt: %v", err)
}
if ct == plaintext {
t.Fatal("ciphertext should differ from plaintext")
}
}
func TestEncryptAESGCM_InvalidKey(t *testing.T) {
_, err := encryptAESGCM("plaintext", "notahexkey")
if err == nil {
t.Error("expected error for invalid key")
}
}
// ── manifest templates ────────────────────────────────────────────────────────
func TestManifestTemplates_Execute(t *testing.T) {
data := manifestData{
Name: "acb-evo-test",
Namespace: "ai-code-battle",
Island: "alpha",
Generation: 1,
Registry: "registry.example.com/acb",
Port: 8080,
SecretBase64: "dGVzdA==",
}
for name, tmpl := range map[string]interface{ Execute(interface{}, interface{}) error }{} {
_ = name
_ = tmpl
}
// Test secret manifest
var buf strings.Builder
if err := secretManifestTmpl.Execute(&buf, data); err != nil {
t.Fatalf("secretManifestTmpl.Execute: %v", err)
}
out := buf.String()
if !strings.Contains(out, "acb-evo-test-secret") {
t.Error("secret manifest missing expected name")
}
if !strings.Contains(out, "dGVzdA==") {
t.Error("secret manifest missing base64 secret")
}
// Test deployment manifest
buf.Reset()
if err := deployManifestTmpl.Execute(&buf, data); err != nil {
t.Fatalf("deployManifestTmpl.Execute: %v", err)
}
out = buf.String()
if !strings.Contains(out, "acb-evo-test") {
t.Error("deployment manifest missing bot name")
}
if !strings.Contains(out, "registry.example.com/acb/acb-evo-test:latest") {
t.Error("deployment manifest missing full image reference")
}
if !strings.Contains(out, "acb/island: alpha") {
t.Error("deployment manifest missing island label")
}
// Test service manifest
buf.Reset()
if err := svcManifestTmpl.Execute(&buf, data); err != nil {
t.Fatalf("svcManifestTmpl.Execute: %v", err)
}
out = buf.String()
if !strings.Contains(out, "ClusterIP") {
t.Error("service manifest missing ClusterIP type")
}
}

View file

@ -7,6 +7,8 @@
// stats Print program counts per island
// validate Run the 3-stage validation pipeline on a bot source file
// validation-stats Show per-island validation pass-rate metrics
// evaluate Run the 10-match arena tournament and apply the promotion gate
// retire Enforce retirement policy (rating threshold + population cap)
package main
import (
@ -21,6 +23,9 @@ import (
_ "github.com/lib/pq"
evolverdb "github.com/aicodebattle/acb/cmd/acb-evolver/internal/db"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/arena"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/promoter"
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/validator"
)
@ -38,6 +43,16 @@ func main() {
ctx := context.Background()
switch os.Args[1] {
case "evaluate":
db := mustOpenDB(dbURL)
defer db.Close()
runEvaluate(ctx, db, os.Args[2:])
case "retire":
db := mustOpenDB(dbURL)
defer db.Close()
runRetire(ctx, db, os.Args[2:])
case "init-schema":
db := mustOpenDB(dbURL)
defer db.Close()
@ -90,11 +105,258 @@ func main() {
default:
fmt.Fprintf(os.Stderr, "unknown subcommand %q\n", os.Args[1])
fmt.Fprintln(os.Stderr, "usage: acb-evolver <init-schema|seed|stats|validate|validation-stats>")
fmt.Fprintln(os.Stderr, "usage: acb-evolver <init-schema|seed|stats|validate|validation-stats|evaluate|retire>")
os.Exit(1)
}
}
// runEvaluate runs the 10-match mini-tournament and applies the promotion gate.
//
// evaluate -lang go -island alpha [-program-id 0] [-promote] [-nash 0.5] [-win-lower 0.4] [-nolog] <file>
func runEvaluate(ctx context.Context, db *sql.DB, args []string) {
fs := flag.NewFlagSet("evaluate", flag.ExitOnError)
lang := fs.String("lang", "", "bot language (go|python|rust|typescript|java|php) [required]")
programID := fs.Int64("program-id", 0, "programs.id to update fitness after evaluation (0 = skip)")
doPromote := fs.Bool("promote", false, "promote the candidate if the gate passes")
nashThreshold := fs.Float64("nash", 0.50, "Nash value threshold for promotion")
winLower := fs.Float64("win-lower", 0.40, "Wilson CI lower-bound threshold (0 to disable)")
nolog := fs.Bool("nolog", false, "skip writing validation result to DB")
// Promoter flags (used only when -promote is set)
repoDir := fs.String("repo-dir", envOrDefault("ACB_REPO_DIR", "."), "git repo root for K8s manifests")
registry := fs.String("registry", envOrDefault("ACB_REGISTRY", "forgejo.ardenone.com/ai-code-battle"), "container registry")
kubectlServer := fs.String("kubectl-server", envOrDefault("ACB_KUBECTL_SERVER", "http://kubectl-ardenone-cluster:8001"), "kubectl API server URL")
encKey := fs.String("enc-key", os.Getenv("ACB_ENCRYPTION_KEY"), "AES-256-GCM encryption key (hex) for bots table")
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
if *lang == "" {
fmt.Fprintln(os.Stderr, "evaluate: -lang is required")
fs.Usage()
os.Exit(1)
}
if fs.NArg() < 1 {
fmt.Fprintln(os.Stderr, "evaluate: file argument is required")
fs.Usage()
os.Exit(1)
}
code, err := os.ReadFile(fs.Arg(0))
if err != nil {
log.Fatalf("read file: %v", err)
}
store := evolverdb.NewStore(db)
// Pre-populate MAP-Elites grid from existing promoted programs so the gate
// can detect niche collisions against the current population.
const gridSize = 10
grid := mapelites.New(gridSize)
if promoted, err := store.ListPromoted(ctx); err == nil {
for _, pp := range promoted {
if len(pp.BehaviorVector) >= 2 {
grid.TryPlace(pp.ProgramID, pp.Fitness, pp.BehaviorVector[0], pp.BehaviorVector[1])
}
}
}
// Run the arena tournament.
arenaCfg := arena.DefaultConfig()
a := arena.New(db, arenaCfg)
fmt.Printf("evaluate: running %d-match tournament for %s bot…\n", arena.DefaultNumMatches, *lang)
result, err := a.Run(ctx, string(code), *lang)
if err != nil {
log.Fatalf("arena: %v", err)
}
// Print match summary.
total := result.Wins + result.Losses + result.Draws
fmt.Printf("\nTournament result: %d W / %d L / %d D / %d err (total=%d)\n",
result.Wins, result.Losses, result.Draws, result.Errors, total)
wr := arena.ComputeFromResult(result)
fmt.Printf("Win rate: %.3f (95%% CI %.3f%.3f)\n", wr.Rate, wr.Lower, wr.Upper)
nash := arena.ComputeNash(result.WinRateVec)
fmt.Printf("Nash value (PSRO): %.3f (opponent mix: %v)\n", nash.NashValue, nash.WinRatePerOpponent)
// Compute fitness as overall win rate.
fitness := wr.Rate
// Look up the program if -program-id was given.
var program *evolverdb.Program
if *programID > 0 {
program, err = store.Get(ctx, *programID)
if err != nil {
log.Fatalf("get program %d: %v", *programID, err)
}
if program == nil {
log.Fatalf("program %d not found", *programID)
}
// Update fitness in DB.
if !*nolog {
if err := store.UpdateFitness(ctx, *programID, fitness, program.BehaviorVector); err != nil {
log.Printf("warn: update fitness: %v", err)
} else {
fmt.Printf("Updated program %d fitness to %.3f\n", *programID, fitness)
}
}
}
// Apply the promotion gate.
gateCfg := arena.GateConfig{
NashThreshold: *nashThreshold,
WinRateLowerBound: *winLower,
}
gate := arena.NewGate(gateCfg, grid)
var behaviorVec []float64
if program != nil {
behaviorVec = program.BehaviorVector
}
gateResult := gate.Evaluate(result, *programID, fitness, behaviorVec)
fmt.Printf("\nGate: %s\n", gateResult.Reason)
fmt.Printf("MAP-Elites: placed=%v improved=%v cell=[%d,%d]\n",
gateResult.MapElitesPlaced, gateResult.MapElitesImproved,
gateResult.Placement.X, gateResult.Placement.Y)
if !gateResult.Promoted {
fmt.Println("Decision: REJECTED")
return
}
fmt.Println("Decision: PROMOTED")
if !*doPromote {
fmt.Println("(pass -promote to execute deployment)")
return
}
if program == nil {
log.Fatalf("promote: -program-id is required when -promote is set")
}
promCfg := promoter.DefaultConfig()
promCfg.Registry = *registry
promCfg.RepoDir = *repoDir
promCfg.KubectlServer = *kubectlServer
promCfg.EncryptionKey = *encKey
p := promoter.New(store, db, promCfg)
res, err := p.Promote(ctx, program)
if err != nil {
log.Fatalf("promote: %v", err)
}
fmt.Printf("Promoted: bot_name=%s bot_id=%s endpoint=%s\n", res.BotName, res.BotID, res.Endpoint)
}
// runRetire enforces the retirement policy (rating threshold + population cap).
//
// retire [-threshold 1000] [-cap 50] [-dry-run] [-kubectl-server URL]
func runRetire(ctx context.Context, db *sql.DB, args []string) {
fs := flag.NewFlagSet("retire", flag.ExitOnError)
threshold := fs.Float64("threshold", 1000.0, "minimum display rating (mu-2*phi) to keep a bot")
cap := fs.Int("cap", 50, "maximum number of simultaneously promoted evolved bots")
dryRun := fs.Bool("dry-run", false, "print what would be retired without making changes")
repoDir := fs.String("repo-dir", envOrDefault("ACB_REPO_DIR", "."), "git repo root")
registry := fs.String("registry", envOrDefault("ACB_REGISTRY", "forgejo.ardenone.com/ai-code-battle"), "container registry")
kubectlServer := fs.String("kubectl-server", envOrDefault("ACB_KUBECTL_SERVER", "http://kubectl-ardenone-cluster:8001"), "kubectl API server URL")
encKey := fs.String("enc-key", os.Getenv("ACB_ENCRYPTION_KEY"), "AES-256-GCM encryption key (hex)")
if err := fs.Parse(args); err != nil {
os.Exit(1)
}
store := evolverdb.NewStore(db)
promCfg := promoter.DefaultConfig()
promCfg.RatingThreshold = *threshold
promCfg.PopCap = *cap
promCfg.RepoDir = *repoDir
promCfg.Registry = *registry
promCfg.KubectlServer = *kubectlServer
promCfg.EncryptionKey = *encKey
if *dryRun {
// Simulate by temporarily setting an impossible cap to list candidates.
fmt.Println("retire: dry-run mode — no changes will be made")
}
p := promoter.New(store, db, promCfg)
if *dryRun {
// Read-only preview using the same DB query logic without executing retirements.
rows, err := db.QueryContext(ctx, `
SELECT p.id, p.bot_id, COALESCE(p.bot_name, ''),
b.rating_mu - 2*b.rating_phi AS display_rating
FROM programs p
JOIN bots b ON p.bot_id = b.bot_id
WHERE p.promoted = TRUE AND p.bot_id IS NOT NULL
AND b.status = 'active' AND b.owner = 'acb-evolver'
ORDER BY display_rating ASC`)
if err != nil {
log.Fatalf("query: %v", err)
}
defer rows.Close()
type row struct {
programID int64
botID, botName string
displayRating float64
}
var bots []row
for rows.Next() {
var r row
if err := rows.Scan(&r.programID, &r.botID, &r.botName, &r.displayRating); err != nil {
log.Fatalf("scan: %v", err)
}
bots = append(bots, r)
}
_ = rows.Err()
remaining := len(bots)
fmt.Printf("Active evolved bots: %d (threshold=%.0f cap=%d)\n", remaining, *threshold, *cap)
for _, b := range bots {
var why string
if b.displayRating < *threshold {
why = fmt.Sprintf("rating %.0f < threshold", b.displayRating)
} else if remaining > *cap {
why = "over cap"
}
mark := " keep"
if why != "" {
mark = " RETIRE"
remaining--
}
fmt.Printf("%s bot_id=%-12s bot_name=%-20s rating=%.0f %s\n",
mark, b.botID, b.botName, b.displayRating, why)
}
return
}
retired, err := p.EnforcePolicy(ctx)
if err != nil {
log.Fatalf("enforce policy: %v", err)
}
if len(retired) == 0 {
fmt.Println("retire: nothing to retire")
return
}
fmt.Printf("retire: retired %d bot(s):\n", len(retired))
for _, r := range retired {
fmt.Printf(" bot_id=%-12s bot_name=%-20s rating=%.0f reason=%s\n",
r.BotID, r.BotName, r.DisplayRating, r.Reason)
}
}
func envOrDefault(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
}
return fallback
}
// runValidate parses flags, runs the three-stage validation pipeline on a bot
// source file, and optionally logs the result to the database.
//