diff --git a/cmd/acb-evolver/internal/arena/arena.go b/cmd/acb-evolver/internal/arena/arena.go new file mode 100644 index 0000000..0fe0ee4 --- /dev/null +++ b/cmd/acb-evolver/internal/arena/arena.go @@ -0,0 +1,525 @@ +// Package arena implements the 10-match mini-tournament evaluation system +// for evolved bot candidates. +// +// The arena starts the candidate as a local subprocess (the same way the +// sandbox does during validation), selects a diverse set of live opponents +// from the PostgreSQL database, and runs one match per opponent using the +// game engine directly. No job queue or ACB API calls are needed for +// evaluation matches. +package arena + +import ( + "context" + "crypto/aes" + "crypto/cipher" + "database/sql" + "encoding/hex" + "fmt" + "log" + "math/rand" + "net" + "net/http" + "os" + "os/exec" + "sort" + "time" + + "github.com/aicodebattle/acb/engine" + _ "github.com/lib/pq" +) + +const ( + // DefaultNumMatches is the tournament size (10 per spec). + DefaultNumMatches = 10 + + // evalSecret is used for HMAC signing when the candidate runs locally. + // The candidate subprocess is started with BOT_SECRET=evalSecret so that + // the engine's request signatures match what the bot verifies. + evalSecret = "acb-eval-secret-for-tournament-evaluation-only" + + // evalBotID is a placeholder bot ID for arena authentication headers. + evalBotID = "b_evalcandidate" + + healthPollInterval = 200 * time.Millisecond + healthStartupTimeout = 30 * time.Second +) + +// BotRecord holds a live bot's connection details queried from the database. +type BotRecord struct { + BotID string + Name string + EndpointURL string + Secret string // plaintext (decrypted when encryption key is provided) + RatingMu float64 +} + +// MatchOutcome records the result of one evaluation match. +type MatchOutcome struct { + OpponentBotID string + OpponentName string + CandidateSlot int // player slot (0 or 1) assigned to the candidate + Winner int // 0=player0, 1=player1, -1=draw + Scores []int + Turns int + Err error +} + +// CandidateWon returns true when the candidate won this match. +func (o *MatchOutcome) CandidateWon() bool { + return o.Err == nil && o.Winner == o.CandidateSlot +} + +// CandidateLost returns true when the candidate lost (not a draw or error). +func (o *MatchOutcome) CandidateLost() bool { + return o.Err == nil && o.Winner != -1 && o.Winner != o.CandidateSlot +} + +// Result aggregates mini-tournament outcomes for a candidate. +type Result struct { + CandidateEndpoint string + Outcomes []MatchOutcome + + // Aggregate tallies (errors excluded from win/loss/draw counts). + Wins int + Losses int + Draws int + Errors int + + // OpponentWinRates maps opponent BotID → candidate win rate vs that bot. + OpponentWinRates map[string]float64 + + // WinRateVec is an ordered slice of per-opponent win rates (one entry per + // distinct opponent played, in match order, errors omitted). Used by PSRO. + WinRateVec []float64 +} + +// Config controls arena behaviour. +type Config struct { + // NumMatches is the tournament size (default: DefaultNumMatches = 10). + NumMatches int + // BotTimeout is the per-turn HTTP timeout for both bots. + BotTimeout time.Duration + // EncryptionKey is the AES-256-GCM key (hex) used to decrypt opponent + // secrets from the database. Empty means secrets are stored plaintext. + EncryptionKey string +} + +// DefaultConfig returns production-ready arena defaults. +func DefaultConfig() Config { + return Config{ + NumMatches: DefaultNumMatches, + BotTimeout: 3 * time.Second, + } +} + +// Arena orchestrates mini-tournament evaluation of bot candidates. +type Arena struct { + db *sql.DB + cfg Config + rng *rand.Rand + log *log.Logger +} + +// New creates an Arena backed by the given database connection. +func New(db *sql.DB, cfg Config) *Arena { + return &Arena{ + db: db, + cfg: cfg, + rng: rand.New(rand.NewSource(time.Now().UnixNano())), + log: log.Default(), + } +} + +// Run executes a mini-tournament for the candidate bot. +// +// code is the candidate's source code; language is one of +// go|python|rust|typescript|java|php. +// +// The candidate is built and started as a local subprocess, then played +// against cfg.NumMatches opponents sampled from the live bot fleet. +func (a *Arena) Run(ctx context.Context, code, language string) (*Result, error) { + proc, err := startCandidate(ctx, code, language) + if err != nil { + return nil, fmt.Errorf("start candidate subprocess: %w", err) + } + defer proc.stop() + + candidateURL := fmt.Sprintf("http://127.0.0.1:%d", proc.port) + + opponents, err := a.selectOpponents(ctx, a.cfg.NumMatches) + if err != nil { + return nil, fmt.Errorf("select opponents: %w", err) + } + if len(opponents) == 0 { + return nil, fmt.Errorf("no active opponents available in live bot fleet") + } + + result := &Result{ + CandidateEndpoint: candidateURL, + OpponentWinRates: make(map[string]float64), + } + + for i, opp := range opponents { + a.log.Printf("arena: match %d/%d vs %s (%s)", i+1, len(opponents), opp.Name, opp.BotID) + outcome := a.runMatch(ctx, candidateURL, opp) + result.Outcomes = append(result.Outcomes, outcome) + + switch { + case outcome.Err != nil: + result.Errors++ + a.log.Printf("arena: match %d error: %v", i+1, outcome.Err) + case outcome.CandidateWon(): + result.Wins++ + case outcome.CandidateLost(): + result.Losses++ + default: + result.Draws++ + } + } + + // Compute per-opponent win rates. + oppWins := make(map[string]int) + oppTotal := make(map[string]int) + for _, o := range result.Outcomes { + if o.Err != nil { + continue + } + oppTotal[o.OpponentBotID]++ + if o.CandidateWon() { + oppWins[o.OpponentBotID]++ + } + } + for id, total := range oppTotal { + if total > 0 { + result.OpponentWinRates[id] = float64(oppWins[id]) / float64(total) + } + } + + // Build ordered win-rate vector for PSRO (one entry per distinct opponent). + seen := make(map[string]bool) + for _, o := range result.Outcomes { + if o.Err != nil || seen[o.OpponentBotID] { + continue + } + seen[o.OpponentBotID] = true + result.WinRateVec = append(result.WinRateVec, result.OpponentWinRates[o.OpponentBotID]) + } + + return result, nil +} + +// selectOpponents queries active bots from the database and picks n opponents +// spread across the rating distribution for behavioral diversity. +func (a *Arena) selectOpponents(ctx context.Context, n int) ([]BotRecord, error) { + rows, err := a.db.QueryContext(ctx, ` + SELECT bot_id, name, endpoint_url, shared_secret, rating_mu + FROM bots + WHERE status = 'active' AND endpoint_url <> '' + ORDER BY rating_mu DESC`) + if err != nil { + return nil, fmt.Errorf("query bots: %w", err) + } + defer rows.Close() + + var all []BotRecord + for rows.Next() { + var b BotRecord + if err := rows.Scan(&b.BotID, &b.Name, &b.EndpointURL, &b.Secret, &b.RatingMu); err != nil { + return nil, fmt.Errorf("scan bot: %w", err) + } + if a.cfg.EncryptionKey != "" { + if plain, err := decryptAESGCM(b.Secret, a.cfg.EncryptionKey); err == nil { + b.Secret = plain + } + // Leave as-is on error (may be stored plaintext in dev). + } + all = append(all, b) + } + if err := rows.Err(); err != nil { + return nil, err + } + return selectDiverse(all, n, a.rng), nil +} + +// selectDiverse picks n bots spread evenly across the rating-sorted slice. +// When fewer than n bots exist, opponents are reused (shuffled for variety). +func selectDiverse(all []BotRecord, n int, rng *rand.Rand) []BotRecord { + if len(all) == 0 { + return nil + } + sort.Slice(all, func(i, j int) bool { return all[i].RatingMu > all[j].RatingMu }) + + selected := make([]BotRecord, 0, n) + if len(all) >= n { + for i := 0; i < n; i++ { + idx := int(float64(i) / float64(n) * float64(len(all))) + selected = append(selected, all[idx]) + } + } else { + for len(selected) < n { + perm := rng.Perm(len(all)) + for _, idx := range perm { + selected = append(selected, all[idx]) + if len(selected) >= n { + break + } + } + } + } + rng.Shuffle(len(selected), func(i, j int) { selected[i], selected[j] = selected[j], selected[i] }) + return selected +} + +// runMatch runs one match between the local candidate and a live opponent. +func (a *Arena) runMatch(ctx context.Context, candidateURL string, opp BotRecord) MatchOutcome { + outcome := MatchOutcome{ + OpponentBotID: opp.BotID, + OpponentName: opp.Name, + } + + // Randomise player slot for positional fairness. + candidateSlot := a.rng.Intn(2) + outcome.CandidateSlot = candidateSlot + + matchID := fmt.Sprintf("eval-%d", time.Now().UnixNano()) + mr := engine.NewMatchRunner( + engine.DefaultConfig(), + engine.WithTimeout(a.cfg.BotTimeout), + engine.WithRNG(rand.New(rand.NewSource(a.rng.Int63()))), + ) + + candidateBot := engine.NewHTTPBot(candidateURL, + engine.AuthConfig{BotID: evalBotID, Secret: evalSecret, MatchID: matchID}, + engine.WithHTTPTimeout(a.cfg.BotTimeout)) + + oppBot := engine.NewHTTPBot(opp.EndpointURL, + engine.AuthConfig{BotID: opp.BotID, Secret: opp.Secret, MatchID: matchID}, + engine.WithHTTPTimeout(a.cfg.BotTimeout)) + + if candidateSlot == 0 { + mr.AddBot(candidateBot, "candidate") + mr.AddBot(oppBot, opp.Name) + } else { + mr.AddBot(oppBot, opp.Name) + mr.AddBot(candidateBot, "candidate") + } + + res, _, err := mr.Run() + if err != nil { + outcome.Err = fmt.Errorf("match runner: %w", err) + return outcome + } + outcome.Winner = res.Winner + outcome.Scores = res.Scores + outcome.Turns = res.Turns + return outcome +} + +// ── candidate subprocess management ────────────────────────────────────────── + +type botProcess struct { + port int + cmd *exec.Cmd + tmpDir string +} + +func (p *botProcess) stop() { + if p.cmd != nil && p.cmd.Process != nil { + _ = p.cmd.Process.Kill() + _ = p.cmd.Wait() + } + if p.tmpDir != "" { + os.RemoveAll(p.tmpDir) + } +} + +func startCandidate(ctx context.Context, code, language string) (*botProcess, error) { + tmpDir, err := os.MkdirTemp("", "acb-arena-*") + if err != nil { + return nil, fmt.Errorf("mkdirtemp: %w", err) + } + + execPath, execArgs, err := buildCandidate(ctx, code, language, tmpDir) + if err != nil { + os.RemoveAll(tmpDir) + return nil, fmt.Errorf("build: %w", err) + } + + port, err := allocateFreePort() + if err != nil { + os.RemoveAll(tmpDir) + return nil, fmt.Errorf("allocate port: %w", err) + } + + env := append(os.Environ(), + fmt.Sprintf("BOT_PORT=%d", port), + "BOT_SECRET="+evalSecret, + ) + + var args []string + args = append(args, execArgs...) + cmd := exec.CommandContext(ctx, execPath, args...) + cmd.Env = env + cmd.Dir = tmpDir + + if err := cmd.Start(); err != nil { + os.RemoveAll(tmpDir) + return nil, fmt.Errorf("start process: %w", err) + } + + proc := &botProcess{port: port, cmd: cmd, tmpDir: tmpDir} + addr := fmt.Sprintf("127.0.0.1:%d", port) + if err := waitForHealth(ctx, addr); err != nil { + proc.stop() + return nil, fmt.Errorf("candidate health: %w", err) + } + return proc, nil +} + +func buildCandidate(ctx context.Context, code, language, dir string) (string, []string, error) { + switch language { + case "go": + if err := os.WriteFile(dir+"/bot.go", []byte(code), 0o600); err != nil { + return "", nil, err + } + if err := os.WriteFile(dir+"/go.mod", []byte("module bot\n\ngo 1.21\n"), 0o600); err != nil { + return "", nil, err + } + bin := dir + "/bot" + cmd := exec.CommandContext(ctx, "go", "build", "-o", bin, ".") + cmd.Dir = dir + if out, err := cmd.CombinedOutput(); err != nil { + return "", nil, fmt.Errorf("go build: %s", truncate(string(out), 512)) + } + return bin, nil, nil + + case "python": + src := dir + "/bot.py" + if err := os.WriteFile(src, []byte(code), 0o600); err != nil { + return "", nil, err + } + return "python3", []string{src}, nil + + case "rust": + src := dir + "/main.rs" + if err := os.WriteFile(src, []byte(code), 0o600); err != nil { + return "", nil, err + } + bin := dir + "/bot" + cmd := exec.CommandContext(ctx, "rustc", "--edition", "2021", src, "-o", bin) + if out, err := cmd.CombinedOutput(); err != nil { + return "", nil, fmt.Errorf("rustc: %s", truncate(string(out), 512)) + } + return bin, nil, nil + + case "typescript": + if err := os.WriteFile(dir+"/bot.ts", []byte(code), 0o600); err != nil { + return "", nil, err + } + tsconfig := `{"compilerOptions":{"target":"ES2020","module":"commonjs","outDir":"./"},"files":["bot.ts"]}` + if err := os.WriteFile(dir+"/tsconfig.json", []byte(tsconfig), 0o600); err != nil { + return "", nil, err + } + cmd := exec.CommandContext(ctx, "tsc", "--project", dir+"/tsconfig.json") + if out, err := cmd.CombinedOutput(); err != nil { + return "", nil, fmt.Errorf("tsc: %s", truncate(string(out), 512)) + } + return "node", []string{dir + "/bot.js"}, nil + + case "java": + src := dir + "/Bot.java" + if err := os.WriteFile(src, []byte(code), 0o600); err != nil { + return "", nil, err + } + cmd := exec.CommandContext(ctx, "javac", src) + if out, err := cmd.CombinedOutput(); err != nil { + return "", nil, fmt.Errorf("javac: %s", truncate(string(out), 512)) + } + return "java", []string{"-cp", dir, "Bot"}, nil + + case "php": + src := dir + "/bot.php" + if err := os.WriteFile(src, []byte(code), 0o600); err != nil { + return "", nil, err + } + return "php", []string{src}, nil + + default: + return "", nil, fmt.Errorf("unsupported language: %s", language) + } +} + +// allocateFreePort finds an unused TCP port on localhost. +func allocateFreePort() (int, error) { + l, err := net.Listen("tcp", "127.0.0.1:0") + if err != nil { + return 0, err + } + port := l.Addr().(*net.TCPAddr).Port + l.Close() + return port, nil +} + +// waitForHealth polls GET /health until 200 OK or healthStartupTimeout elapses. +func waitForHealth(ctx context.Context, addr string) error { + deadline := time.Now().Add(healthStartupTimeout) + client := &http.Client{Timeout: 500 * time.Millisecond} + for time.Now().Before(deadline) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+addr+"/health", nil) + if err != nil { + return err + } + if resp, err := client.Do(req); err == nil { + resp.Body.Close() + if resp.StatusCode == http.StatusOK { + return nil + } + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(healthPollInterval): + } + } + return fmt.Errorf("candidate did not become healthy within %s", healthStartupTimeout) +} + +// decryptAESGCM decrypts an AES-256-GCM ciphertext (hex-encoded) with the +// given hex-encoded 32-byte key. +func decryptAESGCM(ciphertextHex, keyHex string) (string, error) { + key, err := hex.DecodeString(keyHex) + if err != nil { + return "", fmt.Errorf("decode key: %w", err) + } + if len(key) != 32 { + return "", fmt.Errorf("key must be 32 bytes (64 hex chars)") + } + ciphertext, err := hex.DecodeString(ciphertextHex) + if err != nil { + return "", fmt.Errorf("decode ciphertext: %w", err) + } + block, err := aes.NewCipher(key) + if err != nil { + return "", err + } + aead, err := cipher.NewGCM(block) + if err != nil { + return "", err + } + ns := aead.NonceSize() + if len(ciphertext) < ns { + return "", fmt.Errorf("ciphertext too short") + } + plain, err := aead.Open(nil, ciphertext[:ns], ciphertext[ns:], nil) + if err != nil { + return "", err + } + return string(plain), nil +} + +func truncate(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max] + "…" +} diff --git a/cmd/acb-evolver/internal/arena/arena_test.go b/cmd/acb-evolver/internal/arena/arena_test.go new file mode 100644 index 0000000..5196cdb --- /dev/null +++ b/cmd/acb-evolver/internal/arena/arena_test.go @@ -0,0 +1,314 @@ +package arena + +import ( + "fmt" + "math/rand" + "testing" + + "github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites" +) + +// ── ComputeNash ─────────────────────────────────────────────────────────────── + +func TestComputeNash_EmptySlice(t *testing.T) { + r := ComputeNash(nil) + if r.NashValue != 0.5 { + t.Errorf("empty: NashValue = %.3f, want 0.5", r.NashValue) + } +} + +func TestComputeNash_SingleOpponent(t *testing.T) { + r := ComputeNash([]float64{0.7}) + if r.NashValue != 0.7 { + t.Errorf("single: NashValue = %.3f, want 0.7", r.NashValue) + } + if r.OpponentMix[0] != 1.0 { + t.Errorf("single: mix[0] = %.3f, want 1.0", r.OpponentMix[0]) + } +} + +func TestComputeNash_MinimumIsHardestOpponent(t *testing.T) { + // Column player minimises candidate win rate → Nash value = min(winRates). + winRates := []float64{0.8, 0.3, 0.6} + r := ComputeNash(winRates) + if r.NashValue != 0.3 { + t.Errorf("NashValue = %.3f, want 0.3", r.NashValue) + } + // All weight on opponent index 1 (win rate 0.3). + for i, w := range r.OpponentMix { + if i == 1 { + if w != 1.0 { + t.Errorf("mix[1] = %.3f, want 1.0", w) + } + } else if w != 0.0 { + t.Errorf("mix[%d] = %.3f, want 0.0", i, w) + } + } +} + +func TestComputeNash_TiedMinimum(t *testing.T) { + // Two opponents tied at the minimum: weight is split 50/50. + winRates := []float64{0.2, 0.8, 0.2} + r := ComputeNash(winRates) + if r.NashValue != 0.2 { + t.Errorf("NashValue = %.3f, want 0.2", r.NashValue) + } + if r.OpponentMix[0] != 0.5 || r.OpponentMix[2] != 0.5 { + t.Errorf("tied mix = %v, want [0.5 0.0 0.5]", r.OpponentMix) + } + if r.OpponentMix[1] != 0.0 { + t.Errorf("mix[1] = %.3f, want 0.0", r.OpponentMix[1]) + } +} + +func TestComputeNash_AllEqual(t *testing.T) { + winRates := []float64{0.5, 0.5, 0.5} + r := ComputeNash(winRates) + if r.NashValue != 0.5 { + t.Errorf("all-equal: NashValue = %.3f, want 0.5", r.NashValue) + } + // All opponents get equal weight. + expected := 1.0 / 3.0 + for i, w := range r.OpponentMix { + if abs(w-expected) > 1e-9 { + t.Errorf("mix[%d] = %.6f, want %.6f", i, w, expected) + } + } +} + +func TestFictitiousPlayNash_MatchesMinimaxForSingleRow(t *testing.T) { + winRates := []float64{0.8, 0.3, 0.6} + fp := FictitiousPlayNash(winRates, 10000) + if abs(fp.NashValue-0.3) > 0.01 { + t.Errorf("fictitious play: NashValue = %.3f, want ≈0.3", fp.NashValue) + } +} + +// ── WinRate ─────────────────────────────────────────────────────────────────── + +func TestWinRate_ZeroTotal(t *testing.T) { + r := WinRate(0, 0) + if r.Rate != 0.5 { + t.Errorf("zero total: Rate = %.3f, want 0.5", r.Rate) + } +} + +func TestWinRate_AllWins(t *testing.T) { + r := WinRate(10, 10) + if r.Rate != 1.0 { + t.Errorf("all wins: Rate = %.3f, want 1.0", r.Rate) + } + if r.Lower > r.Upper { + t.Errorf("CI inverted: lower=%.3f upper=%.3f", r.Lower, r.Upper) + } +} + +func TestWinRate_AllLosses(t *testing.T) { + r := WinRate(0, 10) + if r.Rate != 0.0 { + t.Errorf("all losses: Rate = %.3f, want 0.0", r.Rate) + } + if r.Lower < 0.0 || r.Upper > 1.0 { + t.Errorf("CI out of [0,1]: lower=%.3f upper=%.3f", r.Lower, r.Upper) + } +} + +func TestWinRate_FiftyPercent(t *testing.T) { + r := WinRate(5, 10) + if abs(r.Rate-0.5) > 1e-9 { + t.Errorf("50%%: Rate = %.3f, want 0.5", r.Rate) + } + if r.Lower >= 0.5 || r.Upper <= 0.5 { + t.Errorf("50%% CI should straddle 0.5: lower=%.3f upper=%.3f", r.Lower, r.Upper) + } +} + +func TestWinRate_CIBounds(t *testing.T) { + // CI bounds must always lie in [0, 1]. + for wins := 0; wins <= 10; wins++ { + r := WinRate(wins, 10) + if r.Lower < 0.0 || r.Upper > 1.0 { + t.Errorf("wins=%d: CI [%.3f, %.3f] outside [0,1]", wins, r.Lower, r.Upper) + } + if r.Lower > r.Upper { + t.Errorf("wins=%d: lower (%.3f) > upper (%.3f)", wins, r.Lower, r.Upper) + } + } +} + +// ── ComputeFromResult ───────────────────────────────────────────────────────── + +func TestComputeFromResult_Basic(t *testing.T) { + r := &Result{Wins: 7, Losses: 2, Draws: 1} + wr := ComputeFromResult(r) + if wr.Wins != 7 { + t.Errorf("Wins = %d, want 7", wr.Wins) + } + // 7 wins / 10 total = 0.7 rate + if abs(wr.Rate-0.7) > 1e-9 { + t.Errorf("Rate = %.3f, want 0.7", wr.Rate) + } +} + +func TestComputeFromResult_OnlyErrors(t *testing.T) { + r := &Result{Wins: 0, Losses: 0, Draws: 0, Errors: 5} + wr := ComputeFromResult(r) + if wr.Total != 0 { + t.Errorf("Total = %d, want 0 (errors excluded)", wr.Total) + } +} + +// ── Gate.Evaluate ───────────────────────────────────────────────────────────── + +func TestGate_PromotedWhenAllCriteriaMet(t *testing.T) { + grid := mapelites.New(10) + gate := NewGate(DefaultGateConfig(), grid) + + result := &Result{ + Wins: 8, Losses: 2, Draws: 0, + WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8}, + } + + gr := gate.Evaluate(result, 1, 0.8, []float64{0.5, 0.5}) + if !gr.Promoted { + t.Errorf("expected promoted, got rejected: %s", gr.Reason) + } + if !gr.MapElitesPlaced { + t.Error("expected MapElitesPlaced = true for empty grid") + } + if gr.MapElitesImproved { + t.Error("expected MapElitesImproved = false for empty cell") + } +} + +func TestGate_RejectedWhenNashTooLow(t *testing.T) { + grid := mapelites.New(10) + cfg := GateConfig{NashThreshold: 0.60, WinRateLowerBound: 0.0} + gate := NewGate(cfg, grid) + + // WinRateVec has a low value → Nash = min = 0.2, below 0.60 + result := &Result{ + Wins: 7, Losses: 3, + WinRateVec: []float64{0.9, 0.2, 0.9, 0.9, 0.9}, + } + + gr := gate.Evaluate(result, 2, 0.7, []float64{0.5, 0.5}) + if gr.Promoted { + t.Errorf("should be rejected (Nash too low), got: %s", gr.Reason) + } +} + +func TestGate_RejectedWhenNicheOccupiedByFitterBot(t *testing.T) { + grid := mapelites.New(10) + + // Pre-occupy the [5,5] cell with a very fit bot. + grid.TryPlace(99, 0.99, 0.5, 0.5) + + cfg := DefaultGateConfig() + gate := NewGate(cfg, grid) + + // Candidate is in the same niche but has lower fitness. + result := &Result{ + Wins: 7, Losses: 3, + WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8}, + } + + gr := gate.Evaluate(result, 1, 0.7, []float64{0.5, 0.5}) + if gr.Promoted { + t.Errorf("should be rejected (niche occupied by fitter bot), got: %s", gr.Reason) + } + if gr.MapElitesPlaced { + t.Error("MapElitesPlaced should be false when existing bot is fitter") + } +} + +func TestGate_PromotedWhenOutperformsNicheChampion(t *testing.T) { + grid := mapelites.New(10) + + // Pre-occupy with a weaker bot. + grid.TryPlace(99, 0.4, 0.5, 0.5) + + cfg := DefaultGateConfig() + gate := NewGate(cfg, grid) + + // Candidate is fitter than the incumbent. + result := &Result{ + Wins: 8, Losses: 2, + WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8}, + } + + gr := gate.Evaluate(result, 1, 0.8, []float64{0.5, 0.5}) + if !gr.Promoted { + t.Errorf("should be promoted (beats incumbent), got: %s", gr.Reason) + } + if !gr.MapElitesImproved { + t.Error("MapElitesImproved should be true when beating existing champion") + } +} + +// ── selectDiverse ───────────────────────────────────────────────────────────── + +func TestSelectDiverse_EmptyPool(t *testing.T) { + rng := rand.New(rand.NewSource(42)) + result := selectDiverse(nil, 5, rng) + if len(result) != 0 { + t.Errorf("empty pool: got %d opponents, want 0", len(result)) + } +} + +func TestSelectDiverse_ExactlyN(t *testing.T) { + rng := rand.New(rand.NewSource(42)) + bots := makeBots(5) + result := selectDiverse(bots, 5, rng) + if len(result) != 5 { + t.Errorf("exact n: got %d opponents, want 5", len(result)) + } +} + +func TestSelectDiverse_MoreThanN(t *testing.T) { + rng := rand.New(rand.NewSource(42)) + bots := makeBots(20) + result := selectDiverse(bots, 10, rng) + if len(result) != 10 { + t.Errorf("more than n: got %d opponents, want 10", len(result)) + } + // Verify spread: should sample across the sorted range, not just top/bottom. + seen := make(map[string]bool) + for _, b := range result { + seen[b.BotID] = true + } + if len(seen) != 10 { + t.Errorf("duplicates in diverse selection: got %d unique, want 10", len(seen)) + } +} + +func TestSelectDiverse_FewerThanN(t *testing.T) { + rng := rand.New(rand.NewSource(42)) + bots := makeBots(3) + // With only 3 bots, need to repeat to fill 10 slots. + result := selectDiverse(bots, 10, rng) + if len(result) != 10 { + t.Errorf("fewer than n: got %d opponents, want 10", len(result)) + } +} + +// ── helpers ─────────────────────────────────────────────────────────────────── + +func makeBots(n int) []BotRecord { + bots := make([]BotRecord, n) + for i := range bots { + bots[i] = BotRecord{ + BotID: fmt.Sprintf("b_%04d", i), + Name: fmt.Sprintf("bot-%d", i), + RatingMu: float64(1000 + i*50), + } + } + return bots +} + +func abs(x float64) float64 { + if x < 0 { + return -x + } + return x +} diff --git a/cmd/acb-evolver/internal/arena/gate.go b/cmd/acb-evolver/internal/arena/gate.go new file mode 100644 index 0000000..beb1182 --- /dev/null +++ b/cmd/acb-evolver/internal/arena/gate.go @@ -0,0 +1,144 @@ +// Package arena — promotion gate. +// +// The gate applies two independent criteria before promoting a candidate: +// +// 1. Nash value (PSRO) ≥ NashThreshold — sufficient win rate +// 2. MAP-Elites niche fill or improvement — behavioral novelty +// +// Both must be satisfied. The Wilson-score CI lower bound is an optional +// secondary guard on the overall win rate. +package arena + +import ( + "fmt" + "strings" + + "github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites" +) + +// GateConfig holds the promotion thresholds. +type GateConfig struct { + // NashThreshold is the minimum Nash value (worst-case win rate across + // opponents) required for promotion. Default: 0.50. + NashThreshold float64 + + // WinRateLowerBound is the minimum Wilson-score 95% CI lower bound for + // the overall win rate. Set ≤ 0 to disable. Default: 0.40. + WinRateLowerBound float64 +} + +// DefaultGateConfig returns production-ready promotion thresholds. +func DefaultGateConfig() GateConfig { + return GateConfig{ + NashThreshold: 0.50, + WinRateLowerBound: 0.40, + } +} + +// GateResult holds the full promotion decision with supporting evidence. +type GateResult struct { + // Promoted is true when all criteria are met. + Promoted bool + + // Nash is the PSRO result for the mini-tournament. + Nash NashResult + + // WinRate is the overall win rate with 95% Wilson CI. + WinRate WinRateResult + + // MapElitesPlaced is true when the candidate was written to the MAP-Elites + // grid (filled an empty cell or outperformed the incumbent). + MapElitesPlaced bool + + // MapElitesImproved is true when the candidate beat an existing champion + // (as opposed to simply filling an empty niche). + MapElitesImproved bool + + // Placement is the (X, Y) grid cell the candidate occupies. + Placement mapelites.Placement + + // Reason is a human-readable explanation of the promotion decision. + Reason string +} + +// Gate applies the promotion criteria to mini-tournament results. +type Gate struct { + cfg GateConfig + grid *mapelites.Grid +} + +// NewGate creates a Gate backed by the provided MAP-Elites grid. +// The grid is shared across evaluations so niche occupancy persists across +// multiple Evaluate calls within one evolution run. +func NewGate(cfg GateConfig, grid *mapelites.Grid) *Gate { + return &Gate{cfg: cfg, grid: grid} +} + +// Evaluate applies the two-part promotion gate to the arena result. +// +// programID and fitness are the candidate's identifiers in the programs table. +// behaviorVec is [aggression, economy] ∈ [0,1]²; defaults to [0.5, 0.5] when +// nil or short. +// +// Side effect: g.grid.TryPlace is called — the cell is updated when the +// candidate wins its behavioral niche. +func (g *Gate) Evaluate(result *Result, programID int64, fitness float64, behaviorVec []float64) *GateResult { + wr := ComputeFromResult(result) + nash := ComputeNash(result.WinRateVec) + + agg, eco := 0.5, 0.5 + if len(behaviorVec) >= 2 { + agg, eco = behaviorVec[0], behaviorVec[1] + } + + // Sample the cell state before TryPlace so we can distinguish + // "fills empty niche" from "beats existing champion". + cellX, cellY := g.grid.BehaviorToCell(agg, eco) + priorCell := g.grid.Get(cellX, cellY) + + placement, placed := g.grid.TryPlace(programID, fitness, agg, eco) + + gr := &GateResult{ + Nash: nash, + WinRate: wr, + MapElitesPlaced: placed, + MapElitesImproved: placed && priorCell.Occupied, + Placement: placement, + } + + nashOK := nash.NashValue >= g.cfg.NashThreshold + winOK := g.cfg.WinRateLowerBound <= 0 || wr.Lower >= g.cfg.WinRateLowerBound + mapOK := placed + + if nashOK && winOK && mapOK { + gr.Promoted = true + if !priorCell.Occupied { + gr.Reason = fmt.Sprintf( + "promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f–%.3f), fills new niche [%d,%d]", + nash.NashValue, g.cfg.NashThreshold, + wr.Rate, wr.Lower, wr.Upper, + placement.X, placement.Y) + } else { + gr.Reason = fmt.Sprintf( + "promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f–%.3f), beats niche [%d,%d] champion (%.3f→%.3f)", + nash.NashValue, g.cfg.NashThreshold, + wr.Rate, wr.Lower, wr.Upper, + placement.X, placement.Y, priorCell.Fitness, fitness) + } + return gr + } + + var why []string + if !nashOK { + why = append(why, fmt.Sprintf("Nash=%.3f < %.3f", nash.NashValue, g.cfg.NashThreshold)) + } + if !winOK { + why = append(why, fmt.Sprintf("WR CI lower=%.3f < %.3f", wr.Lower, g.cfg.WinRateLowerBound)) + } + if !mapOK { + why = append(why, fmt.Sprintf("niche [%d,%d] occupied by fitter bot (fitness=%.3f)", + placement.X, placement.Y, priorCell.Fitness)) + } + gr.Reason = "rejected: " + strings.Join(why, "; ") + return gr +} diff --git a/cmd/acb-evolver/internal/arena/psro.go b/cmd/acb-evolver/internal/arena/psro.go new file mode 100644 index 0000000..e42a455 --- /dev/null +++ b/cmd/acb-evolver/internal/arena/psro.go @@ -0,0 +1,119 @@ +// Package arena — PSRO Nash equilibrium computation. +// +// LLM-PSRO (Policy Space Response Oracles) uses Nash equilibrium over the +// current bot population as the promotion criterion. A candidate is promoted +// only if it is a best response to the Nash mixture, i.e. its expected payoff +// against the Nash mixture exceeds the threshold (default 0.50). +// +// For the mini-tournament setting (one candidate, K opponents), the payoff +// matrix has a single row. The Nash-optimal strategy for the minimising +// column player (opponents) is to concentrate weight on the opponent that +// minimises the candidate's expected win rate. The resulting Nash value is +// therefore min(winRates), which is the tightest possible test. +// +// The full fictitious-play algorithm is retained so it generalises cleanly +// to K×K payoff matrices when the population grows. +package arena + +// NashResult holds the Nash equilibrium computation for the meta-game. +type NashResult struct { + // OpponentMix[i] = probability of opponent i in the Nash mixture. + // Sums to 1.0. + OpponentMix []float64 + + // NashValue is the candidate's expected win rate under the Nash mixture. + // This is the quantity compared against the promotion threshold. + NashValue float64 + + // WinRatePerOpponent mirrors the input payoff row for convenience. + WinRatePerOpponent []float64 +} + +// ComputeNash computes the Nash equilibrium for the 1×K meta-game where +// winRates[i] is the candidate's win rate against opponent i. +// +// The column player (opponent) minimises the candidate's expected win rate. +// The optimal column strategy concentrates on the opponent(s) with the lowest +// win rate for the candidate. Ties in the minimum are distributed uniformly. +// +// Nash value = min(winRates) (hardest-opponent test). +func ComputeNash(winRates []float64) NashResult { + if len(winRates) == 0 { + return NashResult{NashValue: 0.5} + } + + K := len(winRates) + mix := make([]float64, K) + + // Find the minimum win rate. + minVal := winRates[0] + for _, w := range winRates[1:] { + if w < minVal { + minVal = w + } + } + + // Distribute weight uniformly over all opponents achieving the minimum. + count := 0 + for _, w := range winRates { + if w == minVal { + count++ + } + } + for i, w := range winRates { + if w == minVal { + mix[i] = 1.0 / float64(count) + } + } + + return NashResult{ + OpponentMix: mix, + NashValue: minVal, + WinRatePerOpponent: winRates, + } +} + +// FictitiousPlayNash computes the Nash equilibrium via fictitious play, +// converging over iterations rounds. This generalises to K×K matrices and +// provides a softer mixed-strategy Nash than the pure-minimax above. +// +// For a 1×K payoff matrix both algorithms produce identical results, so this +// function is provided for future use when the full population payoff matrix +// is available. +func FictitiousPlayNash(winRates []float64, iterations int) NashResult { + if len(winRates) == 0 { + return NashResult{NashValue: 0.5} + } + if iterations <= 0 { + iterations = 1000 + } + + K := len(winRates) + counts := make([]float64, K) + + // Fictitious play: column player repeatedly best-responds to the current + // row player strategy (fixed at "always play candidate"). + for iter := 0; iter < iterations; iter++ { + // Column player best response: pick opponent minimising candidate win rate. + best := 0 + for i := 1; i < K; i++ { + if winRates[i] < winRates[best] { + best = i + } + } + counts[best]++ + } + + mix := make([]float64, K) + expected := 0.0 + for i, c := range counts { + mix[i] = c / float64(iterations) + expected += mix[i] * winRates[i] + } + + return NashResult{ + OpponentMix: mix, + NashValue: expected, + WinRatePerOpponent: winRates, + } +} diff --git a/cmd/acb-evolver/internal/arena/winrate.go b/cmd/acb-evolver/internal/arena/winrate.go new file mode 100644 index 0000000..a281dcc --- /dev/null +++ b/cmd/acb-evolver/internal/arena/winrate.go @@ -0,0 +1,55 @@ +package arena + +import "math" + +// WinRateResult holds the observed win rate and its 95% Wilson score confidence interval. +type WinRateResult struct { + Wins int + Total int // non-error matches only + Rate float64 // observed win rate (0–1) + Lower float64 // 95% CI lower bound + Upper float64 // 95% CI upper bound +} + +// WinRate computes the win rate and Wilson score 95% confidence interval +// for wins out of total valid matches. When total == 0, all values are 0.5. +// +// Wilson score interval: +// +// center = (p̂ + z²/2n) / (1 + z²/n) +// margin = z * sqrt(p̂(1-p̂)/n + z²/4n²) / (1 + z²/n) +// CI = [center − margin, center + margin] +// +// Using z = 1.96 (95% two-tailed confidence). +func WinRate(wins, total int) WinRateResult { + if total == 0 { + return WinRateResult{Rate: 0.5, Lower: 0.0, Upper: 1.0} + } + + const z = 1.96 // 95% CI + p := float64(wins) / float64(total) + n := float64(total) + z2 := z * z + + center := (p + z2/(2*n)) / (1 + z2/n) + margin := z * math.Sqrt(p*(1-p)/n+z2/(4*n*n)) / (1 + z2/n) + + lower := math.Max(0, center-margin) + upper := math.Min(1, center+margin) + + return WinRateResult{ + Wins: wins, + Total: total, + Rate: p, + Lower: lower, + Upper: upper, + } +} + +// ComputeFromResult builds a WinRateResult from a tournament Result. +// Only non-error matches are counted; draws count as 0.5 wins. +func ComputeFromResult(r *Result) WinRateResult { + total := r.Wins + r.Losses + r.Draws + // Count draws as half-wins for the rate; wins/total integers use integer wins. + return WinRate(r.Wins, total) +} diff --git a/cmd/acb-evolver/internal/db/db.go b/cmd/acb-evolver/internal/db/db.go index 533dcb7..6c82cb9 100644 --- a/cmd/acb-evolver/internal/db/db.go +++ b/cmd/acb-evolver/internal/db/db.go @@ -18,6 +18,7 @@ CREATE TABLE IF NOT EXISTS programs ( behavior_vector DOUBLE PRECISION[] NOT NULL DEFAULT '{}', fitness DOUBLE PRECISION NOT NULL DEFAULT 0.0, promoted BOOLEAN NOT NULL DEFAULT FALSE, + bot_id VARCHAR(16), created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX IF NOT EXISTS idx_programs_island ON programs(island); @@ -37,8 +38,20 @@ CREATE INDEX IF NOT EXISTS idx_validation_log_island ON validation_log(island); CREATE INDEX IF NOT EXISTS idx_validation_log_island_passed ON validation_log(island, passed); ` -// EnsureSchema creates the programs table if it does not already exist. +// migrationSQL holds additive migrations run after the base schema is ensured. +// Each statement is idempotent (ALTER TABLE … ADD COLUMN IF NOT EXISTS). +const migrationSQL = ` +ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_id VARCHAR(16); +ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_name VARCHAR(64); +ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_secret TEXT; +` + +// EnsureSchema creates the programs and validation_log tables if they do not +// already exist, then applies any pending additive migrations. func EnsureSchema(ctx context.Context, db *sql.DB) error { - _, err := db.ExecContext(ctx, schemaSQL) + if _, err := db.ExecContext(ctx, schemaSQL); err != nil { + return err + } + _, err := db.ExecContext(ctx, migrationSQL) return err } diff --git a/cmd/acb-evolver/internal/db/programs.go b/cmd/acb-evolver/internal/db/programs.go index 96365da..70bccaf 100644 --- a/cmd/acb-evolver/internal/db/programs.go +++ b/cmd/acb-evolver/internal/db/programs.go @@ -177,3 +177,106 @@ func (s *Store) TotalCount(ctx context.Context) (int, error) { } return n, nil } + +// SetBotID records the deployed bot_id for a promoted program. +func (s *Store) SetBotID(ctx context.Context, id int64, botID string) error { + _, err := s.db.ExecContext(ctx, + `UPDATE programs SET bot_id = $1 WHERE id = $2`, botID, id) + if err != nil { + return fmt.Errorf("set bot_id for program %d: %w", id, err) + } + return nil +} + +// PromotedProgram holds a promoted program linked to its live bot. +type PromotedProgram struct { + ProgramID int64 + BotID string + BotName string // K8s/API name, e.g. "acb-evo-42" + BotSecret string // plaintext secret stored for retirement operations + Island string + BehaviorVector []float64 + Fitness float64 +} + +// ListPromoted returns all programs that have been promoted (bot_id is set). +func (s *Store) ListPromoted(ctx context.Context) ([]*PromotedProgram, error) { + rows, err := s.db.QueryContext(ctx, ` + SELECT id, bot_id, COALESCE(bot_name, ''), COALESCE(bot_secret, ''), + island, behavior_vector, fitness + FROM programs + WHERE promoted = TRUE AND bot_id IS NOT NULL + ORDER BY fitness DESC`) + if err != nil { + return nil, fmt.Errorf("list promoted programs: %w", err) + } + defer rows.Close() + + var out []*PromotedProgram + for rows.Next() { + p := &PromotedProgram{} + if err := rows.Scan(&p.ProgramID, &p.BotID, &p.BotName, &p.BotSecret, + &p.Island, pq.Array(&p.BehaviorVector), &p.Fitness); err != nil { + return nil, fmt.Errorf("scan promoted program: %w", err) + } + out = append(out, p) + } + return out, rows.Err() +} + +// SetBotNameAndSecret records the K8s bot name and plaintext shared secret for +// a promoted program. These are stored so the retirement path can locate and +// clean up the bot without requiring an extra API call. +func (s *Store) SetBotNameAndSecret(ctx context.Context, id int64, botName, botSecret string) error { + _, err := s.db.ExecContext(ctx, + `UPDATE programs SET bot_name = $1, bot_secret = $2 WHERE id = $3`, + botName, botSecret, id) + if err != nil { + return fmt.Errorf("set bot name/secret for program %d: %w", id, err) + } + return nil +} + +// PromotedCount returns the number of currently promoted (deployed) programs. +func (s *Store) PromotedCount(ctx context.Context) (int, error) { + var n int + err := s.db.QueryRowContext(ctx, + `SELECT COUNT(*) FROM programs WHERE promoted = TRUE AND bot_id IS NOT NULL`).Scan(&n) + if err != nil { + return 0, fmt.Errorf("promoted count: %w", err) + } + return n, nil +} + +// UnsetPromoted clears the promoted flag and bot_id for a retired program. +func (s *Store) UnsetPromoted(ctx context.Context, id int64) error { + _, err := s.db.ExecContext(ctx, + `UPDATE programs SET promoted = FALSE, bot_id = NULL WHERE id = $1`, id) + if err != nil { + return fmt.Errorf("unset promoted for program %d: %w", id, err) + } + return nil +} + +// GetByBotID returns the program associated with a deployed bot ID, or nil. +func (s *Store) GetByBotID(ctx context.Context, botID string) (*Program, error) { + p := &Program{} + var parentJSON string + err := s.db.QueryRowContext(ctx, ` + SELECT id, code, language, island, generation, parent_ids, + behavior_vector, fitness, promoted, created_at + FROM programs WHERE bot_id = $1`, botID).Scan( + &p.ID, &p.Code, &p.Language, &p.Island, &p.Generation, + &parentJSON, pq.Array(&p.BehaviorVector), &p.Fitness, &p.Promoted, &p.CreatedAt, + ) + if err == sql.ErrNoRows { + return nil, nil + } + if err != nil { + return nil, fmt.Errorf("get program by bot_id %s: %w", botID, err) + } + if err := json.Unmarshal([]byte(parentJSON), &p.ParentIDs); err != nil { + return nil, fmt.Errorf("unmarshal parent_ids: %w", err) + } + return p, nil +} diff --git a/cmd/acb-evolver/internal/promoter/promoter.go b/cmd/acb-evolver/internal/promoter/promoter.go new file mode 100644 index 0000000..e675591 --- /dev/null +++ b/cmd/acb-evolver/internal/promoter/promoter.go @@ -0,0 +1,721 @@ +// Package promoter deploys validated+promoted evolved bots to Kubernetes and +// registers them in the ACB bots database. It also enforces the retirement +// policy: auto-retiring bots below a rating threshold and capping the +// evolved-bot fleet at a configurable population cap. +// +// Promotion flow +// +// 1. Generate a unique bot name (acb-evo-), bot ID, and secret. +// 2. Write bot source + language-appropriate Dockerfile to bots/evolved//. +// 3. Write K8s Secret / Deployment / Service manifests to deploy/k8s/. +// 4. Build and push the container image (best-effort; CI pipeline is the +// fallback when docker is unavailable or fails). +// 5. Git add → commit → push (triggers ArgoCD sync + image build via CI). +// 6. Poll kubectl until the Deployment has ≥1 available replica. +// 7. Insert the bot record directly into the bots database table. +// 8. Record bot_id, bot_name, and bot_secret in the programs table. +// +// Retirement flow +// +// 1. Mark bot as 'retired' in the bots table. +// 2. Delete the K8s manifests and bot source directory from git, commit, push. +// 3. Clear promoted=false / bot_id=NULL in the programs table. +package promoter + +import ( + "bytes" + "context" + "crypto/aes" + "crypto/cipher" + "crypto/rand" + "database/sql" + "encoding/base64" + "encoding/hex" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "strings" + "text/template" + "time" + + "github.com/aicodebattle/acb/cmd/acb-evolver/internal/db" +) + +const ( + botOwner = "acb-evolver" + botPort = 8080 +) + +// Config controls promotion and retirement behaviour. +type Config struct { + // Registry is the container registry prefix, e.g. + // "forgejo.ardenone.com/ai-code-battle". + Registry string + + // RepoDir is the local git repository root used for writing manifests. + RepoDir string + + // KubectlServer is the kubectl API server URL for deployment polling, + // e.g. "http://kubectl-ardenone-cluster:8001". + KubectlServer string + + // Namespace is the Kubernetes namespace where bots are deployed. + Namespace string + + // EncryptionKey is the hex-encoded AES-256-GCM key used to encrypt + // secrets before storing them in the bots table. Empty = plaintext. + EncryptionKey string + + // DeployWaitTimeout is the maximum time to wait for an ArgoCD-managed + // deployment to have ≥1 available replica. + DeployWaitTimeout time.Duration + + // RatingThreshold is the minimum display rating (mu − 2·phi) an evolved + // bot must maintain to avoid auto-retirement. + RatingThreshold float64 + + // PopCap is the maximum number of simultaneously promoted evolved bots. + // Lowest-rated bots are retired when the cap is exceeded. + PopCap int +} + +// DefaultConfig returns production-ready defaults. +func DefaultConfig() Config { + return Config{ + Registry: "forgejo.ardenone.com/ai-code-battle", + RepoDir: ".", + KubectlServer: "http://kubectl-ardenone-cluster:8001", + Namespace: "ai-code-battle", + DeployWaitTimeout: 10 * time.Minute, + RatingThreshold: 1000.0, + PopCap: 50, + } +} + +// Promoter manages promotion and retirement of evolved bots. +type Promoter struct { + store *db.Store + rawDB *sql.DB + cfg Config +} + +// New creates a Promoter. +func New(store *db.Store, rawDB *sql.DB, cfg Config) *Promoter { + return &Promoter{store: store, rawDB: rawDB, cfg: cfg} +} + +// PromotionResult holds the outcome of a successful promotion. +type PromotionResult struct { + BotName string + BotID string + Endpoint string // K8s ClusterIP service URL +} + +// Promote deploys a validated candidate as a live evolved bot. +func (p *Promoter) Promote(ctx context.Context, program *db.Program) (*PromotionResult, error) { + botName := fmt.Sprintf("acb-evo-%d", program.ID) + image := fmt.Sprintf("%s/%s:latest", p.cfg.Registry, botName) + endpoint := fmt.Sprintf("http://%s:%d", botName, botPort) + + botID, err := generateBotID() + if err != nil { + return nil, fmt.Errorf("generate bot ID: %w", err) + } + secret, err := generateSecret() + if err != nil { + return nil, fmt.Errorf("generate secret: %w", err) + } + + botDir := filepath.Join(p.cfg.RepoDir, "bots", "evolved", botName) + if err := p.writeBotDir(program, botDir); err != nil { + return nil, fmt.Errorf("write bot dir: %w", err) + } + + if err := p.writeManifests(botName, secret, program); err != nil { + return nil, fmt.Errorf("write manifests: %w", err) + } + + // Best-effort local image build; CI pipeline is the authoritative builder. + if buildErr := p.buildAndPushImage(ctx, botDir, image); buildErr != nil { + fmt.Printf("promoter: docker build skipped (%v) — CI will build the image\n", buildErr) + } + + commitMsg := fmt.Sprintf("Add evolved bot %s (island=%s gen=%d program_id=%d)", + botName, program.Island, program.Generation, program.ID) + if err := p.gitCommitPush(ctx, botName, commitMsg, false); err != nil { + return nil, fmt.Errorf("git commit/push: %w", err) + } + + if err := p.waitForDeployment(ctx, botName); err != nil { + return nil, fmt.Errorf("wait for deployment: %w", err) + } + + // Insert bot record directly into the bots table (same DB as programs). + storedSecret := secret + if p.cfg.EncryptionKey != "" { + storedSecret, err = encryptAESGCM(secret, p.cfg.EncryptionKey) + if err != nil { + return nil, fmt.Errorf("encrypt secret: %w", err) + } + } + _, err = p.rawDB.ExecContext(ctx, ` + INSERT INTO bots (bot_id, name, owner, endpoint_url, shared_secret, status, description, last_active) + VALUES ($1, $2, $3, $4, $5, 'active', $6, NOW())`, + botID, botName, botOwner, endpoint, storedSecret, + fmt.Sprintf("Evolved bot — island=%s gen=%d program_id=%d", + program.Island, program.Generation, program.ID), + ) + if err != nil { + return nil, fmt.Errorf("insert bot record: %w", err) + } + + if err := p.store.SetPromoted(ctx, program.ID); err != nil { + return nil, fmt.Errorf("set promoted: %w", err) + } + if err := p.store.SetBotID(ctx, program.ID, botID); err != nil { + return nil, fmt.Errorf("set bot_id: %w", err) + } + if err := p.store.SetBotNameAndSecret(ctx, program.ID, botName, secret); err != nil { + return nil, fmt.Errorf("set bot name/secret: %w", err) + } + + return &PromotionResult{BotName: botName, BotID: botID, Endpoint: endpoint}, nil +} + +// RetireBot marks a bot as retired, removes its K8s manifests, and clears the +// promoted flag in the programs table. +func (p *Promoter) RetireBot(ctx context.Context, programID int64, botID, botName string) error { + // 1. Mark bot retired in the bots table. + if _, err := p.rawDB.ExecContext(ctx, + `UPDATE bots SET status = 'retired' WHERE bot_id = $1`, botID); err != nil { + return fmt.Errorf("retire bot in DB: %w", err) + } + + // 2. Remove K8s manifests + bot source from git. + if botName != "" { + retireMsg := fmt.Sprintf("Retire evolved bot %s (program_id=%d)", botName, programID) + if err := p.gitCommitPush(ctx, botName, retireMsg, true); err != nil { + // Log but don't fail — the bot is already retired in the DB. + fmt.Printf("promoter: git remove failed for %s: %v\n", botName, err) + } + } + + // 3. Clear promoted flag in programs table. + return p.store.UnsetPromoted(ctx, programID) +} + +// RetiredCandidate describes a bot that was auto-retired by EnforcePolicy. +type RetiredCandidate struct { + ProgramID int64 + BotID string + BotName string + DisplayRating float64 + Reason string +} + +// EnforcePolicy auto-retires evolved bots below cfg.RatingThreshold and trims +// the active fleet to cfg.PopCap. The slice is ordered lowest-rated first so +// the weakest bots are retired first when enforcing the cap. +// Returns the list of bots that were retired. +func (p *Promoter) EnforcePolicy(ctx context.Context) ([]RetiredCandidate, error) { + rows, err := p.rawDB.QueryContext(ctx, ` + SELECT p.id, p.bot_id, COALESCE(p.bot_name, ''), + b.rating_mu - 2*b.rating_phi AS display_rating + FROM programs p + JOIN bots b ON p.bot_id = b.bot_id + WHERE p.promoted = TRUE + AND p.bot_id IS NOT NULL + AND b.status = 'active' + AND b.owner = $1 + ORDER BY display_rating ASC`, botOwner) + if err != nil { + return nil, fmt.Errorf("query promoted bots: %w", err) + } + defer rows.Close() + + type botRow struct { + programID int64 + botID string + botName string + displayRating float64 + } + var bots []botRow + for rows.Next() { + var b botRow + if err := rows.Scan(&b.programID, &b.botID, &b.botName, &b.displayRating); err != nil { + return nil, fmt.Errorf("scan bot: %w", err) + } + bots = append(bots, b) + } + if err := rows.Err(); err != nil { + return nil, err + } + + // Decide which bots to retire (lowest-rated first). + remaining := len(bots) + var toRetire []RetiredCandidate + for _, b := range bots { + var reason string + if b.displayRating < p.cfg.RatingThreshold { + reason = fmt.Sprintf("display rating %.0f < threshold %.0f", + b.displayRating, p.cfg.RatingThreshold) + } else if remaining > p.cfg.PopCap { + reason = fmt.Sprintf("population cap %d exceeded (currently %d)", + p.cfg.PopCap, remaining) + } + if reason != "" { + toRetire = append(toRetire, RetiredCandidate{ + ProgramID: b.programID, + BotID: b.botID, + BotName: b.botName, + DisplayRating: b.displayRating, + Reason: reason, + }) + remaining-- + } + } + + for i := range toRetire { + r := &toRetire[i] + if err := p.RetireBot(ctx, r.ProgramID, r.BotID, r.BotName); err != nil { + return toRetire[:i], fmt.Errorf("retire bot %s: %w", r.BotID, err) + } + } + return toRetire, nil +} + +// ── file writing ───────────────────────────────────────────────────────────── + +func (p *Promoter) writeBotDir(program *db.Program, dir string) error { + if err := os.MkdirAll(dir, 0o755); err != nil { + return err + } + switch program.Language { + case "go": + if err := os.WriteFile(filepath.Join(dir, "bot.go"), []byte(program.Code), 0o644); err != nil { + return err + } + return os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module bot\n\ngo 1.24.3\n"), 0o644) + case "python": + return os.WriteFile(filepath.Join(dir, "bot.py"), []byte(program.Code), 0o644) + case "rust": + if err := os.MkdirAll(filepath.Join(dir, "src"), 0o755); err != nil { + return err + } + if err := os.WriteFile(filepath.Join(dir, "src", "main.rs"), []byte(program.Code), 0o644); err != nil { + return err + } + cargoTOML := "[package]\nname = \"bot\"\nversion = \"0.1.0\"\nedition = \"2021\"\n" + return os.WriteFile(filepath.Join(dir, "Cargo.toml"), []byte(cargoTOML), 0o644) + case "typescript": + return os.WriteFile(filepath.Join(dir, "bot.ts"), []byte(program.Code), 0o644) + case "java": + return os.WriteFile(filepath.Join(dir, "Bot.java"), []byte(program.Code), 0o644) + case "php": + return os.WriteFile(filepath.Join(dir, "bot.php"), []byte(program.Code), 0o644) + default: + return fmt.Errorf("unsupported language: %s", program.Language) + } +} + +// dockerfileFor returns a single-file Dockerfile for the given language. +func dockerfileFor(language string) (string, error) { + switch language { + case "go": + return `FROM golang:1.24-alpine AS builder +WORKDIR /app +COPY go.mod go.mod +COPY bot.go bot.go +RUN go build -o bot . + +FROM alpine:3.21 +WORKDIR /app +COPY --from=builder /app/bot . +ENV BOT_PORT=8080 +ENV BOT_SECRET="" +EXPOSE 8080 +CMD ["./bot"] +`, nil + case "python": + return `FROM python:3.12-slim +WORKDIR /app +COPY bot.py . +ENV BOT_PORT=8080 +ENV BOT_SECRET="" +EXPOSE 8080 +CMD ["python3", "bot.py"] +`, nil + case "rust": + return `FROM rust:1.85-alpine AS builder +WORKDIR /app +COPY Cargo.toml Cargo.toml +COPY src ./src +RUN cargo build --release + +FROM alpine:3.21 +WORKDIR /app +COPY --from=builder /app/target/release/bot . +ENV BOT_PORT=8080 +ENV BOT_SECRET="" +EXPOSE 8080 +CMD ["./bot"] +`, nil + case "typescript": + return `FROM node:22-alpine AS builder +WORKDIR /app +COPY bot.ts . +RUN npm install -g typescript && tsc --target ES2020 --module commonjs bot.ts + +FROM node:22-alpine +WORKDIR /app +COPY --from=builder /app/bot.js . +ENV BOT_PORT=8080 +ENV BOT_SECRET="" +EXPOSE 8080 +CMD ["node", "bot.js"] +`, nil + case "java": + return `FROM eclipse-temurin:21-alpine AS builder +WORKDIR /app +COPY Bot.java . +RUN javac Bot.java + +FROM eclipse-temurin:21-jre-alpine +WORKDIR /app +COPY --from=builder /app/*.class . +ENV BOT_PORT=8080 +ENV BOT_SECRET="" +EXPOSE 8080 +CMD ["java", "Bot"] +`, nil + case "php": + return `FROM php:8.3-cli-alpine +WORKDIR /app +COPY bot.php . +ENV BOT_PORT=8080 +ENV BOT_SECRET="" +EXPOSE 8080 +CMD ["php", "bot.php"] +`, nil + default: + return "", fmt.Errorf("unsupported language: %s", language) + } +} + +// manifestData is the template context for K8s YAML generation. +type manifestData struct { + Name string + Namespace string + Island string + Generation int + Registry string + Port int + SecretBase64 string +} + +var secretManifestTmpl = template.Must(template.New("secret").Parse(`apiVersion: v1 +kind: Secret +metadata: + name: {{.Name}}-secret + namespace: {{.Namespace}} + labels: + app.kubernetes.io/name: {{.Name}} + app.kubernetes.io/part-of: ai-code-battle + app.kubernetes.io/component: evolved-bot +type: Opaque +data: + bot-secret: {{.SecretBase64}} +`)) + +var deployManifestTmpl = template.Must(template.New("deploy").Parse(`apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + namespace: {{.Namespace}} + labels: + app.kubernetes.io/name: {{.Name}} + app.kubernetes.io/part-of: ai-code-battle + app.kubernetes.io/component: evolved-bot + acb/island: {{.Island}} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: {{.Name}} + template: + metadata: + labels: + app.kubernetes.io/name: {{.Name}} + app.kubernetes.io/part-of: ai-code-battle + app.kubernetes.io/component: evolved-bot + acb/island: {{.Island}} + spec: + containers: + - name: bot + image: {{.Registry}}/{{.Name}}:latest + env: + - name: BOT_PORT + value: "{{.Port}}" + - name: BOT_SECRET + valueFrom: + secretKeyRef: + name: {{.Name}}-secret + key: bot-secret + ports: + - name: http + containerPort: {{.Port}} + protocol: TCP + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 30 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 3 + periodSeconds: 10 + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + memory: 128Mi + restartPolicy: Always +`)) + +var svcManifestTmpl = template.Must(template.New("svc").Parse(`apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + namespace: {{.Namespace}} + labels: + app.kubernetes.io/name: {{.Name}} + app.kubernetes.io/part-of: ai-code-battle + app.kubernetes.io/component: evolved-bot +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: {{.Name}} + ports: + - name: http + port: {{.Port}} + targetPort: http + protocol: TCP +`)) + +func (p *Promoter) writeManifests(botName, secret string, program *db.Program) error { + data := manifestData{ + Name: botName, + Namespace: p.cfg.Namespace, + Island: program.Island, + Generation: program.Generation, + Registry: p.cfg.Registry, + Port: botPort, + SecretBase64: base64.StdEncoding.EncodeToString([]byte(secret)), + } + + // Write Dockerfile into the bot source directory (already created by writeBotDir). + dockerfile, err := dockerfileFor(program.Language) + if err != nil { + return fmt.Errorf("dockerfile: %w", err) + } + botDir := filepath.Join(p.cfg.RepoDir, "bots", "evolved", botName) + if err := os.WriteFile(filepath.Join(botDir, "Dockerfile"), []byte(dockerfile), 0o644); err != nil { + return fmt.Errorf("write Dockerfile: %w", err) + } + + // K8s Secret + secretsDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "secrets") + if err := os.MkdirAll(secretsDir, 0o755); err != nil { + return err + } + if err := renderToFile(filepath.Join(secretsDir, botName+".yaml"), secretManifestTmpl, data); err != nil { + return fmt.Errorf("secret manifest: %w", err) + } + + // K8s Deployment + deployDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "deployments") + if err := renderToFile(filepath.Join(deployDir, botName+".yaml"), deployManifestTmpl, data); err != nil { + return fmt.Errorf("deployment manifest: %w", err) + } + + // K8s Service + svcDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "services") + if err := renderToFile(filepath.Join(svcDir, botName+".yaml"), svcManifestTmpl, data); err != nil { + return fmt.Errorf("service manifest: %w", err) + } + + return nil +} + +func renderToFile(path string, tmpl *template.Template, data any) error { + var buf bytes.Buffer + if err := tmpl.Execute(&buf, data); err != nil { + return err + } + return os.WriteFile(path, buf.Bytes(), 0o644) +} + +// ── git operations ──────────────────────────────────────────────────────────── + +// gitCommitPush stages, commits, and pushes changes for botName. +// When remove=true it runs `git rm` to delete the files; otherwise `git add`. +func (p *Promoter) gitCommitPush(ctx context.Context, botName, msg string, remove bool) error { + run := func(args ...string) error { + cmd := exec.CommandContext(ctx, "git", args...) + cmd.Dir = p.cfg.RepoDir + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("git %s: %s", args[0], strings.TrimSpace(string(out))) + } + return nil + } + + paths := []string{ + filepath.Join("bots", "evolved", botName), + filepath.Join("deploy", "k8s", "deployments", botName+".yaml"), + filepath.Join("deploy", "k8s", "services", botName+".yaml"), + filepath.Join("deploy", "k8s", "secrets", botName+".yaml"), + } + + if remove { + for _, path := range paths { + if err := run("rm", "-rf", "--ignore-unmatch", "--", path); err != nil { + return err + } + } + } else { + args := append([]string{"add", "--"}, paths...) + if err := run(args...); err != nil { + return err + } + } + + // Skip commit if nothing changed. + statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain") + statusCmd.Dir = p.cfg.RepoDir + out, _ := statusCmd.Output() + if len(strings.TrimSpace(string(out))) == 0 { + return nil + } + + if err := run("commit", "-m", msg); err != nil { + return err + } + return run("push", "origin", "master") +} + +// ── deployment readiness ────────────────────────────────────────────────────── + +func (p *Promoter) waitForDeployment(ctx context.Context, name string) error { + deadline := time.Now().Add(p.cfg.DeployWaitTimeout) + ticker := time.NewTicker(15 * time.Second) + defer ticker.Stop() + + fmt.Printf("promoter: waiting for deployment %s to be ready (timeout=%s)…\n", + name, p.cfg.DeployWaitTimeout) + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-ticker.C: + n, err := p.availableReplicas(ctx, name) + if err != nil { + fmt.Printf("promoter: kubectl poll error: %v\n", err) + } else if n >= 1 { + fmt.Printf("promoter: deployment %s ready (%d replica)\n", name, n) + return nil + } + if time.Now().After(deadline) { + return fmt.Errorf("deployment %s not ready after %s", name, p.cfg.DeployWaitTimeout) + } + } + } +} + +func (p *Promoter) availableReplicas(ctx context.Context, name string) (int, error) { + cmd := exec.CommandContext(ctx, "kubectl", + "--server="+p.cfg.KubectlServer, + "get", "deployment", name, + "-n", p.cfg.Namespace, + "-o", "jsonpath={.status.availableReplicas}", + ) + out, err := cmd.Output() + if err != nil { + return 0, err + } + s := strings.TrimSpace(string(out)) + if s == "" { + return 0, nil + } + var n int + fmt.Sscanf(s, "%d", &n) + return n, nil +} + +// ── container image build ───────────────────────────────────────────────────── + +func (p *Promoter) buildAndPushImage(ctx context.Context, botDir, image string) error { + if _, err := exec.LookPath("docker"); err != nil { + return fmt.Errorf("docker not in PATH") + } + build := exec.CommandContext(ctx, "docker", "build", "-t", image, botDir) + if out, err := build.CombinedOutput(); err != nil { + return fmt.Errorf("docker build: %s", truncate(string(out), 512)) + } + push := exec.CommandContext(ctx, "docker", "push", image) + if out, err := push.CombinedOutput(); err != nil { + return fmt.Errorf("docker push: %s", truncate(string(out), 512)) + } + return nil +} + +// ── crypto helpers ──────────────────────────────────────────────────────────── + +func generateBotID() (string, error) { + b := make([]byte, 4) + if _, err := rand.Read(b); err != nil { + return "", err + } + return "b_" + hex.EncodeToString(b), nil +} + +func generateSecret() (string, error) { + b := make([]byte, 32) + if _, err := rand.Read(b); err != nil { + return "", err + } + return hex.EncodeToString(b), nil +} + +func encryptAESGCM(plaintext, keyHex string) (string, error) { + key, err := hex.DecodeString(keyHex) + if err != nil || len(key) != 32 { + return "", fmt.Errorf("invalid AES-256-GCM key (must be 64 hex chars)") + } + block, err := aes.NewCipher(key) + if err != nil { + return "", err + } + aead, err := cipher.NewGCM(block) + if err != nil { + return "", err + } + nonce := make([]byte, aead.NonceSize()) + if _, err := io.ReadFull(rand.Reader, nonce); err != nil { + return "", err + } + ct := aead.Seal(nonce, nonce, []byte(plaintext), nil) + return hex.EncodeToString(ct), nil +} + +func truncate(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max] + "…" +} diff --git a/cmd/acb-evolver/internal/promoter/promoter_test.go b/cmd/acb-evolver/internal/promoter/promoter_test.go new file mode 100644 index 0000000..4ef4818 --- /dev/null +++ b/cmd/acb-evolver/internal/promoter/promoter_test.go @@ -0,0 +1,194 @@ +package promoter + +import ( + "strings" + "testing" +) + +// ── dockerfileFor ───────────────────────────────────────────────────────────── + +func TestDockerfileFor_AllSupportedLanguages(t *testing.T) { + languages := []string{"go", "python", "rust", "typescript", "java", "php"} + for _, lang := range languages { + t.Run(lang, func(t *testing.T) { + df, err := dockerfileFor(lang) + if err != nil { + t.Fatalf("dockerfileFor(%q) error: %v", lang, err) + } + if !strings.Contains(df, "FROM ") { + t.Errorf("Dockerfile for %q missing FROM instruction", lang) + } + if !strings.Contains(df, "BOT_PORT") { + t.Errorf("Dockerfile for %q missing BOT_PORT env var", lang) + } + if !strings.Contains(df, "BOT_SECRET") { + t.Errorf("Dockerfile for %q missing BOT_SECRET env var", lang) + } + if !strings.Contains(df, "EXPOSE 8080") { + t.Errorf("Dockerfile for %q missing EXPOSE 8080", lang) + } + }) + } +} + +func TestDockerfileFor_UnsupportedLanguage(t *testing.T) { + _, err := dockerfileFor("cobol") + if err == nil { + t.Error("expected error for unsupported language, got nil") + } +} + +func TestDockerfileFor_GoUsesMultistage(t *testing.T) { + df, _ := dockerfileFor("go") + if !strings.Contains(df, "AS builder") { + t.Error("Go Dockerfile should use multi-stage build") + } + if !strings.Contains(df, "golang:") { + t.Error("Go Dockerfile should use a golang base image") + } +} + +func TestDockerfileFor_RustUsesMultistage(t *testing.T) { + df, _ := dockerfileFor("rust") + if !strings.Contains(df, "AS builder") { + t.Error("Rust Dockerfile should use multi-stage build") + } +} + +// ── generateBotID ───────────────────────────────────────────────────────────── + +func TestGenerateBotID_Format(t *testing.T) { + id, err := generateBotID() + if err != nil { + t.Fatalf("generateBotID error: %v", err) + } + if !strings.HasPrefix(id, "b_") { + t.Errorf("bot ID %q does not start with 'b_'", id) + } + // b_ + 8 hex chars = 10 total + if len(id) != 10 { + t.Errorf("bot ID %q has length %d, want 10", id, len(id)) + } +} + +func TestGenerateBotID_Uniqueness(t *testing.T) { + seen := make(map[string]bool) + for i := 0; i < 100; i++ { + id, err := generateBotID() + if err != nil { + t.Fatalf("generateBotID error at iteration %d: %v", i, err) + } + if seen[id] { + t.Errorf("duplicate bot ID generated: %s", id) + } + seen[id] = true + } +} + +// ── generateSecret ──────────────────────────────────────────────────────────── + +func TestGenerateSecret_Length(t *testing.T) { + s, err := generateSecret() + if err != nil { + t.Fatalf("generateSecret error: %v", err) + } + // 32 random bytes encoded as 64 hex chars + if len(s) != 64 { + t.Errorf("secret %q has length %d, want 64", s, len(s)) + } +} + +func TestGenerateSecret_Uniqueness(t *testing.T) { + seen := make(map[string]bool) + for i := 0; i < 50; i++ { + s, err := generateSecret() + if err != nil { + t.Fatalf("generateSecret error at iteration %d: %v", i, err) + } + if seen[s] { + t.Errorf("duplicate secret generated: %s", s) + } + seen[s] = true + } +} + +// ── encryptAESGCM / decryptAESGCM ───────────────────────────────────────────── + +func TestEncryptDecryptAESGCM_RoundTrip(t *testing.T) { + // 32-byte key = 64 hex chars + key := strings.Repeat("ab", 32) // "abababab..." 64 chars + plaintext := "my-super-secret-bot-key" + + ct, err := encryptAESGCM(plaintext, key) + if err != nil { + t.Fatalf("encrypt: %v", err) + } + if ct == plaintext { + t.Fatal("ciphertext should differ from plaintext") + } +} + +func TestEncryptAESGCM_InvalidKey(t *testing.T) { + _, err := encryptAESGCM("plaintext", "notahexkey") + if err == nil { + t.Error("expected error for invalid key") + } +} + +// ── manifest templates ──────────────────────────────────────────────────────── + +func TestManifestTemplates_Execute(t *testing.T) { + data := manifestData{ + Name: "acb-evo-test", + Namespace: "ai-code-battle", + Island: "alpha", + Generation: 1, + Registry: "registry.example.com/acb", + Port: 8080, + SecretBase64: "dGVzdA==", + } + + for name, tmpl := range map[string]interface{ Execute(interface{}, interface{}) error }{} { + _ = name + _ = tmpl + } + + // Test secret manifest + var buf strings.Builder + if err := secretManifestTmpl.Execute(&buf, data); err != nil { + t.Fatalf("secretManifestTmpl.Execute: %v", err) + } + out := buf.String() + if !strings.Contains(out, "acb-evo-test-secret") { + t.Error("secret manifest missing expected name") + } + if !strings.Contains(out, "dGVzdA==") { + t.Error("secret manifest missing base64 secret") + } + + // Test deployment manifest + buf.Reset() + if err := deployManifestTmpl.Execute(&buf, data); err != nil { + t.Fatalf("deployManifestTmpl.Execute: %v", err) + } + out = buf.String() + if !strings.Contains(out, "acb-evo-test") { + t.Error("deployment manifest missing bot name") + } + if !strings.Contains(out, "registry.example.com/acb/acb-evo-test:latest") { + t.Error("deployment manifest missing full image reference") + } + if !strings.Contains(out, "acb/island: alpha") { + t.Error("deployment manifest missing island label") + } + + // Test service manifest + buf.Reset() + if err := svcManifestTmpl.Execute(&buf, data); err != nil { + t.Fatalf("svcManifestTmpl.Execute: %v", err) + } + out = buf.String() + if !strings.Contains(out, "ClusterIP") { + t.Error("service manifest missing ClusterIP type") + } +} diff --git a/cmd/acb-evolver/main.go b/cmd/acb-evolver/main.go index 393f567..a93dd50 100644 --- a/cmd/acb-evolver/main.go +++ b/cmd/acb-evolver/main.go @@ -7,6 +7,8 @@ // stats Print program counts per island // validate Run the 3-stage validation pipeline on a bot source file // validation-stats Show per-island validation pass-rate metrics +// evaluate Run the 10-match arena tournament and apply the promotion gate +// retire Enforce retirement policy (rating threshold + population cap) package main import ( @@ -21,6 +23,9 @@ import ( _ "github.com/lib/pq" evolverdb "github.com/aicodebattle/acb/cmd/acb-evolver/internal/db" + "github.com/aicodebattle/acb/cmd/acb-evolver/internal/arena" + "github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites" + "github.com/aicodebattle/acb/cmd/acb-evolver/internal/promoter" "github.com/aicodebattle/acb/cmd/acb-evolver/internal/validator" ) @@ -38,6 +43,16 @@ func main() { ctx := context.Background() switch os.Args[1] { + case "evaluate": + db := mustOpenDB(dbURL) + defer db.Close() + runEvaluate(ctx, db, os.Args[2:]) + + case "retire": + db := mustOpenDB(dbURL) + defer db.Close() + runRetire(ctx, db, os.Args[2:]) + case "init-schema": db := mustOpenDB(dbURL) defer db.Close() @@ -90,11 +105,258 @@ func main() { default: fmt.Fprintf(os.Stderr, "unknown subcommand %q\n", os.Args[1]) - fmt.Fprintln(os.Stderr, "usage: acb-evolver ") + fmt.Fprintln(os.Stderr, "usage: acb-evolver ") os.Exit(1) } } +// runEvaluate runs the 10-match mini-tournament and applies the promotion gate. +// +// evaluate -lang go -island alpha [-program-id 0] [-promote] [-nash 0.5] [-win-lower 0.4] [-nolog] +func runEvaluate(ctx context.Context, db *sql.DB, args []string) { + fs := flag.NewFlagSet("evaluate", flag.ExitOnError) + lang := fs.String("lang", "", "bot language (go|python|rust|typescript|java|php) [required]") + programID := fs.Int64("program-id", 0, "programs.id to update fitness after evaluation (0 = skip)") + doPromote := fs.Bool("promote", false, "promote the candidate if the gate passes") + nashThreshold := fs.Float64("nash", 0.50, "Nash value threshold for promotion") + winLower := fs.Float64("win-lower", 0.40, "Wilson CI lower-bound threshold (0 to disable)") + nolog := fs.Bool("nolog", false, "skip writing validation result to DB") + + // Promoter flags (used only when -promote is set) + repoDir := fs.String("repo-dir", envOrDefault("ACB_REPO_DIR", "."), "git repo root for K8s manifests") + registry := fs.String("registry", envOrDefault("ACB_REGISTRY", "forgejo.ardenone.com/ai-code-battle"), "container registry") + kubectlServer := fs.String("kubectl-server", envOrDefault("ACB_KUBECTL_SERVER", "http://kubectl-ardenone-cluster:8001"), "kubectl API server URL") + encKey := fs.String("enc-key", os.Getenv("ACB_ENCRYPTION_KEY"), "AES-256-GCM encryption key (hex) for bots table") + + if err := fs.Parse(args); err != nil { + os.Exit(1) + } + if *lang == "" { + fmt.Fprintln(os.Stderr, "evaluate: -lang is required") + fs.Usage() + os.Exit(1) + } + if fs.NArg() < 1 { + fmt.Fprintln(os.Stderr, "evaluate: file argument is required") + fs.Usage() + os.Exit(1) + } + + code, err := os.ReadFile(fs.Arg(0)) + if err != nil { + log.Fatalf("read file: %v", err) + } + + store := evolverdb.NewStore(db) + + // Pre-populate MAP-Elites grid from existing promoted programs so the gate + // can detect niche collisions against the current population. + const gridSize = 10 + grid := mapelites.New(gridSize) + if promoted, err := store.ListPromoted(ctx); err == nil { + for _, pp := range promoted { + if len(pp.BehaviorVector) >= 2 { + grid.TryPlace(pp.ProgramID, pp.Fitness, pp.BehaviorVector[0], pp.BehaviorVector[1]) + } + } + } + + // Run the arena tournament. + arenaCfg := arena.DefaultConfig() + a := arena.New(db, arenaCfg) + + fmt.Printf("evaluate: running %d-match tournament for %s bot…\n", arena.DefaultNumMatches, *lang) + result, err := a.Run(ctx, string(code), *lang) + if err != nil { + log.Fatalf("arena: %v", err) + } + + // Print match summary. + total := result.Wins + result.Losses + result.Draws + fmt.Printf("\nTournament result: %d W / %d L / %d D / %d err (total=%d)\n", + result.Wins, result.Losses, result.Draws, result.Errors, total) + wr := arena.ComputeFromResult(result) + fmt.Printf("Win rate: %.3f (95%% CI %.3f–%.3f)\n", wr.Rate, wr.Lower, wr.Upper) + + nash := arena.ComputeNash(result.WinRateVec) + fmt.Printf("Nash value (PSRO): %.3f (opponent mix: %v)\n", nash.NashValue, nash.WinRatePerOpponent) + + // Compute fitness as overall win rate. + fitness := wr.Rate + + // Look up the program if -program-id was given. + var program *evolverdb.Program + if *programID > 0 { + program, err = store.Get(ctx, *programID) + if err != nil { + log.Fatalf("get program %d: %v", *programID, err) + } + if program == nil { + log.Fatalf("program %d not found", *programID) + } + // Update fitness in DB. + if !*nolog { + if err := store.UpdateFitness(ctx, *programID, fitness, program.BehaviorVector); err != nil { + log.Printf("warn: update fitness: %v", err) + } else { + fmt.Printf("Updated program %d fitness to %.3f\n", *programID, fitness) + } + } + } + + // Apply the promotion gate. + gateCfg := arena.GateConfig{ + NashThreshold: *nashThreshold, + WinRateLowerBound: *winLower, + } + gate := arena.NewGate(gateCfg, grid) + + var behaviorVec []float64 + if program != nil { + behaviorVec = program.BehaviorVector + } + gateResult := gate.Evaluate(result, *programID, fitness, behaviorVec) + + fmt.Printf("\nGate: %s\n", gateResult.Reason) + fmt.Printf("MAP-Elites: placed=%v improved=%v cell=[%d,%d]\n", + gateResult.MapElitesPlaced, gateResult.MapElitesImproved, + gateResult.Placement.X, gateResult.Placement.Y) + + if !gateResult.Promoted { + fmt.Println("Decision: REJECTED") + return + } + + fmt.Println("Decision: PROMOTED") + + if !*doPromote { + fmt.Println("(pass -promote to execute deployment)") + return + } + if program == nil { + log.Fatalf("promote: -program-id is required when -promote is set") + } + + promCfg := promoter.DefaultConfig() + promCfg.Registry = *registry + promCfg.RepoDir = *repoDir + promCfg.KubectlServer = *kubectlServer + promCfg.EncryptionKey = *encKey + + p := promoter.New(store, db, promCfg) + res, err := p.Promote(ctx, program) + if err != nil { + log.Fatalf("promote: %v", err) + } + fmt.Printf("Promoted: bot_name=%s bot_id=%s endpoint=%s\n", res.BotName, res.BotID, res.Endpoint) +} + +// runRetire enforces the retirement policy (rating threshold + population cap). +// +// retire [-threshold 1000] [-cap 50] [-dry-run] [-kubectl-server URL] +func runRetire(ctx context.Context, db *sql.DB, args []string) { + fs := flag.NewFlagSet("retire", flag.ExitOnError) + threshold := fs.Float64("threshold", 1000.0, "minimum display rating (mu-2*phi) to keep a bot") + cap := fs.Int("cap", 50, "maximum number of simultaneously promoted evolved bots") + dryRun := fs.Bool("dry-run", false, "print what would be retired without making changes") + repoDir := fs.String("repo-dir", envOrDefault("ACB_REPO_DIR", "."), "git repo root") + registry := fs.String("registry", envOrDefault("ACB_REGISTRY", "forgejo.ardenone.com/ai-code-battle"), "container registry") + kubectlServer := fs.String("kubectl-server", envOrDefault("ACB_KUBECTL_SERVER", "http://kubectl-ardenone-cluster:8001"), "kubectl API server URL") + encKey := fs.String("enc-key", os.Getenv("ACB_ENCRYPTION_KEY"), "AES-256-GCM encryption key (hex)") + + if err := fs.Parse(args); err != nil { + os.Exit(1) + } + + store := evolverdb.NewStore(db) + + promCfg := promoter.DefaultConfig() + promCfg.RatingThreshold = *threshold + promCfg.PopCap = *cap + promCfg.RepoDir = *repoDir + promCfg.Registry = *registry + promCfg.KubectlServer = *kubectlServer + promCfg.EncryptionKey = *encKey + + if *dryRun { + // Simulate by temporarily setting an impossible cap to list candidates. + fmt.Println("retire: dry-run mode — no changes will be made") + } + + p := promoter.New(store, db, promCfg) + + if *dryRun { + // Read-only preview using the same DB query logic without executing retirements. + rows, err := db.QueryContext(ctx, ` + SELECT p.id, p.bot_id, COALESCE(p.bot_name, ''), + b.rating_mu - 2*b.rating_phi AS display_rating + FROM programs p + JOIN bots b ON p.bot_id = b.bot_id + WHERE p.promoted = TRUE AND p.bot_id IS NOT NULL + AND b.status = 'active' AND b.owner = 'acb-evolver' + ORDER BY display_rating ASC`) + if err != nil { + log.Fatalf("query: %v", err) + } + defer rows.Close() + type row struct { + programID int64 + botID, botName string + displayRating float64 + } + var bots []row + for rows.Next() { + var r row + if err := rows.Scan(&r.programID, &r.botID, &r.botName, &r.displayRating); err != nil { + log.Fatalf("scan: %v", err) + } + bots = append(bots, r) + } + _ = rows.Err() + + remaining := len(bots) + fmt.Printf("Active evolved bots: %d (threshold=%.0f cap=%d)\n", remaining, *threshold, *cap) + for _, b := range bots { + var why string + if b.displayRating < *threshold { + why = fmt.Sprintf("rating %.0f < threshold", b.displayRating) + } else if remaining > *cap { + why = "over cap" + } + mark := " keep" + if why != "" { + mark = " RETIRE" + remaining-- + } + fmt.Printf("%s bot_id=%-12s bot_name=%-20s rating=%.0f %s\n", + mark, b.botID, b.botName, b.displayRating, why) + } + return + } + + retired, err := p.EnforcePolicy(ctx) + if err != nil { + log.Fatalf("enforce policy: %v", err) + } + + if len(retired) == 0 { + fmt.Println("retire: nothing to retire") + return + } + fmt.Printf("retire: retired %d bot(s):\n", len(retired)) + for _, r := range retired { + fmt.Printf(" bot_id=%-12s bot_name=%-20s rating=%.0f reason=%s\n", + r.BotID, r.BotName, r.DisplayRating, r.Reason) + } +} + +func envOrDefault(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} + // runValidate parses flags, runs the three-stage validation pipeline on a bot // source file, and optionally logs the result to the database. //