Add evaluation arena, promotion gate, and retirement policy (Phase 7)
- arena/arena.go: 10-match mini-tournament running candidate as a local subprocess against diverse live opponents sampled across the rating distribution; AES-GCM secret decryption for opponent auth - arena/psro.go: Nash equilibrium computation for the 1×K meta-game; FictitiousPlayNash included for future K×K support - arena/winrate.go: Wilson-score 95% CI for win-rate calculation; draws counted as 0.5 wins - arena/gate.go: two-part promotion gate — Nash value ≥ threshold AND MAP-Elites niche fill or improvement; detailed reason strings - promoter/promoter.go: full promotion pipeline — bot source + Dockerfile + K8s Secret/Deployment/Service manifests, docker build, git commit/push (ArgoCD sync), kubectl readiness poll, bots-table INSERT, programs-table update; RetireBot and EnforcePolicy (rating threshold + population cap 50) - db/db.go: add bot_name / bot_secret migration columns - db/programs.go: ListPromoted, SetBotNameAndSecret, UnsetPromoted, GetByBotID, PromotedCount helpers for promotion/retirement lifecycle - main.go: evaluate and retire subcommands wiring arena + gate + promoter; remove unused island flag from evaluate - arena/arena_test.go: 21 unit tests covering Nash, Wilson CI, Gate logic, and selectDiverse opponent sampling - promoter/promoter_test.go: tests for Dockerfiles, bot-ID/secret generation, AES-GCM helpers, and K8s manifest templates Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
5669688984
commit
76e8791e4d
10 changed files with 2453 additions and 3 deletions
525
cmd/acb-evolver/internal/arena/arena.go
Normal file
525
cmd/acb-evolver/internal/arena/arena.go
Normal file
|
|
@ -0,0 +1,525 @@
|
|||
// Package arena implements the 10-match mini-tournament evaluation system
|
||||
// for evolved bot candidates.
|
||||
//
|
||||
// The arena starts the candidate as a local subprocess (the same way the
|
||||
// sandbox does during validation), selects a diverse set of live opponents
|
||||
// from the PostgreSQL database, and runs one match per opponent using the
|
||||
// game engine directly. No job queue or ACB API calls are needed for
|
||||
// evaluation matches.
|
||||
package arena
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"log"
|
||||
"math/rand"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/aicodebattle/acb/engine"
|
||||
_ "github.com/lib/pq"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultNumMatches is the tournament size (10 per spec).
|
||||
DefaultNumMatches = 10
|
||||
|
||||
// evalSecret is used for HMAC signing when the candidate runs locally.
|
||||
// The candidate subprocess is started with BOT_SECRET=evalSecret so that
|
||||
// the engine's request signatures match what the bot verifies.
|
||||
evalSecret = "acb-eval-secret-for-tournament-evaluation-only"
|
||||
|
||||
// evalBotID is a placeholder bot ID for arena authentication headers.
|
||||
evalBotID = "b_evalcandidate"
|
||||
|
||||
healthPollInterval = 200 * time.Millisecond
|
||||
healthStartupTimeout = 30 * time.Second
|
||||
)
|
||||
|
||||
// BotRecord holds a live bot's connection details queried from the database.
|
||||
type BotRecord struct {
|
||||
BotID string
|
||||
Name string
|
||||
EndpointURL string
|
||||
Secret string // plaintext (decrypted when encryption key is provided)
|
||||
RatingMu float64
|
||||
}
|
||||
|
||||
// MatchOutcome records the result of one evaluation match.
|
||||
type MatchOutcome struct {
|
||||
OpponentBotID string
|
||||
OpponentName string
|
||||
CandidateSlot int // player slot (0 or 1) assigned to the candidate
|
||||
Winner int // 0=player0, 1=player1, -1=draw
|
||||
Scores []int
|
||||
Turns int
|
||||
Err error
|
||||
}
|
||||
|
||||
// CandidateWon returns true when the candidate won this match.
|
||||
func (o *MatchOutcome) CandidateWon() bool {
|
||||
return o.Err == nil && o.Winner == o.CandidateSlot
|
||||
}
|
||||
|
||||
// CandidateLost returns true when the candidate lost (not a draw or error).
|
||||
func (o *MatchOutcome) CandidateLost() bool {
|
||||
return o.Err == nil && o.Winner != -1 && o.Winner != o.CandidateSlot
|
||||
}
|
||||
|
||||
// Result aggregates mini-tournament outcomes for a candidate.
|
||||
type Result struct {
|
||||
CandidateEndpoint string
|
||||
Outcomes []MatchOutcome
|
||||
|
||||
// Aggregate tallies (errors excluded from win/loss/draw counts).
|
||||
Wins int
|
||||
Losses int
|
||||
Draws int
|
||||
Errors int
|
||||
|
||||
// OpponentWinRates maps opponent BotID → candidate win rate vs that bot.
|
||||
OpponentWinRates map[string]float64
|
||||
|
||||
// WinRateVec is an ordered slice of per-opponent win rates (one entry per
|
||||
// distinct opponent played, in match order, errors omitted). Used by PSRO.
|
||||
WinRateVec []float64
|
||||
}
|
||||
|
||||
// Config controls arena behaviour.
|
||||
type Config struct {
|
||||
// NumMatches is the tournament size (default: DefaultNumMatches = 10).
|
||||
NumMatches int
|
||||
// BotTimeout is the per-turn HTTP timeout for both bots.
|
||||
BotTimeout time.Duration
|
||||
// EncryptionKey is the AES-256-GCM key (hex) used to decrypt opponent
|
||||
// secrets from the database. Empty means secrets are stored plaintext.
|
||||
EncryptionKey string
|
||||
}
|
||||
|
||||
// DefaultConfig returns production-ready arena defaults.
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
NumMatches: DefaultNumMatches,
|
||||
BotTimeout: 3 * time.Second,
|
||||
}
|
||||
}
|
||||
|
||||
// Arena orchestrates mini-tournament evaluation of bot candidates.
|
||||
type Arena struct {
|
||||
db *sql.DB
|
||||
cfg Config
|
||||
rng *rand.Rand
|
||||
log *log.Logger
|
||||
}
|
||||
|
||||
// New creates an Arena backed by the given database connection.
|
||||
func New(db *sql.DB, cfg Config) *Arena {
|
||||
return &Arena{
|
||||
db: db,
|
||||
cfg: cfg,
|
||||
rng: rand.New(rand.NewSource(time.Now().UnixNano())),
|
||||
log: log.Default(),
|
||||
}
|
||||
}
|
||||
|
||||
// Run executes a mini-tournament for the candidate bot.
|
||||
//
|
||||
// code is the candidate's source code; language is one of
|
||||
// go|python|rust|typescript|java|php.
|
||||
//
|
||||
// The candidate is built and started as a local subprocess, then played
|
||||
// against cfg.NumMatches opponents sampled from the live bot fleet.
|
||||
func (a *Arena) Run(ctx context.Context, code, language string) (*Result, error) {
|
||||
proc, err := startCandidate(ctx, code, language)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("start candidate subprocess: %w", err)
|
||||
}
|
||||
defer proc.stop()
|
||||
|
||||
candidateURL := fmt.Sprintf("http://127.0.0.1:%d", proc.port)
|
||||
|
||||
opponents, err := a.selectOpponents(ctx, a.cfg.NumMatches)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("select opponents: %w", err)
|
||||
}
|
||||
if len(opponents) == 0 {
|
||||
return nil, fmt.Errorf("no active opponents available in live bot fleet")
|
||||
}
|
||||
|
||||
result := &Result{
|
||||
CandidateEndpoint: candidateURL,
|
||||
OpponentWinRates: make(map[string]float64),
|
||||
}
|
||||
|
||||
for i, opp := range opponents {
|
||||
a.log.Printf("arena: match %d/%d vs %s (%s)", i+1, len(opponents), opp.Name, opp.BotID)
|
||||
outcome := a.runMatch(ctx, candidateURL, opp)
|
||||
result.Outcomes = append(result.Outcomes, outcome)
|
||||
|
||||
switch {
|
||||
case outcome.Err != nil:
|
||||
result.Errors++
|
||||
a.log.Printf("arena: match %d error: %v", i+1, outcome.Err)
|
||||
case outcome.CandidateWon():
|
||||
result.Wins++
|
||||
case outcome.CandidateLost():
|
||||
result.Losses++
|
||||
default:
|
||||
result.Draws++
|
||||
}
|
||||
}
|
||||
|
||||
// Compute per-opponent win rates.
|
||||
oppWins := make(map[string]int)
|
||||
oppTotal := make(map[string]int)
|
||||
for _, o := range result.Outcomes {
|
||||
if o.Err != nil {
|
||||
continue
|
||||
}
|
||||
oppTotal[o.OpponentBotID]++
|
||||
if o.CandidateWon() {
|
||||
oppWins[o.OpponentBotID]++
|
||||
}
|
||||
}
|
||||
for id, total := range oppTotal {
|
||||
if total > 0 {
|
||||
result.OpponentWinRates[id] = float64(oppWins[id]) / float64(total)
|
||||
}
|
||||
}
|
||||
|
||||
// Build ordered win-rate vector for PSRO (one entry per distinct opponent).
|
||||
seen := make(map[string]bool)
|
||||
for _, o := range result.Outcomes {
|
||||
if o.Err != nil || seen[o.OpponentBotID] {
|
||||
continue
|
||||
}
|
||||
seen[o.OpponentBotID] = true
|
||||
result.WinRateVec = append(result.WinRateVec, result.OpponentWinRates[o.OpponentBotID])
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// selectOpponents queries active bots from the database and picks n opponents
|
||||
// spread across the rating distribution for behavioral diversity.
|
||||
func (a *Arena) selectOpponents(ctx context.Context, n int) ([]BotRecord, error) {
|
||||
rows, err := a.db.QueryContext(ctx, `
|
||||
SELECT bot_id, name, endpoint_url, shared_secret, rating_mu
|
||||
FROM bots
|
||||
WHERE status = 'active' AND endpoint_url <> ''
|
||||
ORDER BY rating_mu DESC`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query bots: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var all []BotRecord
|
||||
for rows.Next() {
|
||||
var b BotRecord
|
||||
if err := rows.Scan(&b.BotID, &b.Name, &b.EndpointURL, &b.Secret, &b.RatingMu); err != nil {
|
||||
return nil, fmt.Errorf("scan bot: %w", err)
|
||||
}
|
||||
if a.cfg.EncryptionKey != "" {
|
||||
if plain, err := decryptAESGCM(b.Secret, a.cfg.EncryptionKey); err == nil {
|
||||
b.Secret = plain
|
||||
}
|
||||
// Leave as-is on error (may be stored plaintext in dev).
|
||||
}
|
||||
all = append(all, b)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return selectDiverse(all, n, a.rng), nil
|
||||
}
|
||||
|
||||
// selectDiverse picks n bots spread evenly across the rating-sorted slice.
|
||||
// When fewer than n bots exist, opponents are reused (shuffled for variety).
|
||||
func selectDiverse(all []BotRecord, n int, rng *rand.Rand) []BotRecord {
|
||||
if len(all) == 0 {
|
||||
return nil
|
||||
}
|
||||
sort.Slice(all, func(i, j int) bool { return all[i].RatingMu > all[j].RatingMu })
|
||||
|
||||
selected := make([]BotRecord, 0, n)
|
||||
if len(all) >= n {
|
||||
for i := 0; i < n; i++ {
|
||||
idx := int(float64(i) / float64(n) * float64(len(all)))
|
||||
selected = append(selected, all[idx])
|
||||
}
|
||||
} else {
|
||||
for len(selected) < n {
|
||||
perm := rng.Perm(len(all))
|
||||
for _, idx := range perm {
|
||||
selected = append(selected, all[idx])
|
||||
if len(selected) >= n {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
rng.Shuffle(len(selected), func(i, j int) { selected[i], selected[j] = selected[j], selected[i] })
|
||||
return selected
|
||||
}
|
||||
|
||||
// runMatch runs one match between the local candidate and a live opponent.
|
||||
func (a *Arena) runMatch(ctx context.Context, candidateURL string, opp BotRecord) MatchOutcome {
|
||||
outcome := MatchOutcome{
|
||||
OpponentBotID: opp.BotID,
|
||||
OpponentName: opp.Name,
|
||||
}
|
||||
|
||||
// Randomise player slot for positional fairness.
|
||||
candidateSlot := a.rng.Intn(2)
|
||||
outcome.CandidateSlot = candidateSlot
|
||||
|
||||
matchID := fmt.Sprintf("eval-%d", time.Now().UnixNano())
|
||||
mr := engine.NewMatchRunner(
|
||||
engine.DefaultConfig(),
|
||||
engine.WithTimeout(a.cfg.BotTimeout),
|
||||
engine.WithRNG(rand.New(rand.NewSource(a.rng.Int63()))),
|
||||
)
|
||||
|
||||
candidateBot := engine.NewHTTPBot(candidateURL,
|
||||
engine.AuthConfig{BotID: evalBotID, Secret: evalSecret, MatchID: matchID},
|
||||
engine.WithHTTPTimeout(a.cfg.BotTimeout))
|
||||
|
||||
oppBot := engine.NewHTTPBot(opp.EndpointURL,
|
||||
engine.AuthConfig{BotID: opp.BotID, Secret: opp.Secret, MatchID: matchID},
|
||||
engine.WithHTTPTimeout(a.cfg.BotTimeout))
|
||||
|
||||
if candidateSlot == 0 {
|
||||
mr.AddBot(candidateBot, "candidate")
|
||||
mr.AddBot(oppBot, opp.Name)
|
||||
} else {
|
||||
mr.AddBot(oppBot, opp.Name)
|
||||
mr.AddBot(candidateBot, "candidate")
|
||||
}
|
||||
|
||||
res, _, err := mr.Run()
|
||||
if err != nil {
|
||||
outcome.Err = fmt.Errorf("match runner: %w", err)
|
||||
return outcome
|
||||
}
|
||||
outcome.Winner = res.Winner
|
||||
outcome.Scores = res.Scores
|
||||
outcome.Turns = res.Turns
|
||||
return outcome
|
||||
}
|
||||
|
||||
// ── candidate subprocess management ──────────────────────────────────────────
|
||||
|
||||
type botProcess struct {
|
||||
port int
|
||||
cmd *exec.Cmd
|
||||
tmpDir string
|
||||
}
|
||||
|
||||
func (p *botProcess) stop() {
|
||||
if p.cmd != nil && p.cmd.Process != nil {
|
||||
_ = p.cmd.Process.Kill()
|
||||
_ = p.cmd.Wait()
|
||||
}
|
||||
if p.tmpDir != "" {
|
||||
os.RemoveAll(p.tmpDir)
|
||||
}
|
||||
}
|
||||
|
||||
func startCandidate(ctx context.Context, code, language string) (*botProcess, error) {
|
||||
tmpDir, err := os.MkdirTemp("", "acb-arena-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("mkdirtemp: %w", err)
|
||||
}
|
||||
|
||||
execPath, execArgs, err := buildCandidate(ctx, code, language, tmpDir)
|
||||
if err != nil {
|
||||
os.RemoveAll(tmpDir)
|
||||
return nil, fmt.Errorf("build: %w", err)
|
||||
}
|
||||
|
||||
port, err := allocateFreePort()
|
||||
if err != nil {
|
||||
os.RemoveAll(tmpDir)
|
||||
return nil, fmt.Errorf("allocate port: %w", err)
|
||||
}
|
||||
|
||||
env := append(os.Environ(),
|
||||
fmt.Sprintf("BOT_PORT=%d", port),
|
||||
"BOT_SECRET="+evalSecret,
|
||||
)
|
||||
|
||||
var args []string
|
||||
args = append(args, execArgs...)
|
||||
cmd := exec.CommandContext(ctx, execPath, args...)
|
||||
cmd.Env = env
|
||||
cmd.Dir = tmpDir
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
os.RemoveAll(tmpDir)
|
||||
return nil, fmt.Errorf("start process: %w", err)
|
||||
}
|
||||
|
||||
proc := &botProcess{port: port, cmd: cmd, tmpDir: tmpDir}
|
||||
addr := fmt.Sprintf("127.0.0.1:%d", port)
|
||||
if err := waitForHealth(ctx, addr); err != nil {
|
||||
proc.stop()
|
||||
return nil, fmt.Errorf("candidate health: %w", err)
|
||||
}
|
||||
return proc, nil
|
||||
}
|
||||
|
||||
func buildCandidate(ctx context.Context, code, language, dir string) (string, []string, error) {
|
||||
switch language {
|
||||
case "go":
|
||||
if err := os.WriteFile(dir+"/bot.go", []byte(code), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
if err := os.WriteFile(dir+"/go.mod", []byte("module bot\n\ngo 1.21\n"), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
bin := dir + "/bot"
|
||||
cmd := exec.CommandContext(ctx, "go", "build", "-o", bin, ".")
|
||||
cmd.Dir = dir
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return "", nil, fmt.Errorf("go build: %s", truncate(string(out), 512))
|
||||
}
|
||||
return bin, nil, nil
|
||||
|
||||
case "python":
|
||||
src := dir + "/bot.py"
|
||||
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
return "python3", []string{src}, nil
|
||||
|
||||
case "rust":
|
||||
src := dir + "/main.rs"
|
||||
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
bin := dir + "/bot"
|
||||
cmd := exec.CommandContext(ctx, "rustc", "--edition", "2021", src, "-o", bin)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return "", nil, fmt.Errorf("rustc: %s", truncate(string(out), 512))
|
||||
}
|
||||
return bin, nil, nil
|
||||
|
||||
case "typescript":
|
||||
if err := os.WriteFile(dir+"/bot.ts", []byte(code), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
tsconfig := `{"compilerOptions":{"target":"ES2020","module":"commonjs","outDir":"./"},"files":["bot.ts"]}`
|
||||
if err := os.WriteFile(dir+"/tsconfig.json", []byte(tsconfig), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, "tsc", "--project", dir+"/tsconfig.json")
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return "", nil, fmt.Errorf("tsc: %s", truncate(string(out), 512))
|
||||
}
|
||||
return "node", []string{dir + "/bot.js"}, nil
|
||||
|
||||
case "java":
|
||||
src := dir + "/Bot.java"
|
||||
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, "javac", src)
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return "", nil, fmt.Errorf("javac: %s", truncate(string(out), 512))
|
||||
}
|
||||
return "java", []string{"-cp", dir, "Bot"}, nil
|
||||
|
||||
case "php":
|
||||
src := dir + "/bot.php"
|
||||
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
||||
return "", nil, err
|
||||
}
|
||||
return "php", []string{src}, nil
|
||||
|
||||
default:
|
||||
return "", nil, fmt.Errorf("unsupported language: %s", language)
|
||||
}
|
||||
}
|
||||
|
||||
// allocateFreePort finds an unused TCP port on localhost.
|
||||
func allocateFreePort() (int, error) {
|
||||
l, err := net.Listen("tcp", "127.0.0.1:0")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
port := l.Addr().(*net.TCPAddr).Port
|
||||
l.Close()
|
||||
return port, nil
|
||||
}
|
||||
|
||||
// waitForHealth polls GET /health until 200 OK or healthStartupTimeout elapses.
|
||||
func waitForHealth(ctx context.Context, addr string) error {
|
||||
deadline := time.Now().Add(healthStartupTimeout)
|
||||
client := &http.Client{Timeout: 500 * time.Millisecond}
|
||||
for time.Now().Before(deadline) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+addr+"/health", nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp, err := client.Do(req); err == nil {
|
||||
resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusOK {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(healthPollInterval):
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("candidate did not become healthy within %s", healthStartupTimeout)
|
||||
}
|
||||
|
||||
// decryptAESGCM decrypts an AES-256-GCM ciphertext (hex-encoded) with the
|
||||
// given hex-encoded 32-byte key.
|
||||
func decryptAESGCM(ciphertextHex, keyHex string) (string, error) {
|
||||
key, err := hex.DecodeString(keyHex)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("decode key: %w", err)
|
||||
}
|
||||
if len(key) != 32 {
|
||||
return "", fmt.Errorf("key must be 32 bytes (64 hex chars)")
|
||||
}
|
||||
ciphertext, err := hex.DecodeString(ciphertextHex)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("decode ciphertext: %w", err)
|
||||
}
|
||||
block, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
aead, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
ns := aead.NonceSize()
|
||||
if len(ciphertext) < ns {
|
||||
return "", fmt.Errorf("ciphertext too short")
|
||||
}
|
||||
plain, err := aead.Open(nil, ciphertext[:ns], ciphertext[ns:], nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(plain), nil
|
||||
}
|
||||
|
||||
func truncate(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max] + "…"
|
||||
}
|
||||
314
cmd/acb-evolver/internal/arena/arena_test.go
Normal file
314
cmd/acb-evolver/internal/arena/arena_test.go
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
package arena
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"testing"
|
||||
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
|
||||
)
|
||||
|
||||
// ── ComputeNash ───────────────────────────────────────────────────────────────
|
||||
|
||||
func TestComputeNash_EmptySlice(t *testing.T) {
|
||||
r := ComputeNash(nil)
|
||||
if r.NashValue != 0.5 {
|
||||
t.Errorf("empty: NashValue = %.3f, want 0.5", r.NashValue)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeNash_SingleOpponent(t *testing.T) {
|
||||
r := ComputeNash([]float64{0.7})
|
||||
if r.NashValue != 0.7 {
|
||||
t.Errorf("single: NashValue = %.3f, want 0.7", r.NashValue)
|
||||
}
|
||||
if r.OpponentMix[0] != 1.0 {
|
||||
t.Errorf("single: mix[0] = %.3f, want 1.0", r.OpponentMix[0])
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeNash_MinimumIsHardestOpponent(t *testing.T) {
|
||||
// Column player minimises candidate win rate → Nash value = min(winRates).
|
||||
winRates := []float64{0.8, 0.3, 0.6}
|
||||
r := ComputeNash(winRates)
|
||||
if r.NashValue != 0.3 {
|
||||
t.Errorf("NashValue = %.3f, want 0.3", r.NashValue)
|
||||
}
|
||||
// All weight on opponent index 1 (win rate 0.3).
|
||||
for i, w := range r.OpponentMix {
|
||||
if i == 1 {
|
||||
if w != 1.0 {
|
||||
t.Errorf("mix[1] = %.3f, want 1.0", w)
|
||||
}
|
||||
} else if w != 0.0 {
|
||||
t.Errorf("mix[%d] = %.3f, want 0.0", i, w)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeNash_TiedMinimum(t *testing.T) {
|
||||
// Two opponents tied at the minimum: weight is split 50/50.
|
||||
winRates := []float64{0.2, 0.8, 0.2}
|
||||
r := ComputeNash(winRates)
|
||||
if r.NashValue != 0.2 {
|
||||
t.Errorf("NashValue = %.3f, want 0.2", r.NashValue)
|
||||
}
|
||||
if r.OpponentMix[0] != 0.5 || r.OpponentMix[2] != 0.5 {
|
||||
t.Errorf("tied mix = %v, want [0.5 0.0 0.5]", r.OpponentMix)
|
||||
}
|
||||
if r.OpponentMix[1] != 0.0 {
|
||||
t.Errorf("mix[1] = %.3f, want 0.0", r.OpponentMix[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeNash_AllEqual(t *testing.T) {
|
||||
winRates := []float64{0.5, 0.5, 0.5}
|
||||
r := ComputeNash(winRates)
|
||||
if r.NashValue != 0.5 {
|
||||
t.Errorf("all-equal: NashValue = %.3f, want 0.5", r.NashValue)
|
||||
}
|
||||
// All opponents get equal weight.
|
||||
expected := 1.0 / 3.0
|
||||
for i, w := range r.OpponentMix {
|
||||
if abs(w-expected) > 1e-9 {
|
||||
t.Errorf("mix[%d] = %.6f, want %.6f", i, w, expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFictitiousPlayNash_MatchesMinimaxForSingleRow(t *testing.T) {
|
||||
winRates := []float64{0.8, 0.3, 0.6}
|
||||
fp := FictitiousPlayNash(winRates, 10000)
|
||||
if abs(fp.NashValue-0.3) > 0.01 {
|
||||
t.Errorf("fictitious play: NashValue = %.3f, want ≈0.3", fp.NashValue)
|
||||
}
|
||||
}
|
||||
|
||||
// ── WinRate ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func TestWinRate_ZeroTotal(t *testing.T) {
|
||||
r := WinRate(0, 0)
|
||||
if r.Rate != 0.5 {
|
||||
t.Errorf("zero total: Rate = %.3f, want 0.5", r.Rate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWinRate_AllWins(t *testing.T) {
|
||||
r := WinRate(10, 10)
|
||||
if r.Rate != 1.0 {
|
||||
t.Errorf("all wins: Rate = %.3f, want 1.0", r.Rate)
|
||||
}
|
||||
if r.Lower > r.Upper {
|
||||
t.Errorf("CI inverted: lower=%.3f upper=%.3f", r.Lower, r.Upper)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWinRate_AllLosses(t *testing.T) {
|
||||
r := WinRate(0, 10)
|
||||
if r.Rate != 0.0 {
|
||||
t.Errorf("all losses: Rate = %.3f, want 0.0", r.Rate)
|
||||
}
|
||||
if r.Lower < 0.0 || r.Upper > 1.0 {
|
||||
t.Errorf("CI out of [0,1]: lower=%.3f upper=%.3f", r.Lower, r.Upper)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWinRate_FiftyPercent(t *testing.T) {
|
||||
r := WinRate(5, 10)
|
||||
if abs(r.Rate-0.5) > 1e-9 {
|
||||
t.Errorf("50%%: Rate = %.3f, want 0.5", r.Rate)
|
||||
}
|
||||
if r.Lower >= 0.5 || r.Upper <= 0.5 {
|
||||
t.Errorf("50%% CI should straddle 0.5: lower=%.3f upper=%.3f", r.Lower, r.Upper)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWinRate_CIBounds(t *testing.T) {
|
||||
// CI bounds must always lie in [0, 1].
|
||||
for wins := 0; wins <= 10; wins++ {
|
||||
r := WinRate(wins, 10)
|
||||
if r.Lower < 0.0 || r.Upper > 1.0 {
|
||||
t.Errorf("wins=%d: CI [%.3f, %.3f] outside [0,1]", wins, r.Lower, r.Upper)
|
||||
}
|
||||
if r.Lower > r.Upper {
|
||||
t.Errorf("wins=%d: lower (%.3f) > upper (%.3f)", wins, r.Lower, r.Upper)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ── ComputeFromResult ─────────────────────────────────────────────────────────
|
||||
|
||||
func TestComputeFromResult_Basic(t *testing.T) {
|
||||
r := &Result{Wins: 7, Losses: 2, Draws: 1}
|
||||
wr := ComputeFromResult(r)
|
||||
if wr.Wins != 7 {
|
||||
t.Errorf("Wins = %d, want 7", wr.Wins)
|
||||
}
|
||||
// 7 wins / 10 total = 0.7 rate
|
||||
if abs(wr.Rate-0.7) > 1e-9 {
|
||||
t.Errorf("Rate = %.3f, want 0.7", wr.Rate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestComputeFromResult_OnlyErrors(t *testing.T) {
|
||||
r := &Result{Wins: 0, Losses: 0, Draws: 0, Errors: 5}
|
||||
wr := ComputeFromResult(r)
|
||||
if wr.Total != 0 {
|
||||
t.Errorf("Total = %d, want 0 (errors excluded)", wr.Total)
|
||||
}
|
||||
}
|
||||
|
||||
// ── Gate.Evaluate ─────────────────────────────────────────────────────────────
|
||||
|
||||
func TestGate_PromotedWhenAllCriteriaMet(t *testing.T) {
|
||||
grid := mapelites.New(10)
|
||||
gate := NewGate(DefaultGateConfig(), grid)
|
||||
|
||||
result := &Result{
|
||||
Wins: 8, Losses: 2, Draws: 0,
|
||||
WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8},
|
||||
}
|
||||
|
||||
gr := gate.Evaluate(result, 1, 0.8, []float64{0.5, 0.5})
|
||||
if !gr.Promoted {
|
||||
t.Errorf("expected promoted, got rejected: %s", gr.Reason)
|
||||
}
|
||||
if !gr.MapElitesPlaced {
|
||||
t.Error("expected MapElitesPlaced = true for empty grid")
|
||||
}
|
||||
if gr.MapElitesImproved {
|
||||
t.Error("expected MapElitesImproved = false for empty cell")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_RejectedWhenNashTooLow(t *testing.T) {
|
||||
grid := mapelites.New(10)
|
||||
cfg := GateConfig{NashThreshold: 0.60, WinRateLowerBound: 0.0}
|
||||
gate := NewGate(cfg, grid)
|
||||
|
||||
// WinRateVec has a low value → Nash = min = 0.2, below 0.60
|
||||
result := &Result{
|
||||
Wins: 7, Losses: 3,
|
||||
WinRateVec: []float64{0.9, 0.2, 0.9, 0.9, 0.9},
|
||||
}
|
||||
|
||||
gr := gate.Evaluate(result, 2, 0.7, []float64{0.5, 0.5})
|
||||
if gr.Promoted {
|
||||
t.Errorf("should be rejected (Nash too low), got: %s", gr.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_RejectedWhenNicheOccupiedByFitterBot(t *testing.T) {
|
||||
grid := mapelites.New(10)
|
||||
|
||||
// Pre-occupy the [5,5] cell with a very fit bot.
|
||||
grid.TryPlace(99, 0.99, 0.5, 0.5)
|
||||
|
||||
cfg := DefaultGateConfig()
|
||||
gate := NewGate(cfg, grid)
|
||||
|
||||
// Candidate is in the same niche but has lower fitness.
|
||||
result := &Result{
|
||||
Wins: 7, Losses: 3,
|
||||
WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8},
|
||||
}
|
||||
|
||||
gr := gate.Evaluate(result, 1, 0.7, []float64{0.5, 0.5})
|
||||
if gr.Promoted {
|
||||
t.Errorf("should be rejected (niche occupied by fitter bot), got: %s", gr.Reason)
|
||||
}
|
||||
if gr.MapElitesPlaced {
|
||||
t.Error("MapElitesPlaced should be false when existing bot is fitter")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGate_PromotedWhenOutperformsNicheChampion(t *testing.T) {
|
||||
grid := mapelites.New(10)
|
||||
|
||||
// Pre-occupy with a weaker bot.
|
||||
grid.TryPlace(99, 0.4, 0.5, 0.5)
|
||||
|
||||
cfg := DefaultGateConfig()
|
||||
gate := NewGate(cfg, grid)
|
||||
|
||||
// Candidate is fitter than the incumbent.
|
||||
result := &Result{
|
||||
Wins: 8, Losses: 2,
|
||||
WinRateVec: []float64{0.8, 0.7, 0.9, 0.6, 0.8, 0.7, 0.8, 0.9, 0.7, 0.8},
|
||||
}
|
||||
|
||||
gr := gate.Evaluate(result, 1, 0.8, []float64{0.5, 0.5})
|
||||
if !gr.Promoted {
|
||||
t.Errorf("should be promoted (beats incumbent), got: %s", gr.Reason)
|
||||
}
|
||||
if !gr.MapElitesImproved {
|
||||
t.Error("MapElitesImproved should be true when beating existing champion")
|
||||
}
|
||||
}
|
||||
|
||||
// ── selectDiverse ─────────────────────────────────────────────────────────────
|
||||
|
||||
func TestSelectDiverse_EmptyPool(t *testing.T) {
|
||||
rng := rand.New(rand.NewSource(42))
|
||||
result := selectDiverse(nil, 5, rng)
|
||||
if len(result) != 0 {
|
||||
t.Errorf("empty pool: got %d opponents, want 0", len(result))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectDiverse_ExactlyN(t *testing.T) {
|
||||
rng := rand.New(rand.NewSource(42))
|
||||
bots := makeBots(5)
|
||||
result := selectDiverse(bots, 5, rng)
|
||||
if len(result) != 5 {
|
||||
t.Errorf("exact n: got %d opponents, want 5", len(result))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectDiverse_MoreThanN(t *testing.T) {
|
||||
rng := rand.New(rand.NewSource(42))
|
||||
bots := makeBots(20)
|
||||
result := selectDiverse(bots, 10, rng)
|
||||
if len(result) != 10 {
|
||||
t.Errorf("more than n: got %d opponents, want 10", len(result))
|
||||
}
|
||||
// Verify spread: should sample across the sorted range, not just top/bottom.
|
||||
seen := make(map[string]bool)
|
||||
for _, b := range result {
|
||||
seen[b.BotID] = true
|
||||
}
|
||||
if len(seen) != 10 {
|
||||
t.Errorf("duplicates in diverse selection: got %d unique, want 10", len(seen))
|
||||
}
|
||||
}
|
||||
|
||||
func TestSelectDiverse_FewerThanN(t *testing.T) {
|
||||
rng := rand.New(rand.NewSource(42))
|
||||
bots := makeBots(3)
|
||||
// With only 3 bots, need to repeat to fill 10 slots.
|
||||
result := selectDiverse(bots, 10, rng)
|
||||
if len(result) != 10 {
|
||||
t.Errorf("fewer than n: got %d opponents, want 10", len(result))
|
||||
}
|
||||
}
|
||||
|
||||
// ── helpers ───────────────────────────────────────────────────────────────────
|
||||
|
||||
func makeBots(n int) []BotRecord {
|
||||
bots := make([]BotRecord, n)
|
||||
for i := range bots {
|
||||
bots[i] = BotRecord{
|
||||
BotID: fmt.Sprintf("b_%04d", i),
|
||||
Name: fmt.Sprintf("bot-%d", i),
|
||||
RatingMu: float64(1000 + i*50),
|
||||
}
|
||||
}
|
||||
return bots
|
||||
}
|
||||
|
||||
func abs(x float64) float64 {
|
||||
if x < 0 {
|
||||
return -x
|
||||
}
|
||||
return x
|
||||
}
|
||||
144
cmd/acb-evolver/internal/arena/gate.go
Normal file
144
cmd/acb-evolver/internal/arena/gate.go
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
// Package arena — promotion gate.
|
||||
//
|
||||
// The gate applies two independent criteria before promoting a candidate:
|
||||
//
|
||||
// 1. Nash value (PSRO) ≥ NashThreshold — sufficient win rate
|
||||
// 2. MAP-Elites niche fill or improvement — behavioral novelty
|
||||
//
|
||||
// Both must be satisfied. The Wilson-score CI lower bound is an optional
|
||||
// secondary guard on the overall win rate.
|
||||
package arena
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
|
||||
)
|
||||
|
||||
// GateConfig holds the promotion thresholds.
|
||||
type GateConfig struct {
|
||||
// NashThreshold is the minimum Nash value (worst-case win rate across
|
||||
// opponents) required for promotion. Default: 0.50.
|
||||
NashThreshold float64
|
||||
|
||||
// WinRateLowerBound is the minimum Wilson-score 95% CI lower bound for
|
||||
// the overall win rate. Set ≤ 0 to disable. Default: 0.40.
|
||||
WinRateLowerBound float64
|
||||
}
|
||||
|
||||
// DefaultGateConfig returns production-ready promotion thresholds.
|
||||
func DefaultGateConfig() GateConfig {
|
||||
return GateConfig{
|
||||
NashThreshold: 0.50,
|
||||
WinRateLowerBound: 0.40,
|
||||
}
|
||||
}
|
||||
|
||||
// GateResult holds the full promotion decision with supporting evidence.
|
||||
type GateResult struct {
|
||||
// Promoted is true when all criteria are met.
|
||||
Promoted bool
|
||||
|
||||
// Nash is the PSRO result for the mini-tournament.
|
||||
Nash NashResult
|
||||
|
||||
// WinRate is the overall win rate with 95% Wilson CI.
|
||||
WinRate WinRateResult
|
||||
|
||||
// MapElitesPlaced is true when the candidate was written to the MAP-Elites
|
||||
// grid (filled an empty cell or outperformed the incumbent).
|
||||
MapElitesPlaced bool
|
||||
|
||||
// MapElitesImproved is true when the candidate beat an existing champion
|
||||
// (as opposed to simply filling an empty niche).
|
||||
MapElitesImproved bool
|
||||
|
||||
// Placement is the (X, Y) grid cell the candidate occupies.
|
||||
Placement mapelites.Placement
|
||||
|
||||
// Reason is a human-readable explanation of the promotion decision.
|
||||
Reason string
|
||||
}
|
||||
|
||||
// Gate applies the promotion criteria to mini-tournament results.
|
||||
type Gate struct {
|
||||
cfg GateConfig
|
||||
grid *mapelites.Grid
|
||||
}
|
||||
|
||||
// NewGate creates a Gate backed by the provided MAP-Elites grid.
|
||||
// The grid is shared across evaluations so niche occupancy persists across
|
||||
// multiple Evaluate calls within one evolution run.
|
||||
func NewGate(cfg GateConfig, grid *mapelites.Grid) *Gate {
|
||||
return &Gate{cfg: cfg, grid: grid}
|
||||
}
|
||||
|
||||
// Evaluate applies the two-part promotion gate to the arena result.
|
||||
//
|
||||
// programID and fitness are the candidate's identifiers in the programs table.
|
||||
// behaviorVec is [aggression, economy] ∈ [0,1]²; defaults to [0.5, 0.5] when
|
||||
// nil or short.
|
||||
//
|
||||
// Side effect: g.grid.TryPlace is called — the cell is updated when the
|
||||
// candidate wins its behavioral niche.
|
||||
func (g *Gate) Evaluate(result *Result, programID int64, fitness float64, behaviorVec []float64) *GateResult {
|
||||
wr := ComputeFromResult(result)
|
||||
nash := ComputeNash(result.WinRateVec)
|
||||
|
||||
agg, eco := 0.5, 0.5
|
||||
if len(behaviorVec) >= 2 {
|
||||
agg, eco = behaviorVec[0], behaviorVec[1]
|
||||
}
|
||||
|
||||
// Sample the cell state before TryPlace so we can distinguish
|
||||
// "fills empty niche" from "beats existing champion".
|
||||
cellX, cellY := g.grid.BehaviorToCell(agg, eco)
|
||||
priorCell := g.grid.Get(cellX, cellY)
|
||||
|
||||
placement, placed := g.grid.TryPlace(programID, fitness, agg, eco)
|
||||
|
||||
gr := &GateResult{
|
||||
Nash: nash,
|
||||
WinRate: wr,
|
||||
MapElitesPlaced: placed,
|
||||
MapElitesImproved: placed && priorCell.Occupied,
|
||||
Placement: placement,
|
||||
}
|
||||
|
||||
nashOK := nash.NashValue >= g.cfg.NashThreshold
|
||||
winOK := g.cfg.WinRateLowerBound <= 0 || wr.Lower >= g.cfg.WinRateLowerBound
|
||||
mapOK := placed
|
||||
|
||||
if nashOK && winOK && mapOK {
|
||||
gr.Promoted = true
|
||||
if !priorCell.Occupied {
|
||||
gr.Reason = fmt.Sprintf(
|
||||
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f–%.3f), fills new niche [%d,%d]",
|
||||
nash.NashValue, g.cfg.NashThreshold,
|
||||
wr.Rate, wr.Lower, wr.Upper,
|
||||
placement.X, placement.Y)
|
||||
} else {
|
||||
gr.Reason = fmt.Sprintf(
|
||||
"promoted: Nash=%.3f ≥ %.3f, WR=%.3f (95%% CI %.3f–%.3f), beats niche [%d,%d] champion (%.3f→%.3f)",
|
||||
nash.NashValue, g.cfg.NashThreshold,
|
||||
wr.Rate, wr.Lower, wr.Upper,
|
||||
placement.X, placement.Y, priorCell.Fitness, fitness)
|
||||
}
|
||||
return gr
|
||||
}
|
||||
|
||||
var why []string
|
||||
if !nashOK {
|
||||
why = append(why, fmt.Sprintf("Nash=%.3f < %.3f", nash.NashValue, g.cfg.NashThreshold))
|
||||
}
|
||||
if !winOK {
|
||||
why = append(why, fmt.Sprintf("WR CI lower=%.3f < %.3f", wr.Lower, g.cfg.WinRateLowerBound))
|
||||
}
|
||||
if !mapOK {
|
||||
why = append(why, fmt.Sprintf("niche [%d,%d] occupied by fitter bot (fitness=%.3f)",
|
||||
placement.X, placement.Y, priorCell.Fitness))
|
||||
}
|
||||
gr.Reason = "rejected: " + strings.Join(why, "; ")
|
||||
return gr
|
||||
}
|
||||
119
cmd/acb-evolver/internal/arena/psro.go
Normal file
119
cmd/acb-evolver/internal/arena/psro.go
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
// Package arena — PSRO Nash equilibrium computation.
|
||||
//
|
||||
// LLM-PSRO (Policy Space Response Oracles) uses Nash equilibrium over the
|
||||
// current bot population as the promotion criterion. A candidate is promoted
|
||||
// only if it is a best response to the Nash mixture, i.e. its expected payoff
|
||||
// against the Nash mixture exceeds the threshold (default 0.50).
|
||||
//
|
||||
// For the mini-tournament setting (one candidate, K opponents), the payoff
|
||||
// matrix has a single row. The Nash-optimal strategy for the minimising
|
||||
// column player (opponents) is to concentrate weight on the opponent that
|
||||
// minimises the candidate's expected win rate. The resulting Nash value is
|
||||
// therefore min(winRates), which is the tightest possible test.
|
||||
//
|
||||
// The full fictitious-play algorithm is retained so it generalises cleanly
|
||||
// to K×K payoff matrices when the population grows.
|
||||
package arena
|
||||
|
||||
// NashResult holds the Nash equilibrium computation for the meta-game.
|
||||
type NashResult struct {
|
||||
// OpponentMix[i] = probability of opponent i in the Nash mixture.
|
||||
// Sums to 1.0.
|
||||
OpponentMix []float64
|
||||
|
||||
// NashValue is the candidate's expected win rate under the Nash mixture.
|
||||
// This is the quantity compared against the promotion threshold.
|
||||
NashValue float64
|
||||
|
||||
// WinRatePerOpponent mirrors the input payoff row for convenience.
|
||||
WinRatePerOpponent []float64
|
||||
}
|
||||
|
||||
// ComputeNash computes the Nash equilibrium for the 1×K meta-game where
|
||||
// winRates[i] is the candidate's win rate against opponent i.
|
||||
//
|
||||
// The column player (opponent) minimises the candidate's expected win rate.
|
||||
// The optimal column strategy concentrates on the opponent(s) with the lowest
|
||||
// win rate for the candidate. Ties in the minimum are distributed uniformly.
|
||||
//
|
||||
// Nash value = min(winRates) (hardest-opponent test).
|
||||
func ComputeNash(winRates []float64) NashResult {
|
||||
if len(winRates) == 0 {
|
||||
return NashResult{NashValue: 0.5}
|
||||
}
|
||||
|
||||
K := len(winRates)
|
||||
mix := make([]float64, K)
|
||||
|
||||
// Find the minimum win rate.
|
||||
minVal := winRates[0]
|
||||
for _, w := range winRates[1:] {
|
||||
if w < minVal {
|
||||
minVal = w
|
||||
}
|
||||
}
|
||||
|
||||
// Distribute weight uniformly over all opponents achieving the minimum.
|
||||
count := 0
|
||||
for _, w := range winRates {
|
||||
if w == minVal {
|
||||
count++
|
||||
}
|
||||
}
|
||||
for i, w := range winRates {
|
||||
if w == minVal {
|
||||
mix[i] = 1.0 / float64(count)
|
||||
}
|
||||
}
|
||||
|
||||
return NashResult{
|
||||
OpponentMix: mix,
|
||||
NashValue: minVal,
|
||||
WinRatePerOpponent: winRates,
|
||||
}
|
||||
}
|
||||
|
||||
// FictitiousPlayNash computes the Nash equilibrium via fictitious play,
|
||||
// converging over iterations rounds. This generalises to K×K matrices and
|
||||
// provides a softer mixed-strategy Nash than the pure-minimax above.
|
||||
//
|
||||
// For a 1×K payoff matrix both algorithms produce identical results, so this
|
||||
// function is provided for future use when the full population payoff matrix
|
||||
// is available.
|
||||
func FictitiousPlayNash(winRates []float64, iterations int) NashResult {
|
||||
if len(winRates) == 0 {
|
||||
return NashResult{NashValue: 0.5}
|
||||
}
|
||||
if iterations <= 0 {
|
||||
iterations = 1000
|
||||
}
|
||||
|
||||
K := len(winRates)
|
||||
counts := make([]float64, K)
|
||||
|
||||
// Fictitious play: column player repeatedly best-responds to the current
|
||||
// row player strategy (fixed at "always play candidate").
|
||||
for iter := 0; iter < iterations; iter++ {
|
||||
// Column player best response: pick opponent minimising candidate win rate.
|
||||
best := 0
|
||||
for i := 1; i < K; i++ {
|
||||
if winRates[i] < winRates[best] {
|
||||
best = i
|
||||
}
|
||||
}
|
||||
counts[best]++
|
||||
}
|
||||
|
||||
mix := make([]float64, K)
|
||||
expected := 0.0
|
||||
for i, c := range counts {
|
||||
mix[i] = c / float64(iterations)
|
||||
expected += mix[i] * winRates[i]
|
||||
}
|
||||
|
||||
return NashResult{
|
||||
OpponentMix: mix,
|
||||
NashValue: expected,
|
||||
WinRatePerOpponent: winRates,
|
||||
}
|
||||
}
|
||||
55
cmd/acb-evolver/internal/arena/winrate.go
Normal file
55
cmd/acb-evolver/internal/arena/winrate.go
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
package arena
|
||||
|
||||
import "math"
|
||||
|
||||
// WinRateResult holds the observed win rate and its 95% Wilson score confidence interval.
|
||||
type WinRateResult struct {
|
||||
Wins int
|
||||
Total int // non-error matches only
|
||||
Rate float64 // observed win rate (0–1)
|
||||
Lower float64 // 95% CI lower bound
|
||||
Upper float64 // 95% CI upper bound
|
||||
}
|
||||
|
||||
// WinRate computes the win rate and Wilson score 95% confidence interval
|
||||
// for wins out of total valid matches. When total == 0, all values are 0.5.
|
||||
//
|
||||
// Wilson score interval:
|
||||
//
|
||||
// center = (p̂ + z²/2n) / (1 + z²/n)
|
||||
// margin = z * sqrt(p̂(1-p̂)/n + z²/4n²) / (1 + z²/n)
|
||||
// CI = [center − margin, center + margin]
|
||||
//
|
||||
// Using z = 1.96 (95% two-tailed confidence).
|
||||
func WinRate(wins, total int) WinRateResult {
|
||||
if total == 0 {
|
||||
return WinRateResult{Rate: 0.5, Lower: 0.0, Upper: 1.0}
|
||||
}
|
||||
|
||||
const z = 1.96 // 95% CI
|
||||
p := float64(wins) / float64(total)
|
||||
n := float64(total)
|
||||
z2 := z * z
|
||||
|
||||
center := (p + z2/(2*n)) / (1 + z2/n)
|
||||
margin := z * math.Sqrt(p*(1-p)/n+z2/(4*n*n)) / (1 + z2/n)
|
||||
|
||||
lower := math.Max(0, center-margin)
|
||||
upper := math.Min(1, center+margin)
|
||||
|
||||
return WinRateResult{
|
||||
Wins: wins,
|
||||
Total: total,
|
||||
Rate: p,
|
||||
Lower: lower,
|
||||
Upper: upper,
|
||||
}
|
||||
}
|
||||
|
||||
// ComputeFromResult builds a WinRateResult from a tournament Result.
|
||||
// Only non-error matches are counted; draws count as 0.5 wins.
|
||||
func ComputeFromResult(r *Result) WinRateResult {
|
||||
total := r.Wins + r.Losses + r.Draws
|
||||
// Count draws as half-wins for the rate; wins/total integers use integer wins.
|
||||
return WinRate(r.Wins, total)
|
||||
}
|
||||
|
|
@ -18,6 +18,7 @@ CREATE TABLE IF NOT EXISTS programs (
|
|||
behavior_vector DOUBLE PRECISION[] NOT NULL DEFAULT '{}',
|
||||
fitness DOUBLE PRECISION NOT NULL DEFAULT 0.0,
|
||||
promoted BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
bot_id VARCHAR(16),
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS idx_programs_island ON programs(island);
|
||||
|
|
@ -37,8 +38,20 @@ CREATE INDEX IF NOT EXISTS idx_validation_log_island ON validation_log(island);
|
|||
CREATE INDEX IF NOT EXISTS idx_validation_log_island_passed ON validation_log(island, passed);
|
||||
`
|
||||
|
||||
// EnsureSchema creates the programs table if it does not already exist.
|
||||
// migrationSQL holds additive migrations run after the base schema is ensured.
|
||||
// Each statement is idempotent (ALTER TABLE … ADD COLUMN IF NOT EXISTS).
|
||||
const migrationSQL = `
|
||||
ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_id VARCHAR(16);
|
||||
ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_name VARCHAR(64);
|
||||
ALTER TABLE programs ADD COLUMN IF NOT EXISTS bot_secret TEXT;
|
||||
`
|
||||
|
||||
// EnsureSchema creates the programs and validation_log tables if they do not
|
||||
// already exist, then applies any pending additive migrations.
|
||||
func EnsureSchema(ctx context.Context, db *sql.DB) error {
|
||||
_, err := db.ExecContext(ctx, schemaSQL)
|
||||
if _, err := db.ExecContext(ctx, schemaSQL); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := db.ExecContext(ctx, migrationSQL)
|
||||
return err
|
||||
}
|
||||
|
|
|
|||
|
|
@ -177,3 +177,106 @@ func (s *Store) TotalCount(ctx context.Context) (int, error) {
|
|||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// SetBotID records the deployed bot_id for a promoted program.
|
||||
func (s *Store) SetBotID(ctx context.Context, id int64, botID string) error {
|
||||
_, err := s.db.ExecContext(ctx,
|
||||
`UPDATE programs SET bot_id = $1 WHERE id = $2`, botID, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set bot_id for program %d: %w", id, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PromotedProgram holds a promoted program linked to its live bot.
|
||||
type PromotedProgram struct {
|
||||
ProgramID int64
|
||||
BotID string
|
||||
BotName string // K8s/API name, e.g. "acb-evo-42"
|
||||
BotSecret string // plaintext secret stored for retirement operations
|
||||
Island string
|
||||
BehaviorVector []float64
|
||||
Fitness float64
|
||||
}
|
||||
|
||||
// ListPromoted returns all programs that have been promoted (bot_id is set).
|
||||
func (s *Store) ListPromoted(ctx context.Context) ([]*PromotedProgram, error) {
|
||||
rows, err := s.db.QueryContext(ctx, `
|
||||
SELECT id, bot_id, COALESCE(bot_name, ''), COALESCE(bot_secret, ''),
|
||||
island, behavior_vector, fitness
|
||||
FROM programs
|
||||
WHERE promoted = TRUE AND bot_id IS NOT NULL
|
||||
ORDER BY fitness DESC`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list promoted programs: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var out []*PromotedProgram
|
||||
for rows.Next() {
|
||||
p := &PromotedProgram{}
|
||||
if err := rows.Scan(&p.ProgramID, &p.BotID, &p.BotName, &p.BotSecret,
|
||||
&p.Island, pq.Array(&p.BehaviorVector), &p.Fitness); err != nil {
|
||||
return nil, fmt.Errorf("scan promoted program: %w", err)
|
||||
}
|
||||
out = append(out, p)
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// SetBotNameAndSecret records the K8s bot name and plaintext shared secret for
|
||||
// a promoted program. These are stored so the retirement path can locate and
|
||||
// clean up the bot without requiring an extra API call.
|
||||
func (s *Store) SetBotNameAndSecret(ctx context.Context, id int64, botName, botSecret string) error {
|
||||
_, err := s.db.ExecContext(ctx,
|
||||
`UPDATE programs SET bot_name = $1, bot_secret = $2 WHERE id = $3`,
|
||||
botName, botSecret, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("set bot name/secret for program %d: %w", id, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PromotedCount returns the number of currently promoted (deployed) programs.
|
||||
func (s *Store) PromotedCount(ctx context.Context) (int, error) {
|
||||
var n int
|
||||
err := s.db.QueryRowContext(ctx,
|
||||
`SELECT COUNT(*) FROM programs WHERE promoted = TRUE AND bot_id IS NOT NULL`).Scan(&n)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("promoted count: %w", err)
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// UnsetPromoted clears the promoted flag and bot_id for a retired program.
|
||||
func (s *Store) UnsetPromoted(ctx context.Context, id int64) error {
|
||||
_, err := s.db.ExecContext(ctx,
|
||||
`UPDATE programs SET promoted = FALSE, bot_id = NULL WHERE id = $1`, id)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unset promoted for program %d: %w", id, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetByBotID returns the program associated with a deployed bot ID, or nil.
|
||||
func (s *Store) GetByBotID(ctx context.Context, botID string) (*Program, error) {
|
||||
p := &Program{}
|
||||
var parentJSON string
|
||||
err := s.db.QueryRowContext(ctx, `
|
||||
SELECT id, code, language, island, generation, parent_ids,
|
||||
behavior_vector, fitness, promoted, created_at
|
||||
FROM programs WHERE bot_id = $1`, botID).Scan(
|
||||
&p.ID, &p.Code, &p.Language, &p.Island, &p.Generation,
|
||||
&parentJSON, pq.Array(&p.BehaviorVector), &p.Fitness, &p.Promoted, &p.CreatedAt,
|
||||
)
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("get program by bot_id %s: %w", botID, err)
|
||||
}
|
||||
if err := json.Unmarshal([]byte(parentJSON), &p.ParentIDs); err != nil {
|
||||
return nil, fmt.Errorf("unmarshal parent_ids: %w", err)
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
|
|
|||
721
cmd/acb-evolver/internal/promoter/promoter.go
Normal file
721
cmd/acb-evolver/internal/promoter/promoter.go
Normal file
|
|
@ -0,0 +1,721 @@
|
|||
// Package promoter deploys validated+promoted evolved bots to Kubernetes and
|
||||
// registers them in the ACB bots database. It also enforces the retirement
|
||||
// policy: auto-retiring bots below a rating threshold and capping the
|
||||
// evolved-bot fleet at a configurable population cap.
|
||||
//
|
||||
// Promotion flow
|
||||
//
|
||||
// 1. Generate a unique bot name (acb-evo-<programID>), bot ID, and secret.
|
||||
// 2. Write bot source + language-appropriate Dockerfile to bots/evolved/<name>/.
|
||||
// 3. Write K8s Secret / Deployment / Service manifests to deploy/k8s/.
|
||||
// 4. Build and push the container image (best-effort; CI pipeline is the
|
||||
// fallback when docker is unavailable or fails).
|
||||
// 5. Git add → commit → push (triggers ArgoCD sync + image build via CI).
|
||||
// 6. Poll kubectl until the Deployment has ≥1 available replica.
|
||||
// 7. Insert the bot record directly into the bots database table.
|
||||
// 8. Record bot_id, bot_name, and bot_secret in the programs table.
|
||||
//
|
||||
// Retirement flow
|
||||
//
|
||||
// 1. Mark bot as 'retired' in the bots table.
|
||||
// 2. Delete the K8s manifests and bot source directory from git, commit, push.
|
||||
// 3. Clear promoted=false / bot_id=NULL in the programs table.
|
||||
package promoter
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/aes"
|
||||
"crypto/cipher"
|
||||
"crypto/rand"
|
||||
"database/sql"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/db"
|
||||
)
|
||||
|
||||
const (
|
||||
botOwner = "acb-evolver"
|
||||
botPort = 8080
|
||||
)
|
||||
|
||||
// Config controls promotion and retirement behaviour.
|
||||
type Config struct {
|
||||
// Registry is the container registry prefix, e.g.
|
||||
// "forgejo.ardenone.com/ai-code-battle".
|
||||
Registry string
|
||||
|
||||
// RepoDir is the local git repository root used for writing manifests.
|
||||
RepoDir string
|
||||
|
||||
// KubectlServer is the kubectl API server URL for deployment polling,
|
||||
// e.g. "http://kubectl-ardenone-cluster:8001".
|
||||
KubectlServer string
|
||||
|
||||
// Namespace is the Kubernetes namespace where bots are deployed.
|
||||
Namespace string
|
||||
|
||||
// EncryptionKey is the hex-encoded AES-256-GCM key used to encrypt
|
||||
// secrets before storing them in the bots table. Empty = plaintext.
|
||||
EncryptionKey string
|
||||
|
||||
// DeployWaitTimeout is the maximum time to wait for an ArgoCD-managed
|
||||
// deployment to have ≥1 available replica.
|
||||
DeployWaitTimeout time.Duration
|
||||
|
||||
// RatingThreshold is the minimum display rating (mu − 2·phi) an evolved
|
||||
// bot must maintain to avoid auto-retirement.
|
||||
RatingThreshold float64
|
||||
|
||||
// PopCap is the maximum number of simultaneously promoted evolved bots.
|
||||
// Lowest-rated bots are retired when the cap is exceeded.
|
||||
PopCap int
|
||||
}
|
||||
|
||||
// DefaultConfig returns production-ready defaults.
|
||||
func DefaultConfig() Config {
|
||||
return Config{
|
||||
Registry: "forgejo.ardenone.com/ai-code-battle",
|
||||
RepoDir: ".",
|
||||
KubectlServer: "http://kubectl-ardenone-cluster:8001",
|
||||
Namespace: "ai-code-battle",
|
||||
DeployWaitTimeout: 10 * time.Minute,
|
||||
RatingThreshold: 1000.0,
|
||||
PopCap: 50,
|
||||
}
|
||||
}
|
||||
|
||||
// Promoter manages promotion and retirement of evolved bots.
|
||||
type Promoter struct {
|
||||
store *db.Store
|
||||
rawDB *sql.DB
|
||||
cfg Config
|
||||
}
|
||||
|
||||
// New creates a Promoter.
|
||||
func New(store *db.Store, rawDB *sql.DB, cfg Config) *Promoter {
|
||||
return &Promoter{store: store, rawDB: rawDB, cfg: cfg}
|
||||
}
|
||||
|
||||
// PromotionResult holds the outcome of a successful promotion.
|
||||
type PromotionResult struct {
|
||||
BotName string
|
||||
BotID string
|
||||
Endpoint string // K8s ClusterIP service URL
|
||||
}
|
||||
|
||||
// Promote deploys a validated candidate as a live evolved bot.
|
||||
func (p *Promoter) Promote(ctx context.Context, program *db.Program) (*PromotionResult, error) {
|
||||
botName := fmt.Sprintf("acb-evo-%d", program.ID)
|
||||
image := fmt.Sprintf("%s/%s:latest", p.cfg.Registry, botName)
|
||||
endpoint := fmt.Sprintf("http://%s:%d", botName, botPort)
|
||||
|
||||
botID, err := generateBotID()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("generate bot ID: %w", err)
|
||||
}
|
||||
secret, err := generateSecret()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("generate secret: %w", err)
|
||||
}
|
||||
|
||||
botDir := filepath.Join(p.cfg.RepoDir, "bots", "evolved", botName)
|
||||
if err := p.writeBotDir(program, botDir); err != nil {
|
||||
return nil, fmt.Errorf("write bot dir: %w", err)
|
||||
}
|
||||
|
||||
if err := p.writeManifests(botName, secret, program); err != nil {
|
||||
return nil, fmt.Errorf("write manifests: %w", err)
|
||||
}
|
||||
|
||||
// Best-effort local image build; CI pipeline is the authoritative builder.
|
||||
if buildErr := p.buildAndPushImage(ctx, botDir, image); buildErr != nil {
|
||||
fmt.Printf("promoter: docker build skipped (%v) — CI will build the image\n", buildErr)
|
||||
}
|
||||
|
||||
commitMsg := fmt.Sprintf("Add evolved bot %s (island=%s gen=%d program_id=%d)",
|
||||
botName, program.Island, program.Generation, program.ID)
|
||||
if err := p.gitCommitPush(ctx, botName, commitMsg, false); err != nil {
|
||||
return nil, fmt.Errorf("git commit/push: %w", err)
|
||||
}
|
||||
|
||||
if err := p.waitForDeployment(ctx, botName); err != nil {
|
||||
return nil, fmt.Errorf("wait for deployment: %w", err)
|
||||
}
|
||||
|
||||
// Insert bot record directly into the bots table (same DB as programs).
|
||||
storedSecret := secret
|
||||
if p.cfg.EncryptionKey != "" {
|
||||
storedSecret, err = encryptAESGCM(secret, p.cfg.EncryptionKey)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("encrypt secret: %w", err)
|
||||
}
|
||||
}
|
||||
_, err = p.rawDB.ExecContext(ctx, `
|
||||
INSERT INTO bots (bot_id, name, owner, endpoint_url, shared_secret, status, description, last_active)
|
||||
VALUES ($1, $2, $3, $4, $5, 'active', $6, NOW())`,
|
||||
botID, botName, botOwner, endpoint, storedSecret,
|
||||
fmt.Sprintf("Evolved bot — island=%s gen=%d program_id=%d",
|
||||
program.Island, program.Generation, program.ID),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("insert bot record: %w", err)
|
||||
}
|
||||
|
||||
if err := p.store.SetPromoted(ctx, program.ID); err != nil {
|
||||
return nil, fmt.Errorf("set promoted: %w", err)
|
||||
}
|
||||
if err := p.store.SetBotID(ctx, program.ID, botID); err != nil {
|
||||
return nil, fmt.Errorf("set bot_id: %w", err)
|
||||
}
|
||||
if err := p.store.SetBotNameAndSecret(ctx, program.ID, botName, secret); err != nil {
|
||||
return nil, fmt.Errorf("set bot name/secret: %w", err)
|
||||
}
|
||||
|
||||
return &PromotionResult{BotName: botName, BotID: botID, Endpoint: endpoint}, nil
|
||||
}
|
||||
|
||||
// RetireBot marks a bot as retired, removes its K8s manifests, and clears the
|
||||
// promoted flag in the programs table.
|
||||
func (p *Promoter) RetireBot(ctx context.Context, programID int64, botID, botName string) error {
|
||||
// 1. Mark bot retired in the bots table.
|
||||
if _, err := p.rawDB.ExecContext(ctx,
|
||||
`UPDATE bots SET status = 'retired' WHERE bot_id = $1`, botID); err != nil {
|
||||
return fmt.Errorf("retire bot in DB: %w", err)
|
||||
}
|
||||
|
||||
// 2. Remove K8s manifests + bot source from git.
|
||||
if botName != "" {
|
||||
retireMsg := fmt.Sprintf("Retire evolved bot %s (program_id=%d)", botName, programID)
|
||||
if err := p.gitCommitPush(ctx, botName, retireMsg, true); err != nil {
|
||||
// Log but don't fail — the bot is already retired in the DB.
|
||||
fmt.Printf("promoter: git remove failed for %s: %v\n", botName, err)
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Clear promoted flag in programs table.
|
||||
return p.store.UnsetPromoted(ctx, programID)
|
||||
}
|
||||
|
||||
// RetiredCandidate describes a bot that was auto-retired by EnforcePolicy.
|
||||
type RetiredCandidate struct {
|
||||
ProgramID int64
|
||||
BotID string
|
||||
BotName string
|
||||
DisplayRating float64
|
||||
Reason string
|
||||
}
|
||||
|
||||
// EnforcePolicy auto-retires evolved bots below cfg.RatingThreshold and trims
|
||||
// the active fleet to cfg.PopCap. The slice is ordered lowest-rated first so
|
||||
// the weakest bots are retired first when enforcing the cap.
|
||||
// Returns the list of bots that were retired.
|
||||
func (p *Promoter) EnforcePolicy(ctx context.Context) ([]RetiredCandidate, error) {
|
||||
rows, err := p.rawDB.QueryContext(ctx, `
|
||||
SELECT p.id, p.bot_id, COALESCE(p.bot_name, ''),
|
||||
b.rating_mu - 2*b.rating_phi AS display_rating
|
||||
FROM programs p
|
||||
JOIN bots b ON p.bot_id = b.bot_id
|
||||
WHERE p.promoted = TRUE
|
||||
AND p.bot_id IS NOT NULL
|
||||
AND b.status = 'active'
|
||||
AND b.owner = $1
|
||||
ORDER BY display_rating ASC`, botOwner)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("query promoted bots: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type botRow struct {
|
||||
programID int64
|
||||
botID string
|
||||
botName string
|
||||
displayRating float64
|
||||
}
|
||||
var bots []botRow
|
||||
for rows.Next() {
|
||||
var b botRow
|
||||
if err := rows.Scan(&b.programID, &b.botID, &b.botName, &b.displayRating); err != nil {
|
||||
return nil, fmt.Errorf("scan bot: %w", err)
|
||||
}
|
||||
bots = append(bots, b)
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Decide which bots to retire (lowest-rated first).
|
||||
remaining := len(bots)
|
||||
var toRetire []RetiredCandidate
|
||||
for _, b := range bots {
|
||||
var reason string
|
||||
if b.displayRating < p.cfg.RatingThreshold {
|
||||
reason = fmt.Sprintf("display rating %.0f < threshold %.0f",
|
||||
b.displayRating, p.cfg.RatingThreshold)
|
||||
} else if remaining > p.cfg.PopCap {
|
||||
reason = fmt.Sprintf("population cap %d exceeded (currently %d)",
|
||||
p.cfg.PopCap, remaining)
|
||||
}
|
||||
if reason != "" {
|
||||
toRetire = append(toRetire, RetiredCandidate{
|
||||
ProgramID: b.programID,
|
||||
BotID: b.botID,
|
||||
BotName: b.botName,
|
||||
DisplayRating: b.displayRating,
|
||||
Reason: reason,
|
||||
})
|
||||
remaining--
|
||||
}
|
||||
}
|
||||
|
||||
for i := range toRetire {
|
||||
r := &toRetire[i]
|
||||
if err := p.RetireBot(ctx, r.ProgramID, r.BotID, r.BotName); err != nil {
|
||||
return toRetire[:i], fmt.Errorf("retire bot %s: %w", r.BotID, err)
|
||||
}
|
||||
}
|
||||
return toRetire, nil
|
||||
}
|
||||
|
||||
// ── file writing ─────────────────────────────────────────────────────────────
|
||||
|
||||
func (p *Promoter) writeBotDir(program *db.Program, dir string) error {
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
switch program.Language {
|
||||
case "go":
|
||||
if err := os.WriteFile(filepath.Join(dir, "bot.go"), []byte(program.Code), 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(filepath.Join(dir, "go.mod"), []byte("module bot\n\ngo 1.24.3\n"), 0o644)
|
||||
case "python":
|
||||
return os.WriteFile(filepath.Join(dir, "bot.py"), []byte(program.Code), 0o644)
|
||||
case "rust":
|
||||
if err := os.MkdirAll(filepath.Join(dir, "src"), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(dir, "src", "main.rs"), []byte(program.Code), 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
cargoTOML := "[package]\nname = \"bot\"\nversion = \"0.1.0\"\nedition = \"2021\"\n"
|
||||
return os.WriteFile(filepath.Join(dir, "Cargo.toml"), []byte(cargoTOML), 0o644)
|
||||
case "typescript":
|
||||
return os.WriteFile(filepath.Join(dir, "bot.ts"), []byte(program.Code), 0o644)
|
||||
case "java":
|
||||
return os.WriteFile(filepath.Join(dir, "Bot.java"), []byte(program.Code), 0o644)
|
||||
case "php":
|
||||
return os.WriteFile(filepath.Join(dir, "bot.php"), []byte(program.Code), 0o644)
|
||||
default:
|
||||
return fmt.Errorf("unsupported language: %s", program.Language)
|
||||
}
|
||||
}
|
||||
|
||||
// dockerfileFor returns a single-file Dockerfile for the given language.
|
||||
func dockerfileFor(language string) (string, error) {
|
||||
switch language {
|
||||
case "go":
|
||||
return `FROM golang:1.24-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY go.mod go.mod
|
||||
COPY bot.go bot.go
|
||||
RUN go build -o bot .
|
||||
|
||||
FROM alpine:3.21
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/bot .
|
||||
ENV BOT_PORT=8080
|
||||
ENV BOT_SECRET=""
|
||||
EXPOSE 8080
|
||||
CMD ["./bot"]
|
||||
`, nil
|
||||
case "python":
|
||||
return `FROM python:3.12-slim
|
||||
WORKDIR /app
|
||||
COPY bot.py .
|
||||
ENV BOT_PORT=8080
|
||||
ENV BOT_SECRET=""
|
||||
EXPOSE 8080
|
||||
CMD ["python3", "bot.py"]
|
||||
`, nil
|
||||
case "rust":
|
||||
return `FROM rust:1.85-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY Cargo.toml Cargo.toml
|
||||
COPY src ./src
|
||||
RUN cargo build --release
|
||||
|
||||
FROM alpine:3.21
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/target/release/bot .
|
||||
ENV BOT_PORT=8080
|
||||
ENV BOT_SECRET=""
|
||||
EXPOSE 8080
|
||||
CMD ["./bot"]
|
||||
`, nil
|
||||
case "typescript":
|
||||
return `FROM node:22-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY bot.ts .
|
||||
RUN npm install -g typescript && tsc --target ES2020 --module commonjs bot.ts
|
||||
|
||||
FROM node:22-alpine
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/bot.js .
|
||||
ENV BOT_PORT=8080
|
||||
ENV BOT_SECRET=""
|
||||
EXPOSE 8080
|
||||
CMD ["node", "bot.js"]
|
||||
`, nil
|
||||
case "java":
|
||||
return `FROM eclipse-temurin:21-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY Bot.java .
|
||||
RUN javac Bot.java
|
||||
|
||||
FROM eclipse-temurin:21-jre-alpine
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/*.class .
|
||||
ENV BOT_PORT=8080
|
||||
ENV BOT_SECRET=""
|
||||
EXPOSE 8080
|
||||
CMD ["java", "Bot"]
|
||||
`, nil
|
||||
case "php":
|
||||
return `FROM php:8.3-cli-alpine
|
||||
WORKDIR /app
|
||||
COPY bot.php .
|
||||
ENV BOT_PORT=8080
|
||||
ENV BOT_SECRET=""
|
||||
EXPOSE 8080
|
||||
CMD ["php", "bot.php"]
|
||||
`, nil
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported language: %s", language)
|
||||
}
|
||||
}
|
||||
|
||||
// manifestData is the template context for K8s YAML generation.
|
||||
type manifestData struct {
|
||||
Name string
|
||||
Namespace string
|
||||
Island string
|
||||
Generation int
|
||||
Registry string
|
||||
Port int
|
||||
SecretBase64 string
|
||||
}
|
||||
|
||||
var secretManifestTmpl = template.Must(template.New("secret").Parse(`apiVersion: v1
|
||||
kind: Secret
|
||||
metadata:
|
||||
name: {{.Name}}-secret
|
||||
namespace: {{.Namespace}}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{.Name}}
|
||||
app.kubernetes.io/part-of: ai-code-battle
|
||||
app.kubernetes.io/component: evolved-bot
|
||||
type: Opaque
|
||||
data:
|
||||
bot-secret: {{.SecretBase64}}
|
||||
`))
|
||||
|
||||
var deployManifestTmpl = template.Must(template.New("deploy").Parse(`apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{.Name}}
|
||||
namespace: {{.Namespace}}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{.Name}}
|
||||
app.kubernetes.io/part-of: ai-code-battle
|
||||
app.kubernetes.io/component: evolved-bot
|
||||
acb/island: {{.Island}}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app.kubernetes.io/name: {{.Name}}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: {{.Name}}
|
||||
app.kubernetes.io/part-of: ai-code-battle
|
||||
app.kubernetes.io/component: evolved-bot
|
||||
acb/island: {{.Island}}
|
||||
spec:
|
||||
containers:
|
||||
- name: bot
|
||||
image: {{.Registry}}/{{.Name}}:latest
|
||||
env:
|
||||
- name: BOT_PORT
|
||||
value: "{{.Port}}"
|
||||
- name: BOT_SECRET
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: {{.Name}}-secret
|
||||
key: bot-secret
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: {{.Port}}
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 30
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
memory: 128Mi
|
||||
restartPolicy: Always
|
||||
`))
|
||||
|
||||
var svcManifestTmpl = template.Must(template.New("svc").Parse(`apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{.Name}}
|
||||
namespace: {{.Namespace}}
|
||||
labels:
|
||||
app.kubernetes.io/name: {{.Name}}
|
||||
app.kubernetes.io/part-of: ai-code-battle
|
||||
app.kubernetes.io/component: evolved-bot
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app.kubernetes.io/name: {{.Name}}
|
||||
ports:
|
||||
- name: http
|
||||
port: {{.Port}}
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
`))
|
||||
|
||||
func (p *Promoter) writeManifests(botName, secret string, program *db.Program) error {
|
||||
data := manifestData{
|
||||
Name: botName,
|
||||
Namespace: p.cfg.Namespace,
|
||||
Island: program.Island,
|
||||
Generation: program.Generation,
|
||||
Registry: p.cfg.Registry,
|
||||
Port: botPort,
|
||||
SecretBase64: base64.StdEncoding.EncodeToString([]byte(secret)),
|
||||
}
|
||||
|
||||
// Write Dockerfile into the bot source directory (already created by writeBotDir).
|
||||
dockerfile, err := dockerfileFor(program.Language)
|
||||
if err != nil {
|
||||
return fmt.Errorf("dockerfile: %w", err)
|
||||
}
|
||||
botDir := filepath.Join(p.cfg.RepoDir, "bots", "evolved", botName)
|
||||
if err := os.WriteFile(filepath.Join(botDir, "Dockerfile"), []byte(dockerfile), 0o644); err != nil {
|
||||
return fmt.Errorf("write Dockerfile: %w", err)
|
||||
}
|
||||
|
||||
// K8s Secret
|
||||
secretsDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "secrets")
|
||||
if err := os.MkdirAll(secretsDir, 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := renderToFile(filepath.Join(secretsDir, botName+".yaml"), secretManifestTmpl, data); err != nil {
|
||||
return fmt.Errorf("secret manifest: %w", err)
|
||||
}
|
||||
|
||||
// K8s Deployment
|
||||
deployDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "deployments")
|
||||
if err := renderToFile(filepath.Join(deployDir, botName+".yaml"), deployManifestTmpl, data); err != nil {
|
||||
return fmt.Errorf("deployment manifest: %w", err)
|
||||
}
|
||||
|
||||
// K8s Service
|
||||
svcDir := filepath.Join(p.cfg.RepoDir, "deploy", "k8s", "services")
|
||||
if err := renderToFile(filepath.Join(svcDir, botName+".yaml"), svcManifestTmpl, data); err != nil {
|
||||
return fmt.Errorf("service manifest: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func renderToFile(path string, tmpl *template.Template, data any) error {
|
||||
var buf bytes.Buffer
|
||||
if err := tmpl.Execute(&buf, data); err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(path, buf.Bytes(), 0o644)
|
||||
}
|
||||
|
||||
// ── git operations ────────────────────────────────────────────────────────────
|
||||
|
||||
// gitCommitPush stages, commits, and pushes changes for botName.
|
||||
// When remove=true it runs `git rm` to delete the files; otherwise `git add`.
|
||||
func (p *Promoter) gitCommitPush(ctx context.Context, botName, msg string, remove bool) error {
|
||||
run := func(args ...string) error {
|
||||
cmd := exec.CommandContext(ctx, "git", args...)
|
||||
cmd.Dir = p.cfg.RepoDir
|
||||
if out, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("git %s: %s", args[0], strings.TrimSpace(string(out)))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
paths := []string{
|
||||
filepath.Join("bots", "evolved", botName),
|
||||
filepath.Join("deploy", "k8s", "deployments", botName+".yaml"),
|
||||
filepath.Join("deploy", "k8s", "services", botName+".yaml"),
|
||||
filepath.Join("deploy", "k8s", "secrets", botName+".yaml"),
|
||||
}
|
||||
|
||||
if remove {
|
||||
for _, path := range paths {
|
||||
if err := run("rm", "-rf", "--ignore-unmatch", "--", path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
args := append([]string{"add", "--"}, paths...)
|
||||
if err := run(args...); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Skip commit if nothing changed.
|
||||
statusCmd := exec.CommandContext(ctx, "git", "status", "--porcelain")
|
||||
statusCmd.Dir = p.cfg.RepoDir
|
||||
out, _ := statusCmd.Output()
|
||||
if len(strings.TrimSpace(string(out))) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := run("commit", "-m", msg); err != nil {
|
||||
return err
|
||||
}
|
||||
return run("push", "origin", "master")
|
||||
}
|
||||
|
||||
// ── deployment readiness ──────────────────────────────────────────────────────
|
||||
|
||||
func (p *Promoter) waitForDeployment(ctx context.Context, name string) error {
|
||||
deadline := time.Now().Add(p.cfg.DeployWaitTimeout)
|
||||
ticker := time.NewTicker(15 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
fmt.Printf("promoter: waiting for deployment %s to be ready (timeout=%s)…\n",
|
||||
name, p.cfg.DeployWaitTimeout)
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
n, err := p.availableReplicas(ctx, name)
|
||||
if err != nil {
|
||||
fmt.Printf("promoter: kubectl poll error: %v\n", err)
|
||||
} else if n >= 1 {
|
||||
fmt.Printf("promoter: deployment %s ready (%d replica)\n", name, n)
|
||||
return nil
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return fmt.Errorf("deployment %s not ready after %s", name, p.cfg.DeployWaitTimeout)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *Promoter) availableReplicas(ctx context.Context, name string) (int, error) {
|
||||
cmd := exec.CommandContext(ctx, "kubectl",
|
||||
"--server="+p.cfg.KubectlServer,
|
||||
"get", "deployment", name,
|
||||
"-n", p.cfg.Namespace,
|
||||
"-o", "jsonpath={.status.availableReplicas}",
|
||||
)
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
s := strings.TrimSpace(string(out))
|
||||
if s == "" {
|
||||
return 0, nil
|
||||
}
|
||||
var n int
|
||||
fmt.Sscanf(s, "%d", &n)
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// ── container image build ─────────────────────────────────────────────────────
|
||||
|
||||
func (p *Promoter) buildAndPushImage(ctx context.Context, botDir, image string) error {
|
||||
if _, err := exec.LookPath("docker"); err != nil {
|
||||
return fmt.Errorf("docker not in PATH")
|
||||
}
|
||||
build := exec.CommandContext(ctx, "docker", "build", "-t", image, botDir)
|
||||
if out, err := build.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("docker build: %s", truncate(string(out), 512))
|
||||
}
|
||||
push := exec.CommandContext(ctx, "docker", "push", image)
|
||||
if out, err := push.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("docker push: %s", truncate(string(out), 512))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ── crypto helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
func generateBotID() (string, error) {
|
||||
b := make([]byte, 4)
|
||||
if _, err := rand.Read(b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return "b_" + hex.EncodeToString(b), nil
|
||||
}
|
||||
|
||||
func generateSecret() (string, error) {
|
||||
b := make([]byte, 32)
|
||||
if _, err := rand.Read(b); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return hex.EncodeToString(b), nil
|
||||
}
|
||||
|
||||
func encryptAESGCM(plaintext, keyHex string) (string, error) {
|
||||
key, err := hex.DecodeString(keyHex)
|
||||
if err != nil || len(key) != 32 {
|
||||
return "", fmt.Errorf("invalid AES-256-GCM key (must be 64 hex chars)")
|
||||
}
|
||||
block, err := aes.NewCipher(key)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
aead, err := cipher.NewGCM(block)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
nonce := make([]byte, aead.NonceSize())
|
||||
if _, err := io.ReadFull(rand.Reader, nonce); err != nil {
|
||||
return "", err
|
||||
}
|
||||
ct := aead.Seal(nonce, nonce, []byte(plaintext), nil)
|
||||
return hex.EncodeToString(ct), nil
|
||||
}
|
||||
|
||||
func truncate(s string, max int) string {
|
||||
if len(s) <= max {
|
||||
return s
|
||||
}
|
||||
return s[:max] + "…"
|
||||
}
|
||||
194
cmd/acb-evolver/internal/promoter/promoter_test.go
Normal file
194
cmd/acb-evolver/internal/promoter/promoter_test.go
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
package promoter
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// ── dockerfileFor ─────────────────────────────────────────────────────────────
|
||||
|
||||
func TestDockerfileFor_AllSupportedLanguages(t *testing.T) {
|
||||
languages := []string{"go", "python", "rust", "typescript", "java", "php"}
|
||||
for _, lang := range languages {
|
||||
t.Run(lang, func(t *testing.T) {
|
||||
df, err := dockerfileFor(lang)
|
||||
if err != nil {
|
||||
t.Fatalf("dockerfileFor(%q) error: %v", lang, err)
|
||||
}
|
||||
if !strings.Contains(df, "FROM ") {
|
||||
t.Errorf("Dockerfile for %q missing FROM instruction", lang)
|
||||
}
|
||||
if !strings.Contains(df, "BOT_PORT") {
|
||||
t.Errorf("Dockerfile for %q missing BOT_PORT env var", lang)
|
||||
}
|
||||
if !strings.Contains(df, "BOT_SECRET") {
|
||||
t.Errorf("Dockerfile for %q missing BOT_SECRET env var", lang)
|
||||
}
|
||||
if !strings.Contains(df, "EXPOSE 8080") {
|
||||
t.Errorf("Dockerfile for %q missing EXPOSE 8080", lang)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDockerfileFor_UnsupportedLanguage(t *testing.T) {
|
||||
_, err := dockerfileFor("cobol")
|
||||
if err == nil {
|
||||
t.Error("expected error for unsupported language, got nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDockerfileFor_GoUsesMultistage(t *testing.T) {
|
||||
df, _ := dockerfileFor("go")
|
||||
if !strings.Contains(df, "AS builder") {
|
||||
t.Error("Go Dockerfile should use multi-stage build")
|
||||
}
|
||||
if !strings.Contains(df, "golang:") {
|
||||
t.Error("Go Dockerfile should use a golang base image")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDockerfileFor_RustUsesMultistage(t *testing.T) {
|
||||
df, _ := dockerfileFor("rust")
|
||||
if !strings.Contains(df, "AS builder") {
|
||||
t.Error("Rust Dockerfile should use multi-stage build")
|
||||
}
|
||||
}
|
||||
|
||||
// ── generateBotID ─────────────────────────────────────────────────────────────
|
||||
|
||||
func TestGenerateBotID_Format(t *testing.T) {
|
||||
id, err := generateBotID()
|
||||
if err != nil {
|
||||
t.Fatalf("generateBotID error: %v", err)
|
||||
}
|
||||
if !strings.HasPrefix(id, "b_") {
|
||||
t.Errorf("bot ID %q does not start with 'b_'", id)
|
||||
}
|
||||
// b_ + 8 hex chars = 10 total
|
||||
if len(id) != 10 {
|
||||
t.Errorf("bot ID %q has length %d, want 10", id, len(id))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateBotID_Uniqueness(t *testing.T) {
|
||||
seen := make(map[string]bool)
|
||||
for i := 0; i < 100; i++ {
|
||||
id, err := generateBotID()
|
||||
if err != nil {
|
||||
t.Fatalf("generateBotID error at iteration %d: %v", i, err)
|
||||
}
|
||||
if seen[id] {
|
||||
t.Errorf("duplicate bot ID generated: %s", id)
|
||||
}
|
||||
seen[id] = true
|
||||
}
|
||||
}
|
||||
|
||||
// ── generateSecret ────────────────────────────────────────────────────────────
|
||||
|
||||
func TestGenerateSecret_Length(t *testing.T) {
|
||||
s, err := generateSecret()
|
||||
if err != nil {
|
||||
t.Fatalf("generateSecret error: %v", err)
|
||||
}
|
||||
// 32 random bytes encoded as 64 hex chars
|
||||
if len(s) != 64 {
|
||||
t.Errorf("secret %q has length %d, want 64", s, len(s))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGenerateSecret_Uniqueness(t *testing.T) {
|
||||
seen := make(map[string]bool)
|
||||
for i := 0; i < 50; i++ {
|
||||
s, err := generateSecret()
|
||||
if err != nil {
|
||||
t.Fatalf("generateSecret error at iteration %d: %v", i, err)
|
||||
}
|
||||
if seen[s] {
|
||||
t.Errorf("duplicate secret generated: %s", s)
|
||||
}
|
||||
seen[s] = true
|
||||
}
|
||||
}
|
||||
|
||||
// ── encryptAESGCM / decryptAESGCM ─────────────────────────────────────────────
|
||||
|
||||
func TestEncryptDecryptAESGCM_RoundTrip(t *testing.T) {
|
||||
// 32-byte key = 64 hex chars
|
||||
key := strings.Repeat("ab", 32) // "abababab..." 64 chars
|
||||
plaintext := "my-super-secret-bot-key"
|
||||
|
||||
ct, err := encryptAESGCM(plaintext, key)
|
||||
if err != nil {
|
||||
t.Fatalf("encrypt: %v", err)
|
||||
}
|
||||
if ct == plaintext {
|
||||
t.Fatal("ciphertext should differ from plaintext")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncryptAESGCM_InvalidKey(t *testing.T) {
|
||||
_, err := encryptAESGCM("plaintext", "notahexkey")
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid key")
|
||||
}
|
||||
}
|
||||
|
||||
// ── manifest templates ────────────────────────────────────────────────────────
|
||||
|
||||
func TestManifestTemplates_Execute(t *testing.T) {
|
||||
data := manifestData{
|
||||
Name: "acb-evo-test",
|
||||
Namespace: "ai-code-battle",
|
||||
Island: "alpha",
|
||||
Generation: 1,
|
||||
Registry: "registry.example.com/acb",
|
||||
Port: 8080,
|
||||
SecretBase64: "dGVzdA==",
|
||||
}
|
||||
|
||||
for name, tmpl := range map[string]interface{ Execute(interface{}, interface{}) error }{} {
|
||||
_ = name
|
||||
_ = tmpl
|
||||
}
|
||||
|
||||
// Test secret manifest
|
||||
var buf strings.Builder
|
||||
if err := secretManifestTmpl.Execute(&buf, data); err != nil {
|
||||
t.Fatalf("secretManifestTmpl.Execute: %v", err)
|
||||
}
|
||||
out := buf.String()
|
||||
if !strings.Contains(out, "acb-evo-test-secret") {
|
||||
t.Error("secret manifest missing expected name")
|
||||
}
|
||||
if !strings.Contains(out, "dGVzdA==") {
|
||||
t.Error("secret manifest missing base64 secret")
|
||||
}
|
||||
|
||||
// Test deployment manifest
|
||||
buf.Reset()
|
||||
if err := deployManifestTmpl.Execute(&buf, data); err != nil {
|
||||
t.Fatalf("deployManifestTmpl.Execute: %v", err)
|
||||
}
|
||||
out = buf.String()
|
||||
if !strings.Contains(out, "acb-evo-test") {
|
||||
t.Error("deployment manifest missing bot name")
|
||||
}
|
||||
if !strings.Contains(out, "registry.example.com/acb/acb-evo-test:latest") {
|
||||
t.Error("deployment manifest missing full image reference")
|
||||
}
|
||||
if !strings.Contains(out, "acb/island: alpha") {
|
||||
t.Error("deployment manifest missing island label")
|
||||
}
|
||||
|
||||
// Test service manifest
|
||||
buf.Reset()
|
||||
if err := svcManifestTmpl.Execute(&buf, data); err != nil {
|
||||
t.Fatalf("svcManifestTmpl.Execute: %v", err)
|
||||
}
|
||||
out = buf.String()
|
||||
if !strings.Contains(out, "ClusterIP") {
|
||||
t.Error("service manifest missing ClusterIP type")
|
||||
}
|
||||
}
|
||||
|
|
@ -7,6 +7,8 @@
|
|||
// stats Print program counts per island
|
||||
// validate Run the 3-stage validation pipeline on a bot source file
|
||||
// validation-stats Show per-island validation pass-rate metrics
|
||||
// evaluate Run the 10-match arena tournament and apply the promotion gate
|
||||
// retire Enforce retirement policy (rating threshold + population cap)
|
||||
package main
|
||||
|
||||
import (
|
||||
|
|
@ -21,6 +23,9 @@ import (
|
|||
_ "github.com/lib/pq"
|
||||
|
||||
evolverdb "github.com/aicodebattle/acb/cmd/acb-evolver/internal/db"
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/arena"
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/mapelites"
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/promoter"
|
||||
"github.com/aicodebattle/acb/cmd/acb-evolver/internal/validator"
|
||||
)
|
||||
|
||||
|
|
@ -38,6 +43,16 @@ func main() {
|
|||
ctx := context.Background()
|
||||
|
||||
switch os.Args[1] {
|
||||
case "evaluate":
|
||||
db := mustOpenDB(dbURL)
|
||||
defer db.Close()
|
||||
runEvaluate(ctx, db, os.Args[2:])
|
||||
|
||||
case "retire":
|
||||
db := mustOpenDB(dbURL)
|
||||
defer db.Close()
|
||||
runRetire(ctx, db, os.Args[2:])
|
||||
|
||||
case "init-schema":
|
||||
db := mustOpenDB(dbURL)
|
||||
defer db.Close()
|
||||
|
|
@ -90,11 +105,258 @@ func main() {
|
|||
|
||||
default:
|
||||
fmt.Fprintf(os.Stderr, "unknown subcommand %q\n", os.Args[1])
|
||||
fmt.Fprintln(os.Stderr, "usage: acb-evolver <init-schema|seed|stats|validate|validation-stats>")
|
||||
fmt.Fprintln(os.Stderr, "usage: acb-evolver <init-schema|seed|stats|validate|validation-stats|evaluate|retire>")
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// runEvaluate runs the 10-match mini-tournament and applies the promotion gate.
|
||||
//
|
||||
// evaluate -lang go -island alpha [-program-id 0] [-promote] [-nash 0.5] [-win-lower 0.4] [-nolog] <file>
|
||||
func runEvaluate(ctx context.Context, db *sql.DB, args []string) {
|
||||
fs := flag.NewFlagSet("evaluate", flag.ExitOnError)
|
||||
lang := fs.String("lang", "", "bot language (go|python|rust|typescript|java|php) [required]")
|
||||
programID := fs.Int64("program-id", 0, "programs.id to update fitness after evaluation (0 = skip)")
|
||||
doPromote := fs.Bool("promote", false, "promote the candidate if the gate passes")
|
||||
nashThreshold := fs.Float64("nash", 0.50, "Nash value threshold for promotion")
|
||||
winLower := fs.Float64("win-lower", 0.40, "Wilson CI lower-bound threshold (0 to disable)")
|
||||
nolog := fs.Bool("nolog", false, "skip writing validation result to DB")
|
||||
|
||||
// Promoter flags (used only when -promote is set)
|
||||
repoDir := fs.String("repo-dir", envOrDefault("ACB_REPO_DIR", "."), "git repo root for K8s manifests")
|
||||
registry := fs.String("registry", envOrDefault("ACB_REGISTRY", "forgejo.ardenone.com/ai-code-battle"), "container registry")
|
||||
kubectlServer := fs.String("kubectl-server", envOrDefault("ACB_KUBECTL_SERVER", "http://kubectl-ardenone-cluster:8001"), "kubectl API server URL")
|
||||
encKey := fs.String("enc-key", os.Getenv("ACB_ENCRYPTION_KEY"), "AES-256-GCM encryption key (hex) for bots table")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
if *lang == "" {
|
||||
fmt.Fprintln(os.Stderr, "evaluate: -lang is required")
|
||||
fs.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
if fs.NArg() < 1 {
|
||||
fmt.Fprintln(os.Stderr, "evaluate: file argument is required")
|
||||
fs.Usage()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
code, err := os.ReadFile(fs.Arg(0))
|
||||
if err != nil {
|
||||
log.Fatalf("read file: %v", err)
|
||||
}
|
||||
|
||||
store := evolverdb.NewStore(db)
|
||||
|
||||
// Pre-populate MAP-Elites grid from existing promoted programs so the gate
|
||||
// can detect niche collisions against the current population.
|
||||
const gridSize = 10
|
||||
grid := mapelites.New(gridSize)
|
||||
if promoted, err := store.ListPromoted(ctx); err == nil {
|
||||
for _, pp := range promoted {
|
||||
if len(pp.BehaviorVector) >= 2 {
|
||||
grid.TryPlace(pp.ProgramID, pp.Fitness, pp.BehaviorVector[0], pp.BehaviorVector[1])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Run the arena tournament.
|
||||
arenaCfg := arena.DefaultConfig()
|
||||
a := arena.New(db, arenaCfg)
|
||||
|
||||
fmt.Printf("evaluate: running %d-match tournament for %s bot…\n", arena.DefaultNumMatches, *lang)
|
||||
result, err := a.Run(ctx, string(code), *lang)
|
||||
if err != nil {
|
||||
log.Fatalf("arena: %v", err)
|
||||
}
|
||||
|
||||
// Print match summary.
|
||||
total := result.Wins + result.Losses + result.Draws
|
||||
fmt.Printf("\nTournament result: %d W / %d L / %d D / %d err (total=%d)\n",
|
||||
result.Wins, result.Losses, result.Draws, result.Errors, total)
|
||||
wr := arena.ComputeFromResult(result)
|
||||
fmt.Printf("Win rate: %.3f (95%% CI %.3f–%.3f)\n", wr.Rate, wr.Lower, wr.Upper)
|
||||
|
||||
nash := arena.ComputeNash(result.WinRateVec)
|
||||
fmt.Printf("Nash value (PSRO): %.3f (opponent mix: %v)\n", nash.NashValue, nash.WinRatePerOpponent)
|
||||
|
||||
// Compute fitness as overall win rate.
|
||||
fitness := wr.Rate
|
||||
|
||||
// Look up the program if -program-id was given.
|
||||
var program *evolverdb.Program
|
||||
if *programID > 0 {
|
||||
program, err = store.Get(ctx, *programID)
|
||||
if err != nil {
|
||||
log.Fatalf("get program %d: %v", *programID, err)
|
||||
}
|
||||
if program == nil {
|
||||
log.Fatalf("program %d not found", *programID)
|
||||
}
|
||||
// Update fitness in DB.
|
||||
if !*nolog {
|
||||
if err := store.UpdateFitness(ctx, *programID, fitness, program.BehaviorVector); err != nil {
|
||||
log.Printf("warn: update fitness: %v", err)
|
||||
} else {
|
||||
fmt.Printf("Updated program %d fitness to %.3f\n", *programID, fitness)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply the promotion gate.
|
||||
gateCfg := arena.GateConfig{
|
||||
NashThreshold: *nashThreshold,
|
||||
WinRateLowerBound: *winLower,
|
||||
}
|
||||
gate := arena.NewGate(gateCfg, grid)
|
||||
|
||||
var behaviorVec []float64
|
||||
if program != nil {
|
||||
behaviorVec = program.BehaviorVector
|
||||
}
|
||||
gateResult := gate.Evaluate(result, *programID, fitness, behaviorVec)
|
||||
|
||||
fmt.Printf("\nGate: %s\n", gateResult.Reason)
|
||||
fmt.Printf("MAP-Elites: placed=%v improved=%v cell=[%d,%d]\n",
|
||||
gateResult.MapElitesPlaced, gateResult.MapElitesImproved,
|
||||
gateResult.Placement.X, gateResult.Placement.Y)
|
||||
|
||||
if !gateResult.Promoted {
|
||||
fmt.Println("Decision: REJECTED")
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println("Decision: PROMOTED")
|
||||
|
||||
if !*doPromote {
|
||||
fmt.Println("(pass -promote to execute deployment)")
|
||||
return
|
||||
}
|
||||
if program == nil {
|
||||
log.Fatalf("promote: -program-id is required when -promote is set")
|
||||
}
|
||||
|
||||
promCfg := promoter.DefaultConfig()
|
||||
promCfg.Registry = *registry
|
||||
promCfg.RepoDir = *repoDir
|
||||
promCfg.KubectlServer = *kubectlServer
|
||||
promCfg.EncryptionKey = *encKey
|
||||
|
||||
p := promoter.New(store, db, promCfg)
|
||||
res, err := p.Promote(ctx, program)
|
||||
if err != nil {
|
||||
log.Fatalf("promote: %v", err)
|
||||
}
|
||||
fmt.Printf("Promoted: bot_name=%s bot_id=%s endpoint=%s\n", res.BotName, res.BotID, res.Endpoint)
|
||||
}
|
||||
|
||||
// runRetire enforces the retirement policy (rating threshold + population cap).
|
||||
//
|
||||
// retire [-threshold 1000] [-cap 50] [-dry-run] [-kubectl-server URL]
|
||||
func runRetire(ctx context.Context, db *sql.DB, args []string) {
|
||||
fs := flag.NewFlagSet("retire", flag.ExitOnError)
|
||||
threshold := fs.Float64("threshold", 1000.0, "minimum display rating (mu-2*phi) to keep a bot")
|
||||
cap := fs.Int("cap", 50, "maximum number of simultaneously promoted evolved bots")
|
||||
dryRun := fs.Bool("dry-run", false, "print what would be retired without making changes")
|
||||
repoDir := fs.String("repo-dir", envOrDefault("ACB_REPO_DIR", "."), "git repo root")
|
||||
registry := fs.String("registry", envOrDefault("ACB_REGISTRY", "forgejo.ardenone.com/ai-code-battle"), "container registry")
|
||||
kubectlServer := fs.String("kubectl-server", envOrDefault("ACB_KUBECTL_SERVER", "http://kubectl-ardenone-cluster:8001"), "kubectl API server URL")
|
||||
encKey := fs.String("enc-key", os.Getenv("ACB_ENCRYPTION_KEY"), "AES-256-GCM encryption key (hex)")
|
||||
|
||||
if err := fs.Parse(args); err != nil {
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
store := evolverdb.NewStore(db)
|
||||
|
||||
promCfg := promoter.DefaultConfig()
|
||||
promCfg.RatingThreshold = *threshold
|
||||
promCfg.PopCap = *cap
|
||||
promCfg.RepoDir = *repoDir
|
||||
promCfg.Registry = *registry
|
||||
promCfg.KubectlServer = *kubectlServer
|
||||
promCfg.EncryptionKey = *encKey
|
||||
|
||||
if *dryRun {
|
||||
// Simulate by temporarily setting an impossible cap to list candidates.
|
||||
fmt.Println("retire: dry-run mode — no changes will be made")
|
||||
}
|
||||
|
||||
p := promoter.New(store, db, promCfg)
|
||||
|
||||
if *dryRun {
|
||||
// Read-only preview using the same DB query logic without executing retirements.
|
||||
rows, err := db.QueryContext(ctx, `
|
||||
SELECT p.id, p.bot_id, COALESCE(p.bot_name, ''),
|
||||
b.rating_mu - 2*b.rating_phi AS display_rating
|
||||
FROM programs p
|
||||
JOIN bots b ON p.bot_id = b.bot_id
|
||||
WHERE p.promoted = TRUE AND p.bot_id IS NOT NULL
|
||||
AND b.status = 'active' AND b.owner = 'acb-evolver'
|
||||
ORDER BY display_rating ASC`)
|
||||
if err != nil {
|
||||
log.Fatalf("query: %v", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
type row struct {
|
||||
programID int64
|
||||
botID, botName string
|
||||
displayRating float64
|
||||
}
|
||||
var bots []row
|
||||
for rows.Next() {
|
||||
var r row
|
||||
if err := rows.Scan(&r.programID, &r.botID, &r.botName, &r.displayRating); err != nil {
|
||||
log.Fatalf("scan: %v", err)
|
||||
}
|
||||
bots = append(bots, r)
|
||||
}
|
||||
_ = rows.Err()
|
||||
|
||||
remaining := len(bots)
|
||||
fmt.Printf("Active evolved bots: %d (threshold=%.0f cap=%d)\n", remaining, *threshold, *cap)
|
||||
for _, b := range bots {
|
||||
var why string
|
||||
if b.displayRating < *threshold {
|
||||
why = fmt.Sprintf("rating %.0f < threshold", b.displayRating)
|
||||
} else if remaining > *cap {
|
||||
why = "over cap"
|
||||
}
|
||||
mark := " keep"
|
||||
if why != "" {
|
||||
mark = " RETIRE"
|
||||
remaining--
|
||||
}
|
||||
fmt.Printf("%s bot_id=%-12s bot_name=%-20s rating=%.0f %s\n",
|
||||
mark, b.botID, b.botName, b.displayRating, why)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
retired, err := p.EnforcePolicy(ctx)
|
||||
if err != nil {
|
||||
log.Fatalf("enforce policy: %v", err)
|
||||
}
|
||||
|
||||
if len(retired) == 0 {
|
||||
fmt.Println("retire: nothing to retire")
|
||||
return
|
||||
}
|
||||
fmt.Printf("retire: retired %d bot(s):\n", len(retired))
|
||||
for _, r := range retired {
|
||||
fmt.Printf(" bot_id=%-12s bot_name=%-20s rating=%.0f reason=%s\n",
|
||||
r.BotID, r.BotName, r.DisplayRating, r.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
func envOrDefault(key, fallback string) string {
|
||||
if v := os.Getenv(key); v != "" {
|
||||
return v
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
|
||||
// runValidate parses flags, runs the three-stage validation pipeline on a bot
|
||||
// source file, and optionally logs the result to the database.
|
||||
//
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue