- Add csharp case to buildCandidate() with dotnet-script and mcs fallback - Add defender_strategy.cs.txt seed file (combined Program.cs, Strategy.cs, Grid.cs) - Add defender seed to population (gamma island, aggression=0.3, economy=0.4) - Add csharp to langDisplayName() and update comments Co-Authored-By: Claude <noreply@anthropic.com>
571 lines
16 KiB
Go
571 lines
16 KiB
Go
// Package arena implements the 10-match mini-tournament evaluation system
|
|
// for evolved bot candidates.
|
|
//
|
|
// The arena starts the candidate as a local subprocess (the same way the
|
|
// sandbox does during validation), selects a diverse set of live opponents
|
|
// from the PostgreSQL database, and runs one match per opponent using the
|
|
// game engine directly. No job queue or ACB API calls are needed for
|
|
// evaluation matches.
|
|
package arena
|
|
|
|
import (
|
|
"context"
|
|
"crypto/aes"
|
|
"crypto/cipher"
|
|
"database/sql"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"log"
|
|
"math/rand"
|
|
"net"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/aicodebattle/acb/engine"
|
|
_ "github.com/lib/pq"
|
|
)
|
|
|
|
const (
|
|
// DefaultNumMatches is the tournament size (10 per spec).
|
|
DefaultNumMatches = 10
|
|
|
|
// evalSecret is used for HMAC signing when the candidate runs locally.
|
|
// The candidate subprocess is started with BOT_SECRET=evalSecret so that
|
|
// the engine's request signatures match what the bot verifies.
|
|
evalSecret = "acb-eval-secret-for-tournament-evaluation-only"
|
|
|
|
// evalBotID is a placeholder bot ID for arena authentication headers.
|
|
evalBotID = "b_evalcandidate"
|
|
|
|
healthPollInterval = 200 * time.Millisecond
|
|
healthStartupTimeout = 30 * time.Second
|
|
)
|
|
|
|
// BotRecord holds a live bot's connection details queried from the database.
|
|
type BotRecord struct {
|
|
BotID string
|
|
Name string
|
|
EndpointURL string
|
|
Secret string // plaintext (decrypted when encryption key is provided)
|
|
RatingMu float64
|
|
}
|
|
|
|
// MatchOutcome records the result of one evaluation match.
|
|
type MatchOutcome struct {
|
|
OpponentBotID string
|
|
OpponentName string
|
|
CandidateSlot int // player slot (0 or 1) assigned to the candidate
|
|
Winner int // 0=player0, 1=player1, -1=draw
|
|
Scores []int
|
|
Turns int
|
|
CombatDeaths []int // bots killed in combat per player (kills credited to killer)
|
|
Err error
|
|
}
|
|
|
|
// CandidateWon returns true when the candidate won this match.
|
|
func (o *MatchOutcome) CandidateWon() bool {
|
|
return o.Err == nil && o.Winner == o.CandidateSlot
|
|
}
|
|
|
|
// CandidateLost returns true when the candidate lost (not a draw or error).
|
|
func (o *MatchOutcome) CandidateLost() bool {
|
|
return o.Err == nil && o.Winner != -1 && o.Winner != o.CandidateSlot
|
|
}
|
|
|
|
// Result aggregates mini-tournament outcomes for a candidate.
|
|
type Result struct {
|
|
CandidateEndpoint string
|
|
Outcomes []MatchOutcome
|
|
|
|
// Aggregate tallies (errors excluded from win/loss/draw counts).
|
|
Wins int
|
|
Losses int
|
|
Draws int
|
|
Errors int
|
|
|
|
// Kill statistics across all matches (errors excluded).
|
|
TotalKills int // total kills credited to candidate
|
|
TotalMatches int // non-error matches for kill rate normalization
|
|
KillRate float64 // kills per match
|
|
|
|
// OpponentWinRates maps opponent BotID → candidate win rate vs that bot.
|
|
OpponentWinRates map[string]float64
|
|
|
|
// WinRateVec is an ordered slice of per-opponent win rates (one entry per
|
|
// distinct opponent played, in match order, errors omitted). Used by PSRO.
|
|
WinRateVec []float64
|
|
}
|
|
|
|
// Config controls arena behaviour.
|
|
type Config struct {
|
|
// NumMatches is the tournament size (default: DefaultNumMatches = 10).
|
|
NumMatches int
|
|
// BotTimeout is the per-turn HTTP timeout for both bots.
|
|
BotTimeout time.Duration
|
|
// EncryptionKey is the AES-256-GCM key (hex) used to decrypt opponent
|
|
// secrets from the database. Empty means secrets are stored plaintext.
|
|
EncryptionKey string
|
|
}
|
|
|
|
// DefaultConfig returns production-ready arena defaults.
|
|
func DefaultConfig() Config {
|
|
return Config{
|
|
NumMatches: DefaultNumMatches,
|
|
BotTimeout: 3 * time.Second,
|
|
}
|
|
}
|
|
|
|
// Arena orchestrates mini-tournament evaluation of bot candidates.
|
|
type Arena struct {
|
|
db *sql.DB
|
|
cfg Config
|
|
rng *rand.Rand
|
|
log *log.Logger
|
|
}
|
|
|
|
// New creates an Arena backed by the given database connection.
|
|
func New(db *sql.DB, cfg Config) *Arena {
|
|
return &Arena{
|
|
db: db,
|
|
cfg: cfg,
|
|
rng: rand.New(rand.NewSource(time.Now().UnixNano())),
|
|
log: log.Default(),
|
|
}
|
|
}
|
|
|
|
// Run executes a mini-tournament for the candidate bot.
|
|
//
|
|
// code is the candidate's source code; language is one of
|
|
// go|python|rust|typescript|java|php|csharp.
|
|
//
|
|
// The candidate is built and started as a local subprocess, then played
|
|
// against cfg.NumMatches opponents sampled from the live bot fleet.
|
|
func (a *Arena) Run(ctx context.Context, code, language string) (*Result, error) {
|
|
proc, err := startCandidate(ctx, code, language)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("start candidate subprocess: %w", err)
|
|
}
|
|
defer proc.stop()
|
|
|
|
candidateURL := fmt.Sprintf("http://127.0.0.1:%d", proc.port)
|
|
|
|
opponents, err := a.selectOpponents(ctx, a.cfg.NumMatches)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("select opponents: %w", err)
|
|
}
|
|
if len(opponents) == 0 {
|
|
return nil, fmt.Errorf("no active opponents available in live bot fleet")
|
|
}
|
|
|
|
result := &Result{
|
|
CandidateEndpoint: candidateURL,
|
|
OpponentWinRates: make(map[string]float64),
|
|
}
|
|
|
|
for i, opp := range opponents {
|
|
a.log.Printf("arena: match %d/%d vs %s (%s)", i+1, len(opponents), opp.Name, opp.BotID)
|
|
outcome := a.runMatch(ctx, candidateURL, opp)
|
|
result.Outcomes = append(result.Outcomes, outcome)
|
|
|
|
switch {
|
|
case outcome.Err != nil:
|
|
result.Errors++
|
|
a.log.Printf("arena: match %d error: %v", i+1, outcome.Err)
|
|
case outcome.CandidateWon():
|
|
result.Wins++
|
|
case outcome.CandidateLost():
|
|
result.Losses++
|
|
default:
|
|
result.Draws++
|
|
}
|
|
}
|
|
|
|
// Compute per-opponent win rates.
|
|
oppWins := make(map[string]int)
|
|
oppTotal := make(map[string]int)
|
|
for _, o := range result.Outcomes {
|
|
if o.Err != nil {
|
|
continue
|
|
}
|
|
oppTotal[o.OpponentBotID]++
|
|
if o.CandidateWon() {
|
|
oppWins[o.OpponentBotID]++
|
|
}
|
|
}
|
|
for id, total := range oppTotal {
|
|
if total > 0 {
|
|
result.OpponentWinRates[id] = float64(oppWins[id]) / float64(total)
|
|
}
|
|
}
|
|
|
|
// Compute kill statistics (combat kills credited to candidate).
|
|
totalKills := 0
|
|
totalMatches := 0
|
|
for _, o := range result.Outcomes {
|
|
if o.Err != nil {
|
|
continue
|
|
}
|
|
totalMatches++
|
|
// CombatDeaths[player_idx] contains kills credited to that player
|
|
if o.CandidateSlot < len(o.CombatDeaths) {
|
|
totalKills += o.CombatDeaths[o.CandidateSlot]
|
|
}
|
|
}
|
|
result.TotalKills = totalKills
|
|
result.TotalMatches = totalMatches
|
|
if totalMatches > 0 {
|
|
result.KillRate = float64(totalKills) / float64(totalMatches)
|
|
}
|
|
|
|
// Build ordered win-rate vector for PSRO (one entry per distinct opponent).
|
|
seen := make(map[string]bool)
|
|
for _, o := range result.Outcomes {
|
|
if o.Err != nil || seen[o.OpponentBotID] {
|
|
continue
|
|
}
|
|
seen[o.OpponentBotID] = true
|
|
result.WinRateVec = append(result.WinRateVec, result.OpponentWinRates[o.OpponentBotID])
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// selectOpponents queries active bots from the database and picks n opponents
|
|
// spread across the rating distribution for behavioral diversity.
|
|
func (a *Arena) selectOpponents(ctx context.Context, n int) ([]BotRecord, error) {
|
|
rows, err := a.db.QueryContext(ctx, `
|
|
SELECT bot_id, name, endpoint_url, shared_secret, rating_mu
|
|
FROM bots
|
|
WHERE status = 'active' AND endpoint_url <> ''
|
|
ORDER BY rating_mu DESC`)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("query bots: %w", err)
|
|
}
|
|
defer rows.Close()
|
|
|
|
var all []BotRecord
|
|
for rows.Next() {
|
|
var b BotRecord
|
|
if err := rows.Scan(&b.BotID, &b.Name, &b.EndpointURL, &b.Secret, &b.RatingMu); err != nil {
|
|
return nil, fmt.Errorf("scan bot: %w", err)
|
|
}
|
|
if a.cfg.EncryptionKey != "" {
|
|
if plain, err := decryptAESGCM(b.Secret, a.cfg.EncryptionKey); err == nil {
|
|
b.Secret = plain
|
|
}
|
|
// Leave as-is on error (may be stored plaintext in dev).
|
|
}
|
|
all = append(all, b)
|
|
}
|
|
if err := rows.Err(); err != nil {
|
|
return nil, err
|
|
}
|
|
return selectDiverse(all, n, a.rng), nil
|
|
}
|
|
|
|
// selectDiverse picks n bots spread evenly across the rating-sorted slice.
|
|
// When fewer than n bots exist, opponents are reused (shuffled for variety).
|
|
func selectDiverse(all []BotRecord, n int, rng *rand.Rand) []BotRecord {
|
|
if len(all) == 0 {
|
|
return nil
|
|
}
|
|
sort.Slice(all, func(i, j int) bool { return all[i].RatingMu > all[j].RatingMu })
|
|
|
|
selected := make([]BotRecord, 0, n)
|
|
if len(all) >= n {
|
|
for i := 0; i < n; i++ {
|
|
idx := int(float64(i) / float64(n) * float64(len(all)))
|
|
selected = append(selected, all[idx])
|
|
}
|
|
} else {
|
|
for len(selected) < n {
|
|
perm := rng.Perm(len(all))
|
|
for _, idx := range perm {
|
|
selected = append(selected, all[idx])
|
|
if len(selected) >= n {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
rng.Shuffle(len(selected), func(i, j int) { selected[i], selected[j] = selected[j], selected[i] })
|
|
return selected
|
|
}
|
|
|
|
// runMatch runs one match between the local candidate and a live opponent.
|
|
func (a *Arena) runMatch(ctx context.Context, candidateURL string, opp BotRecord) MatchOutcome {
|
|
outcome := MatchOutcome{
|
|
OpponentBotID: opp.BotID,
|
|
OpponentName: opp.Name,
|
|
}
|
|
|
|
// Randomise player slot for positional fairness.
|
|
candidateSlot := a.rng.Intn(2)
|
|
outcome.CandidateSlot = candidateSlot
|
|
|
|
matchID := fmt.Sprintf("eval-%d", time.Now().UnixNano())
|
|
mr := engine.NewMatchRunner(
|
|
engine.ConfigForPlayers(2, 2), // 2-player match, 2 cores per player per plan §3.4
|
|
engine.WithTimeout(a.cfg.BotTimeout),
|
|
engine.WithRNG(rand.New(rand.NewSource(a.rng.Int63()))),
|
|
)
|
|
|
|
candidateBot := engine.NewHTTPBot(candidateURL,
|
|
engine.AuthConfig{BotID: evalBotID, Secret: evalSecret, MatchID: matchID},
|
|
engine.WithHTTPTimeout(a.cfg.BotTimeout))
|
|
|
|
oppBot := engine.NewHTTPBot(opp.EndpointURL,
|
|
engine.AuthConfig{BotID: opp.BotID, Secret: opp.Secret, MatchID: matchID},
|
|
engine.WithHTTPTimeout(a.cfg.BotTimeout))
|
|
|
|
if candidateSlot == 0 {
|
|
mr.AddBot(candidateBot, "candidate")
|
|
mr.AddBot(oppBot, opp.Name)
|
|
} else {
|
|
mr.AddBot(oppBot, opp.Name)
|
|
mr.AddBot(candidateBot, "candidate")
|
|
}
|
|
|
|
res, _, err := mr.Run()
|
|
if err != nil {
|
|
outcome.Err = fmt.Errorf("match runner: %w", err)
|
|
return outcome
|
|
}
|
|
outcome.Winner = res.Winner
|
|
outcome.Scores = res.Scores
|
|
outcome.Turns = res.Turns
|
|
outcome.CombatDeaths = res.CombatDeaths
|
|
return outcome
|
|
}
|
|
|
|
// ── candidate subprocess management ──────────────────────────────────────────
|
|
|
|
type botProcess struct {
|
|
port int
|
|
cmd *exec.Cmd
|
|
tmpDir string
|
|
}
|
|
|
|
func (p *botProcess) stop() {
|
|
if p.cmd != nil && p.cmd.Process != nil {
|
|
_ = p.cmd.Process.Kill()
|
|
_ = p.cmd.Wait()
|
|
}
|
|
if p.tmpDir != "" {
|
|
os.RemoveAll(p.tmpDir)
|
|
}
|
|
}
|
|
|
|
func startCandidate(ctx context.Context, code, language string) (*botProcess, error) {
|
|
tmpDir, err := os.MkdirTemp("", "acb-arena-*")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("mkdirtemp: %w", err)
|
|
}
|
|
|
|
execPath, execArgs, err := buildCandidate(ctx, code, language, tmpDir)
|
|
if err != nil {
|
|
os.RemoveAll(tmpDir)
|
|
return nil, fmt.Errorf("build: %w", err)
|
|
}
|
|
|
|
port, err := allocateFreePort()
|
|
if err != nil {
|
|
os.RemoveAll(tmpDir)
|
|
return nil, fmt.Errorf("allocate port: %w", err)
|
|
}
|
|
|
|
env := append(os.Environ(),
|
|
fmt.Sprintf("BOT_PORT=%d", port),
|
|
"BOT_SECRET="+evalSecret,
|
|
)
|
|
|
|
var args []string
|
|
args = append(args, execArgs...)
|
|
cmd := exec.CommandContext(ctx, execPath, args...)
|
|
cmd.Env = env
|
|
cmd.Dir = tmpDir
|
|
|
|
if err := cmd.Start(); err != nil {
|
|
os.RemoveAll(tmpDir)
|
|
return nil, fmt.Errorf("start process: %w", err)
|
|
}
|
|
|
|
proc := &botProcess{port: port, cmd: cmd, tmpDir: tmpDir}
|
|
addr := fmt.Sprintf("127.0.0.1:%d", port)
|
|
if err := waitForHealth(ctx, addr); err != nil {
|
|
proc.stop()
|
|
return nil, fmt.Errorf("candidate health: %w", err)
|
|
}
|
|
return proc, nil
|
|
}
|
|
|
|
func buildCandidate(ctx context.Context, code, language, dir string) (string, []string, error) {
|
|
switch language {
|
|
case "go":
|
|
if err := os.WriteFile(dir+"/bot.go", []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
if err := os.WriteFile(dir+"/go.mod", []byte("module bot\n\ngo 1.21\n"), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
bin := dir + "/bot"
|
|
cmd := exec.CommandContext(ctx, "go", "build", "-o", bin, ".")
|
|
cmd.Dir = dir
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
return "", nil, fmt.Errorf("go build: %s", truncate(string(out), 512))
|
|
}
|
|
return bin, nil, nil
|
|
|
|
case "python":
|
|
src := dir + "/bot.py"
|
|
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
return "python3", []string{src}, nil
|
|
|
|
case "rust":
|
|
src := dir + "/main.rs"
|
|
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
bin := dir + "/bot"
|
|
cmd := exec.CommandContext(ctx, "rustc", "--edition", "2021", src, "-o", bin)
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
return "", nil, fmt.Errorf("rustc: %s", truncate(string(out), 512))
|
|
}
|
|
return bin, nil, nil
|
|
|
|
case "typescript":
|
|
if err := os.WriteFile(dir+"/bot.ts", []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
tsconfig := `{"compilerOptions":{"target":"ES2020","module":"commonjs","outDir":"./"},"files":["bot.ts"]}`
|
|
if err := os.WriteFile(dir+"/tsconfig.json", []byte(tsconfig), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
cmd := exec.CommandContext(ctx, "tsc", "--project", dir+"/tsconfig.json")
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
return "", nil, fmt.Errorf("tsc: %s", truncate(string(out), 512))
|
|
}
|
|
return "node", []string{dir + "/bot.js"}, nil
|
|
|
|
case "java":
|
|
src := dir + "/Bot.java"
|
|
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
cmd := exec.CommandContext(ctx, "javac", src)
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
return "", nil, fmt.Errorf("javac: %s", truncate(string(out), 512))
|
|
}
|
|
return "java", []string{"-cp", dir, "Bot"}, nil
|
|
|
|
case "php":
|
|
src := dir + "/bot.php"
|
|
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
return "php", []string{src}, nil
|
|
|
|
case "csharp":
|
|
src := dir + "/bot.cs"
|
|
if err := os.WriteFile(src, []byte(code), 0o600); err != nil {
|
|
return "", nil, err
|
|
}
|
|
// Check for dotnet-script first (no compilation needed)
|
|
if _, err := exec.LookPath("dotnet-script"); err == nil {
|
|
return "dotnet-script", []string{src}, nil
|
|
}
|
|
// Fallback to mcs (Mono C# compiler)
|
|
if _, err := exec.LookPath("mcs"); err == nil {
|
|
exe := dir + "/bot.exe"
|
|
cmd := exec.CommandContext(ctx, "mcs", "-out:"+exe, src)
|
|
if out, err := cmd.CombinedOutput(); err != nil {
|
|
return "", nil, fmt.Errorf("mcs: %s", truncate(string(out), 512))
|
|
}
|
|
return "mono", []string{exe}, nil
|
|
}
|
|
return "", nil, fmt.Errorf("csharp requires dotnet-script or mcs (mono)")
|
|
|
|
default:
|
|
return "", nil, fmt.Errorf("unsupported language: %s", language)
|
|
}
|
|
}
|
|
|
|
// allocateFreePort finds an unused TCP port on localhost.
|
|
func allocateFreePort() (int, error) {
|
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
port := l.Addr().(*net.TCPAddr).Port
|
|
l.Close()
|
|
return port, nil
|
|
}
|
|
|
|
// waitForHealth polls GET /health until 200 OK or healthStartupTimeout elapses.
|
|
func waitForHealth(ctx context.Context, addr string) error {
|
|
deadline := time.Now().Add(healthStartupTimeout)
|
|
client := &http.Client{Timeout: 500 * time.Millisecond}
|
|
for time.Now().Before(deadline) {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://"+addr+"/health", nil)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if resp, err := client.Do(req); err == nil {
|
|
resp.Body.Close()
|
|
if resp.StatusCode == http.StatusOK {
|
|
return nil
|
|
}
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(healthPollInterval):
|
|
}
|
|
}
|
|
return fmt.Errorf("candidate did not become healthy within %s", healthStartupTimeout)
|
|
}
|
|
|
|
// decryptAESGCM decrypts an AES-256-GCM ciphertext (hex-encoded) with the
|
|
// given hex-encoded 32-byte key.
|
|
func decryptAESGCM(ciphertextHex, keyHex string) (string, error) {
|
|
key, err := hex.DecodeString(keyHex)
|
|
if err != nil {
|
|
return "", fmt.Errorf("decode key: %w", err)
|
|
}
|
|
if len(key) != 32 {
|
|
return "", fmt.Errorf("key must be 32 bytes (64 hex chars)")
|
|
}
|
|
ciphertext, err := hex.DecodeString(ciphertextHex)
|
|
if err != nil {
|
|
return "", fmt.Errorf("decode ciphertext: %w", err)
|
|
}
|
|
block, err := aes.NewCipher(key)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
aead, err := cipher.NewGCM(block)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
ns := aead.NonceSize()
|
|
if len(ciphertext) < ns {
|
|
return "", fmt.Errorf("ciphertext too short")
|
|
}
|
|
plain, err := aead.Open(nil, ciphertext[:ns], ciphertext[ns:], nil)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(plain), nil
|
|
}
|
|
|
|
func truncate(s string, max int) string {
|
|
if len(s) <= max {
|
|
return s
|
|
}
|
|
return s[:max] + "…"
|
|
}
|