ai-code-battle/cmd/acb-evolver/internal/arena/winrate.go
jedarden 76e8791e4d Add evaluation arena, promotion gate, and retirement policy (Phase 7)
- arena/arena.go: 10-match mini-tournament running candidate as a local
  subprocess against diverse live opponents sampled across the rating
  distribution; AES-GCM secret decryption for opponent auth
- arena/psro.go: Nash equilibrium computation for the 1×K meta-game;
  FictitiousPlayNash included for future K×K support
- arena/winrate.go: Wilson-score 95% CI for win-rate calculation; draws
  counted as 0.5 wins
- arena/gate.go: two-part promotion gate — Nash value ≥ threshold AND
  MAP-Elites niche fill or improvement; detailed reason strings
- promoter/promoter.go: full promotion pipeline — bot source + Dockerfile
  + K8s Secret/Deployment/Service manifests, docker build, git commit/push
  (ArgoCD sync), kubectl readiness poll, bots-table INSERT, programs-table
  update; RetireBot and EnforcePolicy (rating threshold + population cap 50)
- db/db.go: add bot_name / bot_secret migration columns
- db/programs.go: ListPromoted, SetBotNameAndSecret, UnsetPromoted,
  GetByBotID, PromotedCount helpers for promotion/retirement lifecycle
- main.go: evaluate and retire subcommands wiring arena + gate + promoter;
  remove unused island flag from evaluate
- arena/arena_test.go: 21 unit tests covering Nash, Wilson CI, Gate logic,
  and selectDiverse opponent sampling
- promoter/promoter_test.go: tests for Dockerfiles, bot-ID/secret generation,
  AES-GCM helpers, and K8s manifest templates

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-26 23:32:37 -04:00

55 lines
1.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package arena
import "math"
// WinRateResult holds the observed win rate and its 95% Wilson score confidence interval.
type WinRateResult struct {
Wins int
Total int // non-error matches only
Rate float64 // observed win rate (01)
Lower float64 // 95% CI lower bound
Upper float64 // 95% CI upper bound
}
// WinRate computes the win rate and Wilson score 95% confidence interval
// for wins out of total valid matches. When total == 0, all values are 0.5.
//
// Wilson score interval:
//
// center = (p̂ + z²/2n) / (1 + z²/n)
// margin = z * sqrt(p̂(1-p̂)/n + z²/4n²) / (1 + z²/n)
// CI = [center margin, center + margin]
//
// Using z = 1.96 (95% two-tailed confidence).
func WinRate(wins, total int) WinRateResult {
if total == 0 {
return WinRateResult{Rate: 0.5, Lower: 0.0, Upper: 1.0}
}
const z = 1.96 // 95% CI
p := float64(wins) / float64(total)
n := float64(total)
z2 := z * z
center := (p + z2/(2*n)) / (1 + z2/n)
margin := z * math.Sqrt(p*(1-p)/n+z2/(4*n*n)) / (1 + z2/n)
lower := math.Max(0, center-margin)
upper := math.Min(1, center+margin)
return WinRateResult{
Wins: wins,
Total: total,
Rate: p,
Lower: lower,
Upper: upper,
}
}
// ComputeFromResult builds a WinRateResult from a tournament Result.
// Only non-error matches are counted; draws count as 0.5 wins.
func ComputeFromResult(r *Result) WinRateResult {
total := r.Wins + r.Losses + r.Draws
// Count draws as half-wins for the rate; wins/total integers use integer wins.
return WinRate(r.Wins, total)
}