- arena/arena.go: 10-match mini-tournament running candidate as a local subprocess against diverse live opponents sampled across the rating distribution; AES-GCM secret decryption for opponent auth - arena/psro.go: Nash equilibrium computation for the 1×K meta-game; FictitiousPlayNash included for future K×K support - arena/winrate.go: Wilson-score 95% CI for win-rate calculation; draws counted as 0.5 wins - arena/gate.go: two-part promotion gate — Nash value ≥ threshold AND MAP-Elites niche fill or improvement; detailed reason strings - promoter/promoter.go: full promotion pipeline — bot source + Dockerfile + K8s Secret/Deployment/Service manifests, docker build, git commit/push (ArgoCD sync), kubectl readiness poll, bots-table INSERT, programs-table update; RetireBot and EnforcePolicy (rating threshold + population cap 50) - db/db.go: add bot_name / bot_secret migration columns - db/programs.go: ListPromoted, SetBotNameAndSecret, UnsetPromoted, GetByBotID, PromotedCount helpers for promotion/retirement lifecycle - main.go: evaluate and retire subcommands wiring arena + gate + promoter; remove unused island flag from evaluate - arena/arena_test.go: 21 unit tests covering Nash, Wilson CI, Gate logic, and selectDiverse opponent sampling - promoter/promoter_test.go: tests for Dockerfiles, bot-ID/secret generation, AES-GCM helpers, and K8s manifest templates Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
55 lines
1.5 KiB
Go
55 lines
1.5 KiB
Go
package arena
|
||
|
||
import "math"
|
||
|
||
// WinRateResult holds the observed win rate and its 95% Wilson score confidence interval.
|
||
type WinRateResult struct {
|
||
Wins int
|
||
Total int // non-error matches only
|
||
Rate float64 // observed win rate (0–1)
|
||
Lower float64 // 95% CI lower bound
|
||
Upper float64 // 95% CI upper bound
|
||
}
|
||
|
||
// WinRate computes the win rate and Wilson score 95% confidence interval
|
||
// for wins out of total valid matches. When total == 0, all values are 0.5.
|
||
//
|
||
// Wilson score interval:
|
||
//
|
||
// center = (p̂ + z²/2n) / (1 + z²/n)
|
||
// margin = z * sqrt(p̂(1-p̂)/n + z²/4n²) / (1 + z²/n)
|
||
// CI = [center − margin, center + margin]
|
||
//
|
||
// Using z = 1.96 (95% two-tailed confidence).
|
||
func WinRate(wins, total int) WinRateResult {
|
||
if total == 0 {
|
||
return WinRateResult{Rate: 0.5, Lower: 0.0, Upper: 1.0}
|
||
}
|
||
|
||
const z = 1.96 // 95% CI
|
||
p := float64(wins) / float64(total)
|
||
n := float64(total)
|
||
z2 := z * z
|
||
|
||
center := (p + z2/(2*n)) / (1 + z2/n)
|
||
margin := z * math.Sqrt(p*(1-p)/n+z2/(4*n*n)) / (1 + z2/n)
|
||
|
||
lower := math.Max(0, center-margin)
|
||
upper := math.Min(1, center+margin)
|
||
|
||
return WinRateResult{
|
||
Wins: wins,
|
||
Total: total,
|
||
Rate: p,
|
||
Lower: lower,
|
||
Upper: upper,
|
||
}
|
||
}
|
||
|
||
// ComputeFromResult builds a WinRateResult from a tournament Result.
|
||
// Only non-error matches are counted; draws count as 0.5 wins.
|
||
func ComputeFromResult(r *Result) WinRateResult {
|
||
total := r.Wins + r.Losses + r.Draws
|
||
// Count draws as half-wins for the rate; wins/total integers use integer wins.
|
||
return WinRate(r.Wins, total)
|
||
}
|