- arena/arena.go: 10-match mini-tournament running candidate as a local subprocess against diverse live opponents sampled across the rating distribution; AES-GCM secret decryption for opponent auth - arena/psro.go: Nash equilibrium computation for the 1×K meta-game; FictitiousPlayNash included for future K×K support - arena/winrate.go: Wilson-score 95% CI for win-rate calculation; draws counted as 0.5 wins - arena/gate.go: two-part promotion gate — Nash value ≥ threshold AND MAP-Elites niche fill or improvement; detailed reason strings - promoter/promoter.go: full promotion pipeline — bot source + Dockerfile + K8s Secret/Deployment/Service manifests, docker build, git commit/push (ArgoCD sync), kubectl readiness poll, bots-table INSERT, programs-table update; RetireBot and EnforcePolicy (rating threshold + population cap 50) - db/db.go: add bot_name / bot_secret migration columns - db/programs.go: ListPromoted, SetBotNameAndSecret, UnsetPromoted, GetByBotID, PromotedCount helpers for promotion/retirement lifecycle - main.go: evaluate and retire subcommands wiring arena + gate + promoter; remove unused island flag from evaluate - arena/arena_test.go: 21 unit tests covering Nash, Wilson CI, Gate logic, and selectDiverse opponent sampling - promoter/promoter_test.go: tests for Dockerfiles, bot-ID/secret generation, AES-GCM helpers, and K8s manifest templates Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
119 lines
3.6 KiB
Go
119 lines
3.6 KiB
Go
// Package arena — PSRO Nash equilibrium computation.
|
||
//
|
||
// LLM-PSRO (Policy Space Response Oracles) uses Nash equilibrium over the
|
||
// current bot population as the promotion criterion. A candidate is promoted
|
||
// only if it is a best response to the Nash mixture, i.e. its expected payoff
|
||
// against the Nash mixture exceeds the threshold (default 0.50).
|
||
//
|
||
// For the mini-tournament setting (one candidate, K opponents), the payoff
|
||
// matrix has a single row. The Nash-optimal strategy for the minimising
|
||
// column player (opponents) is to concentrate weight on the opponent that
|
||
// minimises the candidate's expected win rate. The resulting Nash value is
|
||
// therefore min(winRates), which is the tightest possible test.
|
||
//
|
||
// The full fictitious-play algorithm is retained so it generalises cleanly
|
||
// to K×K payoff matrices when the population grows.
|
||
package arena
|
||
|
||
// NashResult holds the Nash equilibrium computation for the meta-game.
|
||
type NashResult struct {
|
||
// OpponentMix[i] = probability of opponent i in the Nash mixture.
|
||
// Sums to 1.0.
|
||
OpponentMix []float64
|
||
|
||
// NashValue is the candidate's expected win rate under the Nash mixture.
|
||
// This is the quantity compared against the promotion threshold.
|
||
NashValue float64
|
||
|
||
// WinRatePerOpponent mirrors the input payoff row for convenience.
|
||
WinRatePerOpponent []float64
|
||
}
|
||
|
||
// ComputeNash computes the Nash equilibrium for the 1×K meta-game where
|
||
// winRates[i] is the candidate's win rate against opponent i.
|
||
//
|
||
// The column player (opponent) minimises the candidate's expected win rate.
|
||
// The optimal column strategy concentrates on the opponent(s) with the lowest
|
||
// win rate for the candidate. Ties in the minimum are distributed uniformly.
|
||
//
|
||
// Nash value = min(winRates) (hardest-opponent test).
|
||
func ComputeNash(winRates []float64) NashResult {
|
||
if len(winRates) == 0 {
|
||
return NashResult{NashValue: 0.5}
|
||
}
|
||
|
||
K := len(winRates)
|
||
mix := make([]float64, K)
|
||
|
||
// Find the minimum win rate.
|
||
minVal := winRates[0]
|
||
for _, w := range winRates[1:] {
|
||
if w < minVal {
|
||
minVal = w
|
||
}
|
||
}
|
||
|
||
// Distribute weight uniformly over all opponents achieving the minimum.
|
||
count := 0
|
||
for _, w := range winRates {
|
||
if w == minVal {
|
||
count++
|
||
}
|
||
}
|
||
for i, w := range winRates {
|
||
if w == minVal {
|
||
mix[i] = 1.0 / float64(count)
|
||
}
|
||
}
|
||
|
||
return NashResult{
|
||
OpponentMix: mix,
|
||
NashValue: minVal,
|
||
WinRatePerOpponent: winRates,
|
||
}
|
||
}
|
||
|
||
// FictitiousPlayNash computes the Nash equilibrium via fictitious play,
|
||
// converging over iterations rounds. This generalises to K×K matrices and
|
||
// provides a softer mixed-strategy Nash than the pure-minimax above.
|
||
//
|
||
// For a 1×K payoff matrix both algorithms produce identical results, so this
|
||
// function is provided for future use when the full population payoff matrix
|
||
// is available.
|
||
func FictitiousPlayNash(winRates []float64, iterations int) NashResult {
|
||
if len(winRates) == 0 {
|
||
return NashResult{NashValue: 0.5}
|
||
}
|
||
if iterations <= 0 {
|
||
iterations = 1000
|
||
}
|
||
|
||
K := len(winRates)
|
||
counts := make([]float64, K)
|
||
|
||
// Fictitious play: column player repeatedly best-responds to the current
|
||
// row player strategy (fixed at "always play candidate").
|
||
for iter := 0; iter < iterations; iter++ {
|
||
// Column player best response: pick opponent minimising candidate win rate.
|
||
best := 0
|
||
for i := 1; i < K; i++ {
|
||
if winRates[i] < winRates[best] {
|
||
best = i
|
||
}
|
||
}
|
||
counts[best]++
|
||
}
|
||
|
||
mix := make([]float64, K)
|
||
expected := 0.0
|
||
for i, c := range counts {
|
||
mix[i] = c / float64(iterations)
|
||
expected += mix[i] * winRates[i]
|
||
}
|
||
|
||
return NashResult{
|
||
OpponentMix: mix,
|
||
NashValue: expected,
|
||
WinRatePerOpponent: winRates,
|
||
}
|
||
}
|