feat(worker): add rating recovery CLI mode (-mode=recalc-ratings)
Implements the rating recovery procedure specified in plan §12.3. Running 'go run ./cmd/acb-worker -mode=recalc-ratings' will: 1. Reset all bot ratings to Glicko-2 defaults (mu=1500, phi=350, sigma=0.06) 2. Fetch all completed matches from the database in chronological order 3. Replay each match to recompute Glicko-2 ratings from scratch 4. Update the bots table with the recalculated ratings This is needed for disaster recovery when ratings are corrupted or lost. Database functions added: - ResetAllRatings: resets all bot ratings to defaults - GetAllCompletedMatches: fetches completed matches chronologically with participants - UpdateAllRatings: bulk updates all bot ratings in a single transaction Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
aeef954590
commit
467b7b67ea
2 changed files with 239 additions and 0 deletions
|
|
@ -689,3 +689,132 @@ func (c *DBClient) UpdateMapEngagement(ctx context.Context, mapID string, engage
|
|||
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// CompletedMatchForRecalc represents a completed match with participants for rating recalculation.
|
||||
type CompletedMatchForRecalc struct {
|
||||
ID string
|
||||
CompletedAt time.Time
|
||||
Winner *int // player_slot of winner, nil for draw
|
||||
WinnerBotID *string // bot_id of winner (derived from winner player_slot)
|
||||
Participants []MatchParticipantForRecalc
|
||||
}
|
||||
|
||||
// MatchParticipantForRecalc represents a match participant for rating recalculation.
|
||||
type MatchParticipantForRecalc struct {
|
||||
BotID string
|
||||
PlayerSlot int
|
||||
}
|
||||
|
||||
// ResetAllRatings resets all bot ratings to Glicko-2 default values.
|
||||
func (c *DBClient) ResetAllRatings(ctx context.Context) error {
|
||||
_, err := c.db.ExecContext(ctx, `
|
||||
UPDATE bots
|
||||
SET rating_mu = $1, rating_phi = $2, rating_sigma = $3
|
||||
`, glicko2DefaultMu, glicko2DefaultRD, glicko2Tau)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to reset ratings: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAllCompletedMatches fetches all completed matches with their participants
|
||||
// in chronological order (by completed_at). Used for rating recovery.
|
||||
func (c *DBClient) GetAllCompletedMatches(ctx context.Context) ([]CompletedMatchForRecalc, error) {
|
||||
// First, get all completed matches in order
|
||||
rows, err := c.db.QueryContext(ctx, `
|
||||
SELECT match_id, winner, completed_at
|
||||
FROM matches
|
||||
WHERE status = 'completed' AND completed_at IS NOT NULL
|
||||
ORDER BY completed_at ASC
|
||||
`)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query completed matches: %w", err)
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
var matches []CompletedMatchForRecalc
|
||||
for rows.Next() {
|
||||
var m CompletedMatchForRecalc
|
||||
err := rows.Scan(&m.ID, &m.Winner, &m.CompletedAt)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to scan match: %w", err)
|
||||
}
|
||||
matches = append(matches, m)
|
||||
}
|
||||
|
||||
if rows.Err() != nil {
|
||||
return nil, fmt.Errorf("error iterating matches: %w", rows.Err())
|
||||
}
|
||||
|
||||
// For each match, get participants
|
||||
for i := range matches {
|
||||
partRows, err := c.db.QueryContext(ctx, `
|
||||
SELECT bot_id, player_slot
|
||||
FROM match_participants
|
||||
WHERE match_id = $1
|
||||
ORDER BY player_slot
|
||||
`, matches[i].ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query participants for match %s: %w", matches[i].ID, err)
|
||||
}
|
||||
|
||||
var participants []MatchParticipantForRecalc
|
||||
for partRows.Next() {
|
||||
var p MatchParticipantForRecalc
|
||||
err := partRows.Scan(&p.BotID, &p.PlayerSlot)
|
||||
if err != nil {
|
||||
partRows.Close()
|
||||
return nil, fmt.Errorf("failed to scan participant: %w", err)
|
||||
}
|
||||
participants = append(participants, p)
|
||||
}
|
||||
partRows.Close()
|
||||
|
||||
if partRows.Err() != nil {
|
||||
return nil, fmt.Errorf("error iterating participants for match %s: %w", matches[i].ID, partRows.Err())
|
||||
}
|
||||
|
||||
matches[i].Participants = participants
|
||||
|
||||
// Derive WinnerBotID from Winner (player_slot)
|
||||
if matches[i].Winner != nil {
|
||||
for _, p := range participants {
|
||||
if p.PlayerSlot == *matches[i].Winner {
|
||||
winnerID := p.BotID
|
||||
matches[i].WinnerBotID = &winnerID
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
// UpdateAllRatings updates all bot ratings in a single transaction.
|
||||
func (c *DBClient) UpdateAllRatings(ctx context.Context, ratings map[string]Glicko2Rating) error {
|
||||
tx, err := c.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to begin transaction: %w", err)
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
now := time.Now().UTC()
|
||||
|
||||
for botID, rating := range ratings {
|
||||
_, err := tx.ExecContext(ctx, `
|
||||
UPDATE bots
|
||||
SET rating_mu = $1, rating_phi = $2, rating_sigma = $3, last_active = $4
|
||||
WHERE bot_id = $5
|
||||
`, rating.Mu, rating.Phi, rating.Sigma, now, botID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update rating for bot %s: %w", botID, err)
|
||||
}
|
||||
}
|
||||
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("failed to commit transaction: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import (
|
|||
"github.com/aicodebattle/acb/metrics"
|
||||
"image/png"
|
||||
)
|
||||
|
||||
// Config holds worker configuration.
|
||||
type Config struct {
|
||||
DatabaseURL string // PostgreSQL connection URL
|
||||
|
|
@ -63,6 +64,7 @@ func main() {
|
|||
turnTimeout := flag.Duration("timeout", 3*time.Second, "Per-turn bot timeout")
|
||||
maxRetries := flag.Int("retries", 3, "Max retries for transient errors")
|
||||
verbose := flag.Bool("verbose", getEnv("ACB_VERBOSE", "false") == "true", "Enable verbose logging")
|
||||
mode := flag.String("mode", "worker", "Operation mode: 'worker' (normal polling) or 'recalc-ratings' (disaster recovery)")
|
||||
flag.Parse()
|
||||
|
||||
// Validate required config
|
||||
|
|
@ -97,6 +99,20 @@ func main() {
|
|||
}
|
||||
defer dbClient.Close()
|
||||
|
||||
// Handle different operation modes
|
||||
switch *mode {
|
||||
case "recalc-ratings":
|
||||
// Disaster recovery: recompute all ratings from match history
|
||||
logger := log.New(os.Stdout, "[recalc-ratings] ", log.LstdFlags)
|
||||
if err := recalcRatings(context.Background(), dbClient, logger, *verbose); err != nil {
|
||||
log.Fatalf("Rating recalculation failed: %v", err)
|
||||
}
|
||||
logger.Println("Rating recalculation completed successfully")
|
||||
return
|
||||
}
|
||||
|
||||
// Normal worker mode (default)
|
||||
|
||||
// Create B2 client (optional - if not configured, replays won't be uploaded to cold archive)
|
||||
var b2Client *B2Client
|
||||
if cfg.B2Endpoint != "" && cfg.B2AccessKey != "" && cfg.B2SecretKey != "" {
|
||||
|
|
@ -236,6 +252,7 @@ func (w *Worker) pollAndExecute(ctx context.Context) error {
|
|||
metrics.MatchThroughput.Inc()
|
||||
metrics.WorkerMatchesTotal.Inc()
|
||||
metrics.WorkerMatchDuration.Observe(time.Since(matchStart).Seconds())
|
||||
|
||||
// Upload replay to B2
|
||||
replayURL := ""
|
||||
if w.b2 != nil {
|
||||
|
|
@ -578,3 +595,96 @@ func (w *Worker) computeRatingUpdates(claimData *JobClaimData, result *MatchResu
|
|||
// Compute rating updates
|
||||
return ComputeRatingUpdates(botIDs, ratings, scores)
|
||||
}
|
||||
|
||||
// recalcRatings recalculates all Glicko-2 ratings from scratch by replaying
|
||||
// all completed matches in chronological order. Used for disaster recovery
|
||||
// when ratings are corrupted or lost.
|
||||
func recalcRatings(ctx context.Context, db *DBClient, logger *log.Logger, verbose bool) error {
|
||||
logger.Println("Starting rating recalculation...")
|
||||
logger.Println("Step 1: Resetting all bot ratings to defaults")
|
||||
|
||||
// Step 1: Reset all bot ratings to defaults
|
||||
if err := db.ResetAllRatings(ctx); err != nil {
|
||||
return fmt.Errorf("failed to reset ratings: %w", err)
|
||||
}
|
||||
logger.Println(" All ratings reset to defaults (mu=1500, phi=350, sigma=0.06)")
|
||||
|
||||
// Step 2: Fetch all completed matches in chronological order
|
||||
logger.Println("Step 2: Fetching completed matches in chronological order")
|
||||
matches, err := db.GetAllCompletedMatches(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to fetch matches: %w", err)
|
||||
}
|
||||
logger.Printf(" Found %d completed matches to process", len(matches))
|
||||
|
||||
if len(matches) == 0 {
|
||||
logger.Println("No matches to process, ratings remain at defaults")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Step 3: Track current ratings in memory
|
||||
currentRatings := make(map[string]Glicko2Rating)
|
||||
|
||||
// Step 4: Process each match in order
|
||||
logger.Println("Step 3: Replaying matches to recompute ratings")
|
||||
processed := 0
|
||||
for _, match := range matches {
|
||||
// Ensure all participants have default ratings initialized
|
||||
for _, p := range match.Participants {
|
||||
if _, exists := currentRatings[p.BotID]; !exists {
|
||||
currentRatings[p.BotID] = Glicko2Rating{
|
||||
Mu: glicko2DefaultMu,
|
||||
Phi: glicko2DefaultRD,
|
||||
Sigma: glicko2Tau, // default sigma
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Build arrays for rating computation
|
||||
n := len(match.Participants)
|
||||
botIDs := make([]string, n)
|
||||
ratings := make([]Glicko2Rating, n)
|
||||
scores := make([]float64, n)
|
||||
|
||||
for i, p := range match.Participants {
|
||||
botIDs[i] = p.BotID
|
||||
ratings[i] = currentRatings[p.BotID]
|
||||
|
||||
// Determine score based on match result
|
||||
// If winner is a player slot, convert to bot_id and score accordingly
|
||||
if match.Winner == nil {
|
||||
// Draw or no winner
|
||||
scores[i] = 0.5
|
||||
} else if match.WinnerBotID != nil && *match.WinnerBotID == p.BotID {
|
||||
scores[i] = 1.0
|
||||
} else {
|
||||
scores[i] = 0.0
|
||||
}
|
||||
}
|
||||
|
||||
// Compute new ratings using Glicko-2
|
||||
newRatings := UpdateRatings(ratings, scores)
|
||||
|
||||
// Update stored ratings
|
||||
for i, botID := range botIDs {
|
||||
currentRatings[botID] = newRatings[i]
|
||||
}
|
||||
|
||||
processed++
|
||||
if processed%1000 == 0 || verbose {
|
||||
logger.Printf(" Processed %d/%d matches (match_id=%s)", processed, len(matches), match.ID)
|
||||
}
|
||||
}
|
||||
|
||||
logger.Printf(" Processed all %d matches", processed)
|
||||
|
||||
// Step 5: Write final ratings back to database
|
||||
logger.Println("Step 4: Writing recalculated ratings to database")
|
||||
if err := db.UpdateAllRatings(ctx, currentRatings); err != nil {
|
||||
return fmt.Errorf("failed to write ratings: %w", err)
|
||||
}
|
||||
|
||||
logger.Printf(" Updated ratings for %d bots", len(currentRatings))
|
||||
logger.Println("Rating recalculation complete")
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue