From 467b7b67ea1a5b2deab9646070fd3096cb487f9f Mon Sep 17 00:00:00 2001 From: jedarden Date: Mon, 4 May 2026 00:37:10 -0400 Subject: [PATCH] feat(worker): add rating recovery CLI mode (-mode=recalc-ratings) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the rating recovery procedure specified in plan ยง12.3. Running 'go run ./cmd/acb-worker -mode=recalc-ratings' will: 1. Reset all bot ratings to Glicko-2 defaults (mu=1500, phi=350, sigma=0.06) 2. Fetch all completed matches from the database in chronological order 3. Replay each match to recompute Glicko-2 ratings from scratch 4. Update the bots table with the recalculated ratings This is needed for disaster recovery when ratings are corrupted or lost. Database functions added: - ResetAllRatings: resets all bot ratings to defaults - GetAllCompletedMatches: fetches completed matches chronologically with participants - UpdateAllRatings: bulk updates all bot ratings in a single transaction Co-Authored-By: Claude Opus 4.7 --- cmd/acb-worker/db.go | 129 +++++++++++++++++++++++++++++++++++++++++ cmd/acb-worker/main.go | 110 +++++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+) diff --git a/cmd/acb-worker/db.go b/cmd/acb-worker/db.go index c7b3c0a..dbad896 100644 --- a/cmd/acb-worker/db.go +++ b/cmd/acb-worker/db.go @@ -689,3 +689,132 @@ func (c *DBClient) UpdateMapEngagement(ctx context.Context, mapID string, engage return tx.Commit() } + +// CompletedMatchForRecalc represents a completed match with participants for rating recalculation. +type CompletedMatchForRecalc struct { + ID string + CompletedAt time.Time + Winner *int // player_slot of winner, nil for draw + WinnerBotID *string // bot_id of winner (derived from winner player_slot) + Participants []MatchParticipantForRecalc +} + +// MatchParticipantForRecalc represents a match participant for rating recalculation. +type MatchParticipantForRecalc struct { + BotID string + PlayerSlot int +} + +// ResetAllRatings resets all bot ratings to Glicko-2 default values. +func (c *DBClient) ResetAllRatings(ctx context.Context) error { + _, err := c.db.ExecContext(ctx, ` + UPDATE bots + SET rating_mu = $1, rating_phi = $2, rating_sigma = $3 + `, glicko2DefaultMu, glicko2DefaultRD, glicko2Tau) + if err != nil { + return fmt.Errorf("failed to reset ratings: %w", err) + } + return nil +} + +// GetAllCompletedMatches fetches all completed matches with their participants +// in chronological order (by completed_at). Used for rating recovery. +func (c *DBClient) GetAllCompletedMatches(ctx context.Context) ([]CompletedMatchForRecalc, error) { + // First, get all completed matches in order + rows, err := c.db.QueryContext(ctx, ` + SELECT match_id, winner, completed_at + FROM matches + WHERE status = 'completed' AND completed_at IS NOT NULL + ORDER BY completed_at ASC + `) + if err != nil { + return nil, fmt.Errorf("failed to query completed matches: %w", err) + } + defer rows.Close() + + var matches []CompletedMatchForRecalc + for rows.Next() { + var m CompletedMatchForRecalc + err := rows.Scan(&m.ID, &m.Winner, &m.CompletedAt) + if err != nil { + return nil, fmt.Errorf("failed to scan match: %w", err) + } + matches = append(matches, m) + } + + if rows.Err() != nil { + return nil, fmt.Errorf("error iterating matches: %w", rows.Err()) + } + + // For each match, get participants + for i := range matches { + partRows, err := c.db.QueryContext(ctx, ` + SELECT bot_id, player_slot + FROM match_participants + WHERE match_id = $1 + ORDER BY player_slot + `, matches[i].ID) + if err != nil { + return nil, fmt.Errorf("failed to query participants for match %s: %w", matches[i].ID, err) + } + + var participants []MatchParticipantForRecalc + for partRows.Next() { + var p MatchParticipantForRecalc + err := partRows.Scan(&p.BotID, &p.PlayerSlot) + if err != nil { + partRows.Close() + return nil, fmt.Errorf("failed to scan participant: %w", err) + } + participants = append(participants, p) + } + partRows.Close() + + if partRows.Err() != nil { + return nil, fmt.Errorf("error iterating participants for match %s: %w", matches[i].ID, partRows.Err()) + } + + matches[i].Participants = participants + + // Derive WinnerBotID from Winner (player_slot) + if matches[i].Winner != nil { + for _, p := range participants { + if p.PlayerSlot == *matches[i].Winner { + winnerID := p.BotID + matches[i].WinnerBotID = &winnerID + break + } + } + } + } + + return matches, nil +} + +// UpdateAllRatings updates all bot ratings in a single transaction. +func (c *DBClient) UpdateAllRatings(ctx context.Context, ratings map[string]Glicko2Rating) error { + tx, err := c.db.BeginTx(ctx, nil) + if err != nil { + return fmt.Errorf("failed to begin transaction: %w", err) + } + defer tx.Rollback() + + now := time.Now().UTC() + + for botID, rating := range ratings { + _, err := tx.ExecContext(ctx, ` + UPDATE bots + SET rating_mu = $1, rating_phi = $2, rating_sigma = $3, last_active = $4 + WHERE bot_id = $5 + `, rating.Mu, rating.Phi, rating.Sigma, now, botID) + if err != nil { + return fmt.Errorf("failed to update rating for bot %s: %w", botID, err) + } + } + + if err := tx.Commit(); err != nil { + return fmt.Errorf("failed to commit transaction: %w", err) + } + + return nil +} diff --git a/cmd/acb-worker/main.go b/cmd/acb-worker/main.go index ada6601..f07823f 100644 --- a/cmd/acb-worker/main.go +++ b/cmd/acb-worker/main.go @@ -23,6 +23,7 @@ import ( "github.com/aicodebattle/acb/metrics" "image/png" ) + // Config holds worker configuration. type Config struct { DatabaseURL string // PostgreSQL connection URL @@ -63,6 +64,7 @@ func main() { turnTimeout := flag.Duration("timeout", 3*time.Second, "Per-turn bot timeout") maxRetries := flag.Int("retries", 3, "Max retries for transient errors") verbose := flag.Bool("verbose", getEnv("ACB_VERBOSE", "false") == "true", "Enable verbose logging") + mode := flag.String("mode", "worker", "Operation mode: 'worker' (normal polling) or 'recalc-ratings' (disaster recovery)") flag.Parse() // Validate required config @@ -97,6 +99,20 @@ func main() { } defer dbClient.Close() + // Handle different operation modes + switch *mode { + case "recalc-ratings": + // Disaster recovery: recompute all ratings from match history + logger := log.New(os.Stdout, "[recalc-ratings] ", log.LstdFlags) + if err := recalcRatings(context.Background(), dbClient, logger, *verbose); err != nil { + log.Fatalf("Rating recalculation failed: %v", err) + } + logger.Println("Rating recalculation completed successfully") + return + } + + // Normal worker mode (default) + // Create B2 client (optional - if not configured, replays won't be uploaded to cold archive) var b2Client *B2Client if cfg.B2Endpoint != "" && cfg.B2AccessKey != "" && cfg.B2SecretKey != "" { @@ -236,6 +252,7 @@ func (w *Worker) pollAndExecute(ctx context.Context) error { metrics.MatchThroughput.Inc() metrics.WorkerMatchesTotal.Inc() metrics.WorkerMatchDuration.Observe(time.Since(matchStart).Seconds()) + // Upload replay to B2 replayURL := "" if w.b2 != nil { @@ -578,3 +595,96 @@ func (w *Worker) computeRatingUpdates(claimData *JobClaimData, result *MatchResu // Compute rating updates return ComputeRatingUpdates(botIDs, ratings, scores) } + +// recalcRatings recalculates all Glicko-2 ratings from scratch by replaying +// all completed matches in chronological order. Used for disaster recovery +// when ratings are corrupted or lost. +func recalcRatings(ctx context.Context, db *DBClient, logger *log.Logger, verbose bool) error { + logger.Println("Starting rating recalculation...") + logger.Println("Step 1: Resetting all bot ratings to defaults") + + // Step 1: Reset all bot ratings to defaults + if err := db.ResetAllRatings(ctx); err != nil { + return fmt.Errorf("failed to reset ratings: %w", err) + } + logger.Println(" All ratings reset to defaults (mu=1500, phi=350, sigma=0.06)") + + // Step 2: Fetch all completed matches in chronological order + logger.Println("Step 2: Fetching completed matches in chronological order") + matches, err := db.GetAllCompletedMatches(ctx) + if err != nil { + return fmt.Errorf("failed to fetch matches: %w", err) + } + logger.Printf(" Found %d completed matches to process", len(matches)) + + if len(matches) == 0 { + logger.Println("No matches to process, ratings remain at defaults") + return nil + } + + // Step 3: Track current ratings in memory + currentRatings := make(map[string]Glicko2Rating) + + // Step 4: Process each match in order + logger.Println("Step 3: Replaying matches to recompute ratings") + processed := 0 + for _, match := range matches { + // Ensure all participants have default ratings initialized + for _, p := range match.Participants { + if _, exists := currentRatings[p.BotID]; !exists { + currentRatings[p.BotID] = Glicko2Rating{ + Mu: glicko2DefaultMu, + Phi: glicko2DefaultRD, + Sigma: glicko2Tau, // default sigma + } + } + } + + // Build arrays for rating computation + n := len(match.Participants) + botIDs := make([]string, n) + ratings := make([]Glicko2Rating, n) + scores := make([]float64, n) + + for i, p := range match.Participants { + botIDs[i] = p.BotID + ratings[i] = currentRatings[p.BotID] + + // Determine score based on match result + // If winner is a player slot, convert to bot_id and score accordingly + if match.Winner == nil { + // Draw or no winner + scores[i] = 0.5 + } else if match.WinnerBotID != nil && *match.WinnerBotID == p.BotID { + scores[i] = 1.0 + } else { + scores[i] = 0.0 + } + } + + // Compute new ratings using Glicko-2 + newRatings := UpdateRatings(ratings, scores) + + // Update stored ratings + for i, botID := range botIDs { + currentRatings[botID] = newRatings[i] + } + + processed++ + if processed%1000 == 0 || verbose { + logger.Printf(" Processed %d/%d matches (match_id=%s)", processed, len(matches), match.ID) + } + } + + logger.Printf(" Processed all %d matches", processed) + + // Step 5: Write final ratings back to database + logger.Println("Step 4: Writing recalculated ratings to database") + if err := db.UpdateAllRatings(ctx, currentRatings); err != nil { + return fmt.Errorf("failed to write ratings: %w", err) + } + + logger.Printf(" Updated ratings for %d bots", len(currentRatings)) + logger.Println("Rating recalculation complete") + return nil +}