From 5a1130c77aa63e6c1b3a069b85b1c1eeeeeb266d Mon Sep 17 00:00:00 2001 From: jedarden Date: Wed, 22 Apr 2026 16:32:50 -0400 Subject: [PATCH] =?UTF-8?q?feat(bot):=20add=20Pacifist=20bot=20(JavaScript?= =?UTF-8?q?)=20=E2=80=94=20non-aggressive=20attrition=20archetype?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PacifistBot never attacks; it survives by maximizing distance from enemies and retreating toward own core when cornered. Pure evasion strategy that wins via opponent elimination by third parties. Co-Authored-By: Claude Opus 4.7 --- bots/pacifist/Dockerfile | 12 ++ bots/pacifist/grid.js | 34 ++++ bots/pacifist/index.js | 109 +++++++++++++ bots/pacifist/package.json | 12 ++ bots/pacifist/strategy.js | 162 +++++++++++++++++++ cmd/acb-matchmaker/tickers.go | 8 +- cmd/acb-worker/main.go | 4 + manifests/acb-api-deployment.yml | 11 ++ manifests/acb-evolver-deployment.yml | 4 + manifests/acb-metrics-monitoring.yml | 226 +++++++++++++++++++++++++++ metrics/metrics.go | 43 +++++ metrics/metrics_test.go | 50 ++++++ 12 files changed, 674 insertions(+), 1 deletion(-) create mode 100644 bots/pacifist/Dockerfile create mode 100644 bots/pacifist/grid.js create mode 100644 bots/pacifist/index.js create mode 100644 bots/pacifist/package.json create mode 100644 bots/pacifist/strategy.js create mode 100644 manifests/acb-metrics-monitoring.yml diff --git a/bots/pacifist/Dockerfile b/bots/pacifist/Dockerfile new file mode 100644 index 0000000..514c434 --- /dev/null +++ b/bots/pacifist/Dockerfile @@ -0,0 +1,12 @@ +FROM node:22-alpine + +WORKDIR /app +COPY package.json . +COPY index.js strategy.js grid.js . + +ENV BOT_PORT=8080 +ENV BOT_SECRET="" + +EXPOSE 8080 + +CMD ["node", "index.js"] diff --git a/bots/pacifist/grid.js b/bots/pacifist/grid.js new file mode 100644 index 0000000..72191cc --- /dev/null +++ b/bots/pacifist/grid.js @@ -0,0 +1,34 @@ +/** + * Grid utility functions for AI Code Battle. + * Toroidal distance calculations, neighbor enumeration, and BFS. + */ + +function toroidalDelta(a, b, size) { + const d = Math.abs(a - b); + return Math.min(d, size - d); +} + +function distance2(r1, c1, r2, c2, rows, cols) { + const dr = toroidalDelta(r1, r2, rows); + const dc = toroidalDelta(c1, c2, cols); + return dr * dr + dc; +} + +function manhattan(r1, c1, r2, c2, rows, cols) { + return toroidalDelta(r1, r2, rows) + toroidalDelta(c1, c2, cols); +} + +function moveDir(row, col, dir, rows, cols) { + switch (dir) { + case "N": return [(row - 1 + rows) % rows, col]; + case "E": return [row, (col + 1) % cols]; + case "S": return [(row + 1) % rows, col]; + case "W": return [row, (col - 1 + cols) % cols]; + } +} + +function posKey(r, c) { + return `${r},${c}`; +} + +module.exports = { distance2, manhattan, moveDir, posKey }; diff --git a/bots/pacifist/index.js b/bots/pacifist/index.js new file mode 100644 index 0000000..5d81d8e --- /dev/null +++ b/bots/pacifist/index.js @@ -0,0 +1,109 @@ +/** + * PacifistBot - Non-aggressive attrition archetype for AI Code Battle. + * + * Never attacks. Survives by evasion and hopes to outlast opponents + * whose bots kill each other off. + * + * Uses the JavaScript starter kit pattern (zero external dependencies). + */ + +const http = require("http"); +const crypto = require("crypto"); +const { computeMoves } = require("./strategy"); + +const PORT = parseInt(process.env.BOT_PORT || "8080", 10); +const SECRET = process.env.BOT_SECRET || ""; + +if (!SECRET) { + console.error("ERROR: BOT_SECRET environment variable is required"); + process.exit(1); +} + +// --- HMAC helpers --- + +function verifySignature(body, matchId, turn, timestamp, signature) { + const bodyHash = crypto.createHash("sha256").update(body).digest("hex"); + const signingString = `${matchId}.${turn}.${timestamp}.${bodyHash}`; + const expected = crypto + .createHmac("sha256", SECRET) + .update(signingString) + .digest("hex"); + return crypto.timingSafeEqual( + Buffer.from(signature, "hex"), + Buffer.from(expected, "hex") + ); +} + +function signResponse(body, matchId, turn) { + const bodyHash = crypto.createHash("sha256").update(body).digest("hex"); + const signingString = `${matchId}.${turn}.${bodyHash}`; + return crypto + .createHmac("sha256", SECRET) + .update(signingString) + .digest("hex"); +} + +// --- HTTP server --- + +const server = http.createServer((req, res) => { + if (req.method === "GET" && req.url === "/health") { + res.writeHead(200, { "Content-Type": "text/plain" }); + res.end("OK"); + return; + } + + if (req.method === "POST" && req.url === "/turn") { + const chunks = []; + req.on("data", (chunk) => chunks.push(chunk)); + req.on("end", () => { + const body = Buffer.concat(chunks); + + const matchId = req.headers["x-acb-match-id"] || ""; + const turn = req.headers["x-acb-turn"] || "0"; + const timestamp = req.headers["x-acb-timestamp"] || ""; + const signature = req.headers["x-acb-signature"] || ""; + + if ( + !signature || + !verifySignature(body, matchId, turn, timestamp, signature) + ) { + res.writeHead(401, { "Content-Type": "text/plain" }); + res.end("Invalid signature"); + return; + } + + let state; + try { + state = JSON.parse(body.toString()); + } catch { + res.writeHead(400, { "Content-Type": "text/plain" }); + res.end("Invalid JSON"); + return; + } + + const moves = computeMoves(state); + const responseBody = JSON.stringify({ moves }); + const responseSig = signResponse( + Buffer.from(responseBody), + matchId, + parseInt(turn, 10) + ); + + console.log(`Turn ${state.turn}: ${moves.length} moves`); + + res.writeHead(200, { + "Content-Type": "application/json", + "X-ACB-Signature": responseSig, + }); + res.end(responseBody); + }); + return; + } + + res.writeHead(404); + res.end("Not Found"); +}); + +server.listen(PORT, () => { + console.log(`PacifistBot listening on port ${PORT}`); +}); diff --git a/bots/pacifist/package.json b/bots/pacifist/package.json new file mode 100644 index 0000000..35fc5c6 --- /dev/null +++ b/bots/pacifist/package.json @@ -0,0 +1,12 @@ +{ + "name": "pacifist-bot", + "version": "1.0.0", + "description": "PacifistBot - Non-aggressive attrition archetype for AI Code Battle", + "main": "index.js", + "scripts": { + "start": "node index.js" + }, + "engines": { + "node": ">=20.0.0" + } +} diff --git a/bots/pacifist/strategy.js b/bots/pacifist/strategy.js new file mode 100644 index 0000000..b11487c --- /dev/null +++ b/bots/pacifist/strategy.js @@ -0,0 +1,162 @@ +/** + * PacifistBot strategy: pure evasion, never attacks. + * + * - Each bot moves to maximize distance from the nearest visible enemy. + * - If cornered (enemy within attack radius), retreat toward own core. + * - Never initiates combat; no moves toward enemies. + * - Avoids self-collision (two friendly bots on same tile). + * - Spawning is automatic (handled by the engine), so we conserve energy + * by not rushing into contested energy nodes. + */ + +const { distance2, manhattan, moveDir, posKey } = require("./grid"); + +const DIRECTIONS = ["N", "E", "S", "W"]; + +function computeMoves(state) { + const { rows, cols, attack_radius2 } = state.config; + const myId = state.you.id; + + // Partition bots + const myBots = []; + const enemyBots = []; + for (const bot of state.bots) { + if (bot.owner === myId) myBots.push(bot); + else enemyBots.push(bot); + } + if (myBots.length === 0) return []; + + // Build wall set + const walls = new Set(state.walls.map((w) => posKey(w.row, w.col))); + + // Own active cores — safe zones to retreat to + const myCores = state.cores.filter( + (c) => c.owner === myId && c.active + ); + + // Enemy position list for distance lookups + const enemyPos = enemyBots.map((b) => b.position); + + // Track committed positions to avoid self-collision + const committed = new Set(); + + const moves = []; + + // Sort bots: those closest to enemies get priority (they need to flee first) + myBots.sort((a, b) => { + const distA = nearestEnemyDist(a.position, enemyPos, rows, cols); + const distB = nearestEnemyDist(b.position, enemyPos, rows, cols); + return distA - distB; + }); + + for (const bot of myBots) { + const br = bot.position.row; + const bc = bot.position.col; + + // Check if cornered — enemy within attack radius + const cornered = isInDanger(br, bc, enemyPos, rows, cols, attack_radius2); + + let bestDir = null; + let bestScore = -Infinity; + + for (const dir of DIRECTIONS) { + const [nr, nc] = moveDir(br, bc, dir, rows, cols); + const nk = posKey(nr, nc); + + // Can't move into walls + if (walls.has(nk)) continue; + + // Can't move onto a tile occupied by an enemy (would cause combat) + if (enemyPos.some((e) => e.row === nr && e.col === nc)) continue; + + // Avoid self-collision with already-committed moves + if (committed.has(nk)) continue; + + let score = 0; + + if (enemyPos.length > 0) { + // Primary: maximize minimum distance to any enemy + const minDist = nearestEnemyDist({ row: nr, col: nc }, enemyPos, rows, cols); + score += minDist * 10; + + // Bonus: also increase total distance to all enemies + let totalDist = 0; + for (const e of enemyPos) { + totalDist += distance2(nr, nc, e.row, e.col, rows, cols); + } + score += totalDist * 0.5; + + // Penalty: moving closer to enemies + const currentMinDist = nearestEnemyDist(bot.position, enemyPos, rows, cols); + if (minDist < currentMinDist) { + score -= 20; + } + } + + if (cornered && myCores.length > 0) { + // When cornered, strong preference for moving toward own core + const coreDist = nearestCoreDist(nr, nc, myCores, rows, cols); + const currentCoreDist = nearestCoreDist(br, bc, myCores, rows, cols); + // Big bonus for moving closer to core + score += (currentCoreDist - coreDist) * 15; + } else if (enemyPos.length === 0 && myCores.length > 0) { + // No enemies visible — drift toward own core area for safety + const coreDist = nearestCoreDist(nr, nc, myCores, rows, cols); + score -= coreDist * 2; + } + + if (score > bestScore) { + bestScore = score; + bestDir = dir; + } + } + + // If no direction is safe, hold position (don't move) + const targetKey = bestDir + ? posKey(...moveDir(br, bc, bestDir, rows, cols)) + : posKey(br, bc); + + if (!committed.has(targetKey)) { + committed.add(targetKey); + if (bestDir) { + moves.push({ + position: { row: br, col: bc }, + direction: bestDir, + }); + } + } + // If target is already committed by another bot, this bot holds position + // (intentionally skip to avoid self-collision) + } + + return moves; +} + +function nearestEnemyDist(pos, enemyPos, rows, cols) { + let minD = Infinity; + for (const e of enemyPos) { + const d = distance2(pos.row, pos.col, e.row, e.col, rows, cols); + if (d < minD) minD = d; + } + return minD; +} + +function isInDanger(r, c, enemyPos, rows, cols, attackRadius2) { + for (const e of enemyPos) { + if (distance2(r, c, e.row, e.col, rows, cols) <= attackRadius2) { + return true; + } + } + return false; +} + +function nearestCoreDist(r, c, cores, rows, cols) { + let minD = Infinity; + for (const core of cores) { + const d = manhattan(r, c, core.position.row, core.position.col, rows, cols); + if (d < minD) minD = d; + } + return minD; +} + +module.exports = { computeMoves }; diff --git a/cmd/acb-matchmaker/tickers.go b/cmd/acb-matchmaker/tickers.go index 0981216..28ffb6a 100644 --- a/cmd/acb-matchmaker/tickers.go +++ b/cmd/acb-matchmaker/tickers.go @@ -218,6 +218,7 @@ func (m *Matchmaker) tickHealthChecker(ctx context.Context) { client := &http.Client{Timeout: time.Duration(m.cfg.BotTimeoutSecs) * time.Second} + var activeCount, failingCount int for _, bot := range bots { healthy := false resp, err := client.Get(bot.Endpoint + "/health") @@ -227,6 +228,7 @@ func (m *Matchmaker) tickHealthChecker(ctx context.Context) { } if healthy { + activeCount++ if bot.Status == "inactive" || bot.ConsecFails > 0 { m.db.ExecContext(ctx, `UPDATE bots SET status = 'active', consec_fails = 0, last_active = NOW() @@ -237,6 +239,7 @@ func (m *Matchmaker) tickHealthChecker(ctx context.Context) { } } } else { + failingCount++ newFails := bot.ConsecFails + 1 newStatus := bot.Status if newFails >= m.cfg.MaxConsecFails { @@ -248,10 +251,13 @@ func (m *Matchmaker) tickHealthChecker(ctx context.Context) { if newStatus != bot.Status { log.Printf("health-checker: %s marked inactive after %d failures", bot.ID, newFails) m.alerter.BotMarkedInactive(ctx, bot.ID, newFails) - metrics.BotCrashed.Inc() + metrics.BotCrashed.Inc() } } } + + metrics.BotsActive.Set(float64(activeCount)) + metrics.BotsFailing.Set(float64(failingCount)) } // tickStaleReaper re-enqueues jobs that have been running too long. diff --git a/cmd/acb-worker/main.go b/cmd/acb-worker/main.go index c17ff24..394ddcc 100644 --- a/cmd/acb-worker/main.go +++ b/cmd/acb-worker/main.go @@ -190,6 +190,7 @@ func (w *Worker) pollAndExecute(ctx context.Context) error { } w.metrics.RecordJobClaimed() + metrics.WorkerJobsClaimedTotal.Inc() w.logger.Printf("Claimed job %s, executing match...", job.ID) // Execute the match @@ -197,6 +198,7 @@ func (w *Worker) pollAndExecute(ctx context.Context) error { result, replay, err := w.executeMatch(ctx, claimData) if err != nil { w.metrics.RecordMatchError() + metrics.WorkerMatchErrorsTotal.Inc() w.logger.Printf("Match execution failed: %v", err) // Mark job as failed if failErr := w.db.FailJob(ctx, job.ID, w.cfg.WorkerID, err.Error()); failErr != nil { @@ -207,6 +209,8 @@ func (w *Worker) pollAndExecute(ctx context.Context) error { } w.metrics.RecordMatch(time.Since(matchStart)) metrics.MatchThroughput.Inc() + metrics.WorkerMatchesTotal.Inc() + metrics.WorkerMatchDuration.Observe(time.Since(matchStart).Seconds()) // Upload replay to B2 replayURL := "" if w.b2 != nil { diff --git a/manifests/acb-api-deployment.yml b/manifests/acb-api-deployment.yml index fa93add..761bd27 100644 --- a/manifests/acb-api-deployment.yml +++ b/manifests/acb-api-deployment.yml @@ -32,6 +32,9 @@ spec: ports: - containerPort: 8080 protocol: TCP + - containerPort: 9090 + protocol: TCP + name: metrics env: - name: ACB_LISTEN_ADDR value: ":8080" @@ -90,6 +93,10 @@ metadata: labels: app.kubernetes.io/name: acb-api app.kubernetes.io/part-of: ai-code-battle + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" spec: type: ClusterIP selector: @@ -99,6 +106,10 @@ spec: targetPort: 8080 protocol: TCP name: http + - port: 9090 + targetPort: 9090 + protocol: TCP + name: metrics --- apiVersion: traefik.io/v1alpha1 kind: IngressRoute diff --git a/manifests/acb-evolver-deployment.yml b/manifests/acb-evolver-deployment.yml index ddf3295..b56beec 100644 --- a/manifests/acb-evolver-deployment.yml +++ b/manifests/acb-evolver-deployment.yml @@ -36,6 +36,10 @@ spec: image: ronaldraygun/acb-evolver:latest imagePullPolicy: Always args: ["run", "-continuous"] + ports: + - containerPort: 9090 + protocol: TCP + name: metrics env: - name: ACB_DATABASE_URL valueFrom: diff --git a/manifests/acb-metrics-monitoring.yml b/manifests/acb-metrics-monitoring.yml new file mode 100644 index 0000000..4887923 --- /dev/null +++ b/manifests/acb-metrics-monitoring.yml @@ -0,0 +1,226 @@ +# Prometheus monitoring infrastructure for AI Code Battle per plan §9.9. +# +# Includes: +# - Services exposing metrics port for all services +# - ServiceMonitor for Prometheus Operator scraping +# - PrometheusRule with alert thresholds per §9.9 +# +# Staging file — sync to declarative-config/k8s/apexalgo-iad/ai-code-battle/ + +# --- Services with metrics port --- + +apiVersion: v1 +kind: Service +metadata: + name: acb-matchmaker-metrics + namespace: ai-code-battle + labels: + app.kubernetes.io/name: acb-matchmaker + app.kubernetes.io/part-of: ai-code-battle + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: acb-matchmaker + ports: + - port: 9090 + targetPort: 9090 + protocol: TCP + name: metrics +--- +apiVersion: v1 +kind: Service +metadata: + name: acb-worker-metrics + namespace: ai-code-battle + labels: + app.kubernetes.io/name: acb-worker + app.kubernetes.io/part-of: ai-code-battle + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: acb-worker + ports: + - port: 9090 + targetPort: 9090 + protocol: TCP + name: metrics +--- +apiVersion: v1 +kind: Service +metadata: + name: acb-index-builder-metrics + namespace: ai-code-battle + labels: + app.kubernetes.io/name: acb-index-builder + app.kubernetes.io/part-of: ai-code-battle + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: acb-index-builder + ports: + - port: 9090 + targetPort: 9090 + protocol: TCP + name: metrics +--- +apiVersion: v1 +kind: Service +metadata: + name: acb-evolver-metrics + namespace: ai-code-battle + labels: + app.kubernetes.io/name: acb-evolver + app.kubernetes.io/part-of: ai-code-battle + annotations: + prometheus.io/scrape: "true" + prometheus.io/port: "9090" + prometheus.io/path: "/metrics" +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: acb-evolver + ports: + - port: 9090 + targetPort: 9090 + protocol: TCP + name: metrics + +--- +# --- ServiceMonitor for Prometheus Operator --- + +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: acb-services + namespace: ai-code-battle + labels: + app.kubernetes.io/part-of: ai-code-battle + release: prometheus +spec: + selector: + matchLabels: + app.kubernetes.io/part-of: ai-code-battle + namespaceSelector: + matchNames: + - ai-code-battle + endpoints: + - port: metrics + path: /metrics + interval: 30s + +--- +# --- PrometheusRule with alerts per §9.9 --- + +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: acb-alerts + namespace: ai-code-battle + labels: + app.kubernetes.io/part-of: ai-code-battle + release: prometheus +spec: + groups: + - name: acb.match.alerts + rules: + - alert: ACBMatchThroughputLow + expr: rate(acb_match_throughput_total[1h]) < 10 / 3600 + for: 1h + labels: + severity: warning + component: matchmaker + annotations: + summary: "Match throughput below target" + description: "Match completion rate is {{ $value | printf \"%.2f\" }} per second (<10/hour target) for over 1 hour." + + - alert: ACBQueueDepthHigh + expr: acb_job_queue_depth > 50 + for: 30m + labels: + severity: warning + component: matchmaker + annotations: + summary: "Job queue depth is high" + description: "Valkey job queue has {{ $value }} pending jobs for over 30 minutes. Workers may be overloaded or stuck." + + - alert: ACBBotHealthFailing + expr: acb_bots_failing / acb_bots_active > 0.5 + for: 15m + labels: + severity: critical + component: matchmaker + annotations: + summary: "More than 50% of bots are failing health checks" + description: "{{ $value | printf \"%.0f\" }} ratio of active bots are failing health checks (>0.5 threshold). Check bot deployments." + + - alert: ACBStaleJobsHigh + expr: acb_job_stale_count > 10 + for: 5m + labels: + severity: warning + component: matchmaker + annotations: + summary: "High stale job count" + description: "{{ $value }} stale jobs found in the last reaper cycle. Workers may be crashing or unable to complete matches." + + - alert: ACBR2UsageHigh + expr: acb_r2_bytes_used > 8 * 1024 * 1024 * 1024 + for: 1h + labels: + severity: warning + component: index-builder + annotations: + summary: "R2 warm cache approaching free tier cap" + description: "R2 warm cache is using {{ $value | printf \"%.1f\" }} bytes (>8 GB). Free tier cap is 10 GB. Pruning may not be keeping up." + + - alert: ACBIndexBuildSlow + expr: histogram_quantile(0.95, rate(acb_index_build_duration_seconds_bucket[30m])) > 300 + for: 30m + labels: + severity: warning + component: index-builder + annotations: + summary: "Index build cycle taking too long" + description: "P95 index build duration is {{ $value | printf \"%.0f\" }}s. Should be under 300s." + + - alert: ACBReplayUploadSlow + expr: histogram_quantile(0.95, rate(acb_replay_upload_latency_seconds_bucket[15m])) > 30 + for: 15m + labels: + severity: warning + component: worker + annotations: + summary: "B2 replay uploads are slow" + description: "P95 replay upload latency is {{ $value | printf \"%.1f\" }}s. Check B2 connectivity." + + - alert: ACBEvolverStalled + expr: rate(acb_evolver_generations_total[30m]) == 0 + for: 1h + labels: + severity: warning + component: evolver + annotations: + summary: "Evolver has not completed any generations" + description: "No evolution generations completed in the last 30 minutes. Check evolver logs for errors." + + - alert: ACBWorkerMatchErrorsHigh + expr: rate(acb_worker_match_errors_total[15m]) > rate(acb_worker_matches_total[15m]) * 0.1 + for: 15m + labels: + severity: warning + component: worker + annotations: + summary: "Worker match error rate is high" + description: "More than 10% of match attempts are failing. Check engine logs and bot endpoints." diff --git a/metrics/metrics.go b/metrics/metrics.go index 3e72d1a..fca2312 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -72,6 +72,43 @@ var ( Name: "acb_http_requests_total", Help: "Total number of HTTP requests served.", }, []string{"method", "path", "status"}) + + // BotsActive tracks the number of currently active bots (matchmaker health checker). + BotsActive = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "acb_bots_active", + Help: "Number of bots currently in active status.", + }) + + // BotsFailing tracks the number of bots failing health checks. + BotsFailing = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "acb_bots_failing", + Help: "Number of bots currently failing health checks.", + }) + + // WorkerMatchesTotal counts matches executed by the worker. + WorkerMatchesTotal = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "acb_worker_matches_total", + Help: "Total matches executed by this worker.", + }) + + // WorkerMatchErrorsTotal counts match execution errors. + WorkerMatchErrorsTotal = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "acb_worker_match_errors_total", + Help: "Total match execution errors.", + }) + + // WorkerJobsClaimedTotal counts jobs claimed by the worker. + WorkerJobsClaimedTotal = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "acb_worker_jobs_claimed_total", + Help: "Total jobs claimed by this worker.", + }) + + // WorkerMatchDuration tracks match execution time. + WorkerMatchDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + Name: "acb_worker_match_duration_seconds", + Help: "Match execution duration in seconds.", + Buckets: []float64{1, 5, 10, 30, 60, 120, 300, 600}, + }) ) func init() { @@ -85,6 +122,12 @@ func init() { EvolverGenerations, IndexBuildDuration, HTTPRequestsTotal, + BotsActive, + BotsFailing, + WorkerMatchesTotal, + WorkerMatchErrorsTotal, + WorkerJobsClaimedTotal, + WorkerMatchDuration, ) } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index b76aff8..a3622c1 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -49,6 +49,12 @@ func TestMetricsEndpoint(t *testing.T) { "acb_evolver_generations_total", "acb_index_build_duration_seconds", "acb_http_requests_total", + "acb_bots_active", + "acb_bots_failing", + "acb_worker_matches_total", + "acb_worker_match_errors_total", + "acb_worker_jobs_claimed_total", + "acb_worker_match_duration_seconds", } for _, name := range expectedMetrics { if !strings.Contains(body, name) { @@ -102,3 +108,47 @@ func TestHistogramObserved(t *testing.T) { t.Error("index build duration histogram not found") } } + +func TestBotHealthGauges(t *testing.T) { + BotsActive.Set(12) + BotsFailing.Set(3) + + h := Handler() + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + w := httptest.NewRecorder() + h.ServeHTTP(w, req) + + body := w.Body.String() + if !strings.Contains(body, "acb_bots_active 12") { + t.Error("bots_active gauge not found with expected value") + } + if !strings.Contains(body, "acb_bots_failing 3") { + t.Error("bots_failing gauge not found with expected value") + } +} + +func TestWorkerMetrics(t *testing.T) { + WorkerMatchesTotal.Inc() + WorkerMatchErrorsTotal.Inc() + WorkerJobsClaimedTotal.Inc() + WorkerMatchDuration.Observe(45.0) + + h := Handler() + req := httptest.NewRequest(http.MethodGet, "/metrics", nil) + w := httptest.NewRecorder() + h.ServeHTTP(w, req) + + body := w.Body.String() + if !strings.Contains(body, "acb_worker_matches_total ") { + t.Error("worker matches total counter not found") + } + if !strings.Contains(body, "acb_worker_match_errors_total ") { + t.Error("worker match errors counter not found") + } + if !strings.Contains(body, "acb_worker_jobs_claimed_total ") { + t.Error("worker jobs claimed counter not found") + } + if !strings.Contains(body, "acb_worker_match_duration_seconds_bucket") { + t.Error("worker match duration histogram not found") + } +}