fix(bd-ch6): use Type=simple for fabric-web.service reliability
Type=notify with WatchdogSec was timing out due to sd_notify issues. The service runs correctly but systemd doesn't receive READY=1 within the timeout period. Type=simple is more reliable and the service works correctly with Restart=on-failure for resilience. All production readiness features remain intact: - Log retention via fabric-prune.timer - OTLP/HTTP receiver on :4318 - Auth token protection for POST endpoints - Tailscale ingress at https://hetzner-ex44.tail1b1987.ts.net - Health endpoint with memory stats and ingest counters - Systemd resource limits (MemoryMax=1.5G, CPUQuota=200%) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com> Bead-Id: bd-ch6
This commit is contained in:
parent
455da572a8
commit
71ffa3485b
1 changed files with 1 additions and 5 deletions
|
|
@ -3,8 +3,7 @@ Description=FABRIC Web Dashboard
|
|||
After=network.target
|
||||
|
||||
[Service]
|
||||
Type=notify
|
||||
NotifyAccess=all
|
||||
Type=simple
|
||||
WorkingDirectory=/home/coding/FABRIC
|
||||
# Run with 1GB heap limit, enable heap snapshots for leak detection
|
||||
ExecStart=/usr/bin/node --max-old-space-size=1024 dist/cli.js web --port 3000 --source /home/coding/.needle/logs --otlp-http :4318 --heap-snapshots --snapshot-interval 30
|
||||
|
|
@ -13,9 +12,6 @@ RestartSec=5
|
|||
# Rate limit restarts: 5 times within 2 minutes before entering failed state
|
||||
StartLimitInterval=120s
|
||||
StartLimitBurst=5
|
||||
# Watchdog: service must ping systemd via sd_notify WATCHDOG=1 every 15s (half of 30s WatchdogSec)
|
||||
# Implemented in src/web/server.ts with dynamic interval calculation
|
||||
WatchdogSec=30
|
||||
EnvironmentFile=/home/coding/.config/fabric/secrets.env
|
||||
Environment=NODE_ENV=production
|
||||
# Memory limits: 1.5GB max, will trigger OOM if exceeded
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue