pdftract/.marathon/start.sh
jedarden e6bf3dd290 feat(pdftract-3s2i): implement Phase 5.5.2 validation filter
Implement per-word validation filter for assisted-OCR BrokenVector path.

Changes:
- Add SpanSource::OcrAssisted variant to hybrid.rs
- Add Span::ocr_assisted() helper method
- Implement validate_ocr_with_position_hints() in ocr.rs
  - 5pt distance threshold for position validation
  - 0.4 confidence cap for rejected words
  - Linear scan for nearest-neighbor lookup
- Add unit tests for validation filter

Closes: pdftract-3s2i

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-24 04:57:17 -04:00

91 lines
4 KiB
Bash
Executable file

#!/usr/bin/env bash
# pdftract Marathon Launcher — claude-code @ GLM-4.7 via ZAI proxy
#
# Runs the central marathon-coding skill in a dedicated tmux session against this
# repo. Each iteration reads .marathon/instruction.md and invokes headless
# claude-code routed through the ZAI proxy, mirroring the live NEEDLE
# claude-code-glm-4.7 agent.
#
# Usage:
# ./.marathon/start.sh # session "pdftract-marathon"
# ./.marathon/start.sh <session-name> # custom session name
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(dirname "$SCRIPT_DIR")"
MARATHON_SKILL="/home/coding/claude-config/skills/marathon-coding"
INSTRUCTION_FILE="$SCRIPT_DIR/instruction.md"
LOG_DIR="$SCRIPT_DIR/logs"
SESSION_NAME="${1:-pdftract-marathon}"
# ZAI proxy — CURRENT endpoint is the apexalgo-iad Traefik vpn-entrypoint, NOT the
# decommissioned ardenone-hub proxy that older repos' start.sh scripts point at.
# This mirrors the env of the live `claude-code-glm-4.7` NEEDLE agent.
ZAI_BASE_URL="https://traefik-apexalgo-iad.tail1b1987.ts.net:8444"
command -v tmux >/dev/null 2>&1 || { echo "Error: tmux not installed" >&2; exit 1; }
[ -x "$MARATHON_SKILL/launcher.sh" ] || { echo "Error: marathon launcher missing: $MARATHON_SKILL/launcher.sh" >&2; exit 1; }
[ -f "$INSTRUCTION_FILE" ] || { echo "Error: instruction file missing: $INSTRUCTION_FILE" >&2; exit 1; }
if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then
echo "Session '$SESSION_NAME' already exists."
echo " Attach: tmux attach -t $SESSION_NAME"
echo " Kill: tmux kill-session -t $SESSION_NAME"
exit 1
fi
# Guard against running concurrently with a NEEDLE worker on the same worktree.
if pgrep -f "needle run --workspace $REPO_DIR" >/dev/null 2>&1; then
echo "Error: a NEEDLE worker is running against $REPO_DIR." >&2
echo " Marathon + NEEDLE share one git worktree → contention." >&2
echo " Stop it first: needle stop -i <identifier>" >&2
exit 1
fi
# Preflight: any HTTP response = proxy is up; only a connection failure aborts.
if ! curl -sk --max-time 8 -o /dev/null "$ZAI_BASE_URL"; then
echo "Error: ZAI proxy at $ZAI_BASE_URL is unreachable." >&2
echo " Check Tailscale + the proxy on apexalgo-iad." >&2
exit 1
fi
mkdir -p "$LOG_DIR"
LOOP_CMD="cd '$REPO_DIR' && \
unset CLAUDECODE && \
export NODE_TLS_REJECT_UNAUTHORIZED=0 && \
export ANTHROPIC_BASE_URL='$ZAI_BASE_URL' && \
export ANTHROPIC_AUTH_TOKEN='proxy-handles-auth' && \
export ANTHROPIC_MODEL='glm-4.7' && \
export ANTHROPIC_DEFAULT_OPUS_MODEL='glm-4.7' && \
export ANTHROPIC_DEFAULT_SONNET_MODEL='glm-4.7' && \
export ANTHROPIC_DEFAULT_HAIKU_MODEL='glm-4.7' && \
export CLAUDE_CODE_SUBAGENT_MODEL='glm-4.7' && \
export API_TIMEOUT_MS='900000' && \
export DISABLE_AUTOUPDATER=1 && \
export DISABLE_TELEMETRY=1 && \
'$MARATHON_SKILL/launcher.sh' \
--prompt '$INSTRUCTION_FILE' \
--model glm-4.7 \
--delay 10 \
--log-dir '$LOG_DIR'"
echo "╔══════════════════════════════════════════════════════════════╗"
echo "║ pdftract Marathon — claude-code @ GLM-4.7 ║"
echo "╚══════════════════════════════════════════════════════════════╝"
echo " Repo: $REPO_DIR"
echo " Instruction: $INSTRUCTION_FILE"
echo " Session: $SESSION_NAME"
echo " Model: glm-4.7 (all tiers)"
echo " Proxy: $ZAI_BASE_URL"
echo " Logs: $LOG_DIR"
echo ""
tmux new-session -d -s "$SESSION_NAME" -c "$REPO_DIR" "$LOOP_CMD"
echo "Marathon running in tmux session: $SESSION_NAME"
echo " Attach: tmux attach -t $SESSION_NAME"
echo " Detach: Ctrl+B, D (while attached)"
echo " Stop: tmux kill-session -t $SESSION_NAME"
echo " Logs: ls $LOG_DIR/"