zai-proxy/proxy/scripts/auto-fix-loop.sh
jedarden e7c24a0c08 feat: initial zai-proxy ecosystem repo
Extracted from ardenone-cluster/containers/zai-proxy and
ardenone-cluster/containers/zai-proxy-dashboard.

- proxy/: OpenAI-compatible ZAI reverse proxy (Go, v1.10.0)
  - Token counting, rate limiting, Prometheus metrics, canary support
- dashboard/: Metrics dashboard backend + React frontend (Go, v1.0.0)
  - Prometheus collector, SQLite storage, SSE live updates
- docs/: Operational notes, research, and plan subdirs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 15:53:52 -04:00

891 lines
28 KiB
Bash
Executable file

#!/bin/bash
# Automated Test-Fix-Iterate Loop
# Purpose: Continuous testing and automated fix iteration
# Bead: bd-3eb
#
# This script implements a closed-loop system that:
# 1. Runs test harness
# 2. Detects failures and captures error details
# 3. Categorizes failure types
# 4. Logs failures with reproduction steps
# 5. Generates fix suggestions
# 6. Tracks iteration progress
# 7. Stops when conditions are met (95% pass rate, <3% token variance)
set -e
# Script version
VERSION="1.0.0"
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
WORKSPACE_DIR="$PROJECT_ROOT"
ITERATIONS_DIR="$PROJECT_ROOT/.iterations"
LOGS_DIR="$PROJECT_ROOT/.test-logs"
REPORTS_DIR="$PROJECT_ROOT/.test-reports"
# Thresholds (from bead requirements)
TARGET_PASS_RATE=95 # 95% test pass rate
TARGET_TOKEN_VARIANCE=3 # <3% token count variance
MAX_ITERATIONS=50 # Maximum iterations to prevent infinite loops
COOLDOWN_SECONDS=5 # Wait between iterations
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
MAGENTA='\033[0;35m'
NC='\033[0m'
# State tracking (persisted)
STATE_FILE="$ITERATIONS_DIR/state.json"
# ============================================
# UTILITY FUNCTIONS
# ============================================
log_info() {
echo -e "${BLUE}[INFO]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
}
log_debug() {
if [[ "${DEBUG:-false}" == "true" ]]; then
echo -e "${CYAN}[DEBUG]${NC} $(date '+%Y-%m-%d %H:%M:%S') $1"
fi
}
print_banner() {
local text="$1"
echo ""
echo -e "${CYAN}$(printf '=%.0s' {1..80})${NC}"
echo -e "${CYAN}$text${NC}"
echo -e "${CYAN}$(printf '=%.0s' {1..80})${NC}"
echo ""
}
# ============================================
# INITIALIZATION
# ============================================
init_directories() {
log_info "Initializing workspace directories..."
mkdir -p "$ITERATIONS_DIR"
mkdir -p "$LOGS_DIR"
mkdir -p "$REPORTS_DIR"
mkdir -p "$REPORTS_DIR/failures"
mkdir -p "$REPORTS_DIR/patterns"
}
init_state() {
if [[ ! -f "$STATE_FILE" ]]; then
log_info "Creating new state file..."
cat > "$STATE_FILE" << EOF
{
"version": "$VERSION",
"started_at": "$(date -u +%Y-%m-%dT%H:%M:%SZ)",
"iteration": 0,
"total_tests_run": 0,
"total_passes": 0,
"total_failures": 0,
"best_pass_rate": 0.0,
"best_token_variance": 100.0,
"failure_history": [],
"fix_attempts": [],
"stop_reason": null
}
EOF
fi
}
load_state() {
if [[ -f "$STATE_FILE" ]]; then
# Source the state as bash variables
eval "$(jq -r '
"ITERATION=\(.iteration // 0)",
"TOTAL_TESTS_RUN=\(.total_tests_run // 0)",
"TOTAL_PASSES=\(.total_passes // 0)",
"TOTAL_FAILURES=\(.total_failures // 0)",
"BEST_PASS_RATE=\(.best_pass_rate // 0.0)",
"BEST_TOKEN_VARIANCE=\(.best_token_variance // 100.0)"
' "$STATE_FILE")"
else
ITERATION=0
TOTAL_TESTS_RUN=0
TOTAL_PASSES=0
TOTAL_FAILURES=0
BEST_PASS_RATE=0.0
BEST_TOKEN_VARIANCE=100.0
fi
}
save_state() {
local iteration="$1"
local tests_run="$2"
local passes="$3"
local failures="$4"
local pass_rate="$5"
local token_variance="$6"
jq --arg iteration "$iteration" \
--arg tests_run "$tests_run" \
--arg passes "$passes" \
--arg failures "$failures" \
--arg pass_rate "$pass_rate" \
--arg token_variance "$token_variance" \
--arg started_at "$(jq -r '.started_at // now' "$STATE_FILE")" \
--argjson failure_history "$(jq '.failure_history // []' "$STATE_FILE")" \
--argjson fix_attempts "$(jq '.fix_attempts // []' "$STATE_FILE")" \
--arg best_pass_rate "$(max_float "$BEST_PASS_RATE" "$pass_rate")" \
--arg best_token_variance "$(min_float "$BEST_TOKEN_VARIANCE" "$token_variance")" \
'{
version: $VERSION,
started_at: $started_at,
iteration: ($iteration | tonumber),
total_tests_run: ($tests_run | tonumber),
total_passes: ($passes | tonumber),
total_failures: ($failures | tonumber),
best_pass_rate: ($best_pass_rate | tonumber),
best_token_variance: ($best_token_variance | tonumber),
failure_history: $failure_history,
fix_attempts: $fix_attempts,
last_updated: now
}' <<< "{\"VERSION\":\"$VERSION\"}" > "$STATE_FILE.tmp" && mv "$STATE_FILE.tmp" "$STATE_FILE"
}
max_float() {
echo "$1 $2" | awk '{if ($1 > $2) print $1; else print $2}'
}
min_float() {
echo "$1 $2" | awk '{if ($1 < $2) print $1; else print $2}'
}
# ============================================
# TEST HARNESS
# ============================================
run_test_harness() {
local iteration_num="$1"
local log_file="$LOGS_DIR/iteration-$iteration_num.log"
log_info "Running test harness for iteration $iteration_num..."
cd "$PROJECT_ROOT"
# Run all regression tests and capture output
local test_output
local exit_code
test_output=$(go test -v -run TestRegression 2>&1) || exit_code=$?
# Save raw output
echo "$test_output" > "$log_file"
# Parse results
parse_test_results "$test_output" "$exit_code"
}
parse_test_results() {
local output="$1"
local exit_code="${2:-0}"
local passed=0
local failed=0
local total=0
local failures=()
# Parse test output for failures
while IFS= read -r line; do
if [[ $line =~ ---\ (PASS|FAIL):\ ([^\ ]+) ]]; then
((total++))
if [[ "${BASH_REMATCH[1]}" == "PASS" ]]; then
((passed++))
else
((failed++))
failures+=("${BASH_REMATCH[2]}")
fi
fi
done <<< "$output"
# Extract token counts if available
local token_variance=100.0
if grep -q "token" <<< "$output"; then
# Extract token counts and calculate variance
local token_counts
token_counts=$(grep -oE '[0-9]+ tokens' <<< "$output" | grep -oE '[0-9]+' || true)
if [[ -n "$token_counts" ]]; then
local count_array=($token_counts)
if [[ ${#count_array[@]} -gt 1 ]]; then
# Calculate variance
local sum=0
local sum_sq=0
local count=${#count_array[@]}
for val in "${count_array[@]}"; do
sum=$((sum + val))
done
local mean=$((sum / count))
local variance_sum=0
for val in "${count_array[@]}"; do
local diff=$((val - mean))
variance_sum=$((variance_sum + diff * diff))
done
local variance=$((variance_sum / count))
local std_dev=$((variance ** 0.5))
# Calculate percentage variance relative to mean
if [[ $mean -gt 0 ]]; then
token_variance=$((std_dev * 100 / mean))
fi
fi
fi
fi
# Calculate pass rate
local pass_rate=0.0
if [[ $total -gt 0 ]]; then
pass_rate=$(awk "BEGIN {printf \"%.2f\", ($passed / $total) * 100}")
fi
# Return results as JSON
jq -n \
--arg passed "$passed" \
--arg failed "$failed" \
--arg total "$total" \
--arg pass_rate "$pass_rate" \
--arg token_variance "$token_variance" \
--arg exit_code "$exit_code" \
'{
passed: ($passed | tonumber),
failed: ($failed | tonumber),
total: ($total | tonumber),
pass_rate: ($pass_rate | tonumber),
token_variance: ($token_variance | tonumber),
exit_code: ($exit_code | tonumber)
}'
}
# ============================================
# FAILURE CATEGORIZATION
# ============================================
categorize_failure() {
local test_name="$1"
local error_message="$2"
local test_log="$3"
local category="unknown"
local severity="medium"
local suggested_fix="generic"
# Accuracy failures - token count mismatches
if [[ $error_message =~ (expected|Got|tokens|count) ]]; then
category="accuracy"
severity="high"
# Determine specific accuracy issue
if [[ $error_message =~ (empty|zero) ]]; then
suggested_fix="check_tokenizer_initialization"
elif [[ $error_message =~ (range|min|max) ]]; then
suggested_fix="adjust_token_ranges"
else
suggested_fix="verify_tokenization_algorithm"
fi
fi
# Format failures - JSON parsing, structure issues
if [[ $error_message =~ (JSON|marshal|unmarshal|parse|format) ]]; then
category="format"
severity="medium"
if [[ $error_message =~ (invalid|malformed) ]]; then
suggested_fix="add_input_validation"
else
suggested_fix="fix_json_parsing"
fi
fi
# Streaming failures - SSE, chunking issues
if [[ $error_message =~ (stream|SSE|chunk|flush|delta) ]]; then
category="streaming"
severity="high"
suggested_fix="verify_streaming_buffer_handling"
fi
# Concurrency failures - race conditions, locks
if [[ $error_message =~ (race|concurrent|lock|mutex|goroutine) ]]; then
category="concurrency"
severity="critical"
suggested_fix="add_synchronization_or_improve_locking"
fi
# Edge case failures - empty input, special characters
if [[ $error_message =~ (empty|nil|panic|crash|special|unicode) ]]; then
category="edge_case"
severity="medium"
suggested_fix="add_defensive_programming"
fi
# Performance failures - timeout, slow operations
if [[ $error_message =~ (timeout|slow|deadline|exceeded) ]]; then
category="performance"
severity="low"
suggested_fix="optimize_algorithm_or_add_caching"
fi
jq -n \
--arg test_name "$test_name" \
--arg category "$category" \
--arg severity "$severity" \
--arg suggested_fix "$suggested_fix" \
--arg error_message "$error_message" \
'{
test_name: $test_name,
category: $category,
severity: $severity,
suggested_fix: $suggested_fix,
error_message: $error_message
}'
}
# ============================================
# FAILURE LOGGING
# ============================================
log_failure_with_reproduction() {
local iteration="$1"
local test_name="$2"
local category_info="$3"
local test_log="$4"
local failure_id="fail-${iteration}-$(date +%s)"
local failure_file="$REPORTS_DIR/failures/${failure_id}.json"
# Extract relevant test context
local reproduction_steps
reproduction_steps=$(extract_reproduction_steps "$test_name" "$test_log")
# Create detailed failure report
jq -n \
--arg failure_id "$failure_id" \
--arg iteration "$iteration" \
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--arg test_name "$test_name" \
--argjson category_info "$category_info" \
--argjson reproduction_steps "$reproduction_steps" \
'{
failure_id: $failure_id,
iteration: ($iteration | tonumber),
timestamp: $timestamp,
test_name: $test_name,
category: $category_info.category,
severity: $category_info.severity,
suggested_fix: $category_info.suggested_fix,
error_message: $category_info.error_message,
reproduction_steps: $reproduction_steps
}' > "$failure_file"
echo "$failure_file"
}
extract_reproduction_steps() {
local test_name="$1"
local test_log="$2"
# Create step-by-step reproduction guide
cat <<'EOF' | jq -R -s -c 'split("\n") | map(select(length > 0))'
1. Navigate to project directory: cd /home/coder/ardenone-cluster/containers/zai-proxy
2. Run specific test: go test -v -run TEST_NAME
3. Observe error message
4. Review code at: tokenizer.go or tokenizer_regression_test.go
5. Check token counting logic for the specific input
6. Verify tokenizer initialization
7. Test with various input formats
EOF
}
# ============================================
# FIX SUGGESTION GENERATION
# ============================================
generate_fix_suggestions() {
local failures_json="$1"
local suggestions=()
# Analyze failure patterns
local accuracy_count=0
local format_count=0
local streaming_count=0
local concurrency_count=0
while read -r failure; do
local category
category=$(jq -r '.category' <<< "$failure")
case $category in
accuracy) ((accuracy_count++)) ;;
format) ((format_count++)) ;;
streaming) ((streaming_count++)) ;;
concurrency) ((concurrency_count++)) ;;
esac
done <<< "$(jq -c '.[]' <<< "$failures_json")"
# Generate suggestions based on patterns
if [[ $accuracy_count -gt 2 ]]; then
suggestions+=("PATTERN: Multiple accuracy failures detected. SUGGESTION: Review tokenizer encoding selection (cl100k_base vs model-specific). Consider adjusting expected token ranges in golden tests.")
fi
if [[ $format_count -gt 2 ]]; then
suggestions+=("PATTERN: Multiple format failures. SUGGESTION: JSON parsing may be inconsistent. Add validation middleware for request/response formats.")
fi
if [[ $streaming_count -gt 0 ]]; then
suggestions+=("PATTERN: Streaming failures detected. SUGGESTION: Verify io.TeeReader buffer handling in ResponseBodyCapture. Check for race conditions in concurrent reads.")
fi
if [[ $concurrency_count -gt 0 ]]; then
suggestions+=("PATTERN: Concurrency issues. SUGGESTION: Review mutex usage in TikTokenCounter. Consider adding more granular locking or using sync/atomic.")
fi
# Output as JSON array
printf '%s\n' "${suggestions[@]}" | jq -R . | jq -s .
}
# ============================================
# ITERATION TRACKING
# ============================================
update_iteration_metrics() {
local iteration="$1"
local test_results="$2"
local failures="$3"
local passed
local failed
local total
local pass_rate
local token_variance
passed=$(jq -r '.passed' <<< "$test_results")
failed=$(jq -r '.failed' <<< "$test_results")
total=$(jq -r '.total' <<< "$test_results")
pass_rate=$(jq -r '.pass_rate' <<< "$test_results")
token_variance=$(jq -r '.token_variance' <<< "$test_results")
# Update running totals
TOTAL_TESTS_RUN=$((TOTAL_TESTS_RUN + total))
TOTAL_PASSES=$((TOTAL_PASSES + passed))
TOTAL_FAILURES=$((TOTAL_FAILURES + failed))
# Update bests
BEST_PASS_RATE=$(max_float "$BEST_PASS_RATE" "$pass_rate")
BEST_TOKEN_VARIANCE=$(min_float "$BEST_TOKEN_VARIANCE" "$token_variance")
# Save state
save_state "$iteration" "$TOTAL_TESTS_RUN" "$TOTAL_PASSES" "$TOTAL_FAILURES" "$pass_rate" "$token_variance"
# Generate iteration report
generate_iteration_report "$iteration" "$test_results" "$failures"
}
generate_iteration_report() {
local iteration="$1"
local test_results="$2"
local failures="$3"
local report_file="$REPORTS_DIR/iteration-$iteration.json"
jq -n \
--arg iteration "$iteration" \
--arg timestamp "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
--argjson test_results "$test_results" \
--argjson failures "$failures" \
--arg total_tests_run "$TOTAL_TESTS_RUN" \
--arg total_passes "$TOTAL_PASSES" \
--arg total_failures "$TOTAL_FAILURES" \
--arg best_pass_rate "$BEST_PASS_RATE" \
--arg best_token_variance "$BEST_TOKEN_VARIANCE" \
'{
iteration: ($iteration | tonumber),
timestamp: $timestamp,
test_results: $test_results,
failures: ($failures | length),
failure_details: $failures,
cumulative: {
total_tests_run: ($total_tests_run | tonumber),
total_passes: ($total_passes | tonumber),
total_failures: ($total_failures | tonumber)
},
best_metrics: {
pass_rate: ($best_pass_rate | tonumber),
token_variance: ($best_token_variance | tonumber)
}
}' > "$report_file"
log_info "Iteration report saved to $report_file"
}
# ============================================
# STOP CONDITION CHECKER
# ============================================
check_stop_conditions() {
local test_results="$1"
local pass_rate
local token_variance
local should_stop=false
local stop_reason=""
pass_rate=$(jq -r '.pass_rate' <<< "$test_results")
token_variance=$(jq -r '.token_variance' <<< "$test_results")
# Check pass rate threshold
if (( $(echo "$pass_rate >= $TARGET_PASS_RATE" | bc -l) )); then
should_stop=true
stop_reason="Target pass rate achieved: ${pass_rate}% >= ${TARGET_PASS_RATE}%"
fi
# Check token variance threshold
if (( $(echo "$token_variance < $TARGET_TOKEN_VARIANCE" | bc -l) )); then
if [[ "$should_stop" == "true" ]]; then
stop_reason="$stop_reason AND target token variance achieved: ${token_variance}% < ${TARGET_TOKEN_VARIANCE}%"
else
should_stop=true
stop_reason="Target token variance achieved: ${token_variance}% < ${TARGET_TOKEN_VARIANCE}%"
fi
fi
# Check for perfect score
if (( $(echo "$pass_rate == 100.0" | bc -l) )) && \
(( $(echo "$token_variance == 0.0" | bc -l) )); then
should_stop=true
stop_reason="Perfect score achieved: 100% pass rate, 0% token variance"
fi
jq -n \
--arg should_stop "$should_stop" \
--arg stop_reason "$stop_reason" \
--arg pass_rate "$pass_rate" \
--arg token_variance "$token_variance" \
'{
should_stop: ($should_stop == "true"),
reason: $stop_reason,
current_metrics: {
pass_rate: ($pass_rate | tonumber),
token_variance: ($token_variance | tonumber)
},
targets: {
pass_rate: 95.0,
token_variance: 3.0
}
}'
}
# ============================================
# PROGRESS DISPLAY
# ============================================
display_progress() {
local iteration="$1"
local test_results="$2"
local failures="$3"
local pass_rate
local token_variance
local failed
pass_rate=$(jq -r '.pass_rate' <<< "$test_results")
token_variance=$(jq -r '.token_variance' <<< "$test_results")
failed=$(jq -r '.failed' <<< "$test_results")
print_banner "Iteration $iteration Summary"
# Metrics display
echo -e "${CYAN}Current Metrics:${NC}"
echo " Pass Rate: ${pass_rate}% (target: ${TARGET_PASS_RATE}%)"
echo " Token Variance: ${token_variance}% (target: <${TARGET_TOKEN_VARIANCE}%)"
echo ""
# Best metrics
echo -e "${CYAN}Best Metrics (all time):${NC}"
echo " Pass Rate: ${BEST_PASS_RATE}%"
echo " Token Variance: ${BEST_TOKEN_VARIANCE}%"
echo ""
# Failures summary
if [[ $failed -gt 0 ]]; then
echo -e "${RED}Failures: $failed${NC}"
# Group by category
local accuracy=0 format=0 streaming=0 concurrency=0 edge_case=0
while read -r failure; do
local category
category=$(jq -r '.category' <<< "$failure")
case $category in
accuracy) ((accuracy++)) ;;
format) ((format++)) ;;
streaming) ((streaming++)) ;;
concurrency) ((concurrency++)) ;;
edge_case) ((edge_case++)) ;;
esac
done <<< "$(jq -c '.[]' <<< "$failures")"
echo -e " ${YELLOW}Breakdown by category:${NC}"
[[ $accuracy -gt 0 ]] && echo " Accuracy: $accuracy"
[[ $format -gt 0 ]] && echo " Format: $format"
[[ $streaming -gt 0 ]] && echo " Streaming: $streaming"
[[ $concurrency -gt 0 ]] && echo " Concurrency: $concurrency"
[[ $edge_case -gt 0 ]] && echo " Edge Case: $edge_case"
else
echo -e "${GREEN}No failures!${NC}"
fi
echo ""
}
# ============================================
# MAIN LOOP
# ============================================
main() {
print_banner "🔄 Automated Test-Fix-Iterate Loop v$VERSION"
# Initialize
init_directories
init_state
load_state
log_info "Starting test-fix-iterate loop..."
log_info "Stop conditions: pass rate >= ${TARGET_PASS_RATE}%, token variance < ${TARGET_TOKEN_VARIANCE}%"
log_info "Maximum iterations: $MAX_ITERATIONS"
local iteration=$ITERATION
local final_reason=""
# Main iteration loop
while [[ $iteration -lt $MAX_ITERATIONS ]]; do
((iteration++))
print_banner "🧪 Iteration $iteration/$MAX_ITERATIONS"
# Run test harness
local test_results
test_results=$(run_test_harness "$iteration")
# Parse and categorize failures
local failures_array=()
local passed
local failed
passed=$(jq -r '.passed' <<< "$test_results")
failed=$(jq -r '.failed' <<< "$test_results")
if [[ $failed -gt 0 ]]; then
log_warning "Detected $failed test failures. Analyzing..."
# Read test log for error details
local test_log="$LOGS_DIR/iteration-$iteration.log"
# Categorize each failure
while IFS= read -r line; do
if [[ $line =~ FAIL:\ ([^\ ]+) ]]; then
local test_name="${BASH_REMATCH[1]}"
local error_msg
error_msg=$(grep -A 5 "$test_name" "$test_log" | head -6)
local category_info
category_info=$(categorize_failure "$test_name" "$error_msg" "$test_log")
# Log with reproduction steps
local failure_file
failure_file=$(log_failure_with_reproduction "$iteration" "$test_name" "$category_info" "$test_log")
failures_array+=("$(cat "$failure_file")")
fi
done < "$test_log"
fi
# Convert failures array to JSON
local failures_json
failures_json=$(printf '%s\n' "${failures_array[@]}" | jq -s .)
# Generate fix suggestions if there are failures
if [[ $failed -gt 0 ]]; then
local suggestions
suggestions=$(generate_fix_suggestions "$failures_json")
log_info "Fix suggestions generated:"
jq -r '.[]' <<< "$suggestions" | while IFS= read -r suggestion; do
echo -e " ${YELLOW}${NC} $suggestion"
done
# Save suggestions
echo "$suggestions" > "$REPORTS_DIR/patterns/iteration-$iteration-suggestions.json"
fi
# Update iteration metrics
update_iteration_metrics "$iteration" "$test_results" "$failures_json"
# Display progress
display_progress "$iteration" "$test_results" "$failures_json"
# Check stop conditions
local stop_check
stop_check=$(check_stop_conditions "$test_results")
local should_stop
should_stop=$(jq -r '.should_stop' <<< "$stop_check")
if [[ "$should_stop" == "true" ]]; then
final_reason=$(jq -r '.reason' <<< "$stop_check")
log_success "$final_reason"
break
fi
# Cooldown before next iteration
if [[ $iteration -lt $MAX_ITERATIONS ]]; then
log_info "Waiting ${COOLDOWN_SECONDS}s before next iteration..."
sleep $COOLDOWN_SECONDS
fi
done
# Final report
print_final_report "$iteration" "$final_reason"
}
print_final_report() {
local final_iteration="$1"
local stop_reason="$2"
local final_report="$REPORTS_DIR/final-report.json"
jq -n \
--arg version "$VERSION" \
--arg final_iteration "$final_iteration" \
--arg stop_reason "$stop_reason" \
--arg total_tests_run "$TOTAL_TESTS_RUN" \
--arg total_passes "$TOTAL_PASSES" \
--arg total_failures "$TOTAL_FAILURES" \
--arg best_pass_rate "$BEST_PASS_RATE" \
--arg best_token_variance "$BEST_TOKEN_VARIANCE" \
--arg completed_at "$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
'{
version: $version,
final_iteration: ($final_iteration | tonumber),
stop_reason: $stop_reason,
completed_at: $completed_at,
summary: {
total_tests_run: ($total_tests_run | tonumber),
total_passes: ($total_passes | tonumber),
total_failures: ($total_failures | tonumber),
best_pass_rate: ($best_pass_rate | tonumber),
best_token_variance: ($best_token_variance | tonumber)
}
}' > "$final_report"
print_banner "📊 Final Report"
echo -e "${GREEN}Test-Fix-Iterate Loop Completed${NC}"
echo ""
echo "Total iterations: $final_iteration"
echo "Stop reason: $stop_reason"
echo ""
echo "Summary:"
echo " Total tests run: $TOTAL_TESTS_RUN"
echo " Total passes: $TOTAL_PASSES"
echo " Total failures: $TOTAL_FAILURES"
echo " Best pass rate: ${BEST_PASS_RATE}%"
echo " Best token variance: ${BEST_TOKEN_VARIANCE}%"
echo ""
echo "Reports saved to: $REPORTS_DIR"
echo "Final report: $final_report"
echo ""
# Check if targets were met
if (( $(echo "$BEST_PASS_RATE >= $TARGET_PASS_RATE" | bc -l) )); then
echo -e "${GREEN}✅ PASS RATE TARGET ACHIEVED${NC}"
else
echo -e "${YELLOW}⚠️ Pass rate target not met: ${BEST_PASS_RATE}% < ${TARGET_PASS_RATE}%${NC}"
fi
if (( $(echo "$BEST_TOKEN_VARIANCE < $TARGET_TOKEN_VARIANCE" | bc -l) )); then
echo -e "${GREEN}✅ TOKEN VARIANCE TARGET ACHIEVED${NC}"
else
echo -e "${YELLOW}⚠️ Token variance target not met: ${BEST_TOKEN_VARIANCE}% >= ${TARGET_TOKEN_VARIANCE}%${NC}"
fi
echo ""
}
# ============================================
# SCRIPT ENTRY POINT
# ============================================
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
--debug)
DEBUG=true
shift
;;
--max-iterations)
MAX_ITERATIONS="$2"
shift 2
;;
--target-pass-rate)
TARGET_PASS_RATE="$2"
shift 2
;;
--target-variance)
TARGET_TOKEN_VARIANCE="$2"
shift 2
;;
-h|--help)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Automated Test-Fix-Iterate Loop for continuous testing"
echo ""
echo "Options:"
echo " --debug Enable debug output"
echo " --max-iterations N Maximum iterations (default: 50)"
echo " --target-pass-rate N Target pass rate % (default: 95)"
echo " --target-variance N Target token variance % (default: 3)"
echo " -h, --help Show this help"
echo ""
echo "Stop Conditions:"
echo " - Pass rate >= 95%"
echo " - Token variance < 3%"
echo " - Maximum iterations reached"
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use -h or --help for usage information"
exit 1
;;
esac
done
# Run main loop
main