From d66f89242302b44ce27e55d245f3e91c97e8d33f Mon Sep 17 00:00:00 2001 From: jedarden Date: Thu, 2 Jul 2026 18:00:20 -0400 Subject: [PATCH] feat(tb-15rh): add tmux detector acceptance test execution script Created bin/run-tmux-detector-acceptance.sh that runs the tmux detector acceptance test 5 times with structured JSON output. Features: - Runs 5 iterations by default (configurable with -n) - Captures per-run metrics: timestamp, pass/fail, duration, exit code - Detects false positives and false negatives from logs - Categorizes failure types (detection_timeout, unstuck_timeout, etc.) - Outputs parseable JSON for analysis - Command-line options: -n/--num-runs, -o/--output, -h/--help Acceptance criteria met: - Script executable at bin/run-tmux-detector-acceptance.sh - Runs 5 iterations automatically - Logs timestamp, pass/fail, execution time, error messages - Outputs structured JSON format - Manually tested and verified Co-Authored-By: Claude --- bin/run-tmux-detector-acceptance.sh | 241 ++++++++++++++++++++++++++++ 1 file changed, 241 insertions(+) create mode 100755 bin/run-tmux-detector-acceptance.sh diff --git a/bin/run-tmux-detector-acceptance.sh b/bin/run-tmux-detector-acceptance.sh new file mode 100755 index 0000000..3dbf867 --- /dev/null +++ b/bin/run-tmux-detector-acceptance.sh @@ -0,0 +1,241 @@ +#!/bin/bash +# Automated acceptance test execution script for tmux detector +# Runs the acceptance test 5 times and captures structured metrics +# Output format: JSON (parseable for analysis) + +set -e + +TB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +TEST_SCRIPT="$TB_DIR/test-tmux-detector.sh" +RESULTS_DIR="$TB_DIR/test-results" +TIMESTAMP=$(date +%Y%m%d-%H%M%S) +RESULTS_FILE="$RESULTS_DIR/tmux-detector-acceptance-$TIMESTAMP.json" +NUM_RUNS=5 + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -n|--num-runs) + NUM_RUNS="$2" + shift 2 + ;; + -o|--output) + RESULTS_FILE="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " -n, --num-runs N Number of test iterations (default: 5)" + echo " -o, --output FILE Output results file (default: test-results/tmux-detector-acceptance-YYYYMMDD-HHMMSS.json)" + echo " -h, --help Show this help message" + echo "" + echo "Output format: JSON with per-run metrics including:" + echo " - timestamp, run_number, result (pass/fail)" + echo " - duration_seconds, exit_code" + echo " - false_positive, false_negative (detected from logs)" + echo " - failure_type, error_message" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +# Validate inputs +if ! [[ "$NUM_RUNS" =~ ^[0-9]+$ ]] || [ "$NUM_RUNS" -lt 1 ]; then + echo "Error: num-runs must be a positive integer" + exit 1 +fi + +# Verify test script exists +if [ ! -f "$TEST_SCRIPT" ]; then + echo "Error: Test script not found: $TEST_SCRIPT" + exit 1 +fi + +# Create results directory +mkdir -p "$RESULTS_DIR" + +echo "=== Tmux Detector Acceptance Test Execution ===" +echo "Running test $NUM_RUNS times..." +echo "Test script: $TEST_SCRIPT" +echo "Results file: $RESULTS_FILE" +echo "" + +# Initialize results JSON +cat > "$RESULTS_FILE" < "$log_file" 2>&1; then + exit_code=0 + result="pass" + total_pass=$((total_pass + 1)) + failure_type="none" + error_message="" + else + exit_code=$? + result="fail" + total_fail=$((total_fail + 1)) + + # Analyze failure pattern from log for false positives/negatives + if grep -q "Pane was not detected as stuck" "$log_file" 2>/dev/null; then + failure_type="detection_timeout" + error_message="False negative: pane not detected as stuck within timeout" + elif grep -q "Session was not unstuck" "$log_file" 2>/dev/null; then + failure_type="unstuck_timeout" + error_message="Dequeue failure: session not unstuck after activity" + elif grep -q "daemon failed to start" "$log_file" 2>/dev/null; then + failure_type="daemon_start" + error_message="Infrastructure: daemon failed to start" + elif grep -q "detector failed to start" "$log_file" 2>/dev/null; then + failure_type="detector_start" + error_message="Infrastructure: detector failed to start" + elif grep -q "Failed to set pane title" "$log_file" 2>/dev/null; then + failure_type="pane_title" + error_message="Infrastructure: tmux pane configuration failed" + elif grep -q "Queue should be empty" "$log_file" 2>/dev/null; then + failure_type="state_inconsistency" + error_message="False positive: queue not empty after dequeue" + else + failure_type="unknown" + error_message="Unknown failure - exit code $exit_code" + fi + fi + + end_time=$(date +%s) + duration=$((end_time - start_time)) + total_duration=$((total_duration + duration)) + + # Detect false positives (test passed but queue should have been empty) + false_positive="false" + if [ "$result" = "fail" ] && grep -q "Queue should be empty" "$log_file" 2>/dev/null; then + false_positive="true" + fi + + # Detect false negatives (pane not detected when it should be) + false_negative="false" + if [ "$result" = "fail" ] && grep -q "Pane was not detected as stuck" "$log_file" 2>/dev/null; then + false_negative="true" + fi + + # Print result + echo "Result: $result (exit code: $exit_code, duration: ${duration}s)" + echo "Failure type: $failure_type" + echo "Error: $error_message" + echo "False positive: $false_positive" + echo "False negative: $false_negative" + echo "Log saved to: $log_file" + echo "" + + # Build run JSON + run_json=$(cat </dev/null 2>&1; then + jq --argjson new "$run_json" '.runs += [$new]' "$RESULTS_FILE" > "$RESULTS_FILE.tmp" && mv "$RESULTS_FILE.tmp" "$RESULTS_FILE" + else + # Fallback: append manually + # Remove closing bracket, append with comma, re-add closing bracket + sed -i '$ s/],$//' "$RESULTS_FILE" + sed -i '$ s/}$//' "$RESULTS_FILE" + echo " ,$run_json" >> "$RESULTS_FILE" + echo "]}" >> "$RESULTS_FILE" + fi + + # Small delay between runs to ensure clean state + sleep 2 +done + +# Calculate and print summary +echo "=== Test Run Summary ===" +echo "Total runs: $NUM_RUNS" +echo "Passed: $total_pass" +echo "Failed: $total_fail" +success_rate=$(awk "BEGIN {printf \"%.1f\", ($total_pass/$NUM_RUNS)*100}") +echo "Success rate: ${success_rate}%" +avg_duration=$(awk "BEGIN {printf \"%.1f\", $total_duration/$NUM_RUNS}") +echo "Average duration: ${avg_duration}s" +echo "Total duration: ${total_duration}s" +echo "" + +# Add summary to JSON +if command -v jq >/dev/null 2>&1; then + # Build summary as a JSON string since jq doesn't handle bash variables well + summary_json="{\"total_runs\":$NUM_RUNS,\"passed\":$total_pass,\"failed\":$total_fail,\"success_rate\":$success_rate,\"average_duration\":$avg_duration,\"total_duration\":$total_duration}" + jq --argjson summary "$summary_json" '.summary = $summary' "$RESULTS_FILE" > "$RESULTS_FILE.tmp" && mv "$RESULTS_FILE.tmp" "$RESULTS_FILE" +fi + +echo "Results saved to: $RESULTS_FILE" +echo "" + +# Print JSON preview +echo "JSON Preview:" +if command -v jq >/dev/null 2>&1; then + jq '.' "$RESULTS_FILE" +else + cat "$RESULTS_FILE" +fi +echo "" + +# Print detailed false positive/negative stats +echo "Quality Metrics:" +false_positive_count=$(grep -c '"false_positive": true' "$RESULTS_FILE" 2>/dev/null || echo "0") +false_negative_count=$(grep -c '"false_negative": true' "$RESULTS_FILE" 2>/dev/null || echo "0") +echo "False positives: $false_positive_count" +echo "False negatives: $false_negative_count" +echo "" + +# Print failure type breakdown +echo "Failure Type Breakdown:" +if command -v jq >/dev/null 2>&1; then + jq -r '.runs | group_by(.failure_type) | .[] | " \(.[0].failure_type): \(length)"' "$RESULTS_FILE" +else + grep -o '"failure_type": "[^"]*"' "$RESULTS_FILE" | sort | uniq -c | sed 's/^/ /' +fi +echo "" + +# Return exit code based on whether all tests passed +if [ $total_fail -eq 0 ]; then + echo "All tests PASSED!" + exit 0 +else + echo "Some tests FAILED" + exit 1 +fi