feat(tb-15rh): add tmux detector acceptance test execution script
Created bin/run-tmux-detector-acceptance.sh that runs the tmux detector acceptance test 5 times with structured JSON output. Features: - Runs 5 iterations by default (configurable with -n) - Captures per-run metrics: timestamp, pass/fail, duration, exit code - Detects false positives and false negatives from logs - Categorizes failure types (detection_timeout, unstuck_timeout, etc.) - Outputs parseable JSON for analysis - Command-line options: -n/--num-runs, -o/--output, -h/--help Acceptance criteria met: - Script executable at bin/run-tmux-detector-acceptance.sh - Runs 5 iterations automatically - Logs timestamp, pass/fail, execution time, error messages - Outputs structured JSON format - Manually tested and verified Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
2269845e72
commit
d66f892423
1 changed files with 241 additions and 0 deletions
241
bin/run-tmux-detector-acceptance.sh
Executable file
241
bin/run-tmux-detector-acceptance.sh
Executable file
|
|
@ -0,0 +1,241 @@
|
|||
#!/bin/bash
|
||||
# Automated acceptance test execution script for tmux detector
|
||||
# Runs the acceptance test 5 times and captures structured metrics
|
||||
# Output format: JSON (parseable for analysis)
|
||||
|
||||
set -e
|
||||
|
||||
TB_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
TEST_SCRIPT="$TB_DIR/test-tmux-detector.sh"
|
||||
RESULTS_DIR="$TB_DIR/test-results"
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
RESULTS_FILE="$RESULTS_DIR/tmux-detector-acceptance-$TIMESTAMP.json"
|
||||
NUM_RUNS=5
|
||||
|
||||
# Parse command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-n|--num-runs)
|
||||
NUM_RUNS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-o|--output)
|
||||
RESULTS_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -n, --num-runs N Number of test iterations (default: 5)"
|
||||
echo " -o, --output FILE Output results file (default: test-results/tmux-detector-acceptance-YYYYMMDD-HHMMSS.json)"
|
||||
echo " -h, --help Show this help message"
|
||||
echo ""
|
||||
echo "Output format: JSON with per-run metrics including:"
|
||||
echo " - timestamp, run_number, result (pass/fail)"
|
||||
echo " - duration_seconds, exit_code"
|
||||
echo " - false_positive, false_negative (detected from logs)"
|
||||
echo " - failure_type, error_message"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Validate inputs
|
||||
if ! [[ "$NUM_RUNS" =~ ^[0-9]+$ ]] || [ "$NUM_RUNS" -lt 1 ]; then
|
||||
echo "Error: num-runs must be a positive integer"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify test script exists
|
||||
if [ ! -f "$TEST_SCRIPT" ]; then
|
||||
echo "Error: Test script not found: $TEST_SCRIPT"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create results directory
|
||||
mkdir -p "$RESULTS_DIR"
|
||||
|
||||
echo "=== Tmux Detector Acceptance Test Execution ==="
|
||||
echo "Running test $NUM_RUNS times..."
|
||||
echo "Test script: $TEST_SCRIPT"
|
||||
echo "Results file: $RESULTS_FILE"
|
||||
echo ""
|
||||
|
||||
# Initialize results JSON
|
||||
cat > "$RESULTS_FILE" <<EOF
|
||||
{
|
||||
"test_name": "tmux-detector-acceptance",
|
||||
"timestamp": "$TIMESTAMP",
|
||||
"num_runs": $NUM_RUNS,
|
||||
"runs": []
|
||||
}
|
||||
EOF
|
||||
|
||||
# Track overall statistics
|
||||
total_pass=0
|
||||
total_fail=0
|
||||
total_duration=0
|
||||
|
||||
for run in $(seq 1 $NUM_RUNS); do
|
||||
echo "=== Run $run of $NUM_RUNS ==="
|
||||
|
||||
start_time=$(date +%s)
|
||||
run_timestamp=$(date -Iseconds)
|
||||
log_file="$RESULTS_DIR/tmux-detector-run${TIMESTAMP}-${run}.log"
|
||||
|
||||
# Run the test and capture output
|
||||
if bash "$TEST_SCRIPT" > "$log_file" 2>&1; then
|
||||
exit_code=0
|
||||
result="pass"
|
||||
total_pass=$((total_pass + 1))
|
||||
failure_type="none"
|
||||
error_message=""
|
||||
else
|
||||
exit_code=$?
|
||||
result="fail"
|
||||
total_fail=$((total_fail + 1))
|
||||
|
||||
# Analyze failure pattern from log for false positives/negatives
|
||||
if grep -q "Pane was not detected as stuck" "$log_file" 2>/dev/null; then
|
||||
failure_type="detection_timeout"
|
||||
error_message="False negative: pane not detected as stuck within timeout"
|
||||
elif grep -q "Session was not unstuck" "$log_file" 2>/dev/null; then
|
||||
failure_type="unstuck_timeout"
|
||||
error_message="Dequeue failure: session not unstuck after activity"
|
||||
elif grep -q "daemon failed to start" "$log_file" 2>/dev/null; then
|
||||
failure_type="daemon_start"
|
||||
error_message="Infrastructure: daemon failed to start"
|
||||
elif grep -q "detector failed to start" "$log_file" 2>/dev/null; then
|
||||
failure_type="detector_start"
|
||||
error_message="Infrastructure: detector failed to start"
|
||||
elif grep -q "Failed to set pane title" "$log_file" 2>/dev/null; then
|
||||
failure_type="pane_title"
|
||||
error_message="Infrastructure: tmux pane configuration failed"
|
||||
elif grep -q "Queue should be empty" "$log_file" 2>/dev/null; then
|
||||
failure_type="state_inconsistency"
|
||||
error_message="False positive: queue not empty after dequeue"
|
||||
else
|
||||
failure_type="unknown"
|
||||
error_message="Unknown failure - exit code $exit_code"
|
||||
fi
|
||||
fi
|
||||
|
||||
end_time=$(date +%s)
|
||||
duration=$((end_time - start_time))
|
||||
total_duration=$((total_duration + duration))
|
||||
|
||||
# Detect false positives (test passed but queue should have been empty)
|
||||
false_positive="false"
|
||||
if [ "$result" = "fail" ] && grep -q "Queue should be empty" "$log_file" 2>/dev/null; then
|
||||
false_positive="true"
|
||||
fi
|
||||
|
||||
# Detect false negatives (pane not detected when it should be)
|
||||
false_negative="false"
|
||||
if [ "$result" = "fail" ] && grep -q "Pane was not detected as stuck" "$log_file" 2>/dev/null; then
|
||||
false_negative="true"
|
||||
fi
|
||||
|
||||
# Print result
|
||||
echo "Result: $result (exit code: $exit_code, duration: ${duration}s)"
|
||||
echo "Failure type: $failure_type"
|
||||
echo "Error: $error_message"
|
||||
echo "False positive: $false_positive"
|
||||
echo "False negative: $false_negative"
|
||||
echo "Log saved to: $log_file"
|
||||
echo ""
|
||||
|
||||
# Build run JSON
|
||||
run_json=$(cat <<EOF
|
||||
{
|
||||
"timestamp": "$run_timestamp",
|
||||
"run_number": $run,
|
||||
"result": "$result",
|
||||
"duration_seconds": $duration,
|
||||
"exit_code": $exit_code,
|
||||
"false_positive": $false_positive,
|
||||
"false_negative": $false_negative,
|
||||
"failure_type": "$failure_type",
|
||||
"error_message": "$error_message",
|
||||
"log_file": "$(basename "$log_file")"
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
# Append to results using jq (or fallback to simple append)
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
jq --argjson new "$run_json" '.runs += [$new]' "$RESULTS_FILE" > "$RESULTS_FILE.tmp" && mv "$RESULTS_FILE.tmp" "$RESULTS_FILE"
|
||||
else
|
||||
# Fallback: append manually
|
||||
# Remove closing bracket, append with comma, re-add closing bracket
|
||||
sed -i '$ s/],$//' "$RESULTS_FILE"
|
||||
sed -i '$ s/}$//' "$RESULTS_FILE"
|
||||
echo " ,$run_json" >> "$RESULTS_FILE"
|
||||
echo "]}" >> "$RESULTS_FILE"
|
||||
fi
|
||||
|
||||
# Small delay between runs to ensure clean state
|
||||
sleep 2
|
||||
done
|
||||
|
||||
# Calculate and print summary
|
||||
echo "=== Test Run Summary ==="
|
||||
echo "Total runs: $NUM_RUNS"
|
||||
echo "Passed: $total_pass"
|
||||
echo "Failed: $total_fail"
|
||||
success_rate=$(awk "BEGIN {printf \"%.1f\", ($total_pass/$NUM_RUNS)*100}")
|
||||
echo "Success rate: ${success_rate}%"
|
||||
avg_duration=$(awk "BEGIN {printf \"%.1f\", $total_duration/$NUM_RUNS}")
|
||||
echo "Average duration: ${avg_duration}s"
|
||||
echo "Total duration: ${total_duration}s"
|
||||
echo ""
|
||||
|
||||
# Add summary to JSON
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
# Build summary as a JSON string since jq doesn't handle bash variables well
|
||||
summary_json="{\"total_runs\":$NUM_RUNS,\"passed\":$total_pass,\"failed\":$total_fail,\"success_rate\":$success_rate,\"average_duration\":$avg_duration,\"total_duration\":$total_duration}"
|
||||
jq --argjson summary "$summary_json" '.summary = $summary' "$RESULTS_FILE" > "$RESULTS_FILE.tmp" && mv "$RESULTS_FILE.tmp" "$RESULTS_FILE"
|
||||
fi
|
||||
|
||||
echo "Results saved to: $RESULTS_FILE"
|
||||
echo ""
|
||||
|
||||
# Print JSON preview
|
||||
echo "JSON Preview:"
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
jq '.' "$RESULTS_FILE"
|
||||
else
|
||||
cat "$RESULTS_FILE"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Print detailed false positive/negative stats
|
||||
echo "Quality Metrics:"
|
||||
false_positive_count=$(grep -c '"false_positive": true' "$RESULTS_FILE" 2>/dev/null || echo "0")
|
||||
false_negative_count=$(grep -c '"false_negative": true' "$RESULTS_FILE" 2>/dev/null || echo "0")
|
||||
echo "False positives: $false_positive_count"
|
||||
echo "False negatives: $false_negative_count"
|
||||
echo ""
|
||||
|
||||
# Print failure type breakdown
|
||||
echo "Failure Type Breakdown:"
|
||||
if command -v jq >/dev/null 2>&1; then
|
||||
jq -r '.runs | group_by(.failure_type) | .[] | " \(.[0].failure_type): \(length)"' "$RESULTS_FILE"
|
||||
else
|
||||
grep -o '"failure_type": "[^"]*"' "$RESULTS_FILE" | sort | uniq -c | sed 's/^/ /'
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Return exit code based on whether all tests passed
|
||||
if [ $total_fail -eq 0 ]; then
|
||||
echo "All tests PASSED!"
|
||||
exit 0
|
||||
else
|
||||
echo "Some tests FAILED"
|
||||
exit 1
|
||||
fi
|
||||
Loading…
Add table
Reference in a new issue