zai-proxy/proxy/scripts/load-test-proxy.sh
jedarden e7c24a0c08 feat: initial zai-proxy ecosystem repo
Extracted from ardenone-cluster/containers/zai-proxy and
ardenone-cluster/containers/zai-proxy-dashboard.

- proxy/: OpenAI-compatible ZAI reverse proxy (Go, v1.10.0)
  - Token counting, rate limiting, Prometheus metrics, canary support
- dashboard/: Metrics dashboard backend + React frontend (Go, v1.0.0)
  - Prometheus collector, SQLite storage, SSE live updates
- docs/: Operational notes, research, and plan subdirs

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 15:53:52 -04:00

255 lines
8.6 KiB
Bash
Executable file

#!/bin/bash
set -euo pipefail
# HTTP Load Test for zai-proxy
# Tests token counting overhead under concurrent load
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$PROJECT_DIR"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Configuration
PROXY_URL="${PROXY_URL:-http://localhost:8080}"
API_KEY="${ZAI_API_KEY:-test-key}"
# Test data sizes
SMALL_PROMPT='{"model":"glm-4","messages":[{"role":"user","content":"What is the capital of France?"}],"stream":false}'
MEDIUM_PROMPT='{"model":"glm-4","messages":[{"role":"user","content":"Explain the history of the Roman Empire in detail, including its founding, major expansion periods, key emperors, political structure, military campaigns, economic system, social hierarchy, cultural achievements, architectural innovations, legal developments, religious evolution, and eventual decline."}],"stream":false}'
LARGE_PROMPT='{"model":"glm-4","messages":[{"role":"user","content":"Provide a comprehensive analysis of artificial intelligence covering: 1) Historical development from Turing test to modern deep learning, 2) Machine learning fundamentals including supervised, unsupervised, and reinforcement learning, 3) Neural network architectures from perceptrons to transformers, 4) Natural language processing breakthroughs, 5) Computer vision applications, 6) Ethical considerations and bias mitigation, 7) Future research directions including AGI, 8) Industry applications across healthcare, finance, transportation, and creative fields, 9) Technical challenges in scaling, interpretability, and safety, 10) Societal impacts on employment, privacy, and human-computer interaction."}],"stream":false}'
# Function to print colored output
print_color() {
local color=$1
local text=$2
echo -e "${color}${text}${NC}"
}
# Function to check if proxy is running
check_proxy() {
if ! curl -s -f "$PROXY_URL/health" > /dev/null 2>&1; then
print_color "$RED" "Error: Proxy is not running at $PROXY_URL"
print_color "$YELLOW" "Start the proxy with: go run ."
exit 1
fi
}
# Function to make a single request
make_request() {
local prompt=$1
local request_id=$2
local start_time=$(date +%s.%N)
local response=$(curl -s -w "\n%{http_code}\n%{time_total}" \
-X POST \
"$PROXY_URL/v1/messages" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $API_KEY" \
-d "$prompt" \
2>&1) || echo "500\n0"
local end_time=$(date +%s.%N)
# Parse response
local body=$(echo "$response" | sed '$d' | sed '$d')
local http_code=$(echo "$response" | tail -n 2 | head -n 1)
local total_time=$(echo "$response" | tail -n 1)
echo "$request_id|$http_code|$total_time|$start_time|$end_time"
}
# Function to run concurrent load test
run_load_test() {
local concurrency=$1
local total_requests=$2
local prompt=$3
local test_name=$4
print_color "$BLUE" "Running: $test_name"
echo " Concurrency: $concurrency"
echo " Total requests: $total_requests"
echo ""
local requests_per_batch=$((total_requests / concurrency))
local remaining=$((total_requests % concurrency))
local pids=()
local temp_files=()
local start_time=$(date +%s.%N)
# Launch concurrent workers
for ((i=0; i<concurrency; i++)); do
local batch_size=$requests_per_batch
if ((i < remaining)); then
batch_size=$((batch_size + 1))
fi
local temp_file=$(mktemp)
temp_files+=("$temp_file")
(
for ((j=0; j<batch_size; j++)); do
make_request "$prompt" "$i-$j"
# Small delay to avoid overwhelming
sleep 0.01
done
) > "$temp_file" &
pids+=($!)
done
# Wait for all workers
for pid in "${pids[@]}"; do
wait $pid 2>/dev/null || true
done
local end_time=$(date +%s.%N)
# Collect results
local total_requests_completed=0
local successful_requests=0
local failed_requests=0
local total_time=0
local min_time=999999
local max_time=0
for temp_file in "${temp_files[@]}"; do
while IFS='|' read -r request_id http_code total_time_req start end; do
((total_requests_completed++))
if [[ "$http_code" == "200" ]]; then
((successful_requests++))
total_time=$(echo "$total_time + $total_time_req" | bc)
if (( $(echo "$total_time_req < $min_time" | bc -l) )); then
min_time=$total_time_req
fi
if (( $(echo "$total_time_req > $max_time" | bc -l) )); then
max_time=$total_time_req
fi
else
((failed_requests++))
fi
done < "$temp_file"
rm -f "$temp_file"
done
# Calculate statistics
local total_test_time=$(echo "$end_time - $start_time" | bc)
local avg_time=0
if ((successful_requests > 0)); then
avg_time=$(echo "scale=3; $total_time / $successful_requests" | bc)
fi
# Print results
print_color "$GREEN" "Results: $test_name"
echo " Total requests: $total_requests_completed"
echo " Successful: $successful_requests"
echo " Failed: $failed_requests"
echo " Total time: $(echo "scale=2; $total_test_time" | bc) seconds"
echo " Requests/sec: $(echo "scale=2; $total_requests_completed / $total_test_time" | bc)"
echo " Avg response time: ${avg_time}s"
echo " Min response time: ${min_time}s"
echo " Max response time: ${max_time}s"
echo ""
# Check latency target
local avg_ms=$(echo "$avg_time * 1000" | bc)
if (( $(echo "$avg_time > 5" | bc -l) )); then
print_color "$YELLOW" " WARNING: Avg response time exceeds 5s (consider increasing timeout)"
fi
}
# Function to compare with/without token counting
compare_token_counting() {
print_color "$BLUE" "======================================"
print_color "$BLUE" "Token Counting Overhead Comparison"
print_color "$BLUE" "======================================"
echo ""
print_color "$YELLOW" "This test requires running the proxy with and without token counting."
print_color "$YELLOW" "Run the following in separate terminals:"
echo ""
echo " Terminal 1 (with counting):"
echo " TOKEN_COUNTING_ENABLED=true go run ."
echo ""
echo " Terminal 2 (without counting):"
echo " TOKEN_COUNTING_ENABLED=false go run ."
echo ""
print_color "$YELLOW" "Then run this script for each configuration:"
echo " PROXY_URL=http://localhost:8080 ./scripts/load-test-proxy.sh"
echo ""
}
# Main function
main() {
print_color "$BLUE" "======================================"
print_color "$BLUE" "zai-proxy HTTP Load Test"
print_color "$BLUE" "======================================"
echo ""
echo "Proxy URL: $PROXY_URL"
echo ""
# Check if proxy is running
check_proxy
# Get proxy info
print_color "$GREEN" "Proxy is running"
echo ""
# Check token counting status from metrics
local metrics=$(curl -s "$PROXY_URL/metrics" 2>/dev/null || echo "")
if echo "$metrics" | grep -q "zai_proxy_tokens_total"; then
print_color "$GREEN" "Token counting: ENABLED"
else
print_color "$YELLOW" "Token counting: DISABLED"
fi
echo ""
# Run load tests with different concurrency levels
print_color "$BLUE" "======================================"
print_color "$BLUE" "Load Test Scenarios"
print_color "$BLUE" "======================================"
echo ""
# Small prompt, low concurrency
run_load_test 10 50 "$SMALL_PROMPT" "Small prompt, 10 concurrent"
# Small prompt, medium concurrency
run_load_test 50 100 "$SMALL_PROMPT" "Small prompt, 50 concurrent"
# Small prompt, high concurrency
run_load_test 100 200 "$SMALL_PROMPT" "Small prompt, 100 concurrent"
echo ""
# Medium prompt, medium concurrency
run_load_test 50 100 "$MEDIUM_PROMPT" "Medium prompt, 50 concurrent"
echo ""
# Large prompt, low concurrency
run_load_test 10 20 "$LARGE_PROMPT" "Large prompt, 10 concurrent"
echo ""
print_color "$GREEN" "======================================"
print_color "$GREEN" "Load Test Complete"
print_color "$GREEN" "======================================"
echo ""
echo "To compare with/without token counting, restart proxy with:"
echo " TOKEN_COUNTING_ENABLED=false"
echo ""
}
# Check if we should show comparison instructions
if [[ "${1:-}" == "--compare" ]]; then
compare_token_counting
exit 0
fi
# Run main tests
main "$@"