Extracted from ardenone-cluster/containers/zai-proxy and ardenone-cluster/containers/zai-proxy-dashboard. - proxy/: OpenAI-compatible ZAI reverse proxy (Go, v1.10.0) - Token counting, rate limiting, Prometheus metrics, canary support - dashboard/: Metrics dashboard backend + React frontend (Go, v1.0.0) - Prometheus collector, SQLite storage, SSE live updates - docs/: Operational notes, research, and plan subdirs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
255 lines
8.6 KiB
Bash
Executable file
255 lines
8.6 KiB
Bash
Executable file
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# HTTP Load Test for zai-proxy
|
|
# Tests token counting overhead under concurrent load
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
cd "$PROJECT_DIR"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
# Configuration
|
|
PROXY_URL="${PROXY_URL:-http://localhost:8080}"
|
|
API_KEY="${ZAI_API_KEY:-test-key}"
|
|
|
|
# Test data sizes
|
|
SMALL_PROMPT='{"model":"glm-4","messages":[{"role":"user","content":"What is the capital of France?"}],"stream":false}'
|
|
MEDIUM_PROMPT='{"model":"glm-4","messages":[{"role":"user","content":"Explain the history of the Roman Empire in detail, including its founding, major expansion periods, key emperors, political structure, military campaigns, economic system, social hierarchy, cultural achievements, architectural innovations, legal developments, religious evolution, and eventual decline."}],"stream":false}'
|
|
LARGE_PROMPT='{"model":"glm-4","messages":[{"role":"user","content":"Provide a comprehensive analysis of artificial intelligence covering: 1) Historical development from Turing test to modern deep learning, 2) Machine learning fundamentals including supervised, unsupervised, and reinforcement learning, 3) Neural network architectures from perceptrons to transformers, 4) Natural language processing breakthroughs, 5) Computer vision applications, 6) Ethical considerations and bias mitigation, 7) Future research directions including AGI, 8) Industry applications across healthcare, finance, transportation, and creative fields, 9) Technical challenges in scaling, interpretability, and safety, 10) Societal impacts on employment, privacy, and human-computer interaction."}],"stream":false}'
|
|
|
|
# Function to print colored output
|
|
print_color() {
|
|
local color=$1
|
|
local text=$2
|
|
echo -e "${color}${text}${NC}"
|
|
}
|
|
|
|
# Function to check if proxy is running
|
|
check_proxy() {
|
|
if ! curl -s -f "$PROXY_URL/health" > /dev/null 2>&1; then
|
|
print_color "$RED" "Error: Proxy is not running at $PROXY_URL"
|
|
print_color "$YELLOW" "Start the proxy with: go run ."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
# Function to make a single request
|
|
make_request() {
|
|
local prompt=$1
|
|
local request_id=$2
|
|
|
|
local start_time=$(date +%s.%N)
|
|
local response=$(curl -s -w "\n%{http_code}\n%{time_total}" \
|
|
-X POST \
|
|
"$PROXY_URL/v1/messages" \
|
|
-H "Content-Type: application/json" \
|
|
-H "Authorization: Bearer $API_KEY" \
|
|
-d "$prompt" \
|
|
2>&1) || echo "500\n0"
|
|
local end_time=$(date +%s.%N)
|
|
|
|
# Parse response
|
|
local body=$(echo "$response" | sed '$d' | sed '$d')
|
|
local http_code=$(echo "$response" | tail -n 2 | head -n 1)
|
|
local total_time=$(echo "$response" | tail -n 1)
|
|
|
|
echo "$request_id|$http_code|$total_time|$start_time|$end_time"
|
|
}
|
|
|
|
# Function to run concurrent load test
|
|
run_load_test() {
|
|
local concurrency=$1
|
|
local total_requests=$2
|
|
local prompt=$3
|
|
local test_name=$4
|
|
|
|
print_color "$BLUE" "Running: $test_name"
|
|
echo " Concurrency: $concurrency"
|
|
echo " Total requests: $total_requests"
|
|
echo ""
|
|
|
|
local requests_per_batch=$((total_requests / concurrency))
|
|
local remaining=$((total_requests % concurrency))
|
|
|
|
local pids=()
|
|
local temp_files=()
|
|
local start_time=$(date +%s.%N)
|
|
|
|
# Launch concurrent workers
|
|
for ((i=0; i<concurrency; i++)); do
|
|
local batch_size=$requests_per_batch
|
|
if ((i < remaining)); then
|
|
batch_size=$((batch_size + 1))
|
|
fi
|
|
|
|
local temp_file=$(mktemp)
|
|
temp_files+=("$temp_file")
|
|
|
|
(
|
|
for ((j=0; j<batch_size; j++)); do
|
|
make_request "$prompt" "$i-$j"
|
|
# Small delay to avoid overwhelming
|
|
sleep 0.01
|
|
done
|
|
) > "$temp_file" &
|
|
|
|
pids+=($!)
|
|
done
|
|
|
|
# Wait for all workers
|
|
for pid in "${pids[@]}"; do
|
|
wait $pid 2>/dev/null || true
|
|
done
|
|
|
|
local end_time=$(date +%s.%N)
|
|
|
|
# Collect results
|
|
local total_requests_completed=0
|
|
local successful_requests=0
|
|
local failed_requests=0
|
|
local total_time=0
|
|
local min_time=999999
|
|
local max_time=0
|
|
|
|
for temp_file in "${temp_files[@]}"; do
|
|
while IFS='|' read -r request_id http_code total_time_req start end; do
|
|
((total_requests_completed++))
|
|
|
|
if [[ "$http_code" == "200" ]]; then
|
|
((successful_requests++))
|
|
total_time=$(echo "$total_time + $total_time_req" | bc)
|
|
if (( $(echo "$total_time_req < $min_time" | bc -l) )); then
|
|
min_time=$total_time_req
|
|
fi
|
|
if (( $(echo "$total_time_req > $max_time" | bc -l) )); then
|
|
max_time=$total_time_req
|
|
fi
|
|
else
|
|
((failed_requests++))
|
|
fi
|
|
done < "$temp_file"
|
|
rm -f "$temp_file"
|
|
done
|
|
|
|
# Calculate statistics
|
|
local total_test_time=$(echo "$end_time - $start_time" | bc)
|
|
local avg_time=0
|
|
if ((successful_requests > 0)); then
|
|
avg_time=$(echo "scale=3; $total_time / $successful_requests" | bc)
|
|
fi
|
|
|
|
# Print results
|
|
print_color "$GREEN" "Results: $test_name"
|
|
echo " Total requests: $total_requests_completed"
|
|
echo " Successful: $successful_requests"
|
|
echo " Failed: $failed_requests"
|
|
echo " Total time: $(echo "scale=2; $total_test_time" | bc) seconds"
|
|
echo " Requests/sec: $(echo "scale=2; $total_requests_completed / $total_test_time" | bc)"
|
|
echo " Avg response time: ${avg_time}s"
|
|
echo " Min response time: ${min_time}s"
|
|
echo " Max response time: ${max_time}s"
|
|
echo ""
|
|
|
|
# Check latency target
|
|
local avg_ms=$(echo "$avg_time * 1000" | bc)
|
|
if (( $(echo "$avg_time > 5" | bc -l) )); then
|
|
print_color "$YELLOW" " WARNING: Avg response time exceeds 5s (consider increasing timeout)"
|
|
fi
|
|
}
|
|
|
|
# Function to compare with/without token counting
|
|
compare_token_counting() {
|
|
print_color "$BLUE" "======================================"
|
|
print_color "$BLUE" "Token Counting Overhead Comparison"
|
|
print_color "$BLUE" "======================================"
|
|
echo ""
|
|
|
|
print_color "$YELLOW" "This test requires running the proxy with and without token counting."
|
|
print_color "$YELLOW" "Run the following in separate terminals:"
|
|
echo ""
|
|
echo " Terminal 1 (with counting):"
|
|
echo " TOKEN_COUNTING_ENABLED=true go run ."
|
|
echo ""
|
|
echo " Terminal 2 (without counting):"
|
|
echo " TOKEN_COUNTING_ENABLED=false go run ."
|
|
echo ""
|
|
print_color "$YELLOW" "Then run this script for each configuration:"
|
|
echo " PROXY_URL=http://localhost:8080 ./scripts/load-test-proxy.sh"
|
|
echo ""
|
|
}
|
|
|
|
# Main function
|
|
main() {
|
|
print_color "$BLUE" "======================================"
|
|
print_color "$BLUE" "zai-proxy HTTP Load Test"
|
|
print_color "$BLUE" "======================================"
|
|
echo ""
|
|
echo "Proxy URL: $PROXY_URL"
|
|
echo ""
|
|
|
|
# Check if proxy is running
|
|
check_proxy
|
|
|
|
# Get proxy info
|
|
print_color "$GREEN" "Proxy is running"
|
|
echo ""
|
|
|
|
# Check token counting status from metrics
|
|
local metrics=$(curl -s "$PROXY_URL/metrics" 2>/dev/null || echo "")
|
|
if echo "$metrics" | grep -q "zai_proxy_tokens_total"; then
|
|
print_color "$GREEN" "Token counting: ENABLED"
|
|
else
|
|
print_color "$YELLOW" "Token counting: DISABLED"
|
|
fi
|
|
echo ""
|
|
|
|
# Run load tests with different concurrency levels
|
|
print_color "$BLUE" "======================================"
|
|
print_color "$BLUE" "Load Test Scenarios"
|
|
print_color "$BLUE" "======================================"
|
|
echo ""
|
|
|
|
# Small prompt, low concurrency
|
|
run_load_test 10 50 "$SMALL_PROMPT" "Small prompt, 10 concurrent"
|
|
|
|
# Small prompt, medium concurrency
|
|
run_load_test 50 100 "$SMALL_PROMPT" "Small prompt, 50 concurrent"
|
|
|
|
# Small prompt, high concurrency
|
|
run_load_test 100 200 "$SMALL_PROMPT" "Small prompt, 100 concurrent"
|
|
|
|
echo ""
|
|
|
|
# Medium prompt, medium concurrency
|
|
run_load_test 50 100 "$MEDIUM_PROMPT" "Medium prompt, 50 concurrent"
|
|
|
|
echo ""
|
|
|
|
# Large prompt, low concurrency
|
|
run_load_test 10 20 "$LARGE_PROMPT" "Large prompt, 10 concurrent"
|
|
|
|
echo ""
|
|
print_color "$GREEN" "======================================"
|
|
print_color "$GREEN" "Load Test Complete"
|
|
print_color "$GREEN" "======================================"
|
|
echo ""
|
|
echo "To compare with/without token counting, restart proxy with:"
|
|
echo " TOKEN_COUNTING_ENABLED=false"
|
|
echo ""
|
|
}
|
|
|
|
# Check if we should show comparison instructions
|
|
if [[ "${1:-}" == "--compare" ]]; then
|
|
compare_token_counting
|
|
exit 0
|
|
fi
|
|
|
|
# Run main tests
|
|
main "$@"
|