Extracted from ardenone-cluster/containers/zai-proxy and ardenone-cluster/containers/zai-proxy-dashboard. - proxy/: OpenAI-compatible ZAI reverse proxy (Go, v1.10.0) - Token counting, rate limiting, Prometheus metrics, canary support - dashboard/: Metrics dashboard backend + React frontend (Go, v1.0.0) - Prometheus collector, SQLite storage, SSE live updates - docs/: Operational notes, research, and plan subdirs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
160 lines
5.1 KiB
Bash
Executable file
160 lines
5.1 KiB
Bash
Executable file
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# Benchmark script for zai-proxy token counting overhead
|
|
# Measures performance with and without token counting enabled
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
cd "$PROJECT_DIR"
|
|
|
|
echo "======================================"
|
|
echo "zai-proxy Token Counting Benchmarks"
|
|
echo "======================================"
|
|
echo "Project dir: $PROJECT_DIR"
|
|
echo ""
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Check if Go is installed
|
|
if ! command -v go &> /dev/null; then
|
|
echo -e "${RED}Error: Go is not installed${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
echo "Go version: $(go version)"
|
|
echo ""
|
|
|
|
# Function to run benchmarks and parse results
|
|
run_benchmarks() {
|
|
local bench_pattern=$1
|
|
local description=$2
|
|
|
|
echo "--------------------------------------"
|
|
echo "$description"
|
|
echo "--------------------------------------"
|
|
|
|
go test -bench="$bench_pattern" -benchmem -benchtime=1s ./... 2>&1 | grep -E "^(Benchmark|PASS|ok|FAIL)"
|
|
echo ""
|
|
}
|
|
|
|
# Function to check if benchmark meets latency target
|
|
check_latency_target() {
|
|
local output=$1
|
|
local target_ms=$2
|
|
local test_name=$3
|
|
|
|
# Extract ns/op from benchmark output
|
|
ns_per_op=$(echo "$output" | grep "$test_name" | awk '{print $3}' | sed 's/ns\/op//')
|
|
|
|
if [[ -n "$ns_per_op" ]]; then
|
|
ms_per_op=$(echo "scale=3; $ns_per_op / 1000000" | bc)
|
|
echo " Latency: $ms_per_op ms (target: ${target_ms}ms)"
|
|
|
|
if (( $(echo "$ms_per_op > $target_ms" | bc -l) )); then
|
|
echo -e " ${RED}FAIL: Exceeds target${NC}"
|
|
return 1
|
|
else
|
|
echo -e " ${GREEN}PASS: Meets target${NC}"
|
|
return 0
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# Main benchmark execution
|
|
main() {
|
|
# Run all benchmarks
|
|
echo "Running Go benchmarks..."
|
|
echo ""
|
|
|
|
# 1. Basic token counting benchmarks
|
|
run_benchmarks "BenchmarkTikTokenCounter" "TikToken Performance by Input Size"
|
|
run_benchmarks "BenchmarkSimpleTokenCounter" "SimpleTokenCounter Performance (Baseline)"
|
|
run_benchmarks "BenchmarkCountRequestTokens" "Request Token Counting"
|
|
|
|
# 2. Concurrent benchmarks
|
|
run_benchmarks "BenchmarkConcurrentTokenCounting" "Concurrent Token Counting (10/50/100)"
|
|
run_benchmarks "BenchmarkTikTokenCounterParallel" "Parallel Token Counting"
|
|
|
|
# 3. Streaming and SSE benchmarks
|
|
run_benchmarks "BenchmarkCountSSE" "SSE Token Counting"
|
|
|
|
# 4. Memory benchmarks
|
|
run_benchmarks "BenchmarkTikTokenCounterMemory" "Memory Allocation"
|
|
|
|
# 5. Full request benchmarks
|
|
run_benchmarks "BenchmarkFullRequest" "Full Request with Token Counting"
|
|
|
|
echo "======================================"
|
|
echo "Benchmark Summary"
|
|
echo "======================================"
|
|
echo ""
|
|
echo "Running detailed benchmarks with timing..."
|
|
|
|
# Run benchmarks with more detailed output
|
|
echo ""
|
|
echo "1. Input Size Benchmarks"
|
|
echo "------------------------"
|
|
for size in Small Medium Large XLarge; do
|
|
echo -n " $size: "
|
|
go test -bench="^BenchmarkTikTokenCounter${size}$" -benchtime=100x ./... 2>&1 | grep "$size" | tail -1 | awk '{print $3 " " $4}'
|
|
done
|
|
|
|
echo ""
|
|
echo "2. Latency Targets (must be <5ms)"
|
|
echo "----------------------------------"
|
|
for size in Small Medium Large; do
|
|
echo -n " $size: "
|
|
result=$(go test -bench="^BenchmarkTikTokenCounter${size}$" -benchtime=100x ./... 2>&1 | grep "$size" | tail -1)
|
|
ns_per_op=$(echo "$result" | awk '{print $3}')
|
|
if [[ -n "$ns_per_op" ]]; then
|
|
ms_per_op=$(echo "scale=2; ${ns_per_op/ns\/op/} / 1000000" | bc)
|
|
echo "$ms_per_op ms"
|
|
if (( $(echo "$ms_per_op >= 5" | bc -l) )); then
|
|
echo -e " ${RED}WARNING: Exceeds 5ms target${NC}"
|
|
fi
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "3. Concurrent Load Performance"
|
|
echo "------------------------------"
|
|
for concurrency in 10 50 100; do
|
|
echo -n " $concurrency concurrent: "
|
|
result=$(go test -bench="^BenchmarkConcurrentTokenCounting${concurrency}$" -benchtime=10x ./... 2>&1 | grep "$concurrency" | tail -1)
|
|
if [[ -n "$result" ]]; then
|
|
echo "$result" | awk '{print $3 " " $4}'
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "4. Memory per Operation"
|
|
echo "-----------------------"
|
|
for size in Small Medium Large; do
|
|
echo -n " $size: "
|
|
result=$(go test -bench="^BenchmarkTikTokenCounter${size}$" -benchmem -benchtime=100x ./... 2>&1 | grep "$size" | tail -1)
|
|
if [[ -n "$result" ]]; then
|
|
echo "$result" | awk '{print $5 " " $6}'
|
|
fi
|
|
done
|
|
|
|
echo ""
|
|
echo "======================================"
|
|
echo "Benchmark Complete"
|
|
echo "======================================"
|
|
echo ""
|
|
echo "For detailed CPU profiling, run:"
|
|
echo " go test -cpuprofile=cpu.prof -bench=. ./..."
|
|
echo " go tool pprof cpu.prof"
|
|
echo ""
|
|
echo "For memory profiling, run:"
|
|
echo " go test -memprofile=mem.prof -bench=. ./..."
|
|
echo " go tool pprof mem.prof"
|
|
}
|
|
|
|
# Run main function
|
|
main "$@"
|