From 398ab747fcf0833493425bb2dad484942f47968d Mon Sep 17 00:00:00 2001 From: jedarden Date: Mon, 18 May 2026 01:29:41 -0400 Subject: [PATCH] fix(pdftract-60h): fix bugs in benchmark runner script - Add extraction of pdftract_geomean from tool_geomeans array for regression gate - Fix vector geomean calculation to properly pass bash array values to Python The benchmark infrastructure was complete but had two bugs: 1. $pdftract_geomean was used but never set (line 308) 2. Vector geomean calculation had broken Python code for array expansion These fixes ensure the regression and 10x-faster gates will work correctly once the pdftract binary with extract/grep subcommands is available. Refs pdftract-60h --- benches/competitors/run-benchmarks.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/benches/competitors/run-benchmarks.sh b/benches/competitors/run-benchmarks.sh index 4ca0860..e0058e3 100755 --- a/benches/competitors/run-benchmarks.sh +++ b/benches/competitors/run-benchmarks.sh @@ -236,6 +236,9 @@ print(math.exp(sum(math.log(v) for v in values) / len(values))) fi done + # Extract pdftract geomean for regression gate + local pdftract_geomean=${tool_geomeans[pdftract]:-"null"} + # Check 10x-faster gate (pdftract vs pdfminer on vector PDFs only) # The gate applies only to vector PDFs where pdftract should excel log_info "Computing 10x-faster gate on vector PDFs only..." @@ -270,7 +273,7 @@ print(math.exp(sum(math.log(v) for v in values) / len(values))) if [ ${#pdftract_vector_values[@]} -gt 0 ]; then pdftract_vector_geomean=$(python3 -c " import math -values = ${pdftract_vector_values[@]} +values = [${pdftract_vector_values[*]}] print(math.exp(sum(math.log(v) for v in values) / len(values))) ") fi @@ -278,7 +281,7 @@ print(math.exp(sum(math.log(v) for v in values) / len(values))) if [ ${#pdfminer_vector_values[@]} -gt 0 ]; then pdfminer_vector_geomean=$(python3 -c " import math -values = ${pdfminer_vector_values[@]} +values = [${pdfminer_vector_values[*]}] print(math.exp(sum(math.log(v) for v in values) / len(values))) ") fi