fix(pdftract-60h): fix bugs in benchmark runner script
- Add extraction of pdftract_geomean from tool_geomeans array for regression gate - Fix vector geomean calculation to properly pass bash array values to Python The benchmark infrastructure was complete but had two bugs: 1. $pdftract_geomean was used but never set (line 308) 2. Vector geomean calculation had broken Python code for array expansion These fixes ensure the regression and 10x-faster gates will work correctly once the pdftract binary with extract/grep subcommands is available. Refs pdftract-60h
This commit is contained in:
parent
5cd0eac170
commit
398ab747fc
1 changed files with 5 additions and 2 deletions
|
|
@ -236,6 +236,9 @@ print(math.exp(sum(math.log(v) for v in values) / len(values)))
|
|||
fi
|
||||
done
|
||||
|
||||
# Extract pdftract geomean for regression gate
|
||||
local pdftract_geomean=${tool_geomeans[pdftract]:-"null"}
|
||||
|
||||
# Check 10x-faster gate (pdftract vs pdfminer on vector PDFs only)
|
||||
# The gate applies only to vector PDFs where pdftract should excel
|
||||
log_info "Computing 10x-faster gate on vector PDFs only..."
|
||||
|
|
@ -270,7 +273,7 @@ print(math.exp(sum(math.log(v) for v in values) / len(values)))
|
|||
if [ ${#pdftract_vector_values[@]} -gt 0 ]; then
|
||||
pdftract_vector_geomean=$(python3 -c "
|
||||
import math
|
||||
values = ${pdftract_vector_values[@]}
|
||||
values = [${pdftract_vector_values[*]}]
|
||||
print(math.exp(sum(math.log(v) for v in values) / len(values)))
|
||||
")
|
||||
fi
|
||||
|
|
@ -278,7 +281,7 @@ print(math.exp(sum(math.log(v) for v in values) / len(values)))
|
|||
if [ ${#pdfminer_vector_values[@]} -gt 0 ]; then
|
||||
pdfminer_vector_geomean=$(python3 -c "
|
||||
import math
|
||||
values = ${pdfminer_vector_values[@]}
|
||||
values = [${pdfminer_vector_values[*]}]
|
||||
print(math.exp(sum(math.log(v) for v in values) / len(values)))
|
||||
")
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue