The indent trigger was using .abs() which fired on both increased indent (non-indented → indented) AND decreased indent (indented → non-indented). This caused drop-cap style paragraphs (indented first line, flush-left continuation) to incorrectly split into two blocks. Per plan Phase 4.4 heuristic #2, indent change should only trigger when the current line is MORE indented (to the right, larger x0) than the block average - i.e., a new paragraph starting after non-indented text. It should NOT trigger for decreased indent (first line indented, rest flush-left). Fix: Remove .abs() and only check if line_x0 - block_avg_x0 > threshold. Tests: - test_indented_first_line_new_block: PASS (non-indented → indented splits) - test_indented_first_line_of_paragraph_not_split: PASS (drop cap stays together) - All 179 line module tests: PASS
51 lines
1.4 KiB
Python
Executable file
51 lines
1.4 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
"""Generate font fingerprint entry for a TTF/OTF file."""
|
|
|
|
import hashlib
|
|
import json
|
|
import sys
|
|
|
|
def compute_sha256(path):
|
|
"""Compute SHA-256 hash of a file."""
|
|
h = hashlib.sha256()
|
|
with open(path, 'rb') as f:
|
|
h.update(f.read())
|
|
return h.hexdigest()
|
|
|
|
def main():
|
|
if len(sys.argv) < 2:
|
|
print(f"Usage: {sys.argv[0]} <font.ttf>", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
font_path = sys.argv[1]
|
|
|
|
# Compute SHA-256
|
|
sha256_hex = compute_sha256(font_path)
|
|
|
|
# For now, create a minimal entry with common ASCII mappings
|
|
# In a real implementation, we'd parse the font tables to get GID->codepoint
|
|
# mappings using fontTools or similar
|
|
entries = []
|
|
|
|
# Common ASCII printable characters (0x20-0x7E)
|
|
# These typically map to GIDs 1-95 in most fonts
|
|
for cp in range(0x20, 0x7F):
|
|
# Most fonts have GID 0 = .notdef, GID 1+ = glyphs
|
|
# This is a placeholder - real implementation would parse the font
|
|
gid = cp - 0x20 + 1 # Shift so space (0x20) maps to GID 1
|
|
entries.append([gid, cp])
|
|
|
|
# Get font name from path
|
|
font_name = font_path.rsplit('/', 1)[-1].rsplit('\\', 1)[-1]
|
|
|
|
# Output JSON entry
|
|
result = [{
|
|
"sha256_hex": sha256_hex,
|
|
"font_name": font_name,
|
|
"entries": entries
|
|
}]
|
|
|
|
print(json.dumps(result, indent=2))
|
|
|
|
if __name__ == '__main__':
|
|
main()
|