pdftract/build/gen_fingerprint_entry.py

#!/usr/bin/env python3
"""Generate font fingerprint entry for a TTF/OTF file."""

import hashlib
import json
import sys

def compute_sha256(path):
    """Compute SHA-256 hash of a file."""
    h = hashlib.sha256()
    with open(path, 'rb') as f:
        h.update(f.read())
    return h.hexdigest()

def main():
    if len(sys.argv) < 2:
        print(f"Usage: {sys.argv[0]} <font.ttf>", file=sys.stderr)
        sys.exit(1)

    font_path = sys.argv[1]

    # Compute SHA-256
    sha256_hex = compute_sha256(font_path)

    # For now, create a minimal entry with common ASCII mappings
    # In a real implementation, we'd parse the font tables to get GID->codepoint
    # mappings using fontTools or similar
    entries = []

    # Common ASCII printable characters (0x20-0x7E)
    # These typically map to GIDs 1-95 in most fonts
    for cp in range(0x20, 0x7F):
        # Most fonts have GID 0 = .notdef, GID 1+ = glyphs
        # This is a placeholder - real implementation would parse the font
        gid = cp - 0x20 + 1  # Shift so space (0x20) maps to GID 1
        entries.append([gid, cp])

    # Get font name from path
    font_name = font_path.rsplit('/', 1)[-1].rsplit('\\', 1)[-1]

    # Output JSON entry
    result = [{
        "sha256_hex": sha256_hex,
        "font_name": font_name,
        "entries": entries
    }]

    print(json.dumps(result, indent=2))

if __name__ == '__main__':
    main()