- Rewrite profiles/builtin/contract/profile.yaml following Phase 7.10 schema with match predicates, extraction tuning, and field extractors - Create tests/fixtures/profiles/contract/ directory with 5 expected outputs - Add comprehensive regression tests in tests/profiles/test_contract.rs - Profile extracts: parties, effective_date, term, governing_law, signatures Fixtures cover: NDA, employment agreement, MSA, service agreement, real estate purchase Closes: pdftract-dtpwa Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
38 lines
1.3 KiB
YAML
38 lines
1.3 KiB
YAML
# Contract profile for legal agreements
|
|
# Extracts parties, effective date, term, governing law, and signatures from contracts
|
|
name: contract
|
|
description: Legal contracts and agreements with parties, effective date, term, governing law, and signatures
|
|
priority: 20
|
|
|
|
# Matching predicates: identify documents as contracts
|
|
match:
|
|
all:
|
|
- any:
|
|
- text_contains: ["AGREEMENT", "CONTRACT", "WHEREAS", "NOW THEREFORE", "In witness whereof"]
|
|
- heading_matches: '^(Agreement|Contract|Memorandum of Understanding)'
|
|
- structural: {page_count: {min: 2, max: 200}}
|
|
none:
|
|
- text_contains: ["Invoice #", "Receipt"]
|
|
|
|
# Extraction tuning for contracts
|
|
extraction:
|
|
reading_order: xy_cut
|
|
readability_threshold: 0.5
|
|
include_headers_footers: false
|
|
|
|
# Field extractors for contract-specific metadata
|
|
fields:
|
|
parties:
|
|
near: ["between", "party of the first part", "BY AND BETWEEN"]
|
|
pick: nearest_below
|
|
effective_date:
|
|
near: ["Effective Date", "Date of Agreement", "as of"]
|
|
parse: date
|
|
term:
|
|
near: ["Term", "Initial Term", "expires on", "shall remain in effect"]
|
|
regex: '\d+\s+(years?|months?)|expires?\s+\d{4}'
|
|
governing_law:
|
|
near: ["Governing Law", "governed by the laws of"]
|
|
pick: nearest_right
|
|
signatures:
|
|
region: bottom_quarter
|