fix(pdftract-4iier): correct typo in scientific_paper README and fix xtask path handling

- Fix typo: "scific_paper" -> "scientific_paper" in fixture path
- Fix xtask path resolution: use relative path ".." to access workspace root
- Fix xtask format string: remove unused profile_name placeholder
- Add workspace exclusion to xtask/Cargo.toml for standalone build

These are minor improvements to the existing per-profile README documentation
that was already created in commit 8b5dd4f.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
jedarden 2026-05-17 23:22:39 -04:00
parent 8b5dd4febb
commit 17f581897f
3 changed files with 10 additions and 6 deletions

View file

@ -39,7 +39,7 @@ The classifier looks for academic paper terminology combined with two-column lay
## Sample Input
Example fixtures demonstrating this profile are available in `tests/fixtures/classifier/scific_paper/`.
Example fixtures demonstrating this profile are available in `tests/fixtures/classifier/scientific_paper/`.
The corpus includes 50 scientific paper documents covering various journals and layouts.

View file

@ -1,3 +1,5 @@
[workspace]
[package]
name = "xtask"
version = "0.1.0"

View file

@ -58,8 +58,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
generate_profile_readme(&args[2])?;
}
"doc-profiles" => {
let profiles_dir = Path::new("profiles/builtin");
for entry in fs::read_dir(profiles_dir)? {
let profiles_dir = Path::new("..").join("profiles/builtin");
for entry in fs::read_dir(&profiles_dir)? {
let entry = entry?;
if entry.path().is_dir() {
let profile_name = entry.file_name().to_string_lossy().to_string();
@ -79,8 +79,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
fn generate_profile_readme(profile_name: &str) -> Result<(), Box<dyn std::error::Error>> {
let profile_path = Path::new("profiles/builtin").join(profile_name).join("profile.yaml");
let readme_path = Path::new("profiles/builtin").join(profile_name).join("README.md");
// Find the workspace root by looking for the parent directory's Cargo.toml
let workspace_root = Path::new("..");
let profile_path = workspace_root.join("profiles/builtin").join(profile_name).join("profile.yaml");
let readme_path = workspace_root.join("profiles/builtin").join(profile_name).join("README.md");
if !profile_path.exists() {
return Err(format!("Profile YAML not found: {}", profile_path.display()).into());
@ -171,7 +173,7 @@ fn generate_profile_readme(profile_name: &str) -> Result<(), Box<dyn std::error:
readme.push_str("```bash\n");
readme.push_str(&format!("pdftract profiles export {} > my-profile.yaml\n", profile_name));
readme.push_str("# Edit my-profile.yaml to customize match criteria, fields, or extraction patterns\n");
readme.push_str(&format!("pdftract extract --profile my-profile.yaml document.pdf\n", profile_name));
readme.push_str("pdftract extract --profile my-profile.yaml document.pdf\n");
readme.push_str("```\n\n");
// Footer