pdftract/tests/fixtures/generate_slide_deck_fixtures.rs
jedarden 21fcd902d1 feat(pdftract-2vajs): implement slide_deck profile with fixtures and tests
Implements the slide_deck document profile for PowerPoint/Keynote/Google
Slides exports as PDF. Includes 5 fixtures, expected outputs, and regression
tests.

Components:
- profiles/builtin/slide_deck/profile.yaml - Profile configuration
- tests/fixtures/profiles/slide_deck/ - 5 PDF fixtures with expected outputs
- crates/pdftract-cli/tests/test_slide_deck.rs - Regression tests (12 PASS)

Fixtures cover:
1. pitch_deck - Sales pitch (10 slides)
2. academic_lecture - Academic lecture (40 slides)
3. corporate_kickoff - Corporate kickoff (15 slides)
4. bilingual_deck - Bilingual EN/ES (12 slides)
5. googleslides_handout - Google Slides handout mode (4 pages, 3 slides/page)

Extracted fields: title, presenter, date, slide_titles

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 21:12:24 -04:00

331 lines
10 KiB
Rust

/// Generate slide deck test fixtures.
///
/// This creates 5 PDF fixtures for slide deck profile testing:
/// 1. pitch_deck - Sales pitch deck (10 slides)
/// 2. academic_lecture - Academic lecture (40 slides)
/// 3. corporate_kickoff - Corporate kickoff (15 slides)
/// 4. bilingual_deck - Bilingual English/Spanish (12 slides)
/// 5. googleslides_handout - Google Slides handout mode (4 pages, 3 slides per page)
///
/// Run with: cargo run --bin generate_slide_deck_fixtures
use std::fs::File;
use std::io::Write;
use std::path::Path;
/// Simple slide deck PDF builder
struct SlideDeckBuilder {
slide_titles: Vec<String>,
title: String,
author: String,
}
impl SlideDeckBuilder {
fn new(title: &str, author: &str) -> Self {
Self {
slide_titles: Vec::new(),
title: title.to_string(),
author: author.to_string(),
}
}
fn add_slide(&mut self, title: &str) {
self.slide_titles.push(title.to_string());
}
fn build(&self) -> Vec<u8> {
let mut pdf_data = String::new();
// PDF header (use a simpler comment to avoid UTF-8 issues)
pdf_data.push_str("%PDF-1.4\n");
pdf_data.push_str("%PDF-Magic-Comment\n");
// We'll build a simple PDF with:
// - Object 1: Catalog
// - Object 2: Pages (root)
// - Objects 3+: Individual pages
// - Each page has its own content stream
let page_count = self.slide_titles.len();
let mut objects = Vec::new();
let mut current_id = 1;
// Catalog (will be object 1)
let catalog = format!("<</Type/Catalog/Pages {} 0 R>>", current_id + 1);
objects.push(catalog);
current_id += 1;
// Pages root (will be object 2)
let kids: Vec<String> = (0..page_count)
.map(|i| format!("{} 0 R", current_id + 1 + i))
.collect();
let pages = format!(
"<</Type/Pages/Count {}/Kids[{}]/Resources<<//Font<</F1 {} 0 R>>>>/MediaBox[0 0 612 792]>>",
page_count,
kids.join(" "),
current_id + page_count + 1
);
objects.push(pages);
current_id += 1;
// Font (will be after all pages)
let font_id = current_id + page_count + 1;
// Individual pages
for (i, slide_title) in self.slide_titles.iter().enumerate() {
let page_num = i + 1;
let content_stream = format!(
"BT\n50 {} Td\n24 Tf\n({}) Tj\nET\n",
700 - (i % 3) * 50, // Vary position slightly for visual distinction
escape_pdf_string(slide_title)
);
let content_id = current_id + page_count + 1 + (page_num as usize);
let page = format!(
"<</Type/Page/Parent {} 0 R/Contents {} 0 R>>",
2, // Parent is always object 2
content_id
);
objects.push(page);
}
// Font object
let font = "<</Type/Font/Subtype/Type1/BaseFont/Helvetica>>";
objects.push(font.to_string());
// Content streams (one per page)
for slide_title in &self.slide_titles {
let content = format!(
"BT\n50 700 Td\n24 Tf\n({}) Tj\nET\n",
escape_pdf_string(slide_title)
);
let content_with_len = format!(
"<</Length {}>>\nstream\n{}\nendstream",
content.len(),
content
);
objects.push(content_with_len);
}
// Info object
let info = format!(
"<</Title({})/Author({})/Producer(pdftract-test)>>",
escape_pdf_string(&self.title),
escape_pdf_string(&self.author)
);
objects.push(info);
// Write all objects
let mut object_offsets = Vec::new();
for obj in &objects {
object_offsets.push(pdf_data.len());
pdf_data.push_str(&format!("{} 0 obj\n", object_offsets.len() + 1));
pdf_data.push_str(obj);
pdf_data.push_str("\nendobj\n");
}
// xref table
let xref_offset = pdf_data.len();
pdf_data.push_str("xref\n");
pdf_data.push_str("0 1\n");
pdf_data.push_str("0000000000 65535 f \n");
pdf_data.push_str(&format!("1 {}\n", objects.len()));
for i in 0..objects.len() {
pdf_data.push_str(&format!("{:010x} 00000 n \n", object_offsets[i]));
}
// Trailer
pdf_data.push_str("trailer\n");
pdf_data.push_str(&format!(
"<</Size {} /Root 1 0 R /Info {} 0 R>>\n",
objects.len() + 1,
objects.len()
));
pdf_data.push_str("startxref\n");
pdf_data.push_str(&format!("{}\n", xref_offset));
pdf_data.push_str("%%EOF\n");
pdf_data.into_bytes()
}
}
/// Escape a string for PDF literal strings
fn escape_pdf_string(s: &str) -> String {
s.chars()
.flat_map(|c| match c {
'(' => vec!['\\', '('],
')' => vec!['\\', ')'],
'\\' => vec!['\\', '\\'],
_ => vec![c],
})
.collect()
}
fn main() -> std::io::Result<()> {
let fixtures_dir = Path::new("tests/fixtures/profiles/slide_deck");
// Ensure directory exists
std::fs::create_dir_all(fixtures_dir)?;
// 1. Pitch deck (10 slides)
let mut builder = SlideDeckBuilder::new("Q3 2024 Product Roadmap", "Jane Smith, VP Product");
let pitch_titles = vec![
"Q3 2024 Product Roadmap",
"Agenda",
"Market Overview",
"Product Vision",
"Key Features",
"Technical Architecture",
"Go-to-Market Strategy",
"Pricing & Packaging",
"Next Steps",
"Q&A",
];
for title in &pitch_titles {
builder.add_slide(title);
}
let pdf_data = builder.build();
let mut file = File::create(fixtures_dir.join("pitch_deck.pdf"))?;
file.write_all(&pdf_data)?;
println!("Created pitch_deck.pdf");
// 2. Academic lecture (40 slides)
let mut builder = SlideDeckBuilder::new("Introduction to Machine Learning", "Prof. Robert Chen, PhD");
let academic_titles = vec![
"Introduction to Machine Learning",
"Overview",
"What is a Neural Network?",
"Perceptrons",
"Multi-Layer Networks",
"Activation Functions",
"Backpropagation",
"Loss Functions",
"Optimization",
"Regularization",
"Convolutional Networks",
"Recurrent Networks",
"Transformer Architecture",
"Attention Mechanisms",
"Training Strategies",
"Hyperparameter Tuning",
"Evaluation Metrics",
"Case Studies",
"Current Research",
"Future Directions",
"Summary",
"References",
"Q1",
"Q2",
"Q3",
"Q4",
"Q5",
"Q6",
"Q7",
"Q8",
"Q9",
"Q10",
"Q11",
"Q12",
"Q13",
"Q14",
"Q15",
"Q16",
"Thank You",
];
for title in &academic_titles {
builder.add_slide(title);
}
let pdf_data = builder.build();
let mut file = File::create(fixtures_dir.join("academic_lecture.pdf"))?;
file.write_all(&pdf_data)?;
println!("Created academic_lecture.pdf");
// 3. Corporate kickoff (15 slides)
let mut builder = SlideDeckBuilder::new("2025 Annual Kickoff", "Michael Johnson, CEO");
let corporate_titles = vec![
"2025 Annual Kickoff",
"Welcome",
"2024 Recap",
"Financial Highlights",
"Customer Success Stories",
"Product Roadmap 2025",
"Market Expansion",
"Team Growth",
"Strategic Priorities",
"OKR Framework",
"Investment Areas",
"Culture & Values",
"Events Calendar",
"Leadership Team",
"Thank You",
];
for title in &corporate_titles {
builder.add_slide(title);
}
let pdf_data = builder.build();
let mut file = File::create(fixtures_dir.join("corporate_kickoff.pdf"))?;
file.write_all(&pdf_data)?;
println!("Created corporate_kickoff.pdf");
// 4. Bilingual deck (12 slides)
let mut builder = SlideDeckBuilder::new("Informe Anual 2024", "Maria Garcia / Director General");
let bilingual_titles = vec![
"Informe Anual 2024",
"Resumen Ejecutivo",
"Logros 2024",
"Crecimiento de Ingresos",
"Expansión Global",
"Productos Nuevos",
"Sostenibilidad",
"Compromiso Social",
"Perspectivas 2025",
"Estrategia",
"Próximos Pasos",
"Gracias",
];
for title in &bilingual_titles {
builder.add_slide(title);
}
let pdf_data = builder.build();
let mut file = File::create(fixtures_dir.join("bilingual_deck.pdf"))?;
file.write_all(&pdf_data)?;
println!("Created bilingual_deck.pdf");
// 5. Google Slides handout (4 pages with multiple titles each)
let mut builder = SlideDeckBuilder::new("Team Onboarding Guide", "HR Department");
let handout_titles = vec![
"Welcome!",
"Company Values",
"Our Mission",
"Tools & Resources",
"Benefits Overview",
"Who's Who",
"First Week Checklist",
"Questions?",
"Contact HR",
"Thank You",
"Insurance",
"401k",
"PTO Policy",
"Remote Work",
"Emergency Contacts",
];
// For handout mode, each page shows multiple slide titles
let handout_pages = vec![
"Welcome! - Company Values - Our Mission",
"Tools & Resources - Benefits Overview - Who's Who",
"First Week Checklist - Questions? - Contact HR",
"Thank You - Insurance - 401k - PTO Policy - Remote Work - Emergency Contacts",
];
for page_title in &handout_pages {
builder.add_slide(page_title);
}
let pdf_data = builder.build();
let mut file = File::create(fixtures_dir.join("googleslides_handout.pdf"))?;
file.write_all(&pdf_data)?;
println!("Created googleslides_handout.pdf");
println!("\nGenerated 5 slide deck fixtures in tests/fixtures/profiles/slide_deck/");
Ok(())
}