pdfrs 0.1.2

A CLI tool to read/write PDFs and convert to/from markdown
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
use pdfrs::builder::PdfBuilder;
use pdfrs::parallel;
use pdfrs::pdf::PdfDocument;
use std::path::Path;

fn create_test_pdf(path: &str, title: &str) {
    let elements = vec![
        pdfrs::elements::Element::Heading {
            text: title.to_string(),
            level: 1,
        },
        pdfrs::elements::Element::Paragraph {
            text: format!("This is a test document: {}", title),
        },
    ];
    pdfrs::pdf_generator::create_pdf_from_elements_with_layout(
        path,
        &elements,
        "Helvetica",
        12.0,
        pdfrs::pdf_generator::PageLayout::portrait(),
    )
    .unwrap();
}

#[test]
fn test_parallel_merge_pdfs() {
    let pdf1 = "tests/output/int_merge1.pdf";
    let pdf2 = "tests/output/int_merge2.pdf";
    let merged = "tests/output/int_merged.pdf";

    std::fs::create_dir_all("tests/output").unwrap();

    create_test_pdf(pdf1, "Document 1");
    create_test_pdf(pdf2, "Document 2");

    assert!(Path::new(pdf1).exists());
    assert!(Path::new(pdf2).exists());

    let result = parallel::merge_pdfs_parallel(&[pdf1, pdf2], merged);
    assert!(result.is_ok(), "Parallel merge failed: {:?}", result.err());
    assert!(Path::new(merged).exists(), "Merged PDF was not created");

    // Verify the merged PDF is valid and has combined pages
    let doc = PdfDocument::load_from_file(merged).unwrap();
    assert!(!doc.objects.is_empty());

    // Cleanup
    std::fs::remove_file(pdf1).ok();
    std::fs::remove_file(pdf2).ok();
    std::fs::remove_file(merged).ok();
}

#[test]
fn test_builder_api_generates_valid_pdf() {
    let result = PdfBuilder::new()
        .add_heading("Integration Test", 1)
        .add_paragraph("This tests the builder API.")
        .add_code_block("fn main() {}", "rust")
        .add_list_item("Item 1", 0)
        .add_ordered_item(1, "Ordered item", 0)
        .add_horizontal_rule()
        .add_page_break()
        .build_bytes();

    assert!(result.is_ok());
    let bytes = result.unwrap();
    assert!(bytes.len() > 100);
    assert!(bytes.starts_with(b"%PDF"));
}

#[test]
fn test_builder_api_with_layout() {
    let result = PdfBuilder::new()
        .with_layout(pdfrs::pdf_generator::PageLayout::landscape())
        .with_margins(50.0)
        .add_heading("Landscape", 1)
        .build_bytes();

    assert!(result.is_ok());
}

#[test]
fn test_optimized_pdf_generator_compression() {
    let elements = vec![
        pdfrs::elements::Element::Paragraph {
            text: "This is a test document for compression. ".repeat(50),
        },
    ];

    // Web profile (high compression)
    let web_gen = pdfrs::optimization::OptimizedPdfGenerator::new(
        pdfrs::optimization::OptimizationProfile::web(),
    );
    let web_bytes = web_gen.generate_bytes(&elements).unwrap();

    // Print profile (low compression)
    let print_gen = pdfrs::optimization::OptimizedPdfGenerator::new(
        pdfrs::optimization::OptimizationProfile::print(),
    );
    let print_bytes = print_gen.generate_bytes(&elements).unwrap();

    assert!(web_bytes.starts_with(b"%PDF"));
    assert!(print_bytes.starts_with(b"%PDF"));

    // Web should be smaller than print due to higher compression
    assert!(
        web_bytes.len() <= print_bytes.len(),
        "Web-optimized PDF ({}) should not be larger than print ({})",
        web_bytes.len(),
        print_bytes.len()
    );

    // Verify compressed PDFs are valid by parsing them
    let temp_web = "tests/output/int_web.pdf";
    let temp_print = "tests/output/int_print.pdf";
    std::fs::create_dir_all("tests/output").unwrap();
    std::fs::write(temp_web, &web_bytes).unwrap();
    std::fs::write(temp_print, &print_bytes).unwrap();

    let web_doc = pdfrs::pdf::PdfDocument::load_from_file(temp_web).unwrap();
    let print_doc = pdfrs::pdf::PdfDocument::load_from_file(temp_print).unwrap();

    assert!(!web_doc.objects.is_empty());
    assert!(!print_doc.objects.is_empty());

    std::fs::remove_file(temp_web).ok();
    std::fs::remove_file(temp_print).ok();
}

#[test]
fn test_streaming_pdf_generator() {
    let output = "tests/output/int_streaming.pdf";
    std::fs::create_dir_all("tests/output").unwrap();

    let mut pdf_gen = pdfrs::streaming::StreamingPdfGenerator::new(
        output,
        pdfrs::pdf_generator::PageLayout::portrait(),
    )
    .unwrap();

    pdf_gen.add_heading("Streaming Test", 1).unwrap();
    pdf_gen.add_paragraph("This is paragraph one.").unwrap();
    pdf_gen.add_paragraph("This is paragraph two.").unwrap();
    pdf_gen.finish().unwrap();

    assert!(std::path::Path::new(output).exists());

    // Verify it's a valid PDF that can be parsed
    let doc = pdfrs::pdf::PdfDocument::load_from_file(output).unwrap();
    assert!(!doc.objects.is_empty());

    std::fs::remove_file(output).ok();
}

#[test]
fn test_digital_signature_sign_and_verify() {
    let base = env!("CARGO_MANIFEST_DIR");
    let out_dir = format!("{}/target/test_output", base);
    std::fs::create_dir_all(&out_dir).unwrap();

    let input_pdf = format!("{}/tests/fixtures/simple.pdf", base);
    let signed_pdf = format!("{}/signed_test.pdf", out_dir);

    // Create a simple PDF first if the fixture doesn't exist
    if !std::path::Path::new(&input_pdf).exists() {
        let elements = vec![
            pdfrs::elements::Element::Heading {
                text: "Test Document".to_string(),
                level: 1,
            },
            pdfrs::elements::Element::Paragraph {
                text: "This is a test document for digital signatures.".to_string(),
            },
        ];
        pdfrs::pdf_generator::create_pdf_from_elements_with_layout(
            &input_pdf,
            &elements,
            "Helvetica",
            12.0,
            pdfrs::pdf_generator::PageLayout::portrait(),
        )
        .unwrap();
    }

    // Sign the PDF
    let sig = pdfrs::security::DigitalSignature::new("Test Signer")
        .with_reason("Testing digital signatures")
        .with_location("Test Location");

    pdfrs::pdf_ops::sign_pdf(&input_pdf, &signed_pdf, &sig).unwrap();

    assert!(std::path::Path::new(&signed_pdf).exists());
    assert!(std::fs::metadata(&signed_pdf).unwrap().len() > 0);

    // Verify the signature
    let signatures = pdfrs::pdf_ops::verify_pdf_signature(&signed_pdf).unwrap();
    assert!(!signatures.is_empty(), "Should find at least one signature");

    let sig_info = &signatures[0];
    assert_eq!(sig_info.signer_name, "Test Signer");
    assert_eq!(sig_info.reason.as_deref(), Some("Testing digital signatures"));
    assert_eq!(sig_info.location.as_deref(), Some("Test Location"));

    std::fs::remove_file(&signed_pdf).ok();
}

#[test]
fn test_extract_tables_from_pdf() {
    let base = env!("CARGO_MANIFEST_DIR");
    let out_dir = format!("{}/target/test_output", base);
    std::fs::create_dir_all(&out_dir).unwrap();

    let table_pdf = format!("{}/table_test.pdf", out_dir);

    // Create a PDF with a simple table using table rows
    let elements = vec![
        pdfrs::elements::Element::TableRow {
            cells: vec!["Name".to_string(), "Age".to_string(), "City".to_string()],
            is_separator: false,
            alignments: vec![
                pdfrs::elements::TableAlignment::Left,
                pdfrs::elements::TableAlignment::Center,
                pdfrs::elements::TableAlignment::Right,
            ],
        },
        pdfrs::elements::Element::TableRow {
            cells: vec!["Alice".to_string(), "30".to_string(), "New York".to_string()],
            is_separator: false,
            alignments: vec![],
        },
        pdfrs::elements::Element::TableRow {
            cells: vec!["Bob".to_string(), "25".to_string(), "London".to_string()],
            is_separator: false,
            alignments: vec![],
        },
    ];

    pdfrs::pdf_generator::create_pdf_from_elements_with_layout(
        &table_pdf,
        &elements,
        "Helvetica",
        12.0,
        pdfrs::pdf_generator::PageLayout::portrait(),
    )
    .unwrap();

    // Extract tables
    let tables = pdfrs::pdf_ops::extract_tables_from_pdf(&table_pdf).unwrap();
    assert!(!tables.is_empty(), "Should find at least one table");

    let csv = &tables[0];
    assert!(csv.contains("Name") || csv.contains("Alice"), "CSV should contain table data");

    std::fs::remove_file(&table_pdf).ok();
}

#[test]
fn test_form_field_detect_and_fill() {
    let base = env!("CARGO_MANIFEST_DIR");
    let out_dir = format!("{}/target/test_output", base);
    std::fs::create_dir_all(&out_dir).unwrap();

    let form_pdf = format!("{}/form_detect_test.pdf", out_dir);
    let filled_pdf = format!("{}/form_filled_test.pdf", out_dir);

    // Create a PDF with form fields
    let elements = vec![
        pdfrs::elements::Element::Paragraph { text: "Please fill out the form.".to_string() },
    ];

    let form_fields = vec![
        pdfrs::pdf_ops::FormField {
            name: "firstName".to_string(),
            field_type: pdfrs::pdf_ops::FormFieldType::Text,
            x: 100.0,
            y: 700.0,
            width: 200.0,
            height: 20.0,
            default_value: Some("Default".to_string()),
            options: vec![],
            required: true,
        },
        pdfrs::pdf_ops::FormField {
            name: "age".to_string(),
            field_type: pdfrs::pdf_ops::FormFieldType::Text,
            x: 100.0,
            y: 670.0,
            width: 50.0,
            height: 20.0,
            default_value: None,
            options: vec![],
            required: false,
        },
    ];

    pdfrs::pdf_generator::create_pdf_from_elements_with_layout(
        &format!("{}/tmp_form_base.pdf", out_dir),
        &elements,
        "Helvetica",
        12.0,
        pdfrs::pdf_generator::PageLayout::portrait(),
    )
    .unwrap();

    // Use create_pdf_with_form_fields to build a proper form PDF
    pdfrs::pdf_ops::create_pdf_with_form_fields(
        &form_pdf,
        "Please fill out the form.",
        &form_fields,
    )
    .unwrap();

    // Detect fields
    let detected = pdfrs::pdf_ops::detect_form_fields(&form_pdf).unwrap();
    assert!(!detected.is_empty(), "Should detect form fields");

    let first_name = detected.iter().find(|f| f.name == "firstName");
    assert!(first_name.is_some(), "Should find firstName field");
    assert_eq!(first_name.unwrap().field_type, "text");
    assert_eq!(first_name.unwrap().value.as_deref(), Some("Default"));

    // Fill fields
    let mut values = std::collections::HashMap::new();
    values.insert("firstName".to_string(), "Alice".to_string());
    values.insert("age".to_string(), "30".to_string());

    pdfrs::pdf_ops::fill_form_fields(&form_pdf, &filled_pdf, &values).unwrap();

    // Verify filled values
    let filled = pdfrs::pdf_ops::detect_form_fields(&filled_pdf).unwrap();
    let filled_first = filled.iter().find(|f| f.name == "firstName");
    assert!(filled_first.is_some());
    assert_eq!(filled_first.unwrap().value.as_deref(), Some("Alice"));

    let filled_age = filled.iter().find(|f| f.name == "age");
    assert!(filled_age.is_some());
    assert_eq!(filled_age.unwrap().value.as_deref(), Some("30"));

    std::fs::remove_file(&form_pdf).ok();
    std::fs::remove_file(&filled_pdf).ok();
    std::fs::remove_file(&format!("{}/tmp_form_base.pdf", out_dir)).ok();
}

#[test]
fn test_detect_document_structure() {
    let base = env!("CARGO_MANIFEST_DIR");
    let out_dir = format!("{}/target/test_output", base);
    std::fs::create_dir_all(&out_dir).unwrap();

    let struct_pdf = format!("{}/structure_test.pdf", out_dir);

    // Create a PDF with clear heading hierarchy using markdown
    let markdown = r#"# Introduction

This is the introduction paragraph.

## Background

Some background text here.
More background text.

## Methods

Method description goes here.

# Results

Result paragraph one.
Result paragraph two.
"#;

    let elements = pdfrs::elements::parse_markdown(markdown);
    pdfrs::pdf_generator::create_pdf_from_elements_with_layout(
        &struct_pdf,
        &elements,
        "Helvetica",
        12.0,
        pdfrs::pdf_generator::PageLayout::portrait(),
    )
    .unwrap();

    // Detect structure
    let structure = pdfrs::pdf_ops::detect_document_structure(&struct_pdf).unwrap();

    // Should detect at least some headings (H1 and H2 sizes differ from body)
    assert!(!structure.headings.is_empty(), "Should detect headings in structured PDF");

    // Check that "Introduction" or "Results" is found as a heading
    let has_intro = structure.headings.iter().any(|h| h.text.contains("Introduction"));
    let has_results = structure.headings.iter().any(|h| h.text.contains("Results"));
    assert!(
        has_intro || has_results,
        "Should detect 'Introduction' or 'Results' heading, got: {:?}",
        structure.headings
    );

    // Sections should exist matching headings
    assert!(
        !structure.sections.is_empty(),
        "Should have sections"
    );

    std::fs::remove_file(&struct_pdf).ok();
}

#[test]
fn test_optimize_pdf_recompression() {
    let base = env!("CARGO_MANIFEST_DIR");
    let out_dir = format!("{}/target/test_output", base);
    std::fs::create_dir_all(&out_dir).unwrap();

    let source_pdf = format!("{}/opt_source.pdf", out_dir);
    let optimized_pdf = format!("{}/opt_output.pdf", out_dir);

    // Create a PDF with content
    let markdown = r#"# Optimization Test

This is a test paragraph for PDF optimization.
It contains enough text to create a multi-page document.

## Section Two

More text here to fill the page.
And even more text to ensure we have content streams.
"#;
    let elements = pdfrs::elements::parse_markdown(markdown);
    pdfrs::pdf_generator::create_pdf_from_elements_with_layout(
        &source_pdf,
        &elements,
        "Helvetica",
        12.0,
        pdfrs::pdf_generator::PageLayout::portrait(),
    )
    .unwrap();

    let original_size = std::fs::metadata(&source_pdf).unwrap().len();

    // Optimize with Web profile (high compression)
    let profile = pdfrs::optimization::OptimizationProfile::Web;
    let settings = profile.settings();
    let pdf_bytes = std::fs::read(&source_pdf).unwrap();
    let optimized = pdfrs::optimization::optimize_pdf_bytes(&pdf_bytes, settings).unwrap();
    std::fs::write(&optimized_pdf, &optimized).unwrap();

    // Verify optimized PDF is valid and can be loaded
    let doc = pdfrs::pdf::PdfDocument::load_from_file(&optimized_pdf).unwrap();
    assert!(!doc.objects.is_empty(), "Optimized PDF should have objects");

    // Verify text can still be extracted
    let text = doc.get_text().unwrap();
    assert!(
        text.contains("Optimization Test"),
        "Optimized PDF should still contain original text"
    );

    // For small text-only PDFs, recompression may not always reduce size,
    // but it should not corrupt the document.
    assert!(
        optimized.len() > 100,
        "Optimized PDF should be non-trivial in size"
    );

    std::fs::remove_file(&source_pdf).ok();
    std::fs::remove_file(&optimized_pdf).ok();
}

#[test]
fn test_extract_images_from_pdf() {
    let base = env!("CARGO_MANIFEST_DIR");
    let out_dir = format!("{}/target/test_output", base);
    std::fs::create_dir_all(&out_dir).unwrap();

    // Create a minimal 1x1 BMP file programmatically
    let bmp_path = format!("{}/test_image.bmp", out_dir);
    let mut bmp_data = Vec::new();
    // BMP file header (14 bytes)
    bmp_data.extend_from_slice(b"BM");              // signature
    bmp_data.extend_from_slice(&70_u32.to_le_bytes()); // file size
    bmp_data.extend_from_slice(&[0, 0]);              // reserved
    bmp_data.extend_from_slice(&[0, 0]);              // reserved
    bmp_data.extend_from_slice(&54_u32.to_le_bytes()); // offset to pixel data
    // DIB header (BITMAPINFOHEADER, 40 bytes)
    bmp_data.extend_from_slice(&40_u32.to_le_bytes()); // header size
    bmp_data.extend_from_slice(&1_u32.to_le_bytes());  // width
    bmp_data.extend_from_slice(&1_u32.to_le_bytes());  // height
    bmp_data.extend_from_slice(&1_u16.to_le_bytes());  // planes
    bmp_data.extend_from_slice(&24_u16.to_le_bytes()); // bits per pixel
    bmp_data.extend_from_slice(&0_u32.to_le_bytes());  // compression (none)
    bmp_data.extend_from_slice(&0_u32.to_le_bytes());  // image size
    bmp_data.extend_from_slice(&2835_u32.to_le_bytes()); // X pixels per meter
    bmp_data.extend_from_slice(&2835_u32.to_le_bytes()); // Y pixels per meter
    bmp_data.extend_from_slice(&0_u32.to_le_bytes());  // colors used
    bmp_data.extend_from_slice(&0_u32.to_le_bytes());  // important colors
    // Pixel data: 1 pixel (3 bytes) + 1 byte padding to 4-byte boundary
    bmp_data.extend_from_slice(&[0xFF, 0x00, 0x00, 0x00]);
    std::fs::write(&bmp_path, &bmp_data).unwrap();

    // Embed the image in a PDF
    let image_pdf = format!("{}/image_embed_test.pdf", out_dir);
    pdfrs::image::add_image_to_pdf(&image_pdf, &bmp_path, 100.0, 100.0, 50.0, 50.0).unwrap();

    // Extract images from the PDF
    let extract_dir = format!("{}/extracted_test_images", out_dir);
    let extracted = pdfrs::pdf_ops::extract_images_from_pdf(&image_pdf, &extract_dir).unwrap();

    assert!(!extracted.is_empty(), "Should extract at least one image from the PDF");

    // Cleanup
    std::fs::remove_file(&bmp_path).ok();
    std::fs::remove_file(&image_pdf).ok();
    std::fs::remove_dir_all(&extract_dir).ok();
}