Skip to main content

superbook_pdf/
image_extract.rs

1//! Image Extraction module
2//!
3//! Provides functionality to extract page images from PDF files.
4//!
5//! # Features
6//!
7//! - Extract pages as PNG, JPEG, or TIFF
8//! - Configurable DPI (72-1200)
9//! - Color space conversion (RGB, Grayscale, CMYK)
10//! - Parallel extraction with progress callbacks
11//! - Transparent background handling
12//!
13//! # Example
14//!
15//! ```rust,no_run
16//! use superbook_pdf::{ExtractOptions, MagickExtractor, ImageFormat, ColorSpace};
17//! use std::path::Path;
18//!
19//! // Create options
20//! let options = ExtractOptions::builder()
21//!     .dpi(300)
22//!     .format(ImageFormat::Png)
23//!     .colorspace(ColorSpace::Rgb)
24//!     .parallel(4)
25//!     .build();
26//!
27//! // Extract a single page
28//! // let result = MagickExtractor::extract_page(
29//! //     Path::new("input.pdf"),
30//! //     0,
31//! //     Path::new("output/page_0.png"),
32//! //     &options,
33//! // );
34//! ```
35
36use std::path::{Path, PathBuf};
37use std::process::Command;
38use thiserror::Error;
39
40// ============================================================
41// Constants
42// ============================================================
43
44/// Standard DPI for document scanning
45const DEFAULT_DPI: u32 = 300;
46
47/// High quality DPI for archival purposes
48const HIGH_QUALITY_DPI: u32 = 600;
49
50/// Low DPI for fast previews
51const FAST_DPI: u32 = 150;
52
53/// Minimum allowed DPI
54const MIN_DPI: u32 = 72;
55
56/// Maximum allowed DPI
57const MAX_DPI: u32 = 1200;
58
59/// Default white background color
60const WHITE_BACKGROUND: [u8; 3] = [255, 255, 255];
61
62/// Image extraction error types
63#[derive(Debug, Error)]
64pub enum ExtractError {
65    #[error("PDF file not found: {0}")]
66    PdfNotFound(PathBuf),
67
68    #[error("Output directory not writable: {0}")]
69    OutputNotWritable(PathBuf),
70
71    #[error("Extraction failed for page {page}: {reason}")]
72    ExtractionFailed { page: usize, reason: String },
73
74    #[error("External tool error: {0}")]
75    ExternalToolError(String),
76
77    #[error("IO error: {0}")]
78    IoError(#[from] std::io::Error),
79}
80
81pub type Result<T> = std::result::Result<T, ExtractError>;
82
83/// Image extraction options
84pub struct ExtractOptions {
85    /// Output DPI
86    pub dpi: u32,
87    /// Output format
88    pub format: ImageFormat,
89    /// Color space
90    pub colorspace: ColorSpace,
91    /// Background color (for transparency handling)
92    pub background: Option<[u8; 3]>,
93    /// Number of parallel workers
94    pub parallel: usize,
95    /// Progress callback
96    #[allow(clippy::type_complexity)]
97    pub progress_callback: Option<Box<dyn Fn(usize, usize) + Send + Sync>>,
98}
99
100impl std::fmt::Debug for ExtractOptions {
101    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102        f.debug_struct("ExtractOptions")
103            .field("dpi", &self.dpi)
104            .field("format", &self.format)
105            .field("colorspace", &self.colorspace)
106            .field("background", &self.background)
107            .field("parallel", &self.parallel)
108            .field(
109                "progress_callback",
110                &self.progress_callback.as_ref().map(|_| "<callback>"),
111            )
112            .finish()
113    }
114}
115
116impl Default for ExtractOptions {
117    fn default() -> Self {
118        Self {
119            dpi: DEFAULT_DPI,
120            format: ImageFormat::Png,
121            colorspace: ColorSpace::Rgb,
122            background: Some(WHITE_BACKGROUND),
123            parallel: num_cpus::get(),
124            progress_callback: None,
125        }
126    }
127}
128
129impl ExtractOptions {
130    /// Create a new options builder
131    pub fn builder() -> ExtractOptionsBuilder {
132        ExtractOptionsBuilder::default()
133    }
134
135    /// Create options for high quality extraction
136    pub fn high_quality() -> Self {
137        Self {
138            dpi: HIGH_QUALITY_DPI,
139            format: ImageFormat::Png,
140            ..Default::default()
141        }
142    }
143
144    /// Create options for fast extraction (lower quality)
145    pub fn fast() -> Self {
146        Self {
147            dpi: FAST_DPI,
148            format: ImageFormat::Jpeg { quality: 80 },
149            ..Default::default()
150        }
151    }
152
153    /// Create options for grayscale documents
154    pub fn grayscale() -> Self {
155        Self {
156            colorspace: ColorSpace::Grayscale,
157            ..Default::default()
158        }
159    }
160}
161
162/// Builder for ExtractOptions
163#[derive(Debug, Default)]
164pub struct ExtractOptionsBuilder {
165    options: ExtractOptions,
166}
167
168impl ExtractOptionsBuilder {
169    /// Set output DPI (clamped to MIN_DPI-MAX_DPI)
170    #[must_use]
171    pub fn dpi(mut self, dpi: u32) -> Self {
172        self.options.dpi = dpi.clamp(MIN_DPI, MAX_DPI);
173        self
174    }
175
176    /// Set output format
177    #[must_use]
178    pub fn format(mut self, format: ImageFormat) -> Self {
179        self.options.format = format;
180        self
181    }
182
183    /// Set color space
184    #[must_use]
185    pub fn colorspace(mut self, colorspace: ColorSpace) -> Self {
186        self.options.colorspace = colorspace;
187        self
188    }
189
190    /// Set background color for transparency handling
191    #[must_use]
192    pub fn background(mut self, rgb: [u8; 3]) -> Self {
193        self.options.background = Some(rgb);
194        self
195    }
196
197    /// Disable background (keep transparency)
198    #[must_use]
199    pub fn no_background(mut self) -> Self {
200        self.options.background = None;
201        self
202    }
203
204    /// Set number of parallel workers
205    #[must_use]
206    pub fn parallel(mut self, workers: usize) -> Self {
207        self.options.parallel = workers.max(1);
208        self
209    }
210
211    /// Set progress callback
212    #[must_use]
213    pub fn progress_callback(mut self, callback: Box<dyn Fn(usize, usize) + Send + Sync>) -> Self {
214        self.options.progress_callback = Some(callback);
215        self
216    }
217
218    /// Build the options
219    #[must_use]
220    pub fn build(self) -> ExtractOptions {
221        self.options
222    }
223}
224
225/// Output image formats
226#[derive(Debug, Clone, Copy, Default)]
227pub enum ImageFormat {
228    #[default]
229    Png,
230    Jpeg {
231        quality: u8,
232    },
233    Bmp,
234    Tiff,
235}
236
237impl ImageFormat {
238    /// Get file extension for format
239    pub fn extension(&self) -> &str {
240        match self {
241            ImageFormat::Png => "png",
242            ImageFormat::Jpeg { .. } => "jpg",
243            ImageFormat::Bmp => "bmp",
244            ImageFormat::Tiff => "tiff",
245        }
246    }
247}
248
249/// Color space options
250#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
251pub enum ColorSpace {
252    #[default]
253    Rgb,
254    Grayscale,
255    Cmyk,
256}
257
258/// Extracted page information
259#[derive(Debug)]
260pub struct ExtractedPage {
261    pub page_index: usize,
262    pub path: PathBuf,
263    pub width: u32,
264    pub height: u32,
265    pub format: ImageFormat,
266}
267
268/// Image extractor trait
269pub trait ImageExtractor {
270    /// Extract all pages from PDF
271    fn extract_all(
272        pdf_path: &Path,
273        output_dir: &Path,
274        options: &ExtractOptions,
275    ) -> Result<Vec<ExtractedPage>>;
276
277    /// Extract a single page
278    fn extract_page(
279        pdf_path: &Path,
280        page_index: usize,
281        output_path: &Path,
282        options: &ExtractOptions,
283    ) -> Result<ExtractedPage>;
284}
285
286/// ImageMagick-based extractor
287pub struct MagickExtractor;
288
289impl MagickExtractor {
290    /// Extract a single page from PDF using ImageMagick
291    pub fn extract_page(
292        pdf_path: &Path,
293        page_index: usize,
294        output_path: &Path,
295        options: &ExtractOptions,
296    ) -> Result<ExtractedPage> {
297        if !pdf_path.exists() {
298            return Err(ExtractError::PdfNotFound(pdf_path.to_path_buf()));
299        }
300
301        // Check if output directory is writable
302        if let Some(parent) = output_path.parent() {
303            if !parent.exists() {
304                std::fs::create_dir_all(parent)?;
305            }
306            // Try to create a test file to verify writability
307            let test_file = parent.join(".write_test");
308            if std::fs::write(&test_file, b"test").is_err() {
309                return Err(ExtractError::OutputNotWritable(parent.to_path_buf()));
310            }
311            let _ = std::fs::remove_file(test_file);
312        }
313
314        let mut cmd = Command::new("magick");
315        cmd.arg("-density").arg(options.dpi.to_string());
316
317        // Set background color for transparency
318        if let Some(bg) = options.background {
319            cmd.arg("-background")
320                .arg(format!("rgb({},{},{})", bg[0], bg[1], bg[2]));
321            cmd.arg("-alpha").arg("remove");
322            cmd.arg("-alpha").arg("off");
323        }
324
325        // Set colorspace
326        match options.colorspace {
327            ColorSpace::Grayscale => {
328                cmd.arg("-colorspace").arg("gray");
329            }
330            ColorSpace::Cmyk => {
331                cmd.arg("-colorspace").arg("CMYK");
332            }
333            ColorSpace::Rgb => {
334                cmd.arg("-colorspace").arg("sRGB");
335            }
336        }
337
338        // Input file with page index
339        cmd.arg(format!("{}[{}]", pdf_path.display(), page_index));
340
341        // Set output quality for JPEG
342        if let ImageFormat::Jpeg { quality } = options.format {
343            cmd.arg("-quality").arg(quality.to_string());
344        }
345
346        // Output file
347        cmd.arg(output_path);
348
349        let output = cmd.output()?;
350
351        if !output.status.success() {
352            let stderr = String::from_utf8_lossy(&output.stderr);
353            return Err(ExtractError::ExternalToolError(stderr.to_string()));
354        }
355
356        // Get image dimensions
357        let img = image::open(output_path).map_err(|e| ExtractError::ExtractionFailed {
358            page: page_index,
359            reason: e.to_string(),
360        })?;
361
362        Ok(ExtractedPage {
363            page_index,
364            path: output_path.to_path_buf(),
365            width: img.width(),
366            height: img.height(),
367            format: options.format,
368        })
369    }
370
371    /// Extract all pages from PDF
372    pub fn extract_all(
373        pdf_path: &Path,
374        output_dir: &Path,
375        options: &ExtractOptions,
376    ) -> Result<Vec<ExtractedPage>> {
377        if !pdf_path.exists() {
378            return Err(ExtractError::PdfNotFound(pdf_path.to_path_buf()));
379        }
380
381        // Create output directory if it doesn't exist
382        if !output_dir.exists() {
383            std::fs::create_dir_all(output_dir)?;
384        }
385
386        // Check writability
387        let test_file = output_dir.join(".write_test");
388        if std::fs::write(&test_file, b"test").is_err() {
389            return Err(ExtractError::OutputNotWritable(output_dir.to_path_buf()));
390        }
391        let _ = std::fs::remove_file(test_file);
392
393        // Get page count using pdfinfo or similar
394        let page_count = Self::get_page_count(pdf_path)?;
395
396        // Extract pages (optionally in parallel)
397        let extension = options.format.extension();
398        let mut results = Vec::with_capacity(page_count);
399
400        // Sequential extraction for now (parallel would require more complex handling)
401        for i in 0..page_count {
402            let output_path = output_dir.join(format!("page_{:05}.{}", i, extension));
403
404            let result = Self::extract_page(pdf_path, i, &output_path, options)?;
405            results.push(result);
406
407            // Call progress callback if provided
408            if let Some(ref callback) = options.progress_callback {
409                callback(i + 1, page_count);
410            }
411        }
412
413        Ok(results)
414    }
415
416    /// Get the number of pages in a PDF
417    fn get_page_count(pdf_path: &Path) -> Result<usize> {
418        // Try using pdfinfo first
419        if let Ok(output) = Command::new("pdfinfo").arg(pdf_path).output() {
420            if output.status.success() {
421                let stdout = String::from_utf8_lossy(&output.stdout);
422                for line in stdout.lines() {
423                    if line.starts_with("Pages:") {
424                        if let Some(count_str) = line.split(':').nth(1) {
425                            if let Ok(count) = count_str.trim().parse() {
426                                return Ok(count);
427                            }
428                        }
429                    }
430                }
431            }
432        }
433
434        // Fallback: use ImageMagick identify
435        let output = Command::new("magick")
436            .args(["identify", "-format", "%n\n"])
437            .arg(pdf_path)
438            .output()?;
439
440        if output.status.success() {
441            let stdout = String::from_utf8_lossy(&output.stdout);
442            if let Some(line) = stdout.lines().next() {
443                if let Ok(count) = line.trim().parse() {
444                    return Ok(count);
445                }
446            }
447        }
448
449        // Last resort: try lopdf
450        let doc = lopdf::Document::load(pdf_path).map_err(|e| ExtractError::ExtractionFailed {
451            page: 0,
452            reason: e.to_string(),
453        })?;
454
455        Ok(doc.get_pages().len())
456    }
457}
458
459/// Pure Rust PDF image extractor using lopdf
460///
461/// This extractor works without external tools by directly extracting
462/// embedded JPEG images from the PDF. Works best with scanned PDFs where
463/// each page is a single JPEG image (DCTDecode filter).
464pub struct LopdfExtractor;
465
466impl LopdfExtractor {
467    /// Extract all embedded JPEG images from a PDF
468    ///
469    /// This method extracts XObject images with DCTDecode filter (JPEG).
470    /// For scanned PDFs, this typically means one JPEG per page.
471    pub fn extract_all(
472        pdf_path: &Path,
473        output_dir: &Path,
474        _options: &ExtractOptions,
475    ) -> Result<Vec<ExtractedPage>> {
476        if !pdf_path.exists() {
477            return Err(ExtractError::PdfNotFound(pdf_path.to_path_buf()));
478        }
479
480        // Create output directory
481        if !output_dir.exists() {
482            std::fs::create_dir_all(output_dir)?;
483        }
484
485        let doc = lopdf::Document::load(pdf_path).map_err(|e| ExtractError::ExtractionFailed {
486            page: 0,
487            reason: format!("Failed to load PDF: {}", e),
488        })?;
489
490        let mut results = Vec::new();
491        let mut image_count = 0;
492
493        // Iterate through all objects to find images
494        for (obj_id, object) in doc.objects.iter() {
495            if let Ok(stream) = object.as_stream() {
496                // Check if it's an Image XObject
497                if let Ok(subtype) = stream.dict.get(b"Subtype") {
498                    if let Ok(subtype_name) = subtype.as_name_str() {
499                        if subtype_name == "Image" {
500                            if let Ok(extracted) =
501                                Self::extract_image_stream(stream, image_count, obj_id, output_dir)
502                            {
503                                results.push(extracted);
504                                image_count += 1;
505                            }
506                        }
507                    }
508                }
509            }
510        }
511
512        if results.is_empty() {
513            return Err(ExtractError::ExtractionFailed {
514                page: 0,
515                reason: "No extractable images found. This PDF may require ImageMagick (install with: sudo apt install imagemagick).".to_string(),
516            });
517        }
518
519        // Sort by page index for consistent ordering
520        results.sort_by_key(|r| r.page_index);
521
522        Ok(results)
523    }
524
525    /// Extract a single image stream to file
526    fn extract_image_stream(
527        stream: &lopdf::Stream,
528        index: usize,
529        obj_id: &lopdf::ObjectId,
530        output_dir: &Path,
531    ) -> Result<ExtractedPage> {
532        // Get image dimensions
533        let width = stream
534            .dict
535            .get(b"Width")
536            .ok()
537            .and_then(|w| w.as_i64().ok())
538            .unwrap_or(0) as u32;
539        let height = stream
540            .dict
541            .get(b"Height")
542            .ok()
543            .and_then(|h| h.as_i64().ok())
544            .unwrap_or(0) as u32;
545
546        // Determine filter type
547        let filter = stream
548            .dict
549            .get(b"Filter")
550            .ok()
551            .and_then(|f| f.as_name_str().ok())
552            .unwrap_or("");
553
554        // Only extract JPEG images (DCTDecode) - these are most common in scanned PDFs
555        if filter == "DCTDecode" {
556            let output_path = output_dir.join(format!(
557                "page_{:04}_obj{}_{}.jpg",
558                index, obj_id.0, obj_id.1
559            ));
560
561            // JPEG data can be saved directly (no decompression needed)
562            std::fs::write(&output_path, &stream.content)?;
563
564            return Ok(ExtractedPage {
565                page_index: index,
566                path: output_path,
567                width,
568                height,
569                format: ImageFormat::Jpeg { quality: 95 },
570            });
571        }
572
573        // Try to decompress and save other formats
574        if let Ok(decoded) = stream.decompressed_content() {
575            if width > 0 && height > 0 {
576                // Determine channels from ColorSpace
577                let channels = stream
578                    .dict
579                    .get(b"ColorSpace")
580                    .ok()
581                    .and_then(|cs| cs.as_name_str().ok())
582                    .map(|name| match name {
583                        "DeviceGray" | "CalGray" => 1,
584                        "DeviceRGB" | "CalRGB" => 3,
585                        "DeviceCMYK" => 4,
586                        _ => 3,
587                    })
588                    .unwrap_or(3);
589
590                let expected_size = (width as usize) * (height as usize) * channels;
591                if decoded.len() >= expected_size {
592                    let output_path = output_dir.join(format!(
593                        "page_{:04}_obj{}_{}.png",
594                        index, obj_id.0, obj_id.1
595                    ));
596
597                    let img_opt = match channels {
598                        1 => image::GrayImage::from_raw(
599                            width,
600                            height,
601                            decoded[..expected_size].to_vec(),
602                        )
603                        .map(image::DynamicImage::ImageLuma8),
604                        3 => image::RgbImage::from_raw(
605                            width,
606                            height,
607                            decoded[..expected_size].to_vec(),
608                        )
609                        .map(image::DynamicImage::ImageRgb8),
610                        4 => {
611                            // CMYK to RGB conversion
612                            let rgb: Vec<u8> = decoded[..expected_size]
613                                .chunks_exact(4)
614                                .flat_map(|cmyk| {
615                                    let (c, m, y, k) = (
616                                        cmyk[0] as f32 / 255.0,
617                                        cmyk[1] as f32 / 255.0,
618                                        cmyk[2] as f32 / 255.0,
619                                        cmyk[3] as f32 / 255.0,
620                                    );
621                                    [
622                                        ((1.0 - c) * (1.0 - k) * 255.0) as u8,
623                                        ((1.0 - m) * (1.0 - k) * 255.0) as u8,
624                                        ((1.0 - y) * (1.0 - k) * 255.0) as u8,
625                                    ]
626                                })
627                                .collect();
628                            image::RgbImage::from_raw(width, height, rgb)
629                                .map(image::DynamicImage::ImageRgb8)
630                        }
631                        _ => None,
632                    };
633
634                    if let Some(img) = img_opt {
635                        img.save(&output_path)
636                            .map_err(|e| ExtractError::ExtractionFailed {
637                                page: index,
638                                reason: format!("Failed to save: {}", e),
639                            })?;
640                        return Ok(ExtractedPage {
641                            page_index: index,
642                            path: output_path,
643                            width,
644                            height,
645                            format: ImageFormat::Png,
646                        });
647                    }
648                }
649            }
650        }
651
652        Err(ExtractError::ExtractionFailed {
653            page: index,
654            reason: format!("Unsupported image filter: {}", filter),
655        })
656    }
657
658    /// Check if ImageMagick is available
659    pub fn magick_available() -> bool {
660        which::which("magick").is_ok() || which::which("convert").is_ok()
661    }
662
663    /// Check if pdftoppm (poppler-utils) is available
664    pub fn pdftoppm_available() -> bool {
665        which::which("pdftoppm").is_ok()
666    }
667
668    /// Extract using best available method
669    pub fn extract_auto(
670        pdf_path: &Path,
671        output_dir: &Path,
672        options: &ExtractOptions,
673    ) -> Result<Vec<ExtractedPage>> {
674        // Try ImageMagick first if available (better quality for complex PDFs)
675        if Self::magick_available() {
676            return MagickExtractor::extract_all(pdf_path, output_dir, options);
677        }
678
679        // Try pdftoppm (poppler-utils) as second option
680        if Self::pdftoppm_available() {
681            return PopplerExtractor::extract_all(pdf_path, output_dir, options);
682        }
683
684        // Fall back to pure Rust extraction
685        Self::extract_all(pdf_path, output_dir, options)
686    }
687}
688
689/// Poppler-based extractor using pdftoppm
690pub struct PopplerExtractor;
691
692impl PopplerExtractor {
693    /// Extract a single page from PDF using pdftoppm
694    pub fn extract_page(
695        pdf_path: &Path,
696        page_index: usize,
697        output_path: &Path,
698        options: &ExtractOptions,
699    ) -> Result<ExtractedPage> {
700        if !pdf_path.exists() {
701            return Err(ExtractError::PdfNotFound(pdf_path.to_path_buf()));
702        }
703
704        // Create output directory if needed
705        if let Some(parent) = output_path.parent() {
706            if !parent.exists() {
707                std::fs::create_dir_all(parent)?;
708            }
709        }
710
711        // pdftoppm uses 1-based page numbers
712        let page_num = page_index + 1;
713
714        // Get output path without extension (pdftoppm adds its own)
715        let output_stem = output_path.with_extension("");
716        let output_stem_str = output_stem.to_string_lossy();
717
718        let mut cmd = Command::new("pdftoppm");
719        cmd.arg("-r").arg(options.dpi.to_string()); // Resolution
720        cmd.arg("-f").arg(page_num.to_string()); // First page
721        cmd.arg("-l").arg(page_num.to_string()); // Last page
722        cmd.arg("-singlefile"); // Single file output (no suffix)
723
724        // Set output format (pdftoppm doesn't support BMP, fallback to PNG)
725        match options.format {
726            ImageFormat::Png | ImageFormat::Bmp => {
727                cmd.arg("-png");
728            }
729            ImageFormat::Jpeg { quality } => {
730                cmd.arg("-jpeg");
731                cmd.arg("-jpegopt")
732                    .arg(format!("quality={}", quality));
733            }
734            ImageFormat::Tiff => {
735                cmd.arg("-tiff");
736            }
737        }
738
739        // Set colorspace
740        match options.colorspace {
741            ColorSpace::Grayscale => {
742                cmd.arg("-gray");
743            }
744            ColorSpace::Rgb | ColorSpace::Cmyk => {
745                // RGB is default for pdftoppm
746            }
747        }
748
749        // Input PDF and output prefix
750        cmd.arg(pdf_path);
751        cmd.arg(&*output_stem_str);
752
753        let output = cmd.output()?;
754
755        if !output.status.success() {
756            let stderr = String::from_utf8_lossy(&output.stderr);
757            return Err(ExtractError::ExternalToolError(format!(
758                "pdftoppm failed: {}",
759                stderr
760            )));
761        }
762
763        // pdftoppm creates file with extension based on format
764        // (BMP outputs as PNG since pdftoppm doesn't support BMP)
765        let actual_output = match options.format {
766            ImageFormat::Png | ImageFormat::Bmp => output_stem.with_extension("png"),
767            ImageFormat::Jpeg { .. } => output_stem.with_extension("jpg"),
768            ImageFormat::Tiff => output_stem.with_extension("tif"),
769        };
770
771        // Rename to expected output path if different
772        if actual_output != output_path {
773            std::fs::rename(&actual_output, output_path)?;
774        }
775
776        // Get image dimensions
777        let img = image::open(output_path).map_err(|e| ExtractError::ExtractionFailed {
778            page: page_index,
779            reason: e.to_string(),
780        })?;
781
782        Ok(ExtractedPage {
783            page_index,
784            path: output_path.to_path_buf(),
785            width: img.width(),
786            height: img.height(),
787            format: options.format,
788        })
789    }
790
791    /// Extract all pages from PDF using pdftoppm
792    pub fn extract_all(
793        pdf_path: &Path,
794        output_dir: &Path,
795        options: &ExtractOptions,
796    ) -> Result<Vec<ExtractedPage>> {
797        if !pdf_path.exists() {
798            return Err(ExtractError::PdfNotFound(pdf_path.to_path_buf()));
799        }
800
801        // Create output directory
802        if !output_dir.exists() {
803            std::fs::create_dir_all(output_dir)?;
804        }
805
806        // Get page count using pdfinfo
807        let page_count = Self::get_page_count(pdf_path)?;
808
809        // Extract pages
810        let extension = options.format.extension();
811        let mut results = Vec::with_capacity(page_count);
812
813        for i in 0..page_count {
814            let output_path = output_dir.join(format!("page_{:05}.{}", i, extension));
815
816            let result = Self::extract_page(pdf_path, i, &output_path, options)?;
817            results.push(result);
818
819            // Call progress callback if provided
820            if let Some(ref callback) = options.progress_callback {
821                callback(i + 1, page_count);
822            }
823        }
824
825        Ok(results)
826    }
827
828    /// Get page count using pdfinfo
829    fn get_page_count(pdf_path: &Path) -> Result<usize> {
830        let output = Command::new("pdfinfo").arg(pdf_path).output()?;
831
832        if output.status.success() {
833            let stdout = String::from_utf8_lossy(&output.stdout);
834            for line in stdout.lines() {
835                if line.starts_with("Pages:") {
836                    if let Some(count_str) = line.split_whitespace().nth(1) {
837                        if let Ok(count) = count_str.parse::<usize>() {
838                            return Ok(count);
839                        }
840                    }
841                }
842            }
843        }
844
845        // Fallback: try lopdf
846        let doc = lopdf::Document::load(pdf_path).map_err(|e| ExtractError::ExtractionFailed {
847            page: 0,
848            reason: format!("Failed to load PDF: {}", e),
849        })?;
850        Ok(doc.get_pages().len())
851    }
852}
853
854#[cfg(test)]
855mod tests {
856    use super::*;
857    use tempfile::tempdir;
858
859    // TC-EXT-009: 存在しないPDFエラー
860    #[test]
861    fn test_nonexistent_pdf_error() {
862        let temp_dir = tempdir().unwrap();
863
864        let result = MagickExtractor::extract_all(
865            Path::new("/nonexistent/file.pdf"),
866            temp_dir.path(),
867            &ExtractOptions::default(),
868        );
869
870        assert!(matches!(result, Err(ExtractError::PdfNotFound(_))));
871    }
872
873    // TC-EXT-003: DPI設定
874    #[test]
875    fn test_default_options() {
876        let opts = ExtractOptions::default();
877
878        assert_eq!(opts.dpi, 300);
879        assert!(matches!(opts.format, ImageFormat::Png));
880        assert!(matches!(opts.colorspace, ColorSpace::Rgb));
881        assert_eq!(opts.background, Some([255, 255, 255]));
882        assert!(opts.parallel > 0);
883    }
884
885    #[test]
886    fn test_image_format_extension() {
887        assert_eq!(ImageFormat::Png.extension(), "png");
888        assert_eq!(ImageFormat::Jpeg { quality: 90 }.extension(), "jpg");
889        assert_eq!(ImageFormat::Bmp.extension(), "bmp");
890        assert_eq!(ImageFormat::Tiff.extension(), "tiff");
891    }
892
893    #[test]
894    fn test_builder_pattern() {
895        let options = ExtractOptions::builder()
896            .dpi(600)
897            .format(ImageFormat::Jpeg { quality: 95 })
898            .colorspace(ColorSpace::Grayscale)
899            .background([0, 0, 0])
900            .parallel(4)
901            .build();
902
903        assert_eq!(options.dpi, 600);
904        assert!(matches!(options.format, ImageFormat::Jpeg { quality: 95 }));
905        assert!(matches!(options.colorspace, ColorSpace::Grayscale));
906        assert_eq!(options.background, Some([0, 0, 0]));
907        assert_eq!(options.parallel, 4);
908    }
909
910    #[test]
911    fn test_builder_dpi_clamping() {
912        // DPI should be clamped to 72-1200
913        let options = ExtractOptions::builder().dpi(50).build();
914        assert_eq!(options.dpi, 72);
915
916        let options = ExtractOptions::builder().dpi(2000).build();
917        assert_eq!(options.dpi, 1200);
918
919        let options = ExtractOptions::builder().dpi(300).build();
920        assert_eq!(options.dpi, 300);
921    }
922
923    #[test]
924    fn test_builder_parallel_minimum() {
925        // Parallel workers should be at least 1
926        let options = ExtractOptions::builder().parallel(0).build();
927        assert_eq!(options.parallel, 1);
928    }
929
930    #[test]
931    fn test_builder_no_background() {
932        let options = ExtractOptions::builder().no_background().build();
933        assert!(options.background.is_none());
934    }
935
936    #[test]
937    fn test_high_quality_preset() {
938        let options = ExtractOptions::high_quality();
939
940        assert_eq!(options.dpi, 600);
941        assert!(matches!(options.format, ImageFormat::Png));
942    }
943
944    #[test]
945    fn test_fast_preset() {
946        let options = ExtractOptions::fast();
947
948        assert_eq!(options.dpi, 150);
949        assert!(matches!(options.format, ImageFormat::Jpeg { quality: 80 }));
950    }
951
952    #[test]
953    fn test_grayscale_preset() {
954        let options = ExtractOptions::grayscale();
955
956        assert!(matches!(options.colorspace, ColorSpace::Grayscale));
957    }
958
959    // Note: The following tests require ImageMagick and actual PDF fixtures
960    // They are marked with #[ignore] until fixtures are available
961
962    // TC-EXT-001: 単一ページ抽出
963    #[test]
964    #[ignore = "requires external tool"]
965    fn test_extract_single_page() {
966        let temp_dir = tempdir().unwrap();
967        let output = temp_dir.path().join("page_0.png");
968
969        let result = MagickExtractor::extract_page(
970            Path::new("tests/fixtures/sample.pdf"),
971            0,
972            &output,
973            &ExtractOptions::default(),
974        )
975        .unwrap();
976
977        assert!(output.exists());
978        assert_eq!(result.page_index, 0);
979        assert!(result.width > 0);
980        assert!(result.height > 0);
981    }
982
983    // TC-EXT-002: 全ページ抽出
984    #[test]
985    #[ignore = "requires external tool"]
986    fn test_extract_all_pages() {
987        let temp_dir = tempdir().unwrap();
988
989        let results = MagickExtractor::extract_all(
990            Path::new("tests/fixtures/10pages.pdf"),
991            temp_dir.path(),
992            &ExtractOptions::default(),
993        )
994        .unwrap();
995
996        assert_eq!(results.len(), 10);
997        for (i, result) in results.iter().enumerate() {
998            assert_eq!(result.page_index, i);
999            assert!(result.path.exists());
1000        }
1001    }
1002
1003    // TC-EXT-003: DPI設定(詳細テスト)
1004    #[test]
1005    #[ignore = "requires external tool"]
1006    fn test_dpi_setting() {
1007        let temp_dir = tempdir().unwrap();
1008
1009        // 72 DPI
1010        let output_72 = temp_dir.path().join("72dpi.png");
1011        let result_72 = MagickExtractor::extract_page(
1012            Path::new("tests/fixtures/a4.pdf"),
1013            0,
1014            &output_72,
1015            &ExtractOptions {
1016                dpi: 72,
1017                ..Default::default()
1018            },
1019        )
1020        .unwrap();
1021
1022        // 300 DPI
1023        let output_300 = temp_dir.path().join("300dpi.png");
1024        let result_300 = MagickExtractor::extract_page(
1025            Path::new("tests/fixtures/a4.pdf"),
1026            0,
1027            &output_300,
1028            &ExtractOptions {
1029                dpi: 300,
1030                ..Default::default()
1031            },
1032        )
1033        .unwrap();
1034
1035        // 300 DPI image should be ~4x larger in each dimension
1036        assert!(result_300.width > result_72.width * 3);
1037        assert!(result_300.height > result_72.height * 3);
1038    }
1039
1040    // TC-EXT-004: JPEG出力
1041    #[test]
1042    #[ignore = "requires external tool"]
1043    fn test_jpeg_output() {
1044        let temp_dir = tempdir().unwrap();
1045        let output = temp_dir.path().join("page_0.jpg");
1046
1047        MagickExtractor::extract_page(
1048            Path::new("tests/fixtures/sample.pdf"),
1049            0,
1050            &output,
1051            &ExtractOptions {
1052                format: ImageFormat::Jpeg { quality: 85 },
1053                ..Default::default()
1054            },
1055        )
1056        .unwrap();
1057
1058        assert!(output.exists());
1059
1060        // Check JPEG magic bytes
1061        let bytes = std::fs::read(&output).unwrap();
1062        assert_eq!(&bytes[0..2], &[0xFF, 0xD8]);
1063    }
1064
1065    // TC-EXT-005: グレースケール変換
1066    #[test]
1067    #[ignore = "requires external tool"]
1068    fn test_grayscale_extraction() {
1069        let temp_dir = tempdir().unwrap();
1070        let output = temp_dir.path().join("gray.png");
1071
1072        MagickExtractor::extract_page(
1073            Path::new("tests/fixtures/color.pdf"),
1074            0,
1075            &output,
1076            &ExtractOptions {
1077                colorspace: ColorSpace::Grayscale,
1078                ..Default::default()
1079            },
1080        )
1081        .unwrap();
1082
1083        // Verify image is grayscale
1084        let img = image::open(&output).unwrap();
1085        let rgb = img.to_rgb8();
1086
1087        // Check that R=G=B for each pixel (grayscale property)
1088        for pixel in rgb.pixels() {
1089            assert_eq!(pixel[0], pixel[1]);
1090            assert_eq!(pixel[1], pixel[2]);
1091        }
1092    }
1093
1094    // Additional structure tests
1095
1096    #[test]
1097    fn test_extracted_page_construction() {
1098        let page = ExtractedPage {
1099            page_index: 5,
1100            path: PathBuf::from("/test/page_5.png"),
1101            width: 2480,
1102            height: 3508,
1103            format: ImageFormat::Png,
1104        };
1105
1106        assert_eq!(page.page_index, 5);
1107        assert_eq!(page.path, PathBuf::from("/test/page_5.png"));
1108        assert_eq!(page.width, 2480);
1109        assert_eq!(page.height, 3508);
1110        assert!(matches!(page.format, ImageFormat::Png));
1111    }
1112
1113    #[test]
1114    fn test_all_image_formats() {
1115        let formats = [
1116            ImageFormat::Png,
1117            ImageFormat::Jpeg { quality: 90 },
1118            ImageFormat::Bmp,
1119            ImageFormat::Tiff,
1120        ];
1121
1122        let expected_ext = ["png", "jpg", "bmp", "tiff"];
1123
1124        for (format, ext) in formats.iter().zip(expected_ext.iter()) {
1125            assert_eq!(format.extension(), *ext);
1126        }
1127    }
1128
1129    #[test]
1130    fn test_all_colorspaces() {
1131        let colorspaces = vec![ColorSpace::Rgb, ColorSpace::Grayscale, ColorSpace::Cmyk];
1132
1133        // Verify all colorspaces can be constructed and roundtrip through builder
1134        for cs in colorspaces {
1135            let options = ExtractOptions::builder().colorspace(cs).build();
1136            match (cs, options.colorspace) {
1137                (ColorSpace::Rgb, ColorSpace::Rgb) => {}
1138                (ColorSpace::Grayscale, ColorSpace::Grayscale) => {}
1139                (ColorSpace::Cmyk, ColorSpace::Cmyk) => {}
1140                _ => panic!("Colorspace mismatch"),
1141            }
1142        }
1143    }
1144
1145    #[test]
1146    fn test_error_types() {
1147        // Test all error variants can be constructed
1148        let _err1 = ExtractError::PdfNotFound(PathBuf::from("/test/path"));
1149        let _err2 = ExtractError::OutputNotWritable(PathBuf::from("/readonly/dir"));
1150        let _err3 = ExtractError::ExternalToolError("ImageMagick not found".to_string());
1151        let _err4 = ExtractError::ExtractionFailed {
1152            page: 3,
1153            reason: "Test error".to_string(),
1154        };
1155        let _err5: ExtractError = std::io::Error::new(std::io::ErrorKind::NotFound, "test").into();
1156    }
1157
1158    #[test]
1159    fn test_output_not_writable_error_message() {
1160        let err = ExtractError::OutputNotWritable(PathBuf::from("/readonly/output"));
1161
1162        let msg = err.to_string();
1163        assert!(msg.contains("/readonly/output"));
1164        assert!(msg.contains("not writable"));
1165    }
1166
1167    #[test]
1168    fn test_extraction_failed_error_message() {
1169        let err = ExtractError::ExtractionFailed {
1170            page: 5,
1171            reason: "ImageMagick crashed".to_string(),
1172        };
1173
1174        let msg = err.to_string();
1175        assert!(msg.contains("5"));
1176        assert!(msg.contains("ImageMagick"));
1177    }
1178
1179    #[test]
1180    fn test_format_builder() {
1181        // Test different JPEG qualities
1182        let options_low = ExtractOptions::builder()
1183            .format(ImageFormat::Jpeg { quality: 50 })
1184            .build();
1185        let options_high = ExtractOptions::builder()
1186            .format(ImageFormat::Jpeg { quality: 95 })
1187            .build();
1188
1189        match (options_low.format, options_high.format) {
1190            (ImageFormat::Jpeg { quality: q1 }, ImageFormat::Jpeg { quality: q2 }) => {
1191                assert_eq!(q1, 50);
1192                assert_eq!(q2, 95);
1193            }
1194            _ => panic!("Expected JPEG format"),
1195        }
1196    }
1197
1198    // TC-EXT-006: 透明部分の処理
1199    #[test]
1200    fn test_background_color_setting() {
1201        // White background (default)
1202        let options = ExtractOptions::builder()
1203            .background([255, 255, 255])
1204            .build();
1205        assert_eq!(options.background, Some([255, 255, 255]));
1206
1207        // Black background
1208        let options = ExtractOptions::builder().background([0, 0, 0]).build();
1209        assert_eq!(options.background, Some([0, 0, 0]));
1210
1211        // Transparent (no background)
1212        let options = ExtractOptions::builder().no_background().build();
1213        assert!(options.background.is_none());
1214    }
1215
1216    // TC-EXT-007: 並列抽出
1217    #[test]
1218    fn test_parallel_extraction_config() {
1219        // Test parallel thread configuration
1220        let options_single = ExtractOptions::builder().parallel(1).build();
1221        assert_eq!(options_single.parallel, 1);
1222
1223        let options_multi = ExtractOptions::builder().parallel(4).build();
1224        assert_eq!(options_multi.parallel, 4);
1225
1226        let options_max = ExtractOptions::builder().parallel(16).build();
1227        assert_eq!(options_max.parallel, 16);
1228
1229        // parallel(0) should be clamped to 1
1230        let options_zero = ExtractOptions::builder().parallel(0).build();
1231        assert_eq!(options_zero.parallel, 1);
1232    }
1233
1234    // TC-EXT-008: 進捗コールバック
1235    #[test]
1236    fn test_progress_callback_structure() {
1237        use std::sync::atomic::{AtomicUsize, Ordering};
1238        use std::sync::Arc;
1239
1240        // Test that progress callback can be set
1241        let progress_count = Arc::new(AtomicUsize::new(0));
1242        let progress_clone = progress_count.clone();
1243
1244        let options = ExtractOptions::builder()
1245            .progress_callback(Box::new(move |current, total| {
1246                progress_clone.fetch_add(1, Ordering::SeqCst);
1247                assert!(current <= total);
1248            }))
1249            .build();
1250
1251        // Verify callback is set
1252        assert!(options.progress_callback.is_some());
1253
1254        // Call the callback to verify it works
1255        if let Some(callback) = &options.progress_callback {
1256            callback(1, 10);
1257            callback(5, 10);
1258            callback(10, 10);
1259        }
1260
1261        assert_eq!(progress_count.load(Ordering::SeqCst), 3);
1262    }
1263
1264    // TC-EXT-010: 書き込み不可ディレクトリエラー追加テスト
1265    #[test]
1266    fn test_output_not_writable_error_display() {
1267        let err = ExtractError::OutputNotWritable(std::path::PathBuf::from("/root/protected"));
1268        let display = format!("{}", err);
1269        assert!(display.contains("/root/protected"));
1270        assert!(display.contains("not writable") || display.contains("writable"));
1271    }
1272
1273    #[test]
1274    fn test_extracted_page_display() {
1275        let page = ExtractedPage {
1276            page_index: 0,
1277            path: std::path::PathBuf::from("/tmp/page_001.png"),
1278            width: 2480,
1279            height: 3508,
1280            format: ImageFormat::Png,
1281        };
1282
1283        assert_eq!(page.page_index, 0);
1284        assert_eq!(page.width, 2480);
1285        assert_eq!(page.height, 3508);
1286        assert!(page.path.to_string_lossy().contains("page_001"));
1287    }
1288
1289    #[test]
1290    fn test_colorspace_all_variants() {
1291        // Test all colorspace variants exist and can be used
1292        let colorspaces = [ColorSpace::Rgb, ColorSpace::Grayscale, ColorSpace::Cmyk];
1293
1294        for cs in &colorspaces {
1295            let options = ExtractOptions::builder().colorspace(*cs).build();
1296            assert_eq!(options.colorspace, *cs);
1297        }
1298    }
1299
1300    #[test]
1301    fn test_image_format_all_variants() {
1302        // Test all image format variants
1303        let formats = [
1304            ImageFormat::Png,
1305            ImageFormat::Jpeg { quality: 85 },
1306            ImageFormat::Bmp,
1307            ImageFormat::Tiff,
1308        ];
1309
1310        for fmt in &formats {
1311            let options = ExtractOptions::builder().format(*fmt).build();
1312            match (&options.format, fmt) {
1313                (ImageFormat::Png, ImageFormat::Png) => {}
1314                (ImageFormat::Tiff, ImageFormat::Tiff) => {}
1315                (ImageFormat::Bmp, ImageFormat::Bmp) => {}
1316                (ImageFormat::Jpeg { quality: q1 }, ImageFormat::Jpeg { quality: q2 }) => {
1317                    assert_eq!(q1, q2);
1318                }
1319                _ => panic!("Format mismatch"),
1320            }
1321        }
1322    }
1323
1324    // Additional comprehensive tests
1325
1326    #[test]
1327    fn test_dpi_boundary_values() {
1328        // Minimum boundary (should clamp to 72)
1329        let options_below = ExtractOptions::builder().dpi(50).build();
1330        assert_eq!(options_below.dpi, 72);
1331
1332        // Exact minimum
1333        let options_min = ExtractOptions::builder().dpi(72).build();
1334        assert_eq!(options_min.dpi, 72);
1335
1336        // Normal range
1337        let options_normal = ExtractOptions::builder().dpi(300).build();
1338        assert_eq!(options_normal.dpi, 300);
1339
1340        // Maximum boundary
1341        let options_max = ExtractOptions::builder().dpi(1200).build();
1342        assert_eq!(options_max.dpi, 1200);
1343
1344        // Above maximum (should clamp to 1200)
1345        let options_above = ExtractOptions::builder().dpi(2400).build();
1346        assert_eq!(options_above.dpi, 1200);
1347    }
1348
1349    #[test]
1350    fn test_jpeg_quality_edge_cases() {
1351        // Minimum quality
1352        let opts_min = ExtractOptions::builder()
1353            .format(ImageFormat::Jpeg { quality: 0 })
1354            .build();
1355        if let ImageFormat::Jpeg { quality } = opts_min.format {
1356            assert_eq!(quality, 0);
1357        } else {
1358            panic!("Expected JPEG format");
1359        }
1360
1361        // Maximum quality
1362        let opts_max = ExtractOptions::builder()
1363            .format(ImageFormat::Jpeg { quality: 100 })
1364            .build();
1365        if let ImageFormat::Jpeg { quality } = opts_max.format {
1366            assert_eq!(quality, 100);
1367        } else {
1368            panic!("Expected JPEG format");
1369        }
1370
1371        // Typical quality values
1372        for q in [1, 50, 75, 85, 99] {
1373            let opts = ExtractOptions::builder()
1374                .format(ImageFormat::Jpeg { quality: q })
1375                .build();
1376            if let ImageFormat::Jpeg { quality } = opts.format {
1377                assert_eq!(quality, q);
1378            }
1379        }
1380    }
1381
1382    #[test]
1383    fn test_builder_method_chaining() {
1384        // Test that all builder methods can be chained
1385        let options = ExtractOptions::builder()
1386            .dpi(400)
1387            .format(ImageFormat::Tiff)
1388            .colorspace(ColorSpace::Cmyk)
1389            .background([128, 128, 128])
1390            .parallel(8)
1391            .build();
1392
1393        assert_eq!(options.dpi, 400);
1394        assert!(matches!(options.format, ImageFormat::Tiff));
1395        assert_eq!(options.colorspace, ColorSpace::Cmyk);
1396        assert_eq!(options.background, Some([128, 128, 128]));
1397        assert_eq!(options.parallel, 8);
1398    }
1399
1400    #[test]
1401    fn test_extracted_page_various_sizes() {
1402        // Standard A4 at 300 DPI
1403        let a4_page = ExtractedPage {
1404            page_index: 0,
1405            path: PathBuf::from("/tmp/a4.png"),
1406            width: 2480,
1407            height: 3508,
1408            format: ImageFormat::Png,
1409        };
1410        assert_eq!(a4_page.width, 2480);
1411        assert_eq!(a4_page.height, 3508);
1412
1413        // Letter size at 300 DPI
1414        let letter_page = ExtractedPage {
1415            page_index: 1,
1416            path: PathBuf::from("/tmp/letter.png"),
1417            width: 2550,
1418            height: 3300,
1419            format: ImageFormat::Png,
1420        };
1421        assert_eq!(letter_page.width, 2550);
1422        assert_eq!(letter_page.height, 3300);
1423
1424        // Square thumbnail
1425        let thumb = ExtractedPage {
1426            page_index: 99,
1427            path: PathBuf::from("/tmp/thumb.jpg"),
1428            width: 150,
1429            height: 150,
1430            format: ImageFormat::Jpeg { quality: 70 },
1431        };
1432        assert_eq!(thumb.width, thumb.height);
1433    }
1434
1435    #[test]
1436    fn test_error_from_io_error() {
1437        // Test From<std::io::Error> conversion
1438        let io_err = std::io::Error::new(std::io::ErrorKind::NotFound, "file not found");
1439        let extract_err: ExtractError = io_err.into();
1440        let msg = extract_err.to_string();
1441        assert!(msg.contains("file not found") || msg.contains("IO error"));
1442    }
1443
1444    #[test]
1445    fn test_preset_high_quality_details() {
1446        let hq = ExtractOptions::high_quality();
1447        assert_eq!(hq.dpi, 600);
1448        assert!(matches!(hq.format, ImageFormat::Png));
1449        // Should inherit other defaults
1450        assert_eq!(hq.colorspace, ColorSpace::Rgb);
1451        assert!(hq.background.is_some());
1452    }
1453
1454    #[test]
1455    fn test_preset_fast_details() {
1456        let fast = ExtractOptions::fast();
1457        assert_eq!(fast.dpi, 150);
1458        if let ImageFormat::Jpeg { quality } = fast.format {
1459            assert_eq!(quality, 80);
1460        } else {
1461            panic!("Fast preset should use JPEG");
1462        }
1463    }
1464
1465    #[test]
1466    fn test_preset_grayscale_details() {
1467        let gray = ExtractOptions::grayscale();
1468        assert_eq!(gray.colorspace, ColorSpace::Grayscale);
1469        // Should have default DPI
1470        assert_eq!(gray.dpi, 300);
1471    }
1472
1473    #[test]
1474    fn test_background_color_extremes() {
1475        // Pure black
1476        let black = ExtractOptions::builder().background([0, 0, 0]).build();
1477        assert_eq!(black.background, Some([0, 0, 0]));
1478
1479        // Pure white
1480        let white = ExtractOptions::builder()
1481            .background([255, 255, 255])
1482            .build();
1483        assert_eq!(white.background, Some([255, 255, 255]));
1484
1485        // Gray
1486        let gray = ExtractOptions::builder()
1487            .background([128, 128, 128])
1488            .build();
1489        assert_eq!(gray.background, Some([128, 128, 128]));
1490
1491        // Primary colors
1492        let red = ExtractOptions::builder().background([255, 0, 0]).build();
1493        assert_eq!(red.background, Some([255, 0, 0]));
1494
1495        let green = ExtractOptions::builder().background([0, 255, 0]).build();
1496        assert_eq!(green.background, Some([0, 255, 0]));
1497
1498        let blue = ExtractOptions::builder().background([0, 0, 255]).build();
1499        assert_eq!(blue.background, Some([0, 0, 255]));
1500    }
1501
1502    #[test]
1503    fn test_extract_options_debug_impl() {
1504        let options = ExtractOptions::builder()
1505            .dpi(300)
1506            .format(ImageFormat::Png)
1507            .build();
1508
1509        let debug_str = format!("{:?}", options);
1510        assert!(debug_str.contains("ExtractOptions"));
1511        assert!(debug_str.contains("dpi"));
1512        assert!(debug_str.contains("300"));
1513    }
1514
1515    #[test]
1516    fn test_extract_options_with_callback_debug() {
1517        let options = ExtractOptions::builder()
1518            .progress_callback(Box::new(|_, _| {}))
1519            .build();
1520
1521        let debug_str = format!("{:?}", options);
1522        assert!(debug_str.contains("<callback>"));
1523    }
1524
1525    #[test]
1526    fn test_image_format_default() {
1527        let format: ImageFormat = Default::default();
1528        assert!(matches!(format, ImageFormat::Png));
1529    }
1530
1531    #[test]
1532    fn test_colorspace_default() {
1533        let cs: ColorSpace = Default::default();
1534        assert_eq!(cs, ColorSpace::Rgb);
1535    }
1536
1537    #[test]
1538    fn test_parallel_workers_various_values() {
1539        // Test various worker counts
1540        for workers in [1, 2, 4, 8, 16, 32, 64] {
1541            let options = ExtractOptions::builder().parallel(workers).build();
1542            assert_eq!(options.parallel, workers);
1543        }
1544    }
1545
1546    #[test]
1547    fn test_extracted_page_with_all_formats() {
1548        let formats = [
1549            (ImageFormat::Png, "png"),
1550            (ImageFormat::Jpeg { quality: 85 }, "jpg"),
1551            (ImageFormat::Bmp, "bmp"),
1552            (ImageFormat::Tiff, "tiff"),
1553        ];
1554
1555        for (idx, (format, ext)) in formats.iter().enumerate() {
1556            let page = ExtractedPage {
1557                page_index: idx,
1558                path: PathBuf::from(format!("/tmp/page.{}", ext)),
1559                width: 1000,
1560                height: 1500,
1561                format: *format,
1562            };
1563            assert_eq!(page.page_index, idx);
1564            assert!(page.path.to_string_lossy().ends_with(ext));
1565        }
1566    }
1567
1568    #[test]
1569    fn test_error_display_all_variants() {
1570        let errors = [
1571            ExtractError::PdfNotFound(PathBuf::from("/test.pdf")),
1572            ExtractError::OutputNotWritable(PathBuf::from("/output")),
1573            ExtractError::ExtractionFailed {
1574                page: 1,
1575                reason: "test reason".to_string(),
1576            },
1577            ExtractError::ExternalToolError("tool error".to_string()),
1578        ];
1579
1580        for err in &errors {
1581            let display = format!("{}", err);
1582            assert!(!display.is_empty());
1583        }
1584    }
1585
1586    #[test]
1587    fn test_options_builder_default_state() {
1588        let builder = ExtractOptionsBuilder::default();
1589        let options = builder.build();
1590
1591        // Should have default values
1592        assert_eq!(options.dpi, 300);
1593        assert!(matches!(options.format, ImageFormat::Png));
1594        assert_eq!(options.colorspace, ColorSpace::Rgb);
1595    }
1596
1597    #[test]
1598    fn test_colorspace_partial_eq() {
1599        assert_eq!(ColorSpace::Rgb, ColorSpace::Rgb);
1600        assert_eq!(ColorSpace::Grayscale, ColorSpace::Grayscale);
1601        assert_eq!(ColorSpace::Cmyk, ColorSpace::Cmyk);
1602        assert_ne!(ColorSpace::Rgb, ColorSpace::Grayscale);
1603        assert_ne!(ColorSpace::Rgb, ColorSpace::Cmyk);
1604        assert_ne!(ColorSpace::Grayscale, ColorSpace::Cmyk);
1605    }
1606
1607    // Additional comprehensive Debug/Clone tests
1608
1609    #[test]
1610    fn test_image_format_debug_impl() {
1611        let png = ImageFormat::Png;
1612        let debug_str = format!("{:?}", png);
1613        assert!(debug_str.contains("Png"));
1614
1615        let jpeg = ImageFormat::Jpeg { quality: 85 };
1616        let debug_str = format!("{:?}", jpeg);
1617        assert!(debug_str.contains("Jpeg"));
1618        assert!(debug_str.contains("85"));
1619
1620        let tiff = ImageFormat::Tiff;
1621        let debug_str = format!("{:?}", tiff);
1622        assert!(debug_str.contains("Tiff"));
1623    }
1624
1625    #[test]
1626    fn test_image_format_clone() {
1627        let original = ImageFormat::Jpeg { quality: 92 };
1628        let cloned = original;
1629        if let ImageFormat::Jpeg { quality } = cloned {
1630            assert_eq!(quality, 92);
1631        } else {
1632            panic!("Clone should preserve JPEG format");
1633        }
1634
1635        let original_png = ImageFormat::Png;
1636        let cloned_png = original_png;
1637        assert!(matches!(cloned_png, ImageFormat::Png));
1638    }
1639
1640    #[test]
1641    fn test_image_format_copy() {
1642        let original = ImageFormat::Bmp;
1643        let copied = original; // Copy, not move
1644        let _still_valid = original; // Original still valid
1645        assert!(matches!(copied, ImageFormat::Bmp));
1646    }
1647
1648    #[test]
1649    fn test_colorspace_debug_impl() {
1650        let rgb = ColorSpace::Rgb;
1651        let debug_str = format!("{:?}", rgb);
1652        assert!(debug_str.contains("Rgb"));
1653
1654        let gray = ColorSpace::Grayscale;
1655        let debug_str = format!("{:?}", gray);
1656        assert!(debug_str.contains("Grayscale"));
1657
1658        let cmyk = ColorSpace::Cmyk;
1659        let debug_str = format!("{:?}", cmyk);
1660        assert!(debug_str.contains("Cmyk"));
1661    }
1662
1663    #[test]
1664    fn test_colorspace_clone() {
1665        let original = ColorSpace::Cmyk;
1666        let cloned = original;
1667        assert_eq!(cloned, ColorSpace::Cmyk);
1668    }
1669
1670    #[test]
1671    fn test_colorspace_copy() {
1672        let original = ColorSpace::Grayscale;
1673        let copied = original; // Copy
1674        let _still_valid = original; // Original still valid
1675        assert_eq!(copied, ColorSpace::Grayscale);
1676    }
1677
1678    #[test]
1679    fn test_extracted_page_debug_impl() {
1680        let page = ExtractedPage {
1681            page_index: 3,
1682            path: PathBuf::from("/tmp/test.png"),
1683            width: 1920,
1684            height: 1080,
1685            format: ImageFormat::Png,
1686        };
1687        let debug_str = format!("{:?}", page);
1688        assert!(debug_str.contains("ExtractedPage"));
1689        assert!(debug_str.contains("3"));
1690        assert!(debug_str.contains("1920"));
1691        assert!(debug_str.contains("1080"));
1692    }
1693
1694    #[test]
1695    fn test_error_debug_impl() {
1696        let err = ExtractError::PdfNotFound(PathBuf::from("/test.pdf"));
1697        let debug_str = format!("{:?}", err);
1698        assert!(debug_str.contains("PdfNotFound"));
1699
1700        let err2 = ExtractError::ExtractionFailed {
1701            page: 5,
1702            reason: "test reason".to_string(),
1703        };
1704        let debug_str2 = format!("{:?}", err2);
1705        assert!(debug_str2.contains("ExtractionFailed"));
1706    }
1707
1708    #[test]
1709    fn test_extract_options_builder_debug_impl() {
1710        let builder = ExtractOptionsBuilder::default();
1711        let debug_str = format!("{:?}", builder);
1712        assert!(debug_str.contains("ExtractOptionsBuilder"));
1713    }
1714
1715    #[test]
1716    fn test_error_path_extraction() {
1717        let path = PathBuf::from("/some/pdf/file.pdf");
1718        let err = ExtractError::PdfNotFound(path.clone());
1719
1720        if let ExtractError::PdfNotFound(p) = err {
1721            assert_eq!(p, path);
1722        } else {
1723            panic!("Wrong error variant");
1724        }
1725    }
1726
1727    #[test]
1728    fn test_error_page_extraction() {
1729        let err = ExtractError::ExtractionFailed {
1730            page: 42,
1731            reason: "Out of memory".to_string(),
1732        };
1733
1734        if let ExtractError::ExtractionFailed { page, reason } = err {
1735            assert_eq!(page, 42);
1736            assert!(reason.contains("memory"));
1737        } else {
1738            panic!("Wrong error variant");
1739        }
1740    }
1741
1742    #[test]
1743    fn test_page_index_sequential() {
1744        let pages: Vec<ExtractedPage> = (0..100)
1745            .map(|i| ExtractedPage {
1746                page_index: i,
1747                path: PathBuf::from(format!("/tmp/page_{:05}.png", i)),
1748                width: 1000,
1749                height: 1500,
1750                format: ImageFormat::Png,
1751            })
1752            .collect();
1753
1754        for (i, page) in pages.iter().enumerate() {
1755            assert_eq!(page.page_index, i);
1756        }
1757    }
1758
1759    #[test]
1760    fn test_large_page_dimensions() {
1761        // A0 at 600 DPI
1762        let large_page = ExtractedPage {
1763            page_index: 0,
1764            path: PathBuf::from("/tmp/a0.png"),
1765            width: 19842,
1766            height: 28067,
1767            format: ImageFormat::Png,
1768        };
1769        assert!(large_page.width > 10000);
1770        assert!(large_page.height > 20000);
1771    }
1772
1773    #[test]
1774    fn test_small_page_dimensions() {
1775        // Tiny thumbnail
1776        let tiny_page = ExtractedPage {
1777            page_index: 0,
1778            path: PathBuf::from("/tmp/tiny.png"),
1779            width: 16,
1780            height: 16,
1781            format: ImageFormat::Png,
1782        };
1783        assert!(tiny_page.width <= 100);
1784        assert!(tiny_page.height <= 100);
1785    }
1786
1787    #[test]
1788    fn test_preset_consistency() {
1789        let high = ExtractOptions::high_quality();
1790        let fast = ExtractOptions::fast();
1791        let gray = ExtractOptions::grayscale();
1792
1793        // High quality should have higher DPI than fast
1794        assert!(high.dpi > fast.dpi);
1795
1796        // Grayscale should have grayscale colorspace
1797        assert_eq!(gray.colorspace, ColorSpace::Grayscale);
1798
1799        // All presets should have valid backgrounds
1800        assert!(high.background.is_some() || high.background.is_none()); // Either is valid
1801    }
1802
1803    #[test]
1804    fn test_output_path_types() {
1805        // Absolute path
1806        let abs_page = ExtractedPage {
1807            page_index: 0,
1808            path: PathBuf::from("/absolute/path/page.png"),
1809            width: 100,
1810            height: 100,
1811            format: ImageFormat::Png,
1812        };
1813        assert!(abs_page.path.is_absolute());
1814
1815        // Relative path
1816        let rel_page = ExtractedPage {
1817            page_index: 0,
1818            path: PathBuf::from("relative/path/page.png"),
1819            width: 100,
1820            height: 100,
1821            format: ImageFormat::Png,
1822        };
1823        assert!(rel_page.path.is_relative());
1824    }
1825
1826    #[test]
1827    fn test_jpeg_quality_boundary() {
1828        // Test quality 1 (minimum realistic)
1829        let opts_1 = ExtractOptions::builder()
1830            .format(ImageFormat::Jpeg { quality: 1 })
1831            .build();
1832        if let ImageFormat::Jpeg { quality } = opts_1.format {
1833            assert_eq!(quality, 1);
1834        }
1835
1836        // Test quality values across range
1837        for q in (0..=100).step_by(10) {
1838            let opts = ExtractOptions::builder()
1839                .format(ImageFormat::Jpeg { quality: q })
1840                .build();
1841            if let ImageFormat::Jpeg { quality } = opts.format {
1842                assert_eq!(quality, q);
1843            }
1844        }
1845    }
1846
1847    #[test]
1848    fn test_magick_extractor_marker() {
1849        // Verify MagickExtractor type exists
1850        let _ = std::any::type_name::<MagickExtractor>();
1851    }
1852
1853    #[test]
1854    fn test_error_io_details_preserved() {
1855        let io_err = std::io::Error::new(
1856            std::io::ErrorKind::PermissionDenied,
1857            "access denied to file",
1858        );
1859        let extract_err: ExtractError = io_err.into();
1860
1861        let msg = extract_err.to_string().to_lowercase();
1862        // The error message should contain IO-related info
1863        assert!(msg.contains("io") || msg.contains("error") || msg.contains("access"));
1864    }
1865
1866    #[test]
1867    fn test_all_dpi_presets() {
1868        let dpi_values = [72, 96, 150, 200, 300, 400, 600, 1200];
1869
1870        for dpi in dpi_values {
1871            let opts = ExtractOptions::builder().dpi(dpi).build();
1872            assert_eq!(opts.dpi, dpi);
1873        }
1874    }
1875
1876    #[test]
1877    fn test_background_none_vs_some() {
1878        let with_bg = ExtractOptions::builder()
1879            .background([255, 255, 255])
1880            .build();
1881        assert!(with_bg.background.is_some());
1882
1883        let without_bg = ExtractOptions::builder().no_background().build();
1884        assert!(without_bg.background.is_none());
1885    }
1886
1887    // ============ Concurrency Tests ============
1888
1889    #[test]
1890    fn test_image_extract_types_send_sync() {
1891        fn assert_send_sync<T: Send + Sync>() {}
1892        assert_send_sync::<ExtractOptions>();
1893        assert_send_sync::<ExtractedPage>();
1894        assert_send_sync::<ImageFormat>();
1895        assert_send_sync::<ColorSpace>();
1896    }
1897
1898    #[test]
1899    fn test_concurrent_options_building() {
1900        use std::thread;
1901        let handles: Vec<_> = (0..8)
1902            .map(|i| {
1903                thread::spawn(move || {
1904                    ExtractOptions::builder()
1905                        .dpi(150 + (i as u32 * 50))
1906                        .colorspace(if i % 2 == 0 {
1907                            ColorSpace::Rgb
1908                        } else {
1909                            ColorSpace::Grayscale
1910                        })
1911                        .build()
1912                })
1913            })
1914            .collect();
1915
1916        let results: Vec<_> = handles
1917            .into_iter()
1918            .map(|h: std::thread::JoinHandle<ExtractOptions>| h.join().unwrap())
1919            .collect();
1920        assert_eq!(results.len(), 8);
1921        for (i, opt) in results.iter().enumerate() {
1922            assert_eq!(opt.dpi, 150 + (i as u32 * 50));
1923        }
1924    }
1925
1926    #[test]
1927    fn test_parallel_extracted_page_creation() {
1928        use rayon::prelude::*;
1929
1930        let pages: Vec<_> = (0..100)
1931            .into_par_iter()
1932            .map(|i| ExtractedPage {
1933                page_index: i,
1934                path: PathBuf::from(format!("page_{:04}.png", i)),
1935                width: 1000 + i as u32,
1936                height: 1500 + i as u32,
1937                format: ImageFormat::Png,
1938            })
1939            .collect();
1940
1941        assert_eq!(pages.len(), 100);
1942        for (i, page) in pages.iter().enumerate() {
1943            assert_eq!(page.page_index, i);
1944            assert_eq!(page.width, 1000 + i as u32);
1945        }
1946    }
1947
1948    #[test]
1949    fn test_extracted_page_thread_transfer() {
1950        use std::thread;
1951
1952        let page = ExtractedPage {
1953            page_index: 42,
1954            path: PathBuf::from("/tmp/test_page.png"),
1955            width: 2480,
1956            height: 3508,
1957            format: ImageFormat::Jpeg { quality: 95 },
1958        };
1959
1960        let handle = thread::spawn(move || {
1961            assert_eq!(page.page_index, 42);
1962            assert_eq!(page.width, 2480);
1963            page.path.to_string_lossy().to_string()
1964        });
1965
1966        let result = handle.join().unwrap();
1967        assert!(result.contains("test_page"));
1968    }
1969
1970    #[test]
1971    fn test_options_shared_across_threads() {
1972        use std::sync::Arc;
1973        use std::thread;
1974
1975        let options = Arc::new(
1976            ExtractOptions::builder()
1977                .dpi(600)
1978                .format(ImageFormat::Png)
1979                .colorspace(ColorSpace::Rgb)
1980                .build(),
1981        );
1982
1983        let handles: Vec<_> = (0..4)
1984            .map(|_| {
1985                let opts = Arc::clone(&options);
1986                thread::spawn(move || {
1987                    assert_eq!(opts.dpi, 600);
1988                    opts.dpi
1989                })
1990            })
1991            .collect();
1992
1993        for handle in handles {
1994            let result: u32 = handle.join().unwrap();
1995            assert_eq!(result, 600);
1996        }
1997    }
1998
1999    #[test]
2000    fn test_image_format_thread_safe() {
2001        use std::thread;
2002
2003        let formats = vec![
2004            ImageFormat::Png,
2005            ImageFormat::Jpeg { quality: 90 },
2006            ImageFormat::Tiff,
2007            ImageFormat::Bmp,
2008        ];
2009
2010        let handles: Vec<_> = formats
2011            .into_iter()
2012            .map(|format| {
2013                thread::spawn(move || {
2014                    let ext = format.extension();
2015                    ext.to_string()
2016                })
2017            })
2018            .collect();
2019
2020        let extensions: Vec<_> = handles.into_iter().map(|h| h.join().unwrap()).collect();
2021        assert_eq!(extensions.len(), 4);
2022        assert!(extensions.contains(&"png".to_string()));
2023        assert!(extensions.contains(&"jpg".to_string()));
2024    }
2025
2026    // ============ Additional Boundary Tests ============
2027
2028    #[test]
2029    fn test_dpi_boundary_minimum() {
2030        // Values below MIN_DPI (72) should be clamped to MIN_DPI
2031        let opts = ExtractOptions::builder().dpi(1).build();
2032        assert_eq!(opts.dpi, MIN_DPI);
2033    }
2034
2035    #[test]
2036    fn test_dpi_boundary_maximum() {
2037        // Values above MAX_DPI (1200) should be clamped to MAX_DPI
2038        let opts = ExtractOptions::builder().dpi(2400).build();
2039        assert_eq!(opts.dpi, MAX_DPI);
2040    }
2041
2042    #[test]
2043    fn test_page_index_zero() {
2044        let page = ExtractedPage {
2045            page_index: 0,
2046            path: PathBuf::from("first.png"),
2047            width: 100,
2048            height: 100,
2049            format: ImageFormat::Png,
2050        };
2051        assert_eq!(page.page_index, 0);
2052    }
2053
2054    #[test]
2055    fn test_page_index_large() {
2056        let page = ExtractedPage {
2057            page_index: 10000,
2058            path: PathBuf::from("page_10000.png"),
2059            width: 100,
2060            height: 100,
2061            format: ImageFormat::Png,
2062        };
2063        assert_eq!(page.page_index, 10000);
2064    }
2065
2066    #[test]
2067    fn test_page_dimensions_zero() {
2068        let page = ExtractedPage {
2069            page_index: 0,
2070            path: PathBuf::from("empty.png"),
2071            width: 0,
2072            height: 0,
2073            format: ImageFormat::Png,
2074        };
2075        assert_eq!(page.width, 0);
2076        assert_eq!(page.height, 0);
2077    }
2078
2079    #[test]
2080    fn test_page_dimensions_large() {
2081        let page = ExtractedPage {
2082            page_index: 0,
2083            path: PathBuf::from("huge.png"),
2084            width: 32768,
2085            height: 32768,
2086            format: ImageFormat::Png,
2087        };
2088        assert_eq!(page.width, 32768);
2089        assert_eq!(page.height, 32768);
2090    }
2091
2092    #[test]
2093    fn test_background_color_black() {
2094        let opts = ExtractOptions::builder().background([0, 0, 0]).build();
2095        assert_eq!(opts.background, Some([0, 0, 0]));
2096    }
2097
2098    #[test]
2099    fn test_background_color_white() {
2100        let opts = ExtractOptions::builder()
2101            .background([255, 255, 255])
2102            .build();
2103        assert_eq!(opts.background, Some([255, 255, 255]));
2104    }
2105
2106    #[test]
2107    fn test_all_color_spaces() {
2108        let spaces = [ColorSpace::Rgb, ColorSpace::Grayscale, ColorSpace::Cmyk];
2109        for space in spaces {
2110            let opts = ExtractOptions::builder().colorspace(space).build();
2111            assert_eq!(opts.colorspace, space);
2112        }
2113    }
2114}