transmutation 0.3.2

High-performance document conversion engine for AI/LLM embeddings - 27 formats supported
//! Document processing pipeline (inspired by Docling architecture)
//!
//! Separates extraction from export for maximum flexibility:
//! Input → Parser → DocumentStructure → Exporter → Output(s)

#![allow(missing_docs)]

pub mod document_structure;
pub mod exporters;

use std::path::Path;

pub use document_structure::DocumentStructure;
pub use exporters::{ChunkingExporter, Exporter, ImageExporter, JsonExporter, MarkdownExporter};

use crate::error::Result;

/// Pipeline for document processing
///
/// Example:
/// ```rust,ignore
/// use transmutation::pipeline::*;
///
/// let pipeline = DocumentPipeline::new();
/// let doc = pipeline.parse("input.pdf").await?;
///
/// // Export to multiple formats from same document
/// let md = MarkdownExporter::new().export(&doc)?;
/// let json = JsonExporter::new().export(&doc)?;
/// let images = ImageExporter::new().export_pages(&doc)?;
/// let chunks = ChunkingExporter::new(512).export(&doc)?;
/// ```
#[derive(Debug)]
pub struct DocumentPipeline {
    // Configuration for parsing
}

impl DocumentPipeline {
    /// Create a new document pipeline
    pub fn new() -> Self {
        Self {}
    }

    /// Parse document into intermediate representation
    pub async fn parse(&self, path: &Path) -> Result<DocumentStructure> {
        // TODO: Detect format and route to appropriate parser
        DocumentStructure::from_pdf(path).await
    }
}

impl Default for DocumentPipeline {
    fn default() -> Self {
        Self::new()
    }
}