#![deny(unsafe_code)]
pub mod cache;
pub(crate) mod cache_dir;
pub mod core;
pub mod error;
pub mod extraction;
pub mod extractors;
pub mod model_cache;
pub mod panic_context;
pub mod plugins;
pub mod rendering;
pub mod telemetry;
pub mod text;
pub mod types;
pub mod utils;
pub mod table_core;
#[cfg(feature = "tower-service")]
pub mod service;
#[cfg(feature = "api")]
pub mod api;
#[cfg(feature = "mcp")]
pub mod mcp;
#[cfg(feature = "chunking")]
pub mod chunking;
#[cfg(feature = "liter-llm")]
pub mod llm;
#[cfg(feature = "embeddings")]
pub mod embeddings;
#[cfg(feature = "ocr")]
pub mod image;
#[cfg(feature = "language-detection")]
pub mod language_detection;
#[cfg(feature = "stopwords")]
pub mod stopwords;
#[cfg(any(feature = "keywords-yake", feature = "keywords-rake"))]
pub mod keywords;
#[cfg(feature = "ocr")]
pub mod ocr;
#[cfg(any(
feature = "paddle-ocr",
feature = "embeddings",
feature = "layout-detection",
feature = "auto-rotate"
))]
pub mod ort_discovery;
#[cfg(any(feature = "paddle-ocr", feature = "layout-detection", feature = "auto-rotate"))]
pub(crate) mod model_download;
#[cfg(feature = "paddle-ocr")]
pub mod paddle_ocr;
#[cfg(feature = "auto-rotate")]
pub mod doc_orientation;
#[cfg(feature = "layout-detection")]
pub mod layout;
#[cfg(feature = "pdf")]
pub mod pdf;
pub use error::{KreuzbergError, Result};
pub use types::*;
#[cfg(feature = "tokio-runtime")]
pub use core::extractor::{batch_extract_bytes, batch_extract_file};
pub use core::extractor::{extract_bytes, extract_file};
pub use core::extractor::{batch_extract_bytes_sync, extract_bytes_sync};
#[cfg(feature = "tokio-runtime")]
pub use core::extractor::{batch_extract_file_sync, extract_file_sync};
pub use core::config::{
AccelerationConfig, ChunkSizing, ChunkerType, ChunkingConfig, ContentFilterConfig, EmailConfig, EmbeddingConfig,
EmbeddingModelType, ExecutionProviderType, ExtractionConfig, FileExtractionConfig, ImageExtractionConfig,
LanguageDetectionConfig, LlmConfig, OcrConfig, OutputFormat, PageConfig, PostProcessorConfig,
StructuredExtractionConfig, TokenReductionConfig,
};
#[cfg(feature = "api")]
pub use core::server_config::ServerConfig;
#[cfg(feature = "pdf")]
pub use core::config::{HierarchyConfig, PdfConfig};
#[cfg(feature = "paddle-ocr")]
pub use paddle_ocr::{CacheStats, ModelManager, ModelPaths, PaddleLanguage, PaddleOcrBackend, PaddleOcrConfig};
#[cfg(feature = "layout-detection")]
pub use core::config::{LayoutDetectionConfig, TableModel};
#[cfg(feature = "tree-sitter")]
pub use core::config::{CodeContentMode, TreeSitterConfig, TreeSitterProcessConfig};
#[cfg(feature = "tree-sitter")]
pub use tree_sitter_language_pack::{
ChunkContext, CodeChunk, CommentInfo, CommentKind, Diagnostic, DiagnosticSeverity, DocstringFormat, DocstringInfo,
ExportInfo, ExportKind, FileMetrics, ImportInfo, ProcessConfig, ProcessResult, Span, StructureItem, StructureKind,
SymbolInfo, SymbolKind, process as process_code,
};
pub use core::mime::{
DOCX_MIME_TYPE, EXCEL_MIME_TYPE, HTML_MIME_TYPE, JSON_MIME_TYPE, MARKDOWN_MIME_TYPE, PDF_MIME_TYPE,
PLAIN_TEXT_MIME_TYPE, POWER_POINT_MIME_TYPE, SupportedFormat, XML_MIME_TYPE, detect_mime_type,
detect_mime_type_from_bytes, detect_or_validate, get_extensions_for_mime, list_supported_formats,
validate_mime_type,
};
pub use core::formats::{KNOWN_FORMATS, is_valid_format_field};
pub use plugins::registry::{
get_document_extractor_registry, get_ocr_backend_registry, get_post_processor_registry, get_renderer_registry,
get_validator_registry,
};
#[cfg(feature = "embeddings")]
pub use embeddings::{
EMBEDDING_PRESETS, EmbeddingPreset, download_model, embed_texts, get_preset, list_presets, warm_model,
};
#[cfg(all(feature = "embeddings", feature = "tokio-runtime"))]
pub use embeddings::embed_texts_async;
pub fn serialize_to_toon(result: &ExtractionResult) -> Result<String> {
serde_toon::to_string(result).map_err(|e| KreuzbergError::serialization(format!("TOON serialization failed: {e}")))
}
pub fn serialize_to_json(result: &ExtractionResult) -> Result<String> {
serde_json::to_string_pretty(result)
.map_err(|e| KreuzbergError::serialization(format!("JSON serialization failed: {e}")))
}