marque_extract/lib.rs
1// SPDX-FileCopyrightText: 2026 Knitli Inc.
2//
3// SPDX-License-Identifier: LicenseRef-MarqueLicense-1.0
4
5#![forbid(unsafe_code)]
6#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
7
8//! marque-extract — document text and metadata extraction.
9//!
10//! Wraps Kreuzberg (https://github.com/kreuzberg-dev/kreuzberg):
11//! Rust-core, SIMD-optimized, streaming, 75+ formats, OCR for scanned documents.
12//!
13//! NOT included in the marque-wasm build. In WASM context, the calling application
14//! is responsible for providing pre-extracted text to the engine.
15//!
16//! # Metadata
17//! Metadata extraction runs in the same pipeline pass as text extraction.
18//! Metadata issues are surfaced as `MetadataWarning` — always reported,
19//! stripping is opt-in via `ExtractionOptions::strip_metadata`.
20
21pub mod extractor;
22pub mod metadata;
23
24pub use extractor::{ExtractedDocument, ExtractionOptions, Extractor};
25pub use metadata::{MetadataField, MetadataReport, MetadataWarning};