Re-exports§
pub use archive::ArchiveEngine;pub use csv::CsvEngine;pub use engine::ExtractionEngine;pub use engine::PlainTextEngine;pub use error::DonglerError;pub use error::Result;pub use format::ExtractionStatus;pub use format::InputFormat;pub use image::ImageEngine;pub use ir::Asset;pub use ir::BBox;pub use ir::BatchResult;pub use ir::Block;pub use ir::BlockKind;pub use ir::Confidence;pub use ir::Document;pub use ir::ExtractOptions;pub use ir::FigureBlock;pub use ir::ImageObject;pub use ir::Line;pub use ir::Metadata;pub use ir::Page;pub use ir::Provenance;pub use ir::Route;pub use ir::SourceAnchor;pub use ir::Span;pub use ir::TableBlock;pub use ir::TableCell;pub use ir::TextBlock;pub use ir::TextSource;pub use ir::Warning;pub use json::JsonEngine;pub use openxml::OpenXmlEngine;pub use pdf::PdfEngine;pub use render::JsonRenderer;pub use render::LatexRenderer;pub use render::MarkdownRenderer;pub use render::Renderer;pub use source::FormatSourceLoader;pub use source::ImageSourceLoader;pub use source::PdfSourceLoader;pub use source::Source;pub use source::SourceLoader;pub use source::TextSourceLoader;pub use textual::EmailEngine;pub use textual::HtmlEngine;pub use textual::XmlEngine;
Modules§
Functions§
- detect_
format - extract_
bytes - Extract a document from in-memory bytes, detecting the format from
filename(its extension only — the file is never read from disk). - extract_
bytes_ with_ options - load_
many - load_
path - load_
path_ with_ options - parse_
text - to_json
- to_
latex - to_
markdown