parser_core/lib.rs
1//! Document parsing library for extracting text from various file formats.
2//!
3//! This crate provides functionality for parsing and extracting text content from
4//! different file formats including PDFs, Office documents (DOCX, XLSX, PPTX),
5//! text files, and images (using OCR).
6//!
7//! # Features
8//!
9//! * Automatic file format detection based on content
10//! * Support for various document types:
11//! * PDF documents
12//! * Microsoft Office formats (DOCX, XLSX, PPTX)
13//! * Plain text and structured text (TXT, CSV, JSON)
14//! * Images with text content via OCR (PNG, JPEG, WebP)
15//! * Memory-efficient processing with minimal temporary file usage
16//! * Consolidated error handling with descriptive error messages
17//!
18//! # Examples
19//!
20//! ```no_run
21//! use parser_core::parse;
22//! use std::fs;
23//!
24//! # fn main() -> Result<(), Box<dyn std::error::Error>> {
25//! // Read a file
26//! let data = fs::read("document.pdf")?;
27//!
28//! // Parse it to extract text
29//! let text = parse(&data)?;
30//! println!("{}", text);
31//! # Ok(())
32//! # }
33//! ```
34
35mod constants;
36mod errors;
37mod parsers;
38
39pub use errors::ParserError;
40pub use parsers::parse;