docx_lite/
extractor.rs

1use std::fs::File;
2use std::io::{BufReader, Cursor};
3use std::path::Path;
4
5use crate::error::Result;
6use crate::parser::DocxParser;
7use crate::types::Document;
8
9/// Extract text from a DOCX file at the given path
10pub fn extract_text<P: AsRef<Path>>(path: P) -> Result<String> {
11    let file = File::open(path)?;
12    let reader = BufReader::new(file);
13    extract_text_from_reader(reader)
14}
15
16/// Extract text from DOCX bytes
17pub fn extract_text_from_bytes(bytes: &[u8]) -> Result<String> {
18    let cursor = Cursor::new(bytes);
19    extract_text_from_reader(cursor)
20}
21
22/// Extract text from any reader containing DOCX data
23pub fn extract_text_from_reader<R: std::io::Read + std::io::Seek>(reader: R) -> Result<String> {
24    let document = parse_document(reader)?;
25    Ok(document.extract_text())
26}
27
28/// Parse a DOCX file and return the structured Document
29pub fn parse_document<R: std::io::Read + std::io::Seek>(reader: R) -> Result<Document> {
30    let parser = DocxParser::new(reader)?;
31    parser.parse()
32}
33
34/// Parse a DOCX file from a path and return the structured Document
35pub fn parse_document_from_path<P: AsRef<Path>>(path: P) -> Result<Document> {
36    let file = File::open(path)?;
37    let reader = BufReader::new(file);
38    parse_document(reader)
39}