1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// Copyright (c) 2026 vectorless developers
// SPDX-License-Identifier: Apache-2.0
//! DOCX document parsing module.
//!
//! This module provides functionality to parse DOCX (Microsoft Word) documents:
//! - **DocxParser** — Extract structured content from DOCX files
//! - **StyleResolver** — Resolve heading styles from style definitions
//!
//! # DOCX Structure
//!
//! A DOCX file is a ZIP archive containing XML files:
//!
//! ```text
//! document.docx
//! ├── word/
//! │ ├── document.xml # Main content
//! │ └── styles.xml # Style definitions (optional)
//! ```
//!
//! # Example
//!
//! ```rust,no_run
//! use vectorless::parser::docx::DocxParser;
//! use vectorless::DocumentParser;
//! use std::path::Path;
//!
//! # #[tokio::main]
//! # async fn main() -> vectorless::Result<()> {
//! let parser = DocxParser::new();
//! let result = parser.parse_file(Path::new("document.docx")).await?;
//!
//! println!("Extracted {} nodes", result.node_count());
//! for node in &result.nodes {
//! println!(" - {} (level {})", node.title, node.level);
//! }
//! # Ok(())
//! # }
//! ```
pub use DocxParser;
pub use StyleResolver;
pub use ;