Skip to main content

dm2xcod/
lib.rs

1//! # dm2xcod
2//!
3//! DOCX to Markdown converter using `rs_docx`.
4//!
5//! ## Example
6//!
7//! ```no_run
8//! use dm2xcod::{DocxToMarkdown, ConvertOptions, ImageHandling};
9//!
10//! let options = ConvertOptions {
11//!     image_handling: ImageHandling::SaveToDir("./images".into()),
12//!     ..Default::default()
13//! };
14//!
15//! let converter = DocxToMarkdown::new(options);
16//! let markdown = converter.convert("document.docx").unwrap();
17//! println!("{}", markdown);
18//! ```
19
20pub mod converter;
21pub mod error;
22pub mod localization;
23
24pub use converter::DocxToMarkdown;
25pub use error::{Error, Result};
26pub use localization::{DefaultLocalization, KoreanLocalization, LocalizationStrategy};
27
28use std::path::PathBuf;
29
30/// Options for DOCX to Markdown conversion.
31#[derive(Debug, Clone)]
32pub struct ConvertOptions {
33    /// How to handle images in the document.
34    pub image_handling: ImageHandling,
35    /// Whether to preserve exact whitespace.
36    pub preserve_whitespace: bool,
37    /// Whether to use HTML for underlined text.
38    pub html_underline: bool,
39    /// Whether to use HTML for strikethrough text.
40    pub html_strikethrough: bool,
41}
42
43impl Default for ConvertOptions {
44    fn default() -> Self {
45        Self {
46            image_handling: ImageHandling::Inline,
47            preserve_whitespace: false,
48            html_underline: true,
49            html_strikethrough: false,
50        }
51    }
52}
53
54/// Specifies how images should be handled during conversion.
55#[derive(Debug, Clone)]
56pub enum ImageHandling {
57    /// Save images to a directory and reference them by path.
58    SaveToDir(PathBuf),
59    /// Embed images as base64 data URIs.
60    Inline,
61    /// Skip images entirely.
62    Skip,
63}
64
65// Python bindings (only when 'python' feature is enabled)
66#[cfg(feature = "python")]
67mod python_bindings {
68    use super::*;
69    use pyo3::prelude::*;
70    use pyo3::types::PyBytes;
71
72    /// Converts a DOCX file to Markdown.
73    ///
74    /// Argument can be a file path (str) or file content (bytes).
75    #[pyfunction]
76    fn convert_docx(input: &Bound<'_, PyAny>) -> PyResult<String> {
77        let options = ConvertOptions::default();
78        let converter = DocxToMarkdown::new(options);
79
80        if let Ok(path) = input.extract::<String>() {
81            converter
82                .convert(&path)
83                .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))
84        } else if let Ok(bytes) = input.downcast::<PyBytes>() {
85            converter
86                .convert_from_bytes(bytes.as_bytes())
87                .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(e.to_string()))
88        } else {
89            Err(PyErr::new::<pyo3::exceptions::PyTypeError, _>(
90                "Expected string path or bytes",
91            ))
92        }
93    }
94
95    /// A Python module implemented in Rust.
96    #[pymodule]
97    pub fn dm2xcod(m: &Bound<'_, PyModule>) -> PyResult<()> {
98        m.add_function(wrap_pyfunction!(convert_docx, m)?)?;
99        Ok(())
100    }
101}