Skip to main content

Crate dm2xcod

Crate dm2xcod 

Source
Expand description

§dm2xcod

DOCX to Markdown converter using rs_docx.

§Example

use dm2xcod::{DocxToMarkdown, ConvertOptions, ImageHandling};

let options = ConvertOptions {
    image_handling: ImageHandling::SaveToDir("./images".into()),
    ..Default::default()
};

let converter = DocxToMarkdown::new(options);
let markdown = converter.convert("document.docx").unwrap();
println!("{}", markdown);

§Advanced Example (Custom Extractor/Renderer)

use dm2xcod::adapters::docx::AstExtractor;
use dm2xcod::converter::ConversionContext;
use dm2xcod::core::ast::{BlockNode, DocumentAst};
use dm2xcod::render::Renderer;
use dm2xcod::{ConvertOptions, DocxToMarkdown, Result};
use rs_docx::document::BodyContent;

#[derive(Debug, Default, Clone, Copy)]
struct MyExtractor;

impl AstExtractor for MyExtractor {
    fn extract<'a>(
        &self,
        _body: &[BodyContent<'a>],
        _context: &mut ConversionContext<'a>,
    ) -> Result<DocumentAst> {
        Ok(DocumentAst {
            blocks: vec![BlockNode::Paragraph("custom pipeline".to_string())],
            references: Default::default(),
        })
    }
}

#[derive(Debug, Default, Clone, Copy)]
struct MyRenderer;

impl Renderer for MyRenderer {
    fn render(&self, document: &DocumentAst) -> Result<String> {
        Ok(format!("blocks={}", document.blocks.len()))
    }
}

fn main() -> Result<()> {
    let converter = DocxToMarkdown::with_components(
        ConvertOptions::default(),
        MyExtractor,
        MyRenderer,
    );
    let output = converter.convert("document.docx")?;
    println!("{}", output);
    Ok(())
}

Re-exports§

pub use converter::DocxToMarkdown;
pub use error::Error;
pub use error::Result;
pub use localization::parse_heading_style;

Modules§

adapters
converter
Converter modules for DOCX to Markdown transformation.
core
error
Error types for docx2md.
localization
Heading style parsing utilities.
render

Structs§

ConvertOptions
Options for DOCX to Markdown conversion.

Enums§

ImageHandling
Specifies how images should be handled during conversion.