html_to_markdown_rs/
lib.rs1use std::borrow::Cow;
11
12pub mod converter;
13pub mod error;
14pub mod hocr;
15#[cfg(feature = "inline-images")]
16mod inline_images;
17pub mod options;
18pub mod safety;
19pub mod text;
20pub mod wrapper;
21
22pub use error::{ConversionError, Result};
23#[cfg(feature = "inline-images")]
24pub use inline_images::{
25 HtmlExtraction, InlineImage, InlineImageConfig, InlineImageFormat, InlineImageSource, InlineImageWarning,
26};
27pub use options::{
28 CodeBlockStyle, ConversionOptions, HeadingStyle, HighlightStyle, ListIndentType, NewlineStyle,
29 PreprocessingOptions, PreprocessingPreset, WhitespaceMode,
30};
31
32pub fn convert(html: &str, options: Option<ConversionOptions>) -> Result<String> {
49 let options = options.unwrap_or_default();
50
51 let normalized_html = if html.contains('\r') {
52 Cow::Owned(html.replace("\r\n", "\n").replace('\r', "\n"))
53 } else {
54 Cow::Borrowed(html)
55 };
56
57 let markdown = converter::convert_html(normalized_html.as_ref(), &options)?;
58
59 if options.wrap {
60 Ok(wrapper::wrap_markdown(&markdown, &options))
61 } else {
62 Ok(markdown)
63 }
64}
65
66#[cfg(feature = "inline-images")]
67pub fn convert_with_inline_images(
77 html: &str,
78 options: Option<ConversionOptions>,
79 image_cfg: InlineImageConfig,
80) -> Result<HtmlExtraction> {
81 use std::cell::RefCell;
82 use std::rc::Rc;
83
84 let options = options.unwrap_or_default();
85
86 let normalized_html = if html.contains('\r') {
87 Cow::Owned(html.replace("\r\n", "\n").replace('\r', "\n"))
88 } else {
89 Cow::Borrowed(html)
90 };
91
92 let collector = Rc::new(RefCell::new(inline_images::InlineImageCollector::new(image_cfg)?));
93
94 let markdown =
95 converter::convert_html_with_inline_collector(normalized_html.as_ref(), &options, Rc::clone(&collector))?;
96
97 let markdown = if options.wrap {
98 wrapper::wrap_markdown(&markdown, &options)
99 } else {
100 markdown
101 };
102
103 let collector = Rc::try_unwrap(collector)
104 .map_err(|_| ConversionError::Other("failed to recover inline image state".to_string()))?
105 .into_inner();
106 let (inline_images, warnings) = collector.finish();
107
108 Ok(HtmlExtraction {
109 markdown,
110 inline_images,
111 warnings,
112 })
113}