1pub mod consts;
2pub(crate) mod convert;
3pub(crate) mod entities;
4pub(crate) mod helpers;
5pub mod splitter;
6pub(crate) mod tags;
7pub mod types;
8
9use convert::ConvertState;
10use types::{HTMLToMarkdownOptions, MdreamResult};
11
12pub fn html_to_markdown(html: &str, options: HTMLToMarkdownOptions) -> String {
14 html_to_markdown_result(html, options).markdown
15}
16
17pub fn html_to_markdown_result(html: &str, options: HTMLToMarkdownOptions) -> MdreamResult {
19 let capacity = (html.len() / 3).clamp(1024, 256 * 1024);
20 let mut state = ConvertState::new(options, capacity);
21 state.process_html(html);
22
23 let extracted = if state.has_extraction {
24 let results = std::mem::take(&mut state.extraction_results);
25 if results.is_empty() { None } else { Some(results) }
26 } else {
27 None
28 };
29
30 let frontmatter = if state.has_frontmatter {
31 let mut entries: Vec<(String, String)> = Vec::new();
32 if let Some(title) = &state.frontmatter_title {
33 entries.push(("title".to_string(), title.clone()));
34 }
35 for (k, v) in &state.frontmatter_meta {
36 entries.push((k.clone(), v.clone()));
37 }
38 if let Some(add) = state.options.plugins.as_ref()
39 .and_then(|p| p.frontmatter.as_ref())
40 .and_then(|f| f.additional_fields.as_ref()) {
41 for (k, v) in add {
42 entries.push((k.clone(), v.clone()));
43 }
44 }
45 Some(entries)
46 } else {
47 None
48 };
49
50 MdreamResult {
51 markdown: state.get_markdown(),
52 extracted,
53 frontmatter,
54 }
55}
56
57pub struct MarkdownStreamProcessor {
61 state: ConvertState,
62 buffer: String,
63}
64
65impl MarkdownStreamProcessor {
66 pub fn new(options: HTMLToMarkdownOptions) -> Self {
67 Self {
68 state: ConvertState::new(options, 4096),
69 buffer: String::new(),
70 }
71 }
72
73 pub fn process_chunk(&mut self, chunk: &str) -> String {
74 if self.buffer.is_empty() {
75 self.buffer = self.state.process_html(chunk);
76 } else {
77 self.buffer.push_str(chunk);
78 let full = std::mem::take(&mut self.buffer);
79 self.buffer = self.state.process_html(&full);
80 }
81 self.state.get_markdown_chunk()
82 }
83
84 pub fn finish(&mut self) -> String {
85 if !self.buffer.is_empty() {
86 let chunk = std::mem::take(&mut self.buffer);
87 self.state.process_html(&chunk);
88 }
89 self.state.get_markdown_chunk()
90 }
91}