1use extended::sifter::{WhitespaceSifter, WhitespaceSifterBytes};
2
3pub mod extended;
5
6#[cfg(feature = "scraper")]
7pub use markup5ever_rcdom::{Handle, NodeData, RcDom};
8
9#[cfg(feature = "rewriter")]
10pub mod rewriter;
11#[cfg(feature = "scraper")]
12pub mod scraper;
13#[cfg(feature = "scraper")]
14pub use scraper::{
15 ignore, parse_html, parse_html_custom, parse_html_custom_base, parse_html_custom_with_url,
16 parse_html_extended,
17};
18
19#[cfg(feature = "scraper")]
21lazy_static::lazy_static! {
22 pub(crate) static ref MARKDOWN_MIDDLE_KEYCHARS: regex::Regex =
23 regex::Regex::new(r"[<>*\\_~]").expect("valid regex pattern");
24}
25
26#[cfg(feature = "rewriter")]
31pub fn rewrite_html(html: &str, commonmark: bool) -> String {
32 rewriter::writer::convert_html_to_markdown(html, &None, commonmark, &None).unwrap_or_default()
33}
34
35#[cfg(all(feature = "stream", feature = "rewriter"))]
40pub async fn rewrite_html_streaming(html: &str, commonmark: bool) -> String {
41 rewriter::writer::convert_html_to_markdown_send(html, &None, commonmark, &None)
42 .await
43 .unwrap_or_default()
44}
45
46#[cfg(all(feature = "stream", feature = "rewriter"))]
55pub fn rewrite_html_custom_with_url(
56 html: &str,
57 custom: &Option<std::collections::HashSet<String>>,
58 commonmark: bool,
59 url: &Option<url::Url>,
60) -> String {
61 rewriter::writer::convert_html_to_markdown(html, &custom, commonmark, url).unwrap_or_default()
62}
63
64#[cfg(all(feature = "stream", feature = "rewriter"))]
74pub async fn rewrite_html_custom_with_url_and_chunk(
75 html: &str,
76 custom: &Option<std::collections::HashSet<String>>,
77 commonmark: bool,
78 url: &Option<url::Url>,
79 chunk_size: usize,
80) -> String {
81 rewriter::writer::convert_html_to_markdown_send_with_size(
82 html, &custom, commonmark, url, chunk_size,
83 )
84 .await
85 .unwrap_or_default()
86}
87
88#[cfg(all(feature = "stream", feature = "rewriter"))]
97pub async fn rewrite_html_custom_with_url_streaming(
98 html: &str,
99 custom: &Option<std::collections::HashSet<String>>,
100 commonmark: bool,
101 url: &Option<url::Url>,
102) -> String {
103 rewriter::writer::convert_html_to_markdown_send(html, &custom, commonmark, url)
104 .await
105 .unwrap_or_default()
106}
107
108pub fn clean_markdown(input: &str) -> String {
112 input.sift_preserve_newlines()
113}
114
115pub fn clean_markdown_bytes(input: &Vec<u8>) -> String {
119 input.sift_bytes_preserve_newlines()
120}
121
122#[inline]
124const fn needs_escape(b: u8) -> bool {
125 matches!(b, b'<' | b'>' | b'*' | b'\\' | b'_' | b'~')
126}
127
128#[inline]
130const fn is_special_byte(b: u8) -> bool {
131 needs_escape(b) || b == b'&'
132}
133
134#[inline]
137pub fn contains_markdown_chars(input: &str) -> bool {
138 input.as_bytes().iter().any(|&b| is_special_byte(b))
139}
140
141#[inline]
145pub fn replace_markdown_chars_opt(input: &str) -> Option<String> {
146 let bytes = input.as_bytes();
147
148 let first_special = bytes.iter().position(|&b| is_special_byte(b));
150
151 match first_special {
152 None => None,
153 Some(first_pos) => {
154 let mut output = String::with_capacity(input.len() + input.len() / 8);
156
157 output.push_str(&input[..first_pos]);
159
160 let mut i = first_pos;
162 while i < bytes.len() {
163 let b = bytes[i];
164
165 if needs_escape(b) {
166 output.push('\\');
167 output.push(b as char);
168 i += 1;
169 } else if b == b'&' {
170 if i + 5 < bytes.len() && &bytes[i..i + 6] == b" " {
172 i += 6;
174 } else {
175 output.push('&');
176 i += 1;
177 }
178 } else {
179 let segment_start = i;
181 i += 1;
182 while i < bytes.len() && !is_special_byte(bytes[i]) {
183 i += 1;
184 }
185 output.push_str(&input[segment_start..i]);
186 }
187 }
188
189 Some(output)
190 }
191 }
192}
193
194#[inline]
197pub fn replace_markdown_chars(input: &str) -> String {
198 replace_markdown_chars_opt(input).unwrap_or_else(|| input.to_string())
199}