ndg_commonmark/utils/
mod.rs1use std::{collections::HashMap, sync::OnceLock};
2pub mod codeblock;
3
4use comrak::{
5 Arena,
6 nodes::{AstNode, NodeHeading, NodeValue},
7 options::Options,
8 parse_document,
9};
10use regex::Regex;
11
12#[derive(Debug, thiserror::Error)]
14pub enum UtilError {
15 #[error("Regex compilation failed: {0}")]
16 RegexError(#[from] regex::Error),
17}
18
19pub type UtilResult<T> = Result<T, UtilError>;
21
22#[must_use]
26pub fn slugify(text: &str) -> String {
27 text
28 .to_lowercase()
29 .replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_', "-")
30 .trim_matches('-')
31 .to_string()
32}
33
34#[must_use]
37pub fn extract_markdown_title(content: &str) -> Option<String> {
38 let arena = Arena::new();
39 let mut options = Options::default();
40 options.extension.table = true;
41 options.extension.footnotes = true;
42 options.extension.strikethrough = true;
43 options.extension.tasklist = true;
44 options.extension.superscript = true;
45 options.render.r#unsafe = true;
46
47 let root = parse_document(&arena, content, &options);
48
49 for node in root.descendants() {
50 if let NodeValue::Heading(_) = &node.data.borrow().value {
51 let mut text = String::new();
52 for child in node.children() {
53 if let NodeValue::Text(t) = &child.data.borrow().value {
54 text.push_str(t);
55 }
56 if let NodeValue::Code(t) = &child.data.borrow().value {
58 text.push_str(&t.literal);
59 }
60 }
61 if !text.trim().is_empty() {
62 return Some(text.trim().to_string());
63 }
64 }
65 }
66 None
67}
68
69#[must_use]
81pub fn extract_title_from_markdown(content: &str) -> Option<String> {
82 let arena = Arena::new();
83 let mut options = Options::default();
84 options.extension.table = true;
85 options.extension.footnotes = true;
86 options.extension.strikethrough = true;
87 options.extension.tasklist = true;
88 options.render.r#unsafe = true;
89
90 let root = parse_document(&arena, content, &options);
91
92 #[allow(
94 clippy::items_after_statements,
95 reason = "Static is Scoped to function for clarity"
96 )]
97 static ANCHOR_RE: OnceLock<Regex> = OnceLock::new();
98 let anchor_re = ANCHOR_RE.get_or_init(|| {
99 Regex::new(r"(\[\]\{#.*?\}|\{#.*?\})").unwrap_or_else(|e| {
100 log::error!(
101 "Failed to compile ANCHOR_RE regex in extract_h1_title: {e}\n Falling \
102 back to never matching regex."
103 );
104 never_matching_regex().unwrap_or_else(|_| {
105 #[allow(
107 clippy::expect_used,
108 reason = "This pattern is guaranteed to be valid"
109 )]
110 Regex::new(r"[^\s\S]")
111 .expect("regex pattern [^\\s\\S] should always compile")
112 })
113 })
114 });
115
116 for node in root.descendants() {
117 if let NodeValue::Heading(NodeHeading { level, .. }) =
118 &node.data.borrow().value
119 {
120 if *level == 1 {
121 let mut text = String::new();
122 for child in node.children() {
123 if let NodeValue::Text(ref t) = child.data.borrow().value {
124 text.push_str(t);
125 }
126 }
127 let clean_title = anchor_re.replace_all(&text, "").trim().to_string();
129 if !clean_title.is_empty() {
130 return Some(clean_title);
131 }
132 }
133 }
134 }
135 None
136}
137
138#[must_use]
146pub fn clean_anchor_patterns(text: &str) -> String {
147 static ANCHOR_PATTERN: OnceLock<Regex> = OnceLock::new();
148 let anchor_pattern = ANCHOR_PATTERN.get_or_init(|| {
149 Regex::new(r"\s*\{#[a-zA-Z0-9_-]+\}\s*$").unwrap_or_else(|e| {
150 log::error!(
151 "Failed to compile ANCHOR_PATTERN regex in clean_anchor_patterns: \
152 {e}\n Falling back to never matching regex."
153 );
154 never_matching_regex().unwrap_or_else(|_| {
155 #[allow(
157 clippy::expect_used,
158 reason = "This pattern is guaranteed to be valid"
159 )]
160 Regex::new(r"[^\s\S]")
161 .expect("regex pattern [^\\s\\S] should always compile")
162 })
163 })
164 });
165 anchor_pattern.replace_all(text.trim(), "").to_string()
166}
167
168pub fn process_html_elements<F>(
171 html: &str,
172 regex: &Regex,
173 transform: F,
174) -> String
175where
176 F: Fn(®ex::Captures) -> String,
177{
178 match regex.replace_all(html, transform) {
179 std::borrow::Cow::Borrowed(_) => html.to_string(),
180 std::borrow::Cow::Owned(s) => s,
181 }
182}
183
184#[must_use]
189pub fn strip_markdown(content: &str) -> String {
190 let arena = Arena::new();
191 let mut options = Options::default();
192 options.extension.table = true;
193 options.extension.footnotes = true;
194 options.extension.strikethrough = true;
195 options.extension.tasklist = true;
196 options.render.r#unsafe = true;
197
198 let root = parse_document(&arena, content, &options);
199
200 let mut plain_text = String::new();
201 #[allow(clippy::items_after_statements, reason = "Helper scoped for clarity")]
202 fn extract_text<'a>(
203 node: &'a AstNode<'a>,
204 plain_text: &mut String,
205 in_code_block: &mut bool,
206 ) {
207 match &node.data.borrow().value {
208 NodeValue::Text(t) => {
209 if !*in_code_block {
210 plain_text.push_str(t);
211 plain_text.push(' ');
212 }
213 },
214 NodeValue::CodeBlock(_) => {
215 *in_code_block = true;
216 },
217 NodeValue::SoftBreak => {
218 plain_text.push(' ');
219 },
220 NodeValue::LineBreak => {
221 plain_text.push('\n');
222 },
223 _ => {},
224 }
225 for child in node.children() {
226 extract_text(child, plain_text, in_code_block);
227 }
228 if let NodeValue::CodeBlock(_) = &node.data.borrow().value {
229 *in_code_block = false;
230 }
231 }
232 let mut in_code_block = false;
233 extract_text(root, &mut plain_text, &mut in_code_block);
234 plain_text
235}
236
237pub fn capitalize_first(s: &str) -> String {
239 let mut chars = s.chars();
240 chars.next().map_or_else(String::new, |c| {
241 c.to_uppercase().collect::<String>() + chars.as_str()
242 })
243}
244
245#[must_use]
247pub fn is_markdown_header(line: &str) -> bool {
248 line.trim_start().starts_with('#')
249}
250
251pub fn load_manpage_urls(
257 path: &str,
258) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
259 let content = std::fs::read_to_string(path)?;
260 let mappings: HashMap<String, String> = serde_json::from_str(&content)?;
261 Ok(mappings)
262}
263
264pub fn never_matching_regex() -> Result<regex::Regex, regex::Error> {
275 regex::Regex::new(r"[^\s\S]").or_else(|_| {
278 regex::Regex::new(r"^\b$")
281 })
282}