ndg_commonmark/utils/
mod.rs1use std::{
2 collections::HashMap,
3 sync::{LazyLock, OnceLock, RwLock},
4};
5pub mod codeblock;
6
7use comrak::{
8 Arena,
9 nodes::{AstNode, NodeHeading, NodeValue},
10 options::Options,
11 parse_document,
12};
13use regex::Regex;
14
15#[derive(Debug, thiserror::Error)]
17pub enum UtilError {
18 #[error("Regex compilation failed: {0}")]
19 RegexError(#[from] regex::Error),
20}
21
22pub type UtilResult<T> = Result<T, UtilError>;
24
25#[must_use]
28pub fn slugify(text: &str) -> String {
29 static CACHE: LazyLock<RwLock<HashMap<String, String>>> =
30 LazyLock::new(|| RwLock::new(HashMap::new()));
31
32 {
33 let cache = CACHE
34 .read()
35 .unwrap_or_else(std::sync::PoisonError::into_inner);
36 if let Some(cached) = cache.get(text) {
37 return cached.clone();
38 }
39 }
40
41 let result = text
42 .to_lowercase()
43 .replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_', "-")
44 .trim_matches('-')
45 .to_string();
46
47 let mut cache = CACHE
48 .write()
49 .unwrap_or_else(std::sync::PoisonError::into_inner);
50 if cache.len() < 2048 {
51 cache.insert(text.to_string(), result.clone());
52 }
53
54 result
55}
56
57#[must_use]
60pub fn extract_markdown_title(content: &str) -> Option<String> {
61 let arena = Arena::new();
62 let mut options = Options::default();
63 options.extension.table = true;
64 options.extension.footnotes = true;
65 options.extension.strikethrough = true;
66 options.extension.tasklist = true;
67 options.extension.superscript = true;
68 options.render.r#unsafe = true;
69
70 let root = parse_document(&arena, content, &options);
71
72 for node in root.descendants() {
73 if let NodeValue::Heading(NodeHeading { level, .. }) =
74 &node.data.borrow().value
75 && *level == 1
76 {
77 let text = extract_inline_text_from_node(node);
78 if !text.trim().is_empty() {
79 return Some(text.trim().to_string());
80 }
81 }
82 }
83 None
84}
85
86fn extract_inline_text_from_node<'a>(node: &'a AstNode<'a>) -> String {
89 let mut text = String::new();
90 for child in node.children() {
91 match &child.data.borrow().value {
92 NodeValue::Text(t) => text.push_str(t),
93 NodeValue::Code(t) => text.push_str(&t.literal),
94 NodeValue::Link(..)
95 | NodeValue::Emph
96 | NodeValue::Strong
97 | NodeValue::Strikethrough
98 | NodeValue::Superscript
99 | NodeValue::FootnoteReference(..) => {
100 text.push_str(&extract_inline_text_from_node(child));
101 },
102 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
103 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
104 _ => {},
105 }
106 }
107 text
108}
109
110#[must_use]
125pub fn extract_markdown_title_and_id(
126 content: &str,
127) -> Option<(String, Option<String>)> {
128 let arena = Arena::new();
129 let mut options = Options::default();
130 options.extension.table = true;
131 options.extension.footnotes = true;
132 options.extension.strikethrough = true;
133 options.extension.tasklist = true;
134 options.render.r#unsafe = true;
135
136 let root = parse_document(&arena, content, &options);
137
138 #[allow(
140 clippy::items_after_statements,
141 reason = "Static is Scoped to function for clarity"
142 )]
143 static ANCHOR_RE: OnceLock<Regex> = OnceLock::new();
144 let anchor_re = ANCHOR_RE.get_or_init(|| {
145 Regex::new(r"(\[\])?\{#(.*?)\}").unwrap_or_else(|e| {
146 log::error!(
147 "Failed to compile ANCHOR_RE regex in extract_h1_title: {e}\n Falling \
148 back to never matching regex."
149 );
150 never_matching_regex().unwrap_or_else(|_| {
151 #[allow(
153 clippy::expect_used,
154 reason = "This pattern is guaranteed to be valid"
155 )]
156 Regex::new(r"[^\s\S]")
157 .expect("regex pattern [^\\s\\S] should always compile")
158 })
159 })
160 });
161
162 for node in root.descendants() {
163 if let NodeValue::Heading(NodeHeading { level, .. }) =
164 &node.data.borrow().value
165 && *level == 1
166 {
167 let text = extract_inline_text_from_node(node);
168 let anchor_id = anchor_re
170 .captures(&text)
171 .and_then(|caps| caps.get(2).map(|m| m.as_str().to_string()));
172 let clean_title = anchor_re.replace_all(&text, "").trim().to_string();
173 if !clean_title.is_empty() {
174 return Some((clean_title, anchor_id));
175 }
176 }
177 }
178 None
179}
180
181#[must_use]
189pub fn clean_anchor_patterns(text: &str) -> String {
190 static ANCHOR_PATTERN: OnceLock<Regex> = OnceLock::new();
191 let anchor_pattern = ANCHOR_PATTERN.get_or_init(|| {
192 Regex::new(r"\s*\{#[a-zA-Z0-9_-]+\}\s*$").unwrap_or_else(|e| {
193 log::error!(
194 "Failed to compile ANCHOR_PATTERN regex in clean_anchor_patterns: \
195 {e}\n Falling back to never matching regex."
196 );
197 never_matching_regex().unwrap_or_else(|_| {
198 #[allow(
200 clippy::expect_used,
201 reason = "This pattern is guaranteed to be valid"
202 )]
203 Regex::new(r"[^\s\S]")
204 .expect("regex pattern [^\\s\\S] should always compile")
205 })
206 })
207 });
208 anchor_pattern.replace_all(text.trim(), "").to_string()
209}
210
211#[must_use]
216pub fn strip_markdown(content: &str) -> String {
217 let arena = Arena::new();
218 let mut options = Options::default();
219 options.extension.table = true;
220 options.extension.footnotes = true;
221 options.extension.strikethrough = true;
222 options.extension.tasklist = true;
223 options.render.r#unsafe = true;
224
225 let root = parse_document(&arena, content, &options);
226
227 let mut plain_text = String::new();
228
229 #[allow(clippy::items_after_statements, reason = "Helper scoped for clarity")]
230 fn extract_text<'a>(
231 node: &'a AstNode<'a>,
232 plain_text: &mut String,
233 in_code_block: &mut bool,
234 ) {
235 match &node.data.borrow().value {
236 NodeValue::Document => {
237 for child in node.children() {
238 extract_text(child, plain_text, in_code_block);
239 }
240 },
241 NodeValue::Paragraph => {
242 for child in node.children() {
243 extract_text(child, plain_text, in_code_block);
244 }
245 plain_text.push('\n');
247 },
248 NodeValue::Heading(_) => {
249 for child in node.children() {
250 extract_text(child, plain_text, in_code_block);
251 }
252 plain_text.push('\n');
254 },
255 NodeValue::Text(t) => {
256 if !*in_code_block {
257 plain_text.push_str(t);
258 }
259 },
260 NodeValue::CodeBlock(_) => {
261 *in_code_block = true;
262 for child in node.children() {
263 extract_text(child, plain_text, in_code_block);
264 }
265 *in_code_block = false;
266 },
267 NodeValue::SoftBreak => {
268 plain_text.push(' ');
269 },
270 NodeValue::LineBreak => {
271 plain_text.push('\n');
272 },
273 NodeValue::List(_) => {
274 for child in node.children() {
275 extract_text(child, plain_text, in_code_block);
276 }
277 plain_text.push('\n');
278 },
279 NodeValue::Item(_) => {
280 for child in node.children() {
281 extract_text(child, plain_text, in_code_block);
282 }
283 },
284 NodeValue::Code(c) => {
285 if !*in_code_block {
286 plain_text.push_str(&c.literal);
287 }
288 },
289 _ => {
290 for child in node.children() {
291 extract_text(child, plain_text, in_code_block);
292 }
293 },
294 }
295 }
296
297 let mut in_code_block = false;
298 extract_text(root, &mut plain_text, &mut in_code_block);
299 plain_text
300}
301
302pub fn capitalize_first(s: &str) -> String {
304 let mut chars = s.chars();
305 chars.next().map_or_else(String::new, |c| {
306 c.to_uppercase().collect::<String>() + chars.as_str()
307 })
308}
309
310#[must_use]
312pub fn is_markdown_header(line: &str) -> bool {
313 line.trim_start().starts_with('#')
314}
315
316pub fn load_manpage_urls(
322 path: &str,
323) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
324 let content = std::fs::read_to_string(path)?;
325 let mappings: HashMap<String, String> = serde_json::from_str(&content)?;
326 Ok(mappings)
327}
328
329pub fn never_matching_regex() -> Result<regex::Regex, regex::Error> {
340 regex::Regex::new(r"[^\s\S]").or_else(|_| {
343 regex::Regex::new(r"^\b$")
346 })
347}