ndg_commonmark/utils/
mod.rs1use std::{collections::HashMap, sync::OnceLock};
2pub mod codeblock;
3
4use comrak::{
5 Arena,
6 nodes::{AstNode, NodeHeading, NodeValue},
7 options::Options,
8 parse_document,
9};
10use regex::Regex;
11
12#[derive(Debug, thiserror::Error)]
14pub enum UtilError {
15 #[error("Regex compilation failed: {0}")]
16 RegexError(#[from] regex::Error),
17}
18
19pub type UtilResult<T> = Result<T, UtilError>;
21
22#[must_use]
26pub fn slugify(text: &str) -> String {
27 text
28 .to_lowercase()
29 .replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_', "-")
30 .trim_matches('-')
31 .to_string()
32}
33
34#[must_use]
37pub fn extract_markdown_title(content: &str) -> Option<String> {
38 let arena = Arena::new();
39 let mut options = Options::default();
40 options.extension.table = true;
41 options.extension.footnotes = true;
42 options.extension.strikethrough = true;
43 options.extension.tasklist = true;
44 options.extension.superscript = true;
45 options.render.r#unsafe = true;
46
47 let root = parse_document(&arena, content, &options);
48
49 for node in root.descendants() {
50 if let NodeValue::Heading(_) = &node.data.borrow().value {
51 let text = extract_inline_text_from_node(node);
52 if !text.trim().is_empty() {
53 return Some(text.trim().to_string());
54 }
55 }
56 }
57 None
58}
59
60fn extract_inline_text_from_node<'a>(node: &'a AstNode<'a>) -> String {
63 let mut text = String::new();
64 for child in node.children() {
65 match &child.data.borrow().value {
66 NodeValue::Text(t) => text.push_str(t),
67 NodeValue::Code(t) => text.push_str(&t.literal),
68 NodeValue::Link(..)
69 | NodeValue::Emph
70 | NodeValue::Strong
71 | NodeValue::Strikethrough
72 | NodeValue::Superscript
73 | NodeValue::FootnoteReference(..) => {
74 text.push_str(&extract_inline_text_from_node(child));
75 },
76 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
77 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
78 _ => {},
79 }
80 }
81 text
82}
83
84#[must_use]
99pub fn extract_markdown_title_and_id(
100 content: &str,
101) -> Option<(String, Option<String>)> {
102 let arena = Arena::new();
103 let mut options = Options::default();
104 options.extension.table = true;
105 options.extension.footnotes = true;
106 options.extension.strikethrough = true;
107 options.extension.tasklist = true;
108 options.render.r#unsafe = true;
109
110 let root = parse_document(&arena, content, &options);
111
112 #[allow(
114 clippy::items_after_statements,
115 reason = "Static is Scoped to function for clarity"
116 )]
117 static ANCHOR_RE: OnceLock<Regex> = OnceLock::new();
118 let anchor_re = ANCHOR_RE.get_or_init(|| {
119 Regex::new(r"(\[\])?\{#(.*?)\}").unwrap_or_else(|e| {
120 log::error!(
121 "Failed to compile ANCHOR_RE regex in extract_h1_title: {e}\n Falling \
122 back to never matching regex."
123 );
124 never_matching_regex().unwrap_or_else(|_| {
125 #[allow(
127 clippy::expect_used,
128 reason = "This pattern is guaranteed to be valid"
129 )]
130 Regex::new(r"[^\s\S]")
131 .expect("regex pattern [^\\s\\S] should always compile")
132 })
133 })
134 });
135
136 for node in root.descendants() {
137 if let NodeValue::Heading(NodeHeading { level, .. }) =
138 &node.data.borrow().value
139 && *level == 1
140 {
141 let text = extract_inline_text_from_node(node);
142 let anchor_id = anchor_re
144 .captures(&text)
145 .and_then(|caps| caps.get(2).map(|m| m.as_str().to_string()));
146 let clean_title = anchor_re.replace_all(&text, "").trim().to_string();
147 if !clean_title.is_empty() {
148 return Some((clean_title, anchor_id));
149 }
150 }
151 }
152 None
153}
154
155#[must_use]
163pub fn clean_anchor_patterns(text: &str) -> String {
164 static ANCHOR_PATTERN: OnceLock<Regex> = OnceLock::new();
165 let anchor_pattern = ANCHOR_PATTERN.get_or_init(|| {
166 Regex::new(r"\s*\{#[a-zA-Z0-9_-]+\}\s*$").unwrap_or_else(|e| {
167 log::error!(
168 "Failed to compile ANCHOR_PATTERN regex in clean_anchor_patterns: \
169 {e}\n Falling back to never matching regex."
170 );
171 never_matching_regex().unwrap_or_else(|_| {
172 #[allow(
174 clippy::expect_used,
175 reason = "This pattern is guaranteed to be valid"
176 )]
177 Regex::new(r"[^\s\S]")
178 .expect("regex pattern [^\\s\\S] should always compile")
179 })
180 })
181 });
182 anchor_pattern.replace_all(text.trim(), "").to_string()
183}
184
185pub fn process_html_elements<F>(
188 html: &str,
189 regex: &Regex,
190 transform: F,
191) -> String
192where
193 F: Fn(®ex::Captures) -> String,
194{
195 match regex.replace_all(html, transform) {
196 std::borrow::Cow::Borrowed(_) => html.to_string(),
197 std::borrow::Cow::Owned(s) => s,
198 }
199}
200
201#[must_use]
206pub fn strip_markdown(content: &str) -> String {
207 let arena = Arena::new();
208 let mut options = Options::default();
209 options.extension.table = true;
210 options.extension.footnotes = true;
211 options.extension.strikethrough = true;
212 options.extension.tasklist = true;
213 options.render.r#unsafe = true;
214
215 let root = parse_document(&arena, content, &options);
216
217 let mut plain_text = String::new();
218 #[allow(clippy::items_after_statements, reason = "Helper scoped for clarity")]
219 fn extract_text<'a>(
220 node: &'a AstNode<'a>,
221 plain_text: &mut String,
222 in_code_block: &mut bool,
223 ) {
224 match &node.data.borrow().value {
225 NodeValue::Text(t) => {
226 if !*in_code_block {
227 plain_text.push_str(t);
228 plain_text.push(' ');
229 }
230 },
231 NodeValue::CodeBlock(_) => {
232 *in_code_block = true;
233 },
234 NodeValue::SoftBreak => {
235 plain_text.push(' ');
236 },
237 NodeValue::LineBreak => {
238 plain_text.push('\n');
239 },
240 _ => {},
241 }
242 for child in node.children() {
243 extract_text(child, plain_text, in_code_block);
244 }
245 if let NodeValue::CodeBlock(_) = &node.data.borrow().value {
246 *in_code_block = false;
247 }
248 }
249 let mut in_code_block = false;
250 extract_text(root, &mut plain_text, &mut in_code_block);
251 plain_text
252}
253
254pub fn capitalize_first(s: &str) -> String {
256 let mut chars = s.chars();
257 chars.next().map_or_else(String::new, |c| {
258 c.to_uppercase().collect::<String>() + chars.as_str()
259 })
260}
261
262#[must_use]
264pub fn is_markdown_header(line: &str) -> bool {
265 line.trim_start().starts_with('#')
266}
267
268pub fn load_manpage_urls(
274 path: &str,
275) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
276 let content = std::fs::read_to_string(path)?;
277 let mappings: HashMap<String, String> = serde_json::from_str(&content)?;
278 Ok(mappings)
279}
280
281pub fn never_matching_regex() -> Result<regex::Regex, regex::Error> {
292 regex::Regex::new(r"[^\s\S]").or_else(|_| {
295 regex::Regex::new(r"^\b$")
298 })
299}