1use std::collections::HashMap;
2
3use comrak::{
4 Arena, ComrakOptions,
5 nodes::{AstNode, NodeHeading, NodeValue},
6 parse_document,
7};
8use regex::Regex;
9
10pub use crate::processor::collect_markdown_files;
12
13#[must_use]
17pub fn slugify(text: &str) -> String {
18 text.to_lowercase()
19 .replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_', "-")
20 .trim_matches('-')
21 .to_string()
22}
23
24#[must_use]
27pub fn extract_markdown_title(content: &str) -> Option<String> {
28 let arena = Arena::new();
29 let mut options = ComrakOptions::default();
30 options.extension.table = true;
31 options.extension.footnotes = true;
32 options.extension.strikethrough = true;
33 options.extension.tasklist = true;
34 options.extension.superscript = true;
35 options.render.unsafe_ = true;
36
37 let root = parse_document(&arena, content, &options);
38
39 for node in root.descendants() {
40 if let NodeValue::Heading(_) = &node.data.borrow().value {
41 let mut text = String::new();
42 for child in node.children() {
43 if let NodeValue::Text(t) = &child.data.borrow().value {
44 text.push_str(t);
45 }
46 if let NodeValue::Code(t) = &child.data.borrow().value {
48 text.push_str(&t.literal);
49 }
50 }
51 if !text.trim().is_empty() {
52 return Some(text.trim().to_string());
53 }
54 }
55 }
56 None
57}
58
59#[must_use]
66pub fn extract_title_from_markdown(content: &str) -> Option<String> {
67 let arena = Arena::new();
68 let mut options = ComrakOptions::default();
69 options.extension.table = true;
70 options.extension.footnotes = true;
71 options.extension.strikethrough = true;
72 options.extension.tasklist = true;
73 options.render.unsafe_ = true;
74
75 let root = parse_document(&arena, content, &options);
76
77 let anchor_re = Regex::new(r"(\[\]\{#.*?\}|\{#.*?\})").unwrap();
79
80 for node in root.descendants() {
81 if let NodeValue::Heading(NodeHeading { level, .. }) = &node.data.borrow().value {
82 if *level == 1 {
83 let mut text = String::new();
84 for child in node.children() {
85 if let NodeValue::Text(ref t) = child.data.borrow().value {
86 text.push_str(t);
87 }
88 }
89 let clean_title = anchor_re.replace_all(&text, "").trim().to_string();
91 if !clean_title.is_empty() {
92 return Some(clean_title);
93 }
94 }
95 }
96 }
97 None
98}
99
100#[must_use]
103pub fn clean_anchor_patterns(text: &str) -> String {
104 let anchor_pattern = Regex::new(r"\s*\{#[a-zA-Z0-9_-]+\}\s*$").unwrap();
105 anchor_pattern.replace_all(text.trim(), "").to_string()
106}
107
108pub fn process_html_elements<F>(html: &str, regex: &Regex, transform: F) -> String
111where
112 F: Fn(®ex::Captures) -> String,
113{
114 match regex.replace_all(html, transform) {
115 std::borrow::Cow::Borrowed(_) => html.to_string(),
116 std::borrow::Cow::Owned(s) => s,
117 }
118}
119
120#[must_use]
124pub fn strip_markdown(content: &str) -> String {
125 let arena = Arena::new();
126 let mut options = ComrakOptions::default();
127 options.extension.table = true;
128 options.extension.footnotes = true;
129 options.extension.strikethrough = true;
130 options.extension.tasklist = true;
131 options.render.unsafe_ = true;
132
133 let root = parse_document(&arena, content, &options);
134
135 let mut plain_text = String::new();
136 fn extract_text<'a>(node: &'a AstNode<'a>, plain_text: &mut String, in_code_block: &mut bool) {
137 match &node.data.borrow().value {
138 NodeValue::Text(t) => {
139 if !*in_code_block {
140 plain_text.push_str(t);
141 plain_text.push(' ');
142 }
143 }
144 NodeValue::CodeBlock(_) => {
145 *in_code_block = true;
146 }
147 NodeValue::SoftBreak => {
148 plain_text.push(' ');
149 }
150 NodeValue::LineBreak => {
151 plain_text.push('\n');
152 }
153 _ => {}
154 }
155 for child in node.children() {
156 extract_text(child, plain_text, in_code_block);
157 }
158 if let NodeValue::CodeBlock(_) = &node.data.borrow().value {
159 *in_code_block = false;
160 }
161 }
162 let mut in_code_block = false;
163 extract_text(root, &mut plain_text, &mut in_code_block);
164 plain_text
165}
166
167pub fn capitalize_first(s: &str) -> String {
169 let mut chars = s.chars();
170 chars.next().map_or_else(String::new, |c| {
171 c.to_uppercase().collect::<String>() + chars.as_str()
172 })
173}
174
175#[must_use]
177pub fn is_markdown_header(line: &str) -> bool {
178 line.trim_start().starts_with('#')
179}
180
181pub fn load_manpage_urls(
183 path: &str,
184) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
185 let content = std::fs::read_to_string(path)?;
186 let mappings: HashMap<String, String> = serde_json::from_str(&content)?;
187 Ok(mappings)
188}
189
190pub fn safely_process_markup<F>(text: &str, process_fn: F, default_on_error: &str) -> String
196where
197 F: FnOnce(&str) -> String,
198{
199 if text.is_empty() {
201 return String::new();
202 }
203
204 let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| process_fn(text)));
206
207 match result {
208 Ok(processed_text) => processed_text,
209 Err(e) => {
210 if let Some(error_msg) = e.downcast_ref::<String>() {
212 log::error!("Error processing markup: {error_msg}");
213 } else if let Some(error_msg) = e.downcast_ref::<&str>() {
214 log::error!("Error processing markup: {error_msg}");
215 } else {
216 log::error!("Unknown error occurred while processing markup");
217 }
218
219 if default_on_error.is_empty() {
221 text.to_string()
222 } else {
223 default_on_error.to_string()
224 }
225 }
226 }
227}
228
229#[must_use]
235pub fn never_matching_regex() -> regex::Regex {
236 regex::Regex::new(r"[^\s\S]").expect("Failed to compile never-matching regex")
238}
239
240pub fn process_with_error_recovery<F, T>(
245 operation_name: &str,
246 input: T,
247 process_fn: F,
248) -> Result<String, String>
249where
250 F: FnOnce(T) -> Result<String, Box<dyn std::error::Error>>,
251{
252 match process_fn(input) {
253 Ok(result) => Ok(result),
254 Err(e) => {
255 let error_msg = format!("Error in {operation_name}: {e}");
256 log::error!("{error_msg}");
257 Err(error_msg)
258 }
259 }
260}