use std::{collections::HashMap, sync::OnceLock};
pub mod codeblock;
use comrak::{
Arena,
nodes::{AstNode, NodeHeading, NodeValue},
options::Options,
parse_document,
};
use regex::Regex;
#[derive(Debug, thiserror::Error)]
pub enum UtilError {
#[error("Regex compilation failed: {0}")]
RegexError(#[from] regex::Error),
}
pub type UtilResult<T> = Result<T, UtilError>;
#[must_use]
pub fn slugify(text: &str) -> String {
text
.to_lowercase()
.replace(|c: char| !c.is_alphanumeric() && c != '-' && c != '_', "-")
.trim_matches('-')
.to_string()
}
#[must_use]
pub fn extract_markdown_title(content: &str) -> Option<String> {
let arena = Arena::new();
let mut options = Options::default();
options.extension.table = true;
options.extension.footnotes = true;
options.extension.strikethrough = true;
options.extension.tasklist = true;
options.extension.superscript = true;
options.render.r#unsafe = true;
let root = parse_document(&arena, content, &options);
for node in root.descendants() {
if let NodeValue::Heading(_) = &node.data.borrow().value {
let text = extract_inline_text_from_node(node);
if !text.trim().is_empty() {
return Some(text.trim().to_string());
}
}
}
None
}
fn extract_inline_text_from_node<'a>(node: &'a AstNode<'a>) -> String {
let mut text = String::new();
for child in node.children() {
match &child.data.borrow().value {
NodeValue::Text(t) => text.push_str(t),
NodeValue::Code(t) => text.push_str(&t.literal),
NodeValue::Link(..)
| NodeValue::Emph
| NodeValue::Strong
| NodeValue::Strikethrough
| NodeValue::Superscript
| NodeValue::FootnoteReference(..) => {
text.push_str(&extract_inline_text_from_node(child));
},
#[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
_ => {},
}
}
text
}
#[must_use]
pub fn extract_markdown_title_and_id(
content: &str,
) -> Option<(String, Option<String>)> {
let arena = Arena::new();
let mut options = Options::default();
options.extension.table = true;
options.extension.footnotes = true;
options.extension.strikethrough = true;
options.extension.tasklist = true;
options.render.r#unsafe = true;
let root = parse_document(&arena, content, &options);
#[allow(
clippy::items_after_statements,
reason = "Static is Scoped to function for clarity"
)]
static ANCHOR_RE: OnceLock<Regex> = OnceLock::new();
let anchor_re = ANCHOR_RE.get_or_init(|| {
Regex::new(r"(\[\])?\{#(.*?)\}").unwrap_or_else(|e| {
log::error!(
"Failed to compile ANCHOR_RE regex in extract_h1_title: {e}\n Falling \
back to never matching regex."
);
never_matching_regex().unwrap_or_else(|_| {
#[allow(
clippy::expect_used,
reason = "This pattern is guaranteed to be valid"
)]
Regex::new(r"[^\s\S]")
.expect("regex pattern [^\\s\\S] should always compile")
})
})
});
for node in root.descendants() {
if let NodeValue::Heading(NodeHeading { level, .. }) =
&node.data.borrow().value
&& *level == 1
{
let text = extract_inline_text_from_node(node);
let anchor_id = anchor_re
.captures(&text)
.and_then(|caps| caps.get(2).map(|m| m.as_str().to_string()));
let clean_title = anchor_re.replace_all(&text, "").trim().to_string();
if !clean_title.is_empty() {
return Some((clean_title, anchor_id));
}
}
}
None
}
#[must_use]
pub fn clean_anchor_patterns(text: &str) -> String {
static ANCHOR_PATTERN: OnceLock<Regex> = OnceLock::new();
let anchor_pattern = ANCHOR_PATTERN.get_or_init(|| {
Regex::new(r"\s*\{#[a-zA-Z0-9_-]+\}\s*$").unwrap_or_else(|e| {
log::error!(
"Failed to compile ANCHOR_PATTERN regex in clean_anchor_patterns: \
{e}\n Falling back to never matching regex."
);
never_matching_regex().unwrap_or_else(|_| {
#[allow(
clippy::expect_used,
reason = "This pattern is guaranteed to be valid"
)]
Regex::new(r"[^\s\S]")
.expect("regex pattern [^\\s\\S] should always compile")
})
})
});
anchor_pattern.replace_all(text.trim(), "").to_string()
}
pub fn process_html_elements<F>(
html: &str,
regex: &Regex,
transform: F,
) -> String
where
F: Fn(®ex::Captures) -> String,
{
match regex.replace_all(html, transform) {
std::borrow::Cow::Borrowed(_) => html.to_string(),
std::borrow::Cow::Owned(s) => s,
}
}
#[must_use]
pub fn strip_markdown(content: &str) -> String {
let arena = Arena::new();
let mut options = Options::default();
options.extension.table = true;
options.extension.footnotes = true;
options.extension.strikethrough = true;
options.extension.tasklist = true;
options.render.r#unsafe = true;
let root = parse_document(&arena, content, &options);
let mut plain_text = String::new();
#[allow(clippy::items_after_statements, reason = "Helper scoped for clarity")]
fn extract_text<'a>(
node: &'a AstNode<'a>,
plain_text: &mut String,
in_code_block: &mut bool,
) {
match &node.data.borrow().value {
NodeValue::Document => {
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
},
NodeValue::Paragraph => {
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
plain_text.push('\n');
},
NodeValue::Heading(_) => {
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
plain_text.push('\n');
},
NodeValue::Text(t) => {
if !*in_code_block {
plain_text.push_str(t);
}
},
NodeValue::CodeBlock(_) => {
*in_code_block = true;
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
*in_code_block = false;
},
NodeValue::SoftBreak => {
plain_text.push(' ');
},
NodeValue::LineBreak => {
plain_text.push('\n');
},
NodeValue::List(_) => {
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
plain_text.push('\n');
},
NodeValue::Item(_) => {
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
},
NodeValue::Code(c) => {
if !*in_code_block {
plain_text.push_str(&c.literal);
}
},
_ => {
for child in node.children() {
extract_text(child, plain_text, in_code_block);
}
},
}
}
let mut in_code_block = false;
extract_text(root, &mut plain_text, &mut in_code_block);
plain_text
}
pub fn capitalize_first(s: &str) -> String {
let mut chars = s.chars();
chars.next().map_or_else(String::new, |c| {
c.to_uppercase().collect::<String>() + chars.as_str()
})
}
#[must_use]
pub fn is_markdown_header(line: &str) -> bool {
line.trim_start().starts_with('#')
}
pub fn load_manpage_urls(
path: &str,
) -> Result<HashMap<String, String>, Box<dyn std::error::Error>> {
let content = std::fs::read_to_string(path)?;
let mappings: HashMap<String, String> = serde_json::from_str(&content)?;
Ok(mappings)
}
pub fn never_matching_regex() -> Result<regex::Regex, regex::Error> {
regex::Regex::new(r"[^\s\S]").or_else(|_| {
regex::Regex::new(r"^\b$")
})
}