use std::{
collections::HashMap,
path::{Path, PathBuf},
};
use comrak::{
Arena,
nodes::{AstNode, NodeHeading, NodeValue},
options::Options,
parse_document,
};
use log::trace;
use markup5ever::local_name;
use walkdir::WalkDir;
#[derive(Debug, thiserror::Error)]
pub enum DomError {
#[error("CSS selector failed: {0}")]
SelectorError(String),
#[error("DOM serialization failed: {0}")]
SerializationError(String),
}
pub type DomResult<T> = Result<T, DomError>;
fn safe_select(
document: &kuchikikiki::NodeRef,
selector: &str,
) -> Vec<kuchikikiki::NodeRef> {
match document.select(selector) {
Ok(selections) => selections.map(|sel| sel.as_node().clone()).collect(),
Err(e) => {
log::warn!("DOM selector '{selector}' failed: {e:?}");
Vec::new()
},
}
}
use super::{
process::process_safe,
types::{
AstTransformer,
MarkdownOptions,
MarkdownProcessor,
PromptTransformer,
},
};
use crate::{
syntax::create_default_manager,
types::{Header, MarkdownResult},
utils,
};
impl MarkdownProcessor {
#[must_use]
pub fn new(options: MarkdownOptions) -> Self {
let manpage_urls = options
.manpage_urls_path
.as_ref()
.and_then(|path| crate::utils::load_manpage_urls(path).ok());
let syntax_manager = if options.highlight_code {
match create_default_manager() {
Ok(manager) => {
log::info!("Syntax highlighting initialized successfully");
Some(manager)
},
Err(e) => {
log::error!("Failed to initialize syntax highlighting: {e}");
log::warn!(
"Continuing without syntax highlighting - code blocks will not be \
highlighted"
);
None
},
}
} else {
None
};
Self {
options,
manpage_urls,
syntax_manager,
base_dir: std::path::PathBuf::from("."),
}
}
#[must_use]
pub const fn options(&self) -> &MarkdownOptions {
&self.options
}
#[must_use]
pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
self.base_dir = base_dir.to_path_buf();
self
}
#[must_use]
pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
match feature {
ProcessorFeature::Gfm => self.options.gfm,
ProcessorFeature::Nixpkgs => self.options.nixpkgs,
ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
}
}
#[must_use]
pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
self.manpage_urls.as_ref()
}
#[must_use]
pub fn highlight_codeblocks(&self, html: &str) -> String {
use kuchikikiki::parse_html;
use tendril::TendrilSink;
if !self.options.highlight_code || self.syntax_manager.is_none() {
return html.to_string();
}
let document = parse_html().one(html);
let mut code_blocks = Vec::new();
for pre_node in safe_select(&document, "pre > code") {
let code_node = pre_node;
if let Some(element) = code_node.as_element() {
let language = element
.attributes
.borrow()
.get("class")
.and_then(|class| class.strip_prefix("language-"))
.unwrap_or("text")
.to_string();
let code_text = code_node.text_contents();
if let Some(pre_parent) = code_node.parent() {
code_blocks.push((
pre_parent.clone(),
code_node.clone(),
code_text,
language,
));
}
}
}
for (pre_element, _code_node, code_text, language) in code_blocks {
if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
{
let wrapped_html = format!(
r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
);
let fragment = parse_html().one(wrapped_html.as_str());
pre_element.insert_after(fragment);
pre_element.detach();
}
}
let mut buf = Vec::new();
if let Err(e) = document.serialize(&mut buf) {
log::warn!("DOM serialization failed: {e:?}");
return html.to_string(); }
String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
}
fn handle_hardtabs(&self, code: &str) -> String {
use super::types::TabStyle;
if !code.contains('\t') {
return code.to_string();
}
match self.options.tab_style {
TabStyle::None => code.to_string(),
TabStyle::Warn => {
log::warn!(
"Hard tabs detected in code block. Consider using spaces for \
consistency. Tools like editorconfig may help you normalize spaces \
in your documents."
);
code.to_string()
},
TabStyle::Normalize => {
log::debug!("Replacing hard tabs with spaces");
code.replace('\t', " ")
},
}
}
fn process_hardtabs(&self, markdown: &str) -> String {
use super::types::TabStyle;
if self.options.tab_style == TabStyle::None {
return markdown.to_string();
}
let mut result = String::with_capacity(markdown.len());
let mut lines = markdown.lines().peekable();
let mut in_code_block = false;
let mut code_fence_char = None;
let mut code_fence_count = 0;
while let Some(line) = lines.next() {
let trimmed = line.trim_start();
if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
let Some(fence_char) = trimmed.chars().next() else {
result.push_str(line);
result.push('\n');
continue;
};
let fence_count =
trimmed.chars().take_while(|&c| c == fence_char).count();
if fence_count >= 3 {
if !in_code_block {
in_code_block = true;
code_fence_char = Some(fence_char);
code_fence_count = fence_count;
} else if code_fence_char == Some(fence_char)
&& fence_count >= code_fence_count
{
in_code_block = false;
code_fence_char = None;
code_fence_count = 0;
}
}
}
let processed_line = if in_code_block && line.contains('\t') {
self.handle_hardtabs(line)
} else {
line.to_string()
};
result.push_str(&processed_line);
if lines.peek().is_some() {
result.push('\n');
}
}
result
}
fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
if !self.options.highlight_code {
return None;
}
let syntax_manager = self.syntax_manager.as_ref()?;
syntax_manager
.highlight_code(code, language, self.options.highlight_theme.as_deref())
.ok()
}
#[must_use]
pub fn render(&self, markdown: &str) -> MarkdownResult {
let (preprocessed, included_files) = self.preprocess(markdown);
let (headers, title) = self.extract_headers(&preprocessed);
let html = self.process_html_pipeline(&preprocessed);
MarkdownResult {
html,
headers,
title,
included_files,
}
}
fn process_html_pipeline(&self, content: &str) -> String {
let mut html = self.convert_to_html(content);
if cfg!(feature = "ndg-flavored") {
#[cfg(feature = "ndg-flavored")]
{
html = super::extensions::process_option_references(
&html,
self.options.valid_options.as_ref(),
);
}
}
if self.options.nixpkgs {
html = self.process_manpage_references_html(&html);
}
if self.options.highlight_code {
html = self.highlight_codeblocks(&html);
}
self.kuchiki_postprocess(&html)
}
fn preprocess(
&self,
content: &str,
) -> (String, Vec<crate::types::IncludedFile>) {
let mut processed = content.to_string();
let mut included_files = Vec::new();
processed = super::extensions::process_myst_autolinks(&processed);
processed = self.process_hardtabs(&processed);
if self.options.nixpkgs {
let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
processed = content;
included_files = files;
}
if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
processed = super::extensions::process_role_markup(
&processed,
self.manpage_urls.as_ref(),
self.options.auto_link_options,
self.options.valid_options.as_ref(),
);
}
(processed, included_files)
}
#[cfg(feature = "nixpkgs")]
fn apply_nixpkgs_preprocessing(
&self,
content: &str,
) -> (String, Vec<crate::types::IncludedFile>) {
let (with_includes, included_files) =
match super::extensions::process_file_includes(content, &self.base_dir, 0)
{
Ok(result) => result,
Err(e) => {
log::warn!(
"File include processing failed: {e}. Continuing without includes."
);
(content.to_string(), Vec::new())
},
};
let with_blocks = super::extensions::process_block_elements(&with_includes);
let processed = super::extensions::process_inline_anchors(&with_blocks);
(processed, included_files)
}
#[cfg(not(feature = "nixpkgs"))]
fn apply_nixpkgs_preprocessing(
&self,
content: &str,
) -> (String, Vec<crate::types::IncludedFile>) {
(content.to_string(), Vec::new())
}
#[must_use]
pub fn extract_headers(
&self,
content: &str,
) -> (Vec<Header>, Option<String>) {
use std::fmt::Write;
let arena = Arena::new();
let options = self.comrak_options();
let mut normalized = String::with_capacity(content.len());
for line in content.lines() {
let trimmed = line.trim_end();
if !trimmed.starts_with('#')
&& let Some(anchor_start) = trimmed.rfind("{#")
&& let Some(anchor_end) = trimmed[anchor_start..].find('}')
{
let text = trimmed[..anchor_start].trim_end();
let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
let _ = writeln!(normalized, "## {text} {{#{id}}}");
continue;
}
normalized.push_str(line);
normalized.push('\n');
}
let root = parse_document(&arena, &normalized, &options);
let mut headers = Vec::new();
let mut found_title = None;
for node in root.descendants() {
if let NodeValue::Heading(NodeHeading { level, .. }) =
&node.data.borrow().value
{
let mut text = String::new();
let mut explicit_id = None;
for child in node.children() {
match &child.data.borrow().value {
NodeValue::Text(t) => text.push_str(t),
NodeValue::Code(t) => text.push_str(&t.literal),
NodeValue::Link(..)
| NodeValue::Emph
| NodeValue::Strong
| NodeValue::Subscript
| NodeValue::Strikethrough
| NodeValue::Superscript
| NodeValue::FootnoteReference(..) => {
text.push_str(&extract_inline_text(child));
},
NodeValue::HtmlInline(html) => {
let html_str = html.as_str();
if let Some(start) = html_str.find("{#")
&& let Some(end) = html_str[start..].find('}')
{
let anchor = &html_str[start + 2..start + end];
explicit_id = Some(anchor.to_string());
}
},
#[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
NodeValue::Image(..) => {},
_ => {},
}
}
let trimmed = text.trim_end();
#[allow(clippy::option_if_let_else)]
let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
if let Some(end) = trimmed[start..].find('}') {
let anchor = &trimmed[start + 2..start + end];
(trimmed[..start].trim_end().to_string(), anchor.to_string())
} else {
(
text.clone(),
explicit_id.unwrap_or_else(|| utils::slugify(&text)),
)
}
} else {
(
text.clone(),
explicit_id.unwrap_or_else(|| utils::slugify(&text)),
)
};
if *level == 1 && found_title.is_none() {
found_title = Some(final_text.clone());
}
headers.push(Header {
text: final_text,
level: *level,
id,
});
}
}
(headers, found_title)
}
fn convert_to_html(&self, content: &str) -> String {
let arena = Arena::new();
let options = self.comrak_options();
let root = parse_document(&arena, content, &options);
let prompt_transformer = PromptTransformer;
prompt_transformer.transform(root);
let mut html_output = String::new();
comrak::format_html(root, &options, &mut html_output).unwrap_or_default();
Self::process_header_anchors_html(&html_output)
}
fn process_header_anchors_html(html: &str) -> String {
use std::sync::LazyLock;
use regex::Regex;
static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
.unwrap_or_else(|e| {
log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
utils::never_matching_regex().unwrap_or_else(|_| {
#[allow(
clippy::expect_used,
reason = "This pattern is guaranteed to be valid"
)]
Regex::new(r"[^\s\S]")
.expect("regex pattern [^\\s\\S] should always compile")
})
})
});
static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
utils::never_matching_regex().unwrap_or_else(|_| {
#[allow(
clippy::expect_used,
reason = "This pattern is guaranteed to be valid"
)]
Regex::new(r"[^\s\S]")
.expect("regex pattern [^\\s\\S] should always compile")
})
})
});
static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
log::error!("Failed to compile HTML_TAG_RE regex: {e}");
utils::never_matching_regex().unwrap_or_else(|_| {
#[allow(
clippy::expect_used,
reason = "This pattern is guaranteed to be valid"
)]
Regex::new(r"[^\s\S]")
.expect("regex pattern [^\\s\\S] should always compile")
})
})
});
let result = HEADER_ANCHOR_RE
.replace_all(html, |caps: ®ex::Captures| {
let level = &caps[1];
let prefix = &caps[2];
let id = &caps[3];
let suffix = &caps[4];
format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
})
.to_string();
HEADER_NO_ID_RE
.replace_all(&result, |caps: ®ex::Captures| {
let level = &caps[1];
let content = &caps[2];
let text_only = HTML_TAG_RE.replace_all(content, "");
let id = utils::slugify(&text_only);
if id.is_empty() {
format!("<h{level}>{content}</h{level}>")
} else {
format!("<h{level} id=\"{id}\">{content}</h{level}>")
}
})
.to_string()
}
fn comrak_options(&self) -> Options<'_> {
let mut options = Options::default();
if self.options.gfm {
options.extension.table = true;
options.extension.footnotes = true;
options.extension.strikethrough = true;
options.extension.tasklist = true;
options.extension.superscript = true;
options.extension.autolink = true;
}
options.render.r#unsafe = true;
options.extension.header_ids = None;
options.extension.description_lists = true;
options
}
#[cfg(feature = "nixpkgs")]
fn process_manpage_references_html(&self, html: &str) -> String {
super::extensions::process_manpage_references(
html,
self.manpage_urls.as_ref(),
)
}
#[cfg(not(feature = "nixpkgs"))]
fn process_manpage_references_html(&self, html: &str) -> String {
html.to_string()
}
#[allow(
clippy::unused_self,
reason = "Method signature matches processor pattern"
)]
fn kuchiki_postprocess(&self, html: &str) -> String {
kuchiki_postprocess_html(html, |document| {
Self::apply_dom_transformations(document);
})
}
fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
Self::process_list_item_id_markers(document);
Self::process_header_anchor_comments(document);
Self::process_list_item_inline_anchors(document);
Self::process_paragraph_inline_anchors(document);
Self::process_remaining_inline_anchors(document);
Self::process_option_anchor_links(document);
Self::process_empty_auto_links(document);
Self::process_empty_html_links(document);
}
fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
let mut to_modify = Vec::new();
for comment in document.inclusive_descendants() {
if let Some(comment_node) = comment.as_comment() {
let comment_text = comment_node.borrow();
if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
let id = comment_text[id_start + 16..].trim();
if !id.is_empty()
&& id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
if let Some(parent) = comment.parent()
&& let Some(element) = parent.as_element()
&& element.name.local.as_ref() == "li"
{
to_modify.push((comment.clone(), id.to_string()));
}
}
}
}
}
for (comment_node, id) in to_modify {
let span = kuchikikiki::NodeRef::new_element(
markup5ever::QualName::new(
None,
markup5ever::ns!(html),
local_name!("span"),
),
vec![
(
kuchikikiki::ExpandedName::new("", "id"),
kuchikikiki::Attribute {
prefix: None,
value: id,
},
),
(
kuchikikiki::ExpandedName::new("", "class"),
kuchikikiki::Attribute {
prefix: None,
value: "nixos-anchor".into(),
},
),
],
);
comment_node.insert_after(span);
comment_node.detach();
}
}
fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
let mut to_modify = Vec::new();
for comment in document.inclusive_descendants() {
if let Some(comment_node) = comment.as_comment() {
let comment_text = comment_node.borrow();
if let Some(anchor_start) = comment_text.find("anchor:") {
let id = comment_text[anchor_start + 7..].trim();
if !id.is_empty()
&& id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
if let Some(parent) = comment.parent()
&& let Some(element) = parent.as_element()
{
let tag_name = element.name.local.as_ref();
if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
to_modify.push((
parent.clone(),
comment.clone(),
id.to_string(),
));
}
}
}
}
}
}
for (header_element, comment_node, id) in to_modify {
if let Some(element) = header_element.as_element() {
element
.attributes
.borrow_mut()
.insert(local_name!("id"), id);
comment_node.detach();
}
}
}
fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
for li_node in safe_select(document, "li") {
let li_element = li_node;
let has_code = !safe_select(&li_element, "code, pre").is_empty();
if has_code {
continue; }
let text_content = li_element.text_contents();
if let Some(anchor_start) = text_content.find("[]{#")
&& let Some(anchor_end) = text_content[anchor_start..].find('}')
{
let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
if !id.is_empty()
&& id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
let remaining_content =
&text_content[anchor_start + anchor_end + 1..];
for child in li_element.children() {
child.detach();
}
let span = kuchikikiki::NodeRef::new_element(
markup5ever::QualName::new(
None,
markup5ever::ns!(html),
local_name!("span"),
),
vec![
(
kuchikikiki::ExpandedName::new("", "id"),
kuchikikiki::Attribute {
prefix: None,
value: id.into(),
},
),
(
kuchikikiki::ExpandedName::new("", "class"),
kuchikikiki::Attribute {
prefix: None,
value: "nixos-anchor".into(),
},
),
],
);
li_element.append(span);
if !remaining_content.is_empty() {
li_element
.append(kuchikikiki::NodeRef::new_text(remaining_content));
}
}
}
}
}
fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
for p_node in safe_select(document, "p") {
let p_element = p_node;
let has_code = !safe_select(&p_element, "code, pre").is_empty();
if has_code {
continue; }
let text_content = p_element.text_contents();
if let Some(anchor_start) = text_content.find("[]{#")
&& let Some(anchor_end) = text_content[anchor_start..].find('}')
{
let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
if !id.is_empty()
&& id
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
let remaining_content =
&text_content[anchor_start + anchor_end + 1..];
for child in p_element.children() {
child.detach();
}
let span = kuchikikiki::NodeRef::new_element(
markup5ever::QualName::new(
None,
markup5ever::ns!(html),
local_name!("span"),
),
vec![
(
kuchikikiki::ExpandedName::new("", "id"),
kuchikikiki::Attribute {
prefix: None,
value: id.into(),
},
),
(
kuchikikiki::ExpandedName::new("", "class"),
kuchikikiki::Attribute {
prefix: None,
value: "nixos-anchor".into(),
},
),
],
);
p_element.append(span);
if !remaining_content.is_empty() {
p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
}
}
}
}
}
fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
let mut text_nodes_to_process = Vec::new();
for node in document.inclusive_descendants() {
if let Some(text_node) = node.as_text() {
let mut parent = node.parent();
let mut in_code = false;
while let Some(p) = parent {
if let Some(element) = p.as_element()
&& (element.name.local == local_name!("code")
|| element.name.local == local_name!("pre"))
{
in_code = true;
break;
}
parent = p.parent();
}
if !in_code {
let text_content = text_node.borrow().clone();
if text_content.contains("[]{#") {
text_nodes_to_process.push((node.clone(), text_content));
}
}
}
}
for (text_node, text_content) in text_nodes_to_process {
let mut last_end = 0;
let mut new_children = Vec::new();
let chars = text_content.chars().collect::<Vec<_>>();
let mut i = 0;
while i < chars.len() {
if i + 4 < chars.len()
&& chars[i] == '['
&& chars[i + 1] == ']'
&& chars[i + 2] == '{'
&& chars[i + 3] == '#'
{
let anchor_start = i;
i += 4;
let mut id = String::new();
while i < chars.len() && chars[i] != '}' {
if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
{
id.push(chars[i]);
i += 1;
} else {
break;
}
}
if i < chars.len() && chars[i] == '}' && !id.is_empty() {
let anchor_end = i + 1;
if anchor_start > last_end {
let before_text: String =
chars[last_end..anchor_start].iter().collect();
if !before_text.is_empty() {
new_children.push(kuchikikiki::NodeRef::new_text(before_text));
}
}
let span = kuchikikiki::NodeRef::new_element(
markup5ever::QualName::new(
None,
markup5ever::ns!(html),
local_name!("span"),
),
vec![
(
kuchikikiki::ExpandedName::new("", "id"),
kuchikikiki::Attribute {
prefix: None,
value: id,
},
),
(
kuchikikiki::ExpandedName::new("", "class"),
kuchikikiki::Attribute {
prefix: None,
value: "nixos-anchor".into(),
},
),
],
);
new_children.push(span);
last_end = anchor_end;
i = anchor_end;
} else {
i += 1;
}
} else {
i += 1;
}
}
if last_end < chars.len() {
let after_text: String = chars[last_end..].iter().collect();
if !after_text.is_empty() {
new_children.push(kuchikikiki::NodeRef::new_text(after_text));
}
}
if !new_children.is_empty() {
for child in new_children {
text_node.insert_before(child);
}
text_node.detach();
}
}
}
fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
for link_node in safe_select(document, "a") {
let link_element = link_node;
if let Some(element) = link_element.as_element() {
let href = element
.attributes
.borrow()
.get(local_name!("href"))
.map(std::string::ToString::to_string);
let text_content = link_element.text_contents();
if let Some(href_value) = href
&& href_value.starts_with('#')
&& (text_content.trim().is_empty()
|| text_content.trim() == "{{ANCHOR}}")
{
if text_content.trim() == "{{ANCHOR}}" {
for child in link_element.children() {
child.detach();
}
}
let display_text = Self::humanize_anchor_id(&href_value);
link_element.append(kuchikikiki::NodeRef::new_text(display_text));
}
}
}
}
fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
for link_node in safe_select(document, "a[href^='#']") {
let link_element = link_node;
let text_content = link_element.text_contents();
if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
if text_content.trim() == "{{ANCHOR}}" {
for child in link_element.children() {
child.detach();
}
}
if let Some(element) = link_element.as_element()
&& let Some(href) =
element.attributes.borrow().get(local_name!("href"))
{
let display_text = Self::humanize_anchor_id(href);
link_element.append(kuchikikiki::NodeRef::new_text(display_text));
}
}
}
}
fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
let mut to_modify = Vec::new();
for link_node in safe_select(document, "a[href^='#opt-']") {
let link_element = link_node;
if let Some(element) = link_element.as_element() {
let href = element
.attributes
.borrow()
.get(local_name!("href"))
.map(std::string::ToString::to_string);
let text_content = link_element.text_contents();
if let Some(href_value) = href
&& href_value.starts_with("#opt-")
{
let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
|| text_content.trim() == "{{ANCHOR}}";
to_modify.push((
link_element.clone(),
option_anchor,
needs_text_replacement,
));
}
}
}
for (link_element, option_anchor, needs_text_replacement) in to_modify {
if let Some(element) = link_element.as_element() {
let new_href = format!("options.html#{option_anchor}");
element
.attributes
.borrow_mut()
.insert(local_name!("href"), new_href);
if needs_text_replacement {
for child in link_element.children() {
child.detach();
}
if let Some(option_path) = option_anchor.strip_prefix("opt-") {
let option_name = option_path.replace('-', ".");
link_element.append(kuchikikiki::NodeRef::new_text(option_name));
}
}
}
}
}
fn humanize_anchor_id(anchor: &str) -> String {
let cleaned = anchor.trim_start_matches('#');
let without_prefix = cleaned
.trim_start_matches("sec-")
.trim_start_matches("ssec-")
.trim_start_matches("opt-");
let spaced = without_prefix.replace(['-', '_'], " ");
spaced
.split_whitespace()
.map(|word| {
let mut chars = word.chars();
chars.next().map_or_else(String::new, |c| {
c.to_uppercase().collect::<String>() + chars.as_str()
})
})
.collect::<Vec<String>>()
.join(" ")
}
}
pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
let mut text = String::new();
for child in node.children() {
match &child.data.borrow().value {
NodeValue::Text(t) => text.push_str(t),
NodeValue::Code(t) => text.push_str(&t.literal),
NodeValue::Link(..)
| NodeValue::Emph
| NodeValue::Strong
| NodeValue::Strikethrough
| NodeValue::Superscript
| NodeValue::Subscript
| NodeValue::FootnoteReference(..) => {
text.push_str(&extract_inline_text(child));
},
#[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
_ => {},
}
}
text
}
pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
let mut files = Vec::with_capacity(100);
for entry in WalkDir::new(input_dir)
.follow_links(true)
.into_iter()
.filter_map(Result::ok)
{
let path = entry.path();
if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
files.push(path.to_owned());
}
}
trace!("Found {} markdown files to process", files.len());
files
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ProcessorFeature {
Gfm,
Nixpkgs,
SyntaxHighlighting,
ManpageUrls,
}
fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
where
F: FnOnce(&kuchikikiki::NodeRef),
{
process_safe(
html,
|html| {
use tendril::TendrilSink;
let document = kuchikikiki::parse_html().one(html);
transform_fn(&document);
let mut out = Vec::new();
let _ = document.serialize(&mut out);
String::from_utf8(out).unwrap_or_default()
},
html,
)
}