1#[expect(
6 clippy::disallowed_types,
7 reason = "Required for generic hasher abstraction"
8)]
9use std::collections::HashMap;
10use std::{
11 path::{Component, Path, PathBuf},
12 sync::LazyLock,
13};
14
15use comrak::{
16 Arena,
17 nodes::{AstNode, NodeHeading, NodeValue},
18 options::Options,
19 parse_document,
20};
21use log::trace;
22use markup5ever::local_name;
23use regex::Regex;
24use rustc_hash::FxHashMap;
25use walkdir::WalkDir;
26
27use super::{
28 dom::safe_select,
29 process::process_safe,
30 types::{
31 AstTransformer,
32 MarkdownOptions,
33 MarkdownProcessor,
34 PromptTransformer,
35 },
36};
37use crate::{
38 syntax::create_default_manager,
39 types::{Header, MarkdownResult},
40 utils,
41};
42
43static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
44 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_.-]+)\}(.*?)</h[1-6]>")
45 .unwrap_or_else(|e| {
46 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
47 utils::never_matching_regex().unwrap_or_else(|_| {
48 #[expect(
49 clippy::expect_used,
50 reason = "This pattern is guaranteed to be valid"
51 )]
52 Regex::new(r"[^\s\S]")
53 .expect("regex pattern [^\\s\\S] should always compile")
54 })
55 })
56});
57
58static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
59 Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
60 log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
61 utils::never_matching_regex().unwrap_or_else(|_| {
62 #[expect(
63 clippy::expect_used,
64 reason = "This pattern is guaranteed to be valid"
65 )]
66 Regex::new(r"[^\s\S]")
67 .expect("regex pattern [^\\s\\S] should always compile")
68 })
69 })
70});
71
72static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
73 Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
74 log::error!("Failed to compile HTML_TAG_RE regex: {e}");
75 utils::never_matching_regex().unwrap_or_else(|_| {
76 #[expect(
77 clippy::expect_used,
78 reason = "This pattern is guaranteed to be valid"
79 )]
80 Regex::new(r"[^\s\S]")
81 .expect("regex pattern [^\\s\\S] should always compile")
82 })
83 })
84});
85
86impl MarkdownProcessor {
87 #[must_use]
89 pub fn new(options: MarkdownOptions) -> Self {
90 let manpage_urls = options
91 .manpage_urls_path
92 .as_ref()
93 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
94
95 let syntax_manager = if options.highlight_code {
96 match create_default_manager(
97 options
98 .syntax_queries_path
99 .as_deref()
100 .map(std::path::Path::new),
101 ) {
102 Ok(manager) => {
103 log::info!("Syntax highlighting initialized successfully");
104 Some(manager)
105 },
106 Err(e) => {
107 log::error!("Failed to initialize syntax highlighting: {e}");
108 log::warn!(
109 "Continuing without syntax highlighting - code blocks will not be \
110 highlighted"
111 );
112 None
113 },
114 }
115 } else {
116 None
117 };
118
119 Self {
120 options,
121 manpage_urls,
122 syntax_manager,
123 base_dir: std::path::PathBuf::from("."),
124 }
125 }
126
127 #[must_use]
129 pub const fn options(&self) -> &MarkdownOptions {
130 &self.options
131 }
132
133 #[must_use]
135 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
136 self.base_dir = base_dir.to_path_buf();
137 self
138 }
139
140 #[must_use]
142 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
143 match feature {
144 ProcessorFeature::Gfm => self.options.gfm,
145 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
146 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
147 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
148 }
149 }
150
151 #[must_use]
153 pub const fn manpage_urls(&self) -> Option<&FxHashMap<String, String>> {
154 self.manpage_urls.as_ref()
155 }
156
157 #[must_use]
159 pub fn highlight_codeblocks(&self, html: &str) -> String {
160 use kuchikikiki::parse_html;
161 use tendril::TendrilSink;
162
163 if !self.options.highlight_code || self.syntax_manager.is_none() {
164 return html.to_string();
165 }
166
167 let document = parse_html().one(html);
168
169 let mut code_blocks = Vec::new();
171 for pre_node in safe_select(&document, "pre > code") {
172 let code_node = pre_node;
173 if let Some(element) = code_node.as_element() {
174 let language = element
175 .attributes
176 .borrow()
177 .get("class")
178 .and_then(|class| class.strip_prefix("language-"))
179 .unwrap_or("text")
180 .to_string();
181 let code_text = code_node.text_contents();
182
183 if let Some(pre_parent) = code_node.parent() {
184 code_blocks.push((
185 pre_parent.clone(),
186 code_node.clone(),
187 code_text,
188 language,
189 ));
190 }
191 }
192 }
193
194 for (pre_element, _code_node, code_text, language) in code_blocks {
196 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
197 {
198 let wrapped_html = format!(
200 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
201 );
202 let fragment = parse_html().one(wrapped_html.as_str());
203 pre_element.insert_after(fragment);
204 pre_element.detach();
205 }
206 }
208
209 let mut buf = Vec::new();
210 if let Err(e) = document.serialize(&mut buf) {
211 log::warn!("DOM serialization failed: {e:?}");
212 return html.to_string(); }
214 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
215 }
216
217 fn handle_hardtabs(&self, code: &str) -> String {
219 use super::types::TabStyle;
220
221 if !code.contains('\t') {
223 return code.to_string();
224 }
225
226 match self.options.tab_style {
227 TabStyle::None => code.to_string(),
229
230 TabStyle::Warn => {
232 log::warn!(
233 "Hard tabs detected in code block. Consider using spaces for \
234 consistency. Tools like editorconfig may help you normalize spaces \
235 in your documents."
236 );
237 code.to_string()
238 },
239
240 TabStyle::Normalize => {
243 log::debug!("Replacing hard tabs with spaces");
244 code.replace('\t', " ")
245 },
246 }
247 }
248
249 fn process_hardtabs(&self, markdown: &str) -> String {
251 use super::types::TabStyle;
252 use crate::utils::codeblock::FenceTracker;
253
254 if self.options.tab_style == TabStyle::None {
256 return markdown.to_string();
257 }
258
259 let mut result = String::with_capacity(markdown.len());
260 let mut lines = markdown.lines().peekable();
261 let mut tracker = FenceTracker::new();
262
263 while let Some(line) = lines.next() {
264 tracker = tracker.process_line(line);
265
266 let processed_line = if tracker.in_code_block() && line.contains('\t') {
268 self.handle_hardtabs(line)
269 } else {
270 line.to_string()
271 };
272
273 result.push_str(&processed_line);
274
275 if lines.peek().is_some() {
277 result.push('\n');
278 }
279 }
280
281 result
282 }
283
284 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
287 if !self.options.highlight_code {
288 return None;
289 }
290
291 let syntax_manager = self.syntax_manager.as_ref()?;
292
293 syntax_manager
294 .highlight_code(code, language, self.options.highlight_theme.as_deref())
295 .ok()
296 }
297
298 #[must_use]
300 pub fn render(&self, markdown: &str) -> MarkdownResult {
301 let (preprocessed, included_files) = self.preprocess(markdown);
302 let (headers, title) = self.extract_headers(&preprocessed);
303 let html = self.process_html_pipeline(&preprocessed);
304
305 MarkdownResult {
306 html,
307 headers,
308 title,
309 included_files,
310 }
311 }
312
313 fn process_html_pipeline(&self, content: &str) -> String {
315 let mut html = self.convert_to_html(content);
316
317 if cfg!(feature = "ndg-flavored") {
319 #[cfg(feature = "ndg-flavored")]
320 {
321 html = super::extensions::process_option_references(
322 &html,
323 self.options.valid_options.as_ref(),
324 );
325 }
326 }
327
328 if self.options.nixpkgs {
329 html = self.process_manpage_references_html(&html);
330 }
331
332 if self.options.highlight_code {
333 html = self.highlight_codeblocks(&html);
334 }
335
336 self.kuchiki_postprocess(&html)
337 }
338
339 fn preprocess(
341 &self,
342 content: &str,
343 ) -> (String, Vec<crate::types::IncludedFile>) {
344 let mut processed = content.to_string();
345 let mut included_files = Vec::new();
346
347 processed = super::extensions::process_myst_autolinks(&processed);
349
350 processed = self.process_hardtabs(&processed);
352
353 if self.options.nixpkgs {
354 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
355 processed = content;
356 included_files = files;
357 }
358
359 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
360 processed = super::extensions::process_role_markup(
361 &processed,
362 self.manpage_urls.as_ref(),
363 self.options.auto_link_options,
364 self.options.valid_options.as_ref(),
365 );
366 }
367
368 #[cfg(feature = "wiki")]
369 {
370 processed = super::extensions::process_wikilinks(&processed);
371 }
372
373 (processed, included_files)
374 }
375
376 #[cfg(feature = "nixpkgs")]
378 fn apply_nixpkgs_preprocessing(
379 &self,
380 content: &str,
381 ) -> (String, Vec<crate::types::IncludedFile>) {
382 let (with_includes, included_files) =
383 match super::extensions::process_file_includes(content, &self.base_dir, 0)
384 {
385 Ok(result) => result,
386 Err(e) => {
387 log::warn!(
388 "File include processing failed: {e}. Continuing without includes."
389 );
390 (content.to_string(), Vec::new())
391 },
392 };
393 let with_blocks = super::extensions::process_block_elements(&with_includes);
394 let with_spans = super::extensions::process_bracketed_spans(&with_blocks);
395 let processed = super::extensions::process_inline_anchors(&with_spans);
396 (processed, included_files)
397 }
398
399 #[cfg(not(feature = "nixpkgs"))]
401 fn apply_nixpkgs_preprocessing(
402 &self,
403 content: &str,
404 ) -> (String, Vec<crate::types::IncludedFile>) {
405 (content.to_string(), Vec::new())
406 }
407
408 #[must_use]
410 pub fn extract_headers(
411 &self,
412 content: &str,
413 ) -> (Vec<Header>, Option<String>) {
414 use std::fmt::Write;
415
416 let arena = Arena::new();
417 let options = self.comrak_options();
418
419 let content = remove_admonition_blocks_for_headers(content);
420
421 let mut normalized = String::with_capacity(content.len());
423 let mut lines = content.lines().peekable();
424 while let Some(line) = lines.next() {
425 let trimmed = line.trim();
426 if !trimmed.starts_with('#')
427 && !lines
428 .peek()
429 .is_some_and(|next| is_setext_heading_underline(next.trim()))
430 && let Some(anchor_start) = trimmed.rfind("{#")
431 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
432 {
433 let text = trimmed[..anchor_start].trim_end();
434 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
435 let _ = writeln!(normalized, "## {text} {{#{id}}}");
436 continue;
437 }
438 normalized.push_str(line);
439 normalized.push('\n');
440 }
441
442 let root = parse_document(&arena, &normalized, &options);
443
444 let mut headers = Vec::new();
445 let mut found_title = None;
446
447 for node in root.descendants() {
448 if let NodeValue::Heading(NodeHeading { level, .. }) =
449 &node.data.borrow().value
450 {
451 let mut text = String::new();
452 let mut explicit_id = None;
453
454 for child in node.children() {
455 match &child.data.borrow().value {
456 NodeValue::Text(t) => text.push_str(t),
457 NodeValue::Code(t) => text.push_str(&t.literal),
458 NodeValue::Link(..)
459 | NodeValue::Emph
460 | NodeValue::Strong
461 | NodeValue::Subscript
462 | NodeValue::Strikethrough
463 | NodeValue::Superscript
464 | NodeValue::FootnoteReference(..) => {
465 text.push_str(&extract_inline_text(child));
466 },
467 NodeValue::HtmlInline(html) => {
468 let html_str = html.as_str();
470 if let Some(start) = html_str.find("{#")
471 && let Some(end) = html_str[start..].find('}')
472 {
473 let anchor = &html_str[start + 2..start + end];
474 explicit_id = Some(anchor.to_string());
475 }
476 },
477 #[expect(clippy::match_same_arms, reason = "Explicit for clarity")]
478 NodeValue::Image(..) => {},
479 _ => {},
480 }
481 }
482
483 let trimmed = text.trim_end();
485 #[expect(
486 clippy::option_if_let_else,
487 reason = "nested options clearer with if-let"
488 )]
489 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
490 if let Some(end) = trimmed[start..].find('}') {
491 let anchor = &trimmed[start + 2..start + end];
492 (trimmed[..start].trim_end().to_string(), anchor.to_string())
493 } else {
494 (
495 text.clone(),
496 explicit_id.unwrap_or_else(|| slugify_heading(&text)),
497 )
498 }
499 } else {
500 (
501 text.clone(),
502 explicit_id.unwrap_or_else(|| slugify_heading(&text)),
503 )
504 };
505 if *level == 1 && found_title.is_none() {
506 found_title = Some(final_text.clone());
507 }
508 headers.push(Header {
509 text: final_text,
510 level: *level,
511 id,
512 });
513 }
514 }
515
516 (headers, found_title)
517 }
518
519 fn convert_to_html(&self, content: &str) -> String {
521 let arena = Arena::new();
523 let options = self.comrak_options();
524 let root = parse_document(&arena, content, &options);
525
526 let prompt_transformer = PromptTransformer;
528 prompt_transformer.transform(root);
529
530 let mut html_output = String::new();
531 if let Err(e) = comrak::format_html(root, &options, &mut html_output) {
532 log::error!("Failed to format HTML: {e}");
533 }
534
535 Self::process_header_anchors_html(&html_output)
537 }
538
539 fn process_header_anchors_html(html: &str) -> String {
543 let result = HEADER_ANCHOR_RE
545 .replace_all(html, |caps: ®ex::Captures| {
546 let level = &caps[1];
547 let prefix = &caps[2];
548 let id = &caps[3];
549 let suffix = &caps[4];
550 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
551 })
552 .to_string();
553
554 HEADER_NO_ID_RE
556 .replace_all(&result, |caps: ®ex::Captures| {
557 let level = &caps[1];
558 let content = &caps[2];
559 let text_only = HTML_TAG_RE.replace_all(content, "");
561 let id = utils::slugify(&text_only);
562 if id.is_empty() {
563 format!("<h{level}>{content}</h{level}>")
565 } else {
566 format!("<h{level} id=\"{id}\">{content}</h{level}>")
567 }
568 })
569 .to_string()
570 }
571
572 fn comrak_options(&self) -> Options<'_> {
574 let mut options = Options::default();
575 if self.options.gfm {
577 options.extension.table = true;
578 options.extension.footnotes = true;
579 options.extension.strikethrough = true;
580 options.extension.tasklist = true;
581 options.extension.superscript = true;
582 options.extension.autolink = true;
583 }
584
585 options.render.r#unsafe = true;
588
589 options.extension.header_id_prefix = None;
591 options.extension.description_lists = true;
592 options
593 }
594
595 #[cfg(feature = "nixpkgs")]
597 fn process_manpage_references_html(&self, html: &str) -> String {
598 super::extensions::process_manpage_references(
599 html,
600 self.manpage_urls.as_ref(),
601 )
602 }
603
604 #[cfg(not(feature = "nixpkgs"))]
607 fn process_manpage_references_html(&self, html: &str) -> String {
608 html.to_string()
609 }
610
611 #[expect(
613 clippy::unused_self,
614 reason = "Method signature matches processor pattern"
615 )]
616 fn kuchiki_postprocess(&self, html: &str) -> String {
617 kuchiki_postprocess_html(html, |document| {
619 Self::apply_dom_transformations(document);
620 })
621 }
622
623 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
625 Self::process_list_item_id_markers(document);
626 Self::process_header_anchor_comments(document);
627 Self::process_list_item_inline_anchors(document);
628 Self::process_paragraph_inline_anchors(document);
629 Self::process_remaining_inline_anchors(document);
630 Self::process_markdown_links(document);
631 Self::process_option_anchor_links(document);
632 Self::process_empty_auto_links(document);
633 }
634
635 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
637 let mut to_modify = Vec::new();
638
639 for comment in document.inclusive_descendants() {
640 if let Some(comment_node) = comment.as_comment() {
641 let comment_text = comment_node.borrow();
642 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
643 let id = comment_text[id_start + 16..].trim();
644 if !id.is_empty()
645 && id
646 .chars()
647 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
648 {
649 if let Some(parent) = comment.parent()
651 && let Some(element) = parent.as_element()
652 && element.name.local.as_ref() == "li"
653 {
654 to_modify.push((comment.clone(), id.to_string()));
655 }
656 }
657 }
658 }
659 }
660
661 for (comment_node, id) in to_modify {
662 let span = kuchikikiki::NodeRef::new_element(
663 markup5ever::QualName::new(
664 None,
665 markup5ever::ns!(html),
666 local_name!("span"),
667 ),
668 vec![
669 (
670 kuchikikiki::ExpandedName::new("", "id"),
671 kuchikikiki::Attribute {
672 prefix: None,
673 value: id,
674 },
675 ),
676 (
677 kuchikikiki::ExpandedName::new("", "class"),
678 kuchikikiki::Attribute {
679 prefix: None,
680 value: "nixos-anchor".into(),
681 },
682 ),
683 ],
684 );
685 comment_node.insert_after(span);
686 comment_node.detach();
687 }
688 }
689
690 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
692 let mut to_modify = Vec::new();
693
694 for comment in document.inclusive_descendants() {
695 if let Some(comment_node) = comment.as_comment() {
696 let comment_text = comment_node.borrow();
697 if let Some(anchor_start) = comment_text.find("anchor:") {
698 let id = comment_text[anchor_start + 7..].trim();
699 if !id.is_empty()
700 && id
701 .chars()
702 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
703 {
704 if let Some(parent) = comment.parent()
706 && let Some(element) = parent.as_element()
707 && matches!(
708 element.name.local.as_ref(),
709 "h1" | "h2" | "h3" | "h4" | "h5" | "h6"
710 )
711 {
712 to_modify.push((parent.clone(), comment.clone(), id.to_string()));
713 }
714 }
715 }
716 }
717 }
718
719 for (header_element, comment_node, id) in to_modify {
720 if let Some(element) = header_element.as_element() {
721 element
722 .attributes
723 .borrow_mut()
724 .insert(local_name!("id"), id);
725 comment_node.detach();
726 }
727 }
728 }
729
730 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
732 for li_node in safe_select(document, "li") {
733 let li_element = li_node;
734
735 let has_code = !safe_select(&li_element, "code, pre").is_empty();
737 if has_code {
738 continue; }
740
741 let text_content = li_element.text_contents();
742
743 if let Some(anchor_start) = text_content.find("[]{#")
744 && let Some(anchor_end) = text_content[anchor_start..].find('}')
745 {
746 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
747 if !id.is_empty()
748 && id
749 .chars()
750 .all(|c| c.is_alphanumeric() || c == '-' || c == '_' || c == '.')
751 {
752 let remaining_content =
753 &text_content[anchor_start + anchor_end + 1..];
754
755 for child in li_element.children() {
757 child.detach();
758 }
759
760 let span = kuchikikiki::NodeRef::new_element(
761 markup5ever::QualName::new(
762 None,
763 markup5ever::ns!(html),
764 local_name!("span"),
765 ),
766 vec![
767 (
768 kuchikikiki::ExpandedName::new("", "id"),
769 kuchikikiki::Attribute {
770 prefix: None,
771 value: id.into(),
772 },
773 ),
774 (
775 kuchikikiki::ExpandedName::new("", "class"),
776 kuchikikiki::Attribute {
777 prefix: None,
778 value: "nixos-anchor".into(),
779 },
780 ),
781 ],
782 );
783 li_element.append(span);
784 if !remaining_content.is_empty() {
785 li_element
786 .append(kuchikikiki::NodeRef::new_text(remaining_content));
787 }
788 }
789 }
790 }
791 }
792
793 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
795 for p_node in safe_select(document, "p") {
796 let p_element = p_node;
797
798 let has_code = !safe_select(&p_element, "code, pre").is_empty();
800 if has_code {
801 continue; }
803
804 let text_content = p_element.text_contents();
805
806 if let Some(anchor_start) = text_content.find("[]{#")
807 && let Some(anchor_end) = text_content[anchor_start..].find('}')
808 {
809 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
810 if !id.is_empty()
811 && id
812 .chars()
813 .all(|c| c.is_alphanumeric() || c == '-' || c == '_' || c == '.')
814 {
815 let remaining_content =
816 &text_content[anchor_start + anchor_end + 1..];
817
818 for child in p_element.children() {
820 child.detach();
821 }
822
823 let span = kuchikikiki::NodeRef::new_element(
824 markup5ever::QualName::new(
825 None,
826 markup5ever::ns!(html),
827 local_name!("span"),
828 ),
829 vec![
830 (
831 kuchikikiki::ExpandedName::new("", "id"),
832 kuchikikiki::Attribute {
833 prefix: None,
834 value: id.into(),
835 },
836 ),
837 (
838 kuchikikiki::ExpandedName::new("", "class"),
839 kuchikikiki::Attribute {
840 prefix: None,
841 value: "nixos-anchor".into(),
842 },
843 ),
844 ],
845 );
846 p_element.append(span);
847 if !remaining_content.is_empty() {
848 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
849 }
850 }
851 }
852 }
853 }
854
855 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
857 let mut text_nodes_to_process = Vec::new();
858
859 for node in document.inclusive_descendants() {
860 if let Some(text_node) = node.as_text() {
861 let mut parent = node.parent();
863 let mut in_code = false;
864 while let Some(p) = parent {
865 if let Some(element) = p.as_element()
866 && (element.name.local == local_name!("code")
867 || element.name.local == local_name!("pre"))
868 {
869 in_code = true;
870 break;
871 }
872 parent = p.parent();
873 }
874
875 if !in_code {
877 let text_content = text_node.borrow().clone();
878 if text_content.contains("[]{#") {
879 text_nodes_to_process.push((node.clone(), text_content));
880 }
881 }
882 }
883 }
884
885 for (text_node, text_content) in text_nodes_to_process {
886 let mut last_end = 0;
887 let mut new_children = Vec::new();
888
889 let chars = text_content.chars().collect::<Vec<_>>();
891 let mut i = 0;
892 while i < chars.len() {
893 if i + 4 < chars.len()
894 && chars[i] == '['
895 && chars[i + 1] == ']'
896 && chars[i + 2] == '{'
897 && chars[i + 3] == '#'
898 {
899 let anchor_start = i;
901 i += 4; let mut id = String::new();
904 while i < chars.len() && chars[i] != '}' {
905 if chars[i].is_alphanumeric()
906 || chars[i] == '-'
907 || chars[i] == '_'
908 || chars[i] == '.'
909 {
910 id.push(chars[i]);
911 i += 1;
912 } else {
913 break;
914 }
915 }
916
917 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
918 let anchor_end = i + 1;
920
921 if anchor_start > last_end {
923 new_children.push(kuchikikiki::NodeRef::new_text(
924 chars[last_end..anchor_start].iter().collect::<String>(),
925 ));
926 }
927
928 let span = kuchikikiki::NodeRef::new_element(
930 markup5ever::QualName::new(
931 None,
932 markup5ever::ns!(html),
933 local_name!("span"),
934 ),
935 vec![
936 (
937 kuchikikiki::ExpandedName::new("", "id"),
938 kuchikikiki::Attribute {
939 prefix: None,
940 value: id,
941 },
942 ),
943 (
944 kuchikikiki::ExpandedName::new("", "class"),
945 kuchikikiki::Attribute {
946 prefix: None,
947 value: "nixos-anchor".into(),
948 },
949 ),
950 ],
951 );
952 new_children.push(span);
953
954 last_end = anchor_end;
955 i = anchor_end;
956 } else {
957 i += 1;
958 }
959 } else {
960 i += 1;
961 }
962 }
963
964 if last_end < chars.len() {
966 let after_text: String = chars[last_end..].iter().collect();
967 if !after_text.is_empty() {
968 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
969 }
970 }
971
972 if !new_children.is_empty() {
974 for child in new_children {
975 text_node.insert_before(child);
976 }
977 text_node.detach();
978 }
979 }
980 }
981
982 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
984 for link_node in safe_select(document, "a") {
985 let link_element = link_node;
986 if let Some(element) = link_element.as_element() {
987 let href = element
988 .attributes
989 .borrow()
990 .get(local_name!("href"))
991 .map(std::string::ToString::to_string);
992 let text_content = link_element.text_contents();
993
994 if let Some(href_value) = href
995 && href_value.starts_with('#')
996 && (text_content.trim().is_empty()
997 || text_content.trim() == "{{ANCHOR}}")
998 {
999 if text_content.trim() == "{{ANCHOR}}" {
1001 for child in link_element.children() {
1002 child.detach();
1003 }
1004 }
1005 let display_text = humanize_anchor(&href_value);
1007 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1008 }
1009 }
1010 }
1011 }
1012
1013 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1015 let mut to_modify = Vec::new();
1016
1017 for link_node in safe_select(document, "a[href^='#opt-']") {
1019 let link_element = link_node;
1020 if let Some(element) = link_element.as_element() {
1021 let href = element
1022 .attributes
1023 .borrow()
1024 .get(local_name!("href"))
1025 .map(std::string::ToString::to_string);
1026 let text_content = link_element.text_contents();
1027
1028 if let Some(href_value) = href
1029 && href_value.starts_with("#opt-")
1030 {
1031 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1033 || text_content.trim() == "{{ANCHOR}}";
1034 to_modify.push((
1035 link_element.clone(),
1036 option_anchor,
1037 needs_text_replacement,
1038 ));
1039 }
1040 }
1041 }
1042
1043 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1045 if let Some(element) = link_element.as_element() {
1046 let new_href = format!("options.html#{option_anchor}");
1047 element
1048 .attributes
1049 .borrow_mut()
1050 .insert(local_name!("href"), new_href);
1051
1052 if needs_text_replacement {
1053 for child in link_element.children() {
1055 child.detach();
1056 }
1057
1058 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1061 let option_name = option_path.replace('-', ".");
1062 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1063 }
1064 }
1065 }
1066 }
1067 }
1068
1069 fn process_markdown_links(document: &kuchikikiki::NodeRef) {
1071 for link_node in safe_select(document, "a") {
1072 let link_element = link_node;
1073 if let Some(element) = link_element.as_element() {
1074 let href = element
1075 .attributes
1076 .borrow()
1077 .get(local_name!("href"))
1078 .map(std::string::ToString::to_string);
1079
1080 if let Some(href_value) = href {
1081 if !href_value.starts_with("http://")
1084 && !href_value.starts_with("https://")
1085 && !href_value.starts_with('#')
1086 && !href_value.starts_with("mailto:")
1087 {
1088 let (path_part, suffix) = href_value
1090 .find(['#', '?'])
1091 .map_or((href_value.as_str(), ""), |idx| {
1092 href_value.split_at(idx)
1093 });
1094
1095 if std::path::Path::new(path_part)
1096 .extension()
1097 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
1098 {
1099 let new_href =
1100 format!("{}.html{}", &path_part[..path_part.len() - 3], suffix);
1101 element
1102 .attributes
1103 .borrow_mut()
1104 .insert(local_name!("href"), new_href);
1105 }
1106 }
1107 }
1108 }
1109 }
1110 }
1111}
1112
1113fn humanize_anchor(anchor: &str) -> String {
1115 let cleaned = anchor.trim_start_matches('#');
1116 let without_prefix = cleaned
1117 .trim_start_matches("sec-")
1118 .trim_start_matches("ssec-")
1119 .trim_start_matches("opt-");
1120 let spaced = without_prefix.replace(['-', '_'], " ");
1121 spaced
1122 .split_whitespace()
1123 .map(|word| {
1124 let mut chars = word.chars();
1125 chars.next().map_or_else(String::new, |c| {
1126 c.to_uppercase().collect::<String>() + chars.as_str()
1127 })
1128 })
1129 .collect::<Vec<String>>()
1130 .join(" ")
1131}
1132
1133fn relative_page_path(from_page: &str, to_page: &str) -> String {
1138 let from_dir = Path::new(from_page)
1139 .parent()
1140 .unwrap_or_else(|| Path::new(""));
1141 let to_path = Path::new(to_page);
1142
1143 let from_parts: Vec<_> = from_dir
1144 .components()
1145 .filter(|c| !matches!(c, Component::CurDir))
1146 .collect();
1147 let to_parts: Vec<_> = to_path
1148 .components()
1149 .filter(|c| !matches!(c, Component::CurDir))
1150 .collect();
1151
1152 let common = from_parts
1153 .iter()
1154 .zip(to_parts.iter())
1155 .take_while(|(a, b)| a == b)
1156 .count();
1157
1158 let ups = from_parts.len() - common;
1159 let remainder = &to_parts[common..];
1160
1161 let mut result = std::path::PathBuf::new();
1162 for _ in 0..ups {
1163 result.push("..");
1164 }
1165 for part in remainder {
1166 result.push(part);
1167 }
1168
1169 let s = result.to_string_lossy().to_string();
1170 if s.is_empty() { to_page.to_string() } else { s }
1171}
1172
1173#[must_use]
1185#[expect(
1186 clippy::disallowed_types,
1187 reason = "Uses generic HashMap for hasher flexibility"
1188)]
1189pub fn rewrite_cross_page_anchor_links<S: std::hash::BuildHasher>(
1190 html: &str,
1191 current_page: &str,
1192 registry: &HashMap<String, (String, String), S>,
1193) -> String {
1194 if registry.is_empty() {
1195 return html.to_string();
1196 }
1197
1198 kuchiki_postprocess_html(html, |document| {
1199 let mut modifications: Vec<(kuchikikiki::NodeRef, String, Option<String>)> =
1201 Vec::new();
1202
1203 for link_node in safe_select(document, "a[href^='#']") {
1204 let Some(element) = link_node.as_element() else {
1205 continue;
1206 };
1207
1208 let href = element
1209 .attributes
1210 .borrow()
1211 .get(local_name!("href"))
1212 .map(std::string::ToString::to_string);
1213 let Some(href_val) = href else { continue };
1214
1215 let anchor_id = href_val.trim_start_matches('#');
1216 let Some((target_page, target_title)) = registry.get(anchor_id) else {
1217 continue;
1218 };
1219
1220 if target_page == current_page {
1221 continue;
1222 }
1223
1224 let rel = relative_page_path(current_page, target_page);
1225 let new_href = format!("{rel}#{anchor_id}");
1226
1227 let current_text = link_node.text_contents();
1228 let humanized = humanize_anchor(&href_val);
1229 let replace_text = current_text.trim().is_empty()
1230 || current_text.trim() == "{{ANCHOR}}"
1231 || current_text.trim() == humanized.trim();
1232
1233 let new_text = if replace_text {
1234 Some(target_title.clone())
1235 } else {
1236 None
1237 };
1238
1239 modifications.push((link_node, new_href, new_text));
1240 }
1241
1242 for (link_node, new_href, new_text) in modifications {
1243 if let Some(element) = link_node.as_element() {
1244 element
1245 .attributes
1246 .borrow_mut()
1247 .insert(local_name!("href"), new_href);
1248 }
1249 if let Some(text) = new_text {
1250 for child in link_node.children() {
1251 child.detach();
1252 }
1253 link_node.append(kuchikikiki::NodeRef::new_text(text));
1254 }
1255 }
1256 })
1257}
1258
1259pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1261 fn inner<'a>(node: &'a AstNode<'a>) -> String {
1262 let mut text = String::new();
1263 for child in node.children() {
1264 match &child.data.borrow().value {
1265 NodeValue::Text(t) => text.push_str(t),
1266 NodeValue::Code(t) => text.push_str(&t.literal),
1267 NodeValue::Link(..)
1268 | NodeValue::Emph
1269 | NodeValue::Strong
1270 | NodeValue::Strikethrough
1271 | NodeValue::Superscript
1272 | NodeValue::Subscript
1273 | NodeValue::FootnoteReference(..) => {
1274 text.push_str(&inner(child));
1275 },
1276 #[expect(clippy::match_same_arms, reason = "Explicit for clarity")]
1277 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1278 _ => {},
1279 }
1280 }
1281 text
1282 }
1283 inner(node)
1284}
1285
1286#[must_use]
1300pub(crate) fn slugify_heading(text: &str) -> String {
1301 utils::slugify(&html_escape::encode_text(text))
1302}
1303
1304pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1306 let mut files = Vec::with_capacity(100);
1307
1308 for entry in WalkDir::new(input_dir)
1309 .follow_links(true)
1310 .into_iter()
1311 .filter_map(Result::ok)
1312 {
1313 let path = entry.path();
1314 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1315 files.push(path.to_owned());
1316 }
1317 }
1318
1319 trace!("Found {} markdown files to process", files.len());
1320 files
1321}
1322
1323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1325pub enum ProcessorFeature {
1326 Gfm,
1328 Nixpkgs,
1330 SyntaxHighlighting,
1332 ManpageUrls,
1334}
1335
1336fn remove_admonition_blocks_for_headers(content: &str) -> String {
1337 let mut output = String::with_capacity(content.len());
1338 let mut admonition_depth = 0usize;
1339
1340 for line in content.lines() {
1341 let trimmed = line.trim_start();
1342 if trimmed.starts_with("<div class=\"admonition ") {
1343 admonition_depth += 1;
1344 output.push('\n');
1345 continue;
1346 }
1347
1348 if admonition_depth > 0 {
1349 if trimmed == "</div>" {
1350 admonition_depth -= 1;
1351 }
1352 output.push('\n');
1353 continue;
1354 }
1355
1356 output.push_str(line);
1357 output.push('\n');
1358 }
1359
1360 output
1361}
1362
1363fn is_setext_heading_underline(line: &str) -> bool {
1364 !line.is_empty()
1365 && (line.chars().all(|ch| ch == '=' || ch.is_whitespace())
1366 || line.chars().all(|ch| ch == '-' || ch.is_whitespace()))
1367}
1368
1369fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1371where
1372 F: FnOnce(&kuchikikiki::NodeRef),
1373{
1374 process_safe(
1375 html,
1376 |html| {
1377 use tendril::TendrilSink;
1378
1379 let document = kuchikikiki::parse_html().one(html);
1380 transform_fn(&document);
1381
1382 let mut out = Vec::new();
1383 let _ = document.serialize(&mut out);
1384 String::from_utf8_lossy(&out).into_owned()
1385 },
1386 html,
1387 )
1388}