1use std::{
6 collections::HashMap,
7 path::{Path, PathBuf},
8 sync::LazyLock,
9};
10
11use comrak::{
12 Arena,
13 nodes::{AstNode, NodeHeading, NodeValue},
14 options::Options,
15 parse_document,
16};
17use log::trace;
18use markup5ever::local_name;
19use regex::Regex;
20use walkdir::WalkDir;
21
22use super::{
23 dom::safe_select,
24 process::process_safe,
25 types::{
26 AstTransformer,
27 MarkdownOptions,
28 MarkdownProcessor,
29 PromptTransformer,
30 },
31};
32use crate::{
33 syntax::create_default_manager,
34 types::{Header, MarkdownResult},
35 utils,
36};
37
38static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
39 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
40 .unwrap_or_else(|e| {
41 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
42 utils::never_matching_regex().unwrap_or_else(|_| {
43 #[allow(
44 clippy::expect_used,
45 reason = "This pattern is guaranteed to be valid"
46 )]
47 Regex::new(r"[^\s\S]")
48 .expect("regex pattern [^\\s\\S] should always compile")
49 })
50 })
51});
52
53static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
54 Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
55 log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
56 utils::never_matching_regex().unwrap_or_else(|_| {
57 #[allow(
58 clippy::expect_used,
59 reason = "This pattern is guaranteed to be valid"
60 )]
61 Regex::new(r"[^\s\S]")
62 .expect("regex pattern [^\\s\\S] should always compile")
63 })
64 })
65});
66
67static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
68 Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
69 log::error!("Failed to compile HTML_TAG_RE regex: {e}");
70 utils::never_matching_regex().unwrap_or_else(|_| {
71 #[allow(
72 clippy::expect_used,
73 reason = "This pattern is guaranteed to be valid"
74 )]
75 Regex::new(r"[^\s\S]")
76 .expect("regex pattern [^\\s\\S] should always compile")
77 })
78 })
79});
80
81impl MarkdownProcessor {
82 #[must_use]
84 pub fn new(options: MarkdownOptions) -> Self {
85 let manpage_urls = options
86 .manpage_urls_path
87 .as_ref()
88 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
89
90 let syntax_manager = if options.highlight_code {
91 match create_default_manager(
92 options
93 .syntax_queries_path
94 .as_deref()
95 .map(std::path::Path::new),
96 ) {
97 Ok(manager) => {
98 log::info!("Syntax highlighting initialized successfully");
99 Some(manager)
100 },
101 Err(e) => {
102 log::error!("Failed to initialize syntax highlighting: {e}");
103 log::warn!(
104 "Continuing without syntax highlighting - code blocks will not be \
105 highlighted"
106 );
107 None
108 },
109 }
110 } else {
111 None
112 };
113
114 Self {
115 options,
116 manpage_urls,
117 syntax_manager,
118 base_dir: std::path::PathBuf::from("."),
119 }
120 }
121
122 #[must_use]
124 pub const fn options(&self) -> &MarkdownOptions {
125 &self.options
126 }
127
128 #[must_use]
130 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
131 self.base_dir = base_dir.to_path_buf();
132 self
133 }
134
135 #[must_use]
137 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
138 match feature {
139 ProcessorFeature::Gfm => self.options.gfm,
140 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
141 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
142 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
143 }
144 }
145
146 #[must_use]
148 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
149 self.manpage_urls.as_ref()
150 }
151
152 #[must_use]
154 pub fn highlight_codeblocks(&self, html: &str) -> String {
155 use kuchikikiki::parse_html;
156 use tendril::TendrilSink;
157
158 if !self.options.highlight_code || self.syntax_manager.is_none() {
159 return html.to_string();
160 }
161
162 let document = parse_html().one(html);
163
164 let mut code_blocks = Vec::new();
166 for pre_node in safe_select(&document, "pre > code") {
167 let code_node = pre_node;
168 if let Some(element) = code_node.as_element() {
169 let language = element
170 .attributes
171 .borrow()
172 .get("class")
173 .and_then(|class| class.strip_prefix("language-"))
174 .unwrap_or("text")
175 .to_string();
176 let code_text = code_node.text_contents();
177
178 if let Some(pre_parent) = code_node.parent() {
179 code_blocks.push((
180 pre_parent.clone(),
181 code_node.clone(),
182 code_text,
183 language,
184 ));
185 }
186 }
187 }
188
189 for (pre_element, _code_node, code_text, language) in code_blocks {
191 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
192 {
193 let wrapped_html = format!(
195 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
196 );
197 let fragment = parse_html().one(wrapped_html.as_str());
198 pre_element.insert_after(fragment);
199 pre_element.detach();
200 }
201 }
203
204 let mut buf = Vec::new();
205 if let Err(e) = document.serialize(&mut buf) {
206 log::warn!("DOM serialization failed: {e:?}");
207 return html.to_string(); }
209 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
210 }
211
212 fn handle_hardtabs(&self, code: &str) -> String {
214 use super::types::TabStyle;
215
216 if !code.contains('\t') {
218 return code.to_string();
219 }
220
221 match self.options.tab_style {
222 TabStyle::None => code.to_string(),
224
225 TabStyle::Warn => {
227 log::warn!(
228 "Hard tabs detected in code block. Consider using spaces for \
229 consistency. Tools like editorconfig may help you normalize spaces \
230 in your documents."
231 );
232 code.to_string()
233 },
234
235 TabStyle::Normalize => {
238 log::debug!("Replacing hard tabs with spaces");
239 code.replace('\t', " ")
240 },
241 }
242 }
243
244 fn process_hardtabs(&self, markdown: &str) -> String {
246 use super::types::TabStyle;
247 use crate::utils::codeblock::FenceTracker;
248
249 if self.options.tab_style == TabStyle::None {
251 return markdown.to_string();
252 }
253
254 let mut result = String::with_capacity(markdown.len());
255 let mut lines = markdown.lines().peekable();
256 let mut tracker = FenceTracker::new();
257
258 while let Some(line) = lines.next() {
259 tracker = tracker.process_line(line);
260
261 let processed_line = if tracker.in_code_block() && line.contains('\t') {
263 self.handle_hardtabs(line)
264 } else {
265 line.to_string()
266 };
267
268 result.push_str(&processed_line);
269
270 if lines.peek().is_some() {
272 result.push('\n');
273 }
274 }
275
276 result
277 }
278
279 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
282 if !self.options.highlight_code {
283 return None;
284 }
285
286 let syntax_manager = self.syntax_manager.as_ref()?;
287
288 syntax_manager
289 .highlight_code(code, language, self.options.highlight_theme.as_deref())
290 .ok()
291 }
292
293 #[must_use]
295 pub fn render(&self, markdown: &str) -> MarkdownResult {
296 let (preprocessed, included_files) = self.preprocess(markdown);
297 let (headers, title) = self.extract_headers(&preprocessed);
298 let html = self.process_html_pipeline(&preprocessed);
299
300 MarkdownResult {
301 html,
302 headers,
303 title,
304 included_files,
305 }
306 }
307
308 fn process_html_pipeline(&self, content: &str) -> String {
310 let mut html = self.convert_to_html(content);
311
312 if cfg!(feature = "ndg-flavored") {
314 #[cfg(feature = "ndg-flavored")]
315 {
316 html = super::extensions::process_option_references(
317 &html,
318 self.options.valid_options.as_ref(),
319 );
320 }
321 }
322
323 if self.options.nixpkgs {
324 html = self.process_manpage_references_html(&html);
325 }
326
327 if self.options.highlight_code {
328 html = self.highlight_codeblocks(&html);
329 }
330
331 self.kuchiki_postprocess(&html)
332 }
333
334 fn preprocess(
336 &self,
337 content: &str,
338 ) -> (String, Vec<crate::types::IncludedFile>) {
339 let mut processed = content.to_string();
340 let mut included_files = Vec::new();
341
342 processed = super::extensions::process_myst_autolinks(&processed);
344
345 processed = self.process_hardtabs(&processed);
347
348 if self.options.nixpkgs {
349 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
350 processed = content;
351 included_files = files;
352 }
353
354 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
355 processed = super::extensions::process_role_markup(
356 &processed,
357 self.manpage_urls.as_ref(),
358 self.options.auto_link_options,
359 self.options.valid_options.as_ref(),
360 );
361 }
362
363 if cfg!(feature = "wiki") {
364 processed = super::extensions::process_wikilinks(&processed);
365 }
366
367 (processed, included_files)
368 }
369
370 #[cfg(feature = "nixpkgs")]
372 fn apply_nixpkgs_preprocessing(
373 &self,
374 content: &str,
375 ) -> (String, Vec<crate::types::IncludedFile>) {
376 let (with_includes, included_files) =
377 match super::extensions::process_file_includes(content, &self.base_dir, 0)
378 {
379 Ok(result) => result,
380 Err(e) => {
381 log::warn!(
382 "File include processing failed: {e}. Continuing without includes."
383 );
384 (content.to_string(), Vec::new())
385 },
386 };
387 let with_blocks = super::extensions::process_block_elements(&with_includes);
388 let processed = super::extensions::process_inline_anchors(&with_blocks);
389 (processed, included_files)
390 }
391
392 #[cfg(not(feature = "nixpkgs"))]
394 fn apply_nixpkgs_preprocessing(
395 &self,
396 content: &str,
397 ) -> (String, Vec<crate::types::IncludedFile>) {
398 (content.to_string(), Vec::new())
399 }
400
401 #[must_use]
403 pub fn extract_headers(
404 &self,
405 content: &str,
406 ) -> (Vec<Header>, Option<String>) {
407 use std::fmt::Write;
408
409 let arena = Arena::new();
410 let options = self.comrak_options();
411
412 let mut normalized = String::with_capacity(content.len());
414 for line in content.lines() {
415 let trimmed = line.trim();
416 if !trimmed.starts_with('#')
417 && let Some(anchor_start) = trimmed.rfind("{#")
418 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
419 {
420 let text = trimmed[..anchor_start].trim_end();
421 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
422 let _ = writeln!(normalized, "## {text} {{#{id}}}");
423 continue;
424 }
425 normalized.push_str(line);
426 normalized.push('\n');
427 }
428
429 let root = parse_document(&arena, &normalized, &options);
430
431 let mut headers = Vec::new();
432 let mut found_title = None;
433
434 for node in root.descendants() {
435 if let NodeValue::Heading(NodeHeading { level, .. }) =
436 &node.data.borrow().value
437 {
438 let mut text = String::new();
439 let mut explicit_id = None;
440
441 for child in node.children() {
442 match &child.data.borrow().value {
443 NodeValue::Text(t) => text.push_str(t),
444 NodeValue::Code(t) => text.push_str(&t.literal),
445 NodeValue::Link(..)
446 | NodeValue::Emph
447 | NodeValue::Strong
448 | NodeValue::Subscript
449 | NodeValue::Strikethrough
450 | NodeValue::Superscript
451 | NodeValue::FootnoteReference(..) => {
452 text.push_str(&extract_inline_text(child));
453 },
454 NodeValue::HtmlInline(html) => {
455 let html_str = html.as_str();
457 if let Some(start) = html_str.find("{#")
458 && let Some(end) = html_str[start..].find('}')
459 {
460 let anchor = &html_str[start + 2..start + end];
461 explicit_id = Some(anchor.to_string());
462 }
463 },
464 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
465 NodeValue::Image(..) => {},
466 _ => {},
467 }
468 }
469
470 let trimmed = text.trim_end();
472 #[allow(clippy::option_if_let_else)]
473 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
475 if let Some(end) = trimmed[start..].find('}') {
476 let anchor = &trimmed[start + 2..start + end];
477 (trimmed[..start].trim_end().to_string(), anchor.to_string())
478 } else {
479 (
480 text.clone(),
481 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
482 )
483 }
484 } else {
485 (
486 text.clone(),
487 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
488 )
489 };
490 if *level == 1 && found_title.is_none() {
491 found_title = Some(final_text.clone());
492 }
493 headers.push(Header {
494 text: final_text,
495 level: *level,
496 id,
497 });
498 }
499 }
500
501 (headers, found_title)
502 }
503
504 fn convert_to_html(&self, content: &str) -> String {
506 let arena = Arena::new();
508 let options = self.comrak_options();
509 let root = parse_document(&arena, content, &options);
510
511 let prompt_transformer = PromptTransformer;
513 prompt_transformer.transform(root);
514
515 let mut html_output = String::new();
516 if let Err(e) = comrak::format_html(root, &options, &mut html_output) {
517 log::error!("Failed to format HTML: {e}");
518 }
519
520 Self::process_header_anchors_html(&html_output)
522 }
523
524 fn process_header_anchors_html(html: &str) -> String {
528 let result = HEADER_ANCHOR_RE
530 .replace_all(html, |caps: ®ex::Captures| {
531 let level = &caps[1];
532 let prefix = &caps[2];
533 let id = &caps[3];
534 let suffix = &caps[4];
535 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
536 })
537 .to_string();
538
539 HEADER_NO_ID_RE
541 .replace_all(&result, |caps: ®ex::Captures| {
542 let level = &caps[1];
543 let content = &caps[2];
544 let text_only = HTML_TAG_RE.replace_all(content, "");
546 let id = utils::slugify(&text_only);
547 if id.is_empty() {
548 format!("<h{level}>{content}</h{level}>")
550 } else {
551 format!("<h{level} id=\"{id}\">{content}</h{level}>")
552 }
553 })
554 .to_string()
555 }
556
557 fn comrak_options(&self) -> Options<'_> {
559 let mut options = Options::default();
560 if self.options.gfm {
562 options.extension.table = true;
563 options.extension.footnotes = true;
564 options.extension.strikethrough = true;
565 options.extension.tasklist = true;
566 options.extension.superscript = true;
567 options.extension.autolink = true;
568 }
569
570 options.render.r#unsafe = true;
573
574 options.extension.header_id_prefix = None;
576 options.extension.description_lists = true;
577 options
578 }
579
580 #[cfg(feature = "nixpkgs")]
582 fn process_manpage_references_html(&self, html: &str) -> String {
583 super::extensions::process_manpage_references(
584 html,
585 self.manpage_urls.as_ref(),
586 )
587 }
588
589 #[cfg(not(feature = "nixpkgs"))]
592 fn process_manpage_references_html(&self, html: &str) -> String {
593 html.to_string()
594 }
595
596 #[allow(
598 clippy::unused_self,
599 reason = "Method signature matches processor pattern"
600 )]
601 fn kuchiki_postprocess(&self, html: &str) -> String {
602 kuchiki_postprocess_html(html, |document| {
604 Self::apply_dom_transformations(document);
605 })
606 }
607
608 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
610 Self::process_list_item_id_markers(document);
611 Self::process_header_anchor_comments(document);
612 Self::process_list_item_inline_anchors(document);
613 Self::process_paragraph_inline_anchors(document);
614 Self::process_remaining_inline_anchors(document);
615 Self::process_markdown_links(document);
616 Self::process_option_anchor_links(document);
617 Self::process_empty_auto_links(document);
618 Self::process_empty_html_links(document);
619 }
620
621 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
623 let mut to_modify = Vec::new();
624
625 for comment in document.inclusive_descendants() {
626 if let Some(comment_node) = comment.as_comment() {
627 let comment_text = comment_node.borrow();
628 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
629 let id = comment_text[id_start + 16..].trim();
630 if !id.is_empty()
631 && id
632 .chars()
633 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
634 {
635 if let Some(parent) = comment.parent()
637 && let Some(element) = parent.as_element()
638 && element.name.local.as_ref() == "li"
639 {
640 to_modify.push((comment.clone(), id.to_string()));
641 }
642 }
643 }
644 }
645 }
646
647 for (comment_node, id) in to_modify {
648 let span = kuchikikiki::NodeRef::new_element(
649 markup5ever::QualName::new(
650 None,
651 markup5ever::ns!(html),
652 local_name!("span"),
653 ),
654 vec![
655 (
656 kuchikikiki::ExpandedName::new("", "id"),
657 kuchikikiki::Attribute {
658 prefix: None,
659 value: id,
660 },
661 ),
662 (
663 kuchikikiki::ExpandedName::new("", "class"),
664 kuchikikiki::Attribute {
665 prefix: None,
666 value: "nixos-anchor".into(),
667 },
668 ),
669 ],
670 );
671 comment_node.insert_after(span);
672 comment_node.detach();
673 }
674 }
675
676 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
678 let mut to_modify = Vec::new();
679
680 for comment in document.inclusive_descendants() {
681 if let Some(comment_node) = comment.as_comment() {
682 let comment_text = comment_node.borrow();
683 if let Some(anchor_start) = comment_text.find("anchor:") {
684 let id = comment_text[anchor_start + 7..].trim();
685 if !id.is_empty()
686 && id
687 .chars()
688 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
689 {
690 if let Some(parent) = comment.parent()
692 && let Some(element) = parent.as_element()
693 {
694 let tag_name = element.name.local.as_ref();
695 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
696 to_modify.push((
697 parent.clone(),
698 comment.clone(),
699 id.to_string(),
700 ));
701 }
702 }
703 }
704 }
705 }
706 }
707
708 for (header_element, comment_node, id) in to_modify {
709 if let Some(element) = header_element.as_element() {
710 element
711 .attributes
712 .borrow_mut()
713 .insert(local_name!("id"), id);
714 comment_node.detach();
715 }
716 }
717 }
718
719 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
721 for li_node in safe_select(document, "li") {
722 let li_element = li_node;
723
724 let has_code = !safe_select(&li_element, "code, pre").is_empty();
726 if has_code {
727 continue; }
729
730 let text_content = li_element.text_contents();
731
732 if let Some(anchor_start) = text_content.find("[]{#")
733 && let Some(anchor_end) = text_content[anchor_start..].find('}')
734 {
735 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
736 if !id.is_empty()
737 && id
738 .chars()
739 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
740 {
741 let remaining_content =
742 &text_content[anchor_start + anchor_end + 1..];
743
744 for child in li_element.children() {
746 child.detach();
747 }
748
749 let span = kuchikikiki::NodeRef::new_element(
750 markup5ever::QualName::new(
751 None,
752 markup5ever::ns!(html),
753 local_name!("span"),
754 ),
755 vec![
756 (
757 kuchikikiki::ExpandedName::new("", "id"),
758 kuchikikiki::Attribute {
759 prefix: None,
760 value: id.into(),
761 },
762 ),
763 (
764 kuchikikiki::ExpandedName::new("", "class"),
765 kuchikikiki::Attribute {
766 prefix: None,
767 value: "nixos-anchor".into(),
768 },
769 ),
770 ],
771 );
772 li_element.append(span);
773 if !remaining_content.is_empty() {
774 li_element
775 .append(kuchikikiki::NodeRef::new_text(remaining_content));
776 }
777 }
778 }
779 }
780 }
781
782 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
784 for p_node in safe_select(document, "p") {
785 let p_element = p_node;
786
787 let has_code = !safe_select(&p_element, "code, pre").is_empty();
789 if has_code {
790 continue; }
792
793 let text_content = p_element.text_contents();
794
795 if let Some(anchor_start) = text_content.find("[]{#")
796 && let Some(anchor_end) = text_content[anchor_start..].find('}')
797 {
798 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
799 if !id.is_empty()
800 && id
801 .chars()
802 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
803 {
804 let remaining_content =
805 &text_content[anchor_start + anchor_end + 1..];
806
807 for child in p_element.children() {
809 child.detach();
810 }
811
812 let span = kuchikikiki::NodeRef::new_element(
813 markup5ever::QualName::new(
814 None,
815 markup5ever::ns!(html),
816 local_name!("span"),
817 ),
818 vec![
819 (
820 kuchikikiki::ExpandedName::new("", "id"),
821 kuchikikiki::Attribute {
822 prefix: None,
823 value: id.into(),
824 },
825 ),
826 (
827 kuchikikiki::ExpandedName::new("", "class"),
828 kuchikikiki::Attribute {
829 prefix: None,
830 value: "nixos-anchor".into(),
831 },
832 ),
833 ],
834 );
835 p_element.append(span);
836 if !remaining_content.is_empty() {
837 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
838 }
839 }
840 }
841 }
842 }
843
844 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
846 let mut text_nodes_to_process = Vec::new();
847
848 for node in document.inclusive_descendants() {
849 if let Some(text_node) = node.as_text() {
850 let mut parent = node.parent();
852 let mut in_code = false;
853 while let Some(p) = parent {
854 if let Some(element) = p.as_element()
855 && (element.name.local == local_name!("code")
856 || element.name.local == local_name!("pre"))
857 {
858 in_code = true;
859 break;
860 }
861 parent = p.parent();
862 }
863
864 if !in_code {
866 let text_content = text_node.borrow().clone();
867 if text_content.contains("[]{#") {
868 text_nodes_to_process.push((node.clone(), text_content));
869 }
870 }
871 }
872 }
873
874 for (text_node, text_content) in text_nodes_to_process {
875 let mut last_end = 0;
876 let mut new_children = Vec::new();
877
878 let chars = text_content.chars().collect::<Vec<_>>();
880 let mut i = 0;
881 while i < chars.len() {
882 if i + 4 < chars.len()
883 && chars[i] == '['
884 && chars[i + 1] == ']'
885 && chars[i + 2] == '{'
886 && chars[i + 3] == '#'
887 {
888 let anchor_start = i;
890 i += 4; let mut id = String::new();
893 while i < chars.len() && chars[i] != '}' {
894 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
895 {
896 id.push(chars[i]);
897 i += 1;
898 } else {
899 break;
900 }
901 }
902
903 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
904 let anchor_end = i + 1;
906
907 if anchor_start > last_end {
909 let before_text: String =
910 chars[last_end..anchor_start].iter().collect();
911 if !before_text.is_empty() {
912 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
913 }
914 }
915
916 let span = kuchikikiki::NodeRef::new_element(
918 markup5ever::QualName::new(
919 None,
920 markup5ever::ns!(html),
921 local_name!("span"),
922 ),
923 vec![
924 (
925 kuchikikiki::ExpandedName::new("", "id"),
926 kuchikikiki::Attribute {
927 prefix: None,
928 value: id,
929 },
930 ),
931 (
932 kuchikikiki::ExpandedName::new("", "class"),
933 kuchikikiki::Attribute {
934 prefix: None,
935 value: "nixos-anchor".into(),
936 },
937 ),
938 ],
939 );
940 new_children.push(span);
941
942 last_end = anchor_end;
943 i = anchor_end;
944 } else {
945 i += 1;
946 }
947 } else {
948 i += 1;
949 }
950 }
951
952 if last_end < chars.len() {
954 let after_text: String = chars[last_end..].iter().collect();
955 if !after_text.is_empty() {
956 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
957 }
958 }
959
960 if !new_children.is_empty() {
962 for child in new_children {
963 text_node.insert_before(child);
964 }
965 text_node.detach();
966 }
967 }
968 }
969
970 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
972 for link_node in safe_select(document, "a") {
973 let link_element = link_node;
974 if let Some(element) = link_element.as_element() {
975 let href = element
976 .attributes
977 .borrow()
978 .get(local_name!("href"))
979 .map(std::string::ToString::to_string);
980 let text_content = link_element.text_contents();
981
982 if let Some(href_value) = href
983 && href_value.starts_with('#')
984 && (text_content.trim().is_empty()
985 || text_content.trim() == "{{ANCHOR}}")
986 {
987 if text_content.trim() == "{{ANCHOR}}" {
989 for child in link_element.children() {
990 child.detach();
991 }
992 }
993 let display_text = Self::humanize_anchor_id(&href_value);
995 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
996 }
997 }
998 }
999 }
1000
1001 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
1003 for link_node in safe_select(document, "a[href^='#']") {
1004 let link_element = link_node;
1005 let text_content = link_element.text_contents();
1006
1007 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1008 if text_content.trim() == "{{ANCHOR}}" {
1010 for child in link_element.children() {
1011 child.detach();
1012 }
1013 }
1014 if let Some(element) = link_element.as_element()
1015 && let Some(href) =
1016 element.attributes.borrow().get(local_name!("href"))
1017 {
1018 let display_text = Self::humanize_anchor_id(href);
1019 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1020 }
1021 }
1022 }
1023 }
1024
1025 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1027 let mut to_modify = Vec::new();
1028
1029 for link_node in safe_select(document, "a[href^='#opt-']") {
1031 let link_element = link_node;
1032 if let Some(element) = link_element.as_element() {
1033 let href = element
1034 .attributes
1035 .borrow()
1036 .get(local_name!("href"))
1037 .map(std::string::ToString::to_string);
1038 let text_content = link_element.text_contents();
1039
1040 if let Some(href_value) = href
1041 && href_value.starts_with("#opt-")
1042 {
1043 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1045 || text_content.trim() == "{{ANCHOR}}";
1046 to_modify.push((
1047 link_element.clone(),
1048 option_anchor,
1049 needs_text_replacement,
1050 ));
1051 }
1052 }
1053 }
1054
1055 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1057 if let Some(element) = link_element.as_element() {
1058 let new_href = format!("options.html#{option_anchor}");
1059 element
1060 .attributes
1061 .borrow_mut()
1062 .insert(local_name!("href"), new_href);
1063
1064 if needs_text_replacement {
1065 for child in link_element.children() {
1067 child.detach();
1068 }
1069
1070 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1073 let option_name = option_path.replace('-', ".");
1074 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1075 }
1076 }
1077 }
1078 }
1079 }
1080
1081 fn process_markdown_links(document: &kuchikikiki::NodeRef) {
1083 for link_node in safe_select(document, "a") {
1084 let link_element = link_node;
1085 if let Some(element) = link_element.as_element() {
1086 let href = element
1087 .attributes
1088 .borrow()
1089 .get(local_name!("href"))
1090 .map(std::string::ToString::to_string);
1091
1092 if let Some(href_value) = href {
1093 if !href_value.starts_with("http://")
1096 && !href_value.starts_with("https://")
1097 && !href_value.starts_with('#')
1098 && !href_value.starts_with("mailto:")
1099 {
1100 let (path_part, suffix) = href_value
1102 .find(|c| c == '#' || c == '?')
1103 .map_or((href_value.as_str(), ""), |idx| {
1104 href_value.split_at(idx)
1105 });
1106
1107 if std::path::Path::new(path_part)
1108 .extension()
1109 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
1110 {
1111 let new_href =
1112 format!("{}.html{}", &path_part[..path_part.len() - 3], suffix);
1113 element
1114 .attributes
1115 .borrow_mut()
1116 .insert(local_name!("href"), new_href);
1117 }
1118 }
1119 }
1120 }
1121 }
1122 }
1123
1124 fn humanize_anchor_id(anchor: &str) -> String {
1126 let cleaned = anchor.trim_start_matches('#');
1128
1129 let without_prefix = cleaned
1131 .trim_start_matches("sec-")
1132 .trim_start_matches("ssec-")
1133 .trim_start_matches("opt-");
1134
1135 let spaced = without_prefix.replace(['-', '_'], " ");
1137
1138 spaced
1140 .split_whitespace()
1141 .map(|word| {
1142 let mut chars = word.chars();
1143 chars.next().map_or_else(String::new, |c| {
1144 c.to_uppercase().collect::<String>() + chars.as_str()
1145 })
1146 })
1147 .collect::<Vec<String>>()
1148 .join(" ")
1149 }
1150}
1151
1152pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1154 fn inner<'a>(node: &'a AstNode<'a>) -> String {
1155 let mut text = String::new();
1156 for child in node.children() {
1157 match &child.data.borrow().value {
1158 NodeValue::Text(t) => text.push_str(t),
1159 NodeValue::Code(t) => text.push_str(&t.literal),
1160 NodeValue::Link(..)
1161 | NodeValue::Emph
1162 | NodeValue::Strong
1163 | NodeValue::Strikethrough
1164 | NodeValue::Superscript
1165 | NodeValue::Subscript
1166 | NodeValue::FootnoteReference(..) => {
1167 text.push_str(&inner(child));
1168 },
1169 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1170 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1171 _ => {},
1172 }
1173 }
1174 text
1175 }
1176 inner(node)
1177}
1178
1179pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1181 let mut files = Vec::with_capacity(100);
1182
1183 for entry in WalkDir::new(input_dir)
1184 .follow_links(true)
1185 .into_iter()
1186 .filter_map(Result::ok)
1187 {
1188 let path = entry.path();
1189 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1190 files.push(path.to_owned());
1191 }
1192 }
1193
1194 trace!("Found {} markdown files to process", files.len());
1195 files
1196}
1197
1198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1200pub enum ProcessorFeature {
1201 Gfm,
1203 Nixpkgs,
1205 SyntaxHighlighting,
1207 ManpageUrls,
1209}
1210
1211fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1213where
1214 F: FnOnce(&kuchikikiki::NodeRef),
1215{
1216 process_safe(
1217 html,
1218 |html| {
1219 use tendril::TendrilSink;
1220
1221 let document = kuchikikiki::parse_html().one(html);
1222 transform_fn(&document);
1223
1224 let mut out = Vec::new();
1225 let _ = document.serialize(&mut out);
1226 String::from_utf8_lossy(&out).into_owned()
1227 },
1228 html,
1229 )
1230}