1use std::{
7 collections::HashMap,
8 path::{Path, PathBuf},
9};
10
11use comrak::{
12 Arena,
13 nodes::{AstNode, NodeHeading, NodeValue},
14 options::Options,
15 parse_document,
16};
17use log::trace;
18use markup5ever::local_name;
19use walkdir::WalkDir;
20
21#[derive(Debug, thiserror::Error)]
23pub enum DomError {
24 #[error("CSS selector failed: {0}")]
25 SelectorError(String),
26 #[error("DOM serialization failed: {0}")]
27 SerializationError(String),
28}
29
30pub type DomResult<T> = Result<T, DomError>;
32
33fn safe_select(
35 document: &kuchikikiki::NodeRef,
36 selector: &str,
37) -> Vec<kuchikikiki::NodeRef> {
38 match document.select(selector) {
39 Ok(selections) => selections.map(|sel| sel.as_node().clone()).collect(),
40 Err(e) => {
41 log::warn!("DOM selector '{selector}' failed: {e:?}");
42 Vec::new()
43 },
44 }
45}
46
47use super::{
48 process::process_safe,
49 types::{
50 AstTransformer,
51 MarkdownOptions,
52 MarkdownProcessor,
53 PromptTransformer,
54 },
55};
56use crate::{
57 syntax::create_default_manager,
58 types::{Header, MarkdownResult},
59 utils,
60};
61
62impl MarkdownProcessor {
63 #[must_use]
65 pub fn new(options: MarkdownOptions) -> Self {
66 let manpage_urls = options
67 .manpage_urls_path
68 .as_ref()
69 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
70
71 let syntax_manager = if options.highlight_code {
72 match create_default_manager() {
73 Ok(manager) => {
74 log::info!("Syntax highlighting initialized successfully");
75 Some(manager)
76 },
77 Err(e) => {
78 log::error!("Failed to initialize syntax highlighting: {e}");
79 log::warn!(
80 "Continuing without syntax highlighting - code blocks will not be \
81 highlighted"
82 );
83 None
84 },
85 }
86 } else {
87 None
88 };
89
90 Self {
91 options,
92 manpage_urls,
93 syntax_manager,
94 base_dir: std::path::PathBuf::from("."),
95 }
96 }
97
98 #[must_use]
100 pub const fn options(&self) -> &MarkdownOptions {
101 &self.options
102 }
103
104 #[must_use]
106 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
107 self.base_dir = base_dir.to_path_buf();
108 self
109 }
110
111 #[must_use]
113 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
114 match feature {
115 ProcessorFeature::Gfm => self.options.gfm,
116 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
117 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
118 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
119 }
120 }
121
122 #[must_use]
124 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
125 self.manpage_urls.as_ref()
126 }
127
128 #[must_use]
130 pub fn highlight_codeblocks(&self, html: &str) -> String {
131 use kuchikikiki::parse_html;
132 use tendril::TendrilSink;
133
134 if !self.options.highlight_code || self.syntax_manager.is_none() {
135 return html.to_string();
136 }
137
138 let document = parse_html().one(html);
139
140 let mut code_blocks = Vec::new();
142 for pre_node in safe_select(&document, "pre > code") {
143 let code_node = pre_node;
144 if let Some(element) = code_node.as_element() {
145 let language = element
146 .attributes
147 .borrow()
148 .get("class")
149 .and_then(|class| class.strip_prefix("language-"))
150 .unwrap_or("text")
151 .to_string();
152 let code_text = code_node.text_contents();
153
154 if let Some(pre_parent) = code_node.parent() {
155 code_blocks.push((
156 pre_parent.clone(),
157 code_node.clone(),
158 code_text,
159 language,
160 ));
161 }
162 }
163 }
164
165 for (pre_element, _code_node, code_text, language) in code_blocks {
167 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
168 {
169 let wrapped_html = format!(
171 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
172 );
173 let fragment = parse_html().one(wrapped_html.as_str());
174 pre_element.insert_after(fragment);
175 pre_element.detach();
176 }
177 }
179
180 let mut buf = Vec::new();
181 if let Err(e) = document.serialize(&mut buf) {
182 log::warn!("DOM serialization failed: {e:?}");
183 return html.to_string(); }
185 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
186 }
187
188 fn handle_hardtabs(&self, code: &str) -> String {
190 use super::types::TabStyle;
191
192 if !code.contains('\t') {
194 return code.to_string();
195 }
196
197 match self.options.tab_style {
198 TabStyle::None => code.to_string(),
200
201 TabStyle::Warn => {
203 log::warn!(
204 "Hard tabs detected in code block. Consider using spaces for \
205 consistency. Tools like editorconfig may help you normalize spaces \
206 in your documents."
207 );
208 code.to_string()
209 },
210
211 TabStyle::Normalize => {
214 log::debug!("Replacing hard tabs with spaces");
215 code.replace('\t', " ")
216 },
217 }
218 }
219
220 fn process_hardtabs(&self, markdown: &str) -> String {
222 use super::types::TabStyle;
223
224 if self.options.tab_style == TabStyle::None {
226 return markdown.to_string();
227 }
228
229 let mut result = String::with_capacity(markdown.len());
230 let mut lines = markdown.lines().peekable();
231 let mut in_code_block = false;
232 let mut code_fence_char = None;
233 let mut code_fence_count = 0;
234
235 while let Some(line) = lines.next() {
236 let trimmed = line.trim_start();
237
238 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
240 let Some(fence_char) = trimmed.chars().next() else {
241 result.push_str(line);
244 result.push('\n');
245 continue;
246 };
247 let fence_count =
248 trimmed.chars().take_while(|&c| c == fence_char).count();
249
250 if fence_count >= 3 {
251 if !in_code_block {
252 in_code_block = true;
254 code_fence_char = Some(fence_char);
255 code_fence_count = fence_count;
256 } else if code_fence_char == Some(fence_char)
257 && fence_count >= code_fence_count
258 {
259 in_code_block = false;
261 code_fence_char = None;
262 code_fence_count = 0;
263 }
264 }
265 }
266
267 let processed_line = if in_code_block && line.contains('\t') {
269 self.handle_hardtabs(line)
270 } else {
271 line.to_string()
272 };
273
274 result.push_str(&processed_line);
275
276 if lines.peek().is_some() {
278 result.push('\n');
279 }
280 }
281
282 result
283 }
284
285 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
288 if !self.options.highlight_code {
289 return None;
290 }
291
292 let syntax_manager = self.syntax_manager.as_ref()?;
293
294 syntax_manager
295 .highlight_code(code, language, self.options.highlight_theme.as_deref())
296 .ok()
297 }
298
299 #[must_use]
301 pub fn render(&self, markdown: &str) -> MarkdownResult {
302 let (preprocessed, included_files) = self.preprocess(markdown);
303 let (headers, title) = self.extract_headers(&preprocessed);
304 let html = self.process_html_pipeline(&preprocessed);
305
306 MarkdownResult {
307 html,
308 headers,
309 title,
310 included_files,
311 }
312 }
313
314 fn process_html_pipeline(&self, content: &str) -> String {
316 let mut html = self.convert_to_html(content);
317
318 if cfg!(feature = "ndg-flavored") {
320 #[cfg(feature = "ndg-flavored")]
321 {
322 html = super::extensions::process_option_references(
323 &html,
324 self.options.valid_options.as_ref(),
325 );
326 }
327 }
328
329 if self.options.nixpkgs {
330 html = self.process_manpage_references_html(&html);
331 }
332
333 if self.options.highlight_code {
334 html = self.highlight_codeblocks(&html);
335 }
336
337 self.kuchiki_postprocess(&html)
338 }
339
340 fn preprocess(
342 &self,
343 content: &str,
344 ) -> (String, Vec<crate::types::IncludedFile>) {
345 let mut processed = content.to_string();
346 let mut included_files = Vec::new();
347
348 processed = super::extensions::process_myst_autolinks(&processed);
350
351 processed = self.process_hardtabs(&processed);
353
354 if self.options.nixpkgs {
355 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
356 processed = content;
357 included_files = files;
358 }
359
360 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
361 processed = super::extensions::process_role_markup(
362 &processed,
363 self.manpage_urls.as_ref(),
364 self.options.auto_link_options,
365 self.options.valid_options.as_ref(),
366 );
367 }
368
369 (processed, included_files)
370 }
371
372 #[cfg(feature = "nixpkgs")]
374 fn apply_nixpkgs_preprocessing(
375 &self,
376 content: &str,
377 ) -> (String, Vec<crate::types::IncludedFile>) {
378 let (with_includes, included_files) =
379 match super::extensions::process_file_includes(content, &self.base_dir, 0)
380 {
381 Ok(result) => result,
382 Err(e) => {
383 log::warn!(
384 "File include processing failed: {e}. Continuing without includes."
385 );
386 (content.to_string(), Vec::new())
387 },
388 };
389 let with_blocks = super::extensions::process_block_elements(&with_includes);
390 let processed = super::extensions::process_inline_anchors(&with_blocks);
391 (processed, included_files)
392 }
393
394 #[cfg(not(feature = "nixpkgs"))]
396 fn apply_nixpkgs_preprocessing(
397 &self,
398 content: &str,
399 ) -> (String, Vec<crate::types::IncludedFile>) {
400 (content.to_string(), Vec::new())
401 }
402
403 #[must_use]
405 pub fn extract_headers(
406 &self,
407 content: &str,
408 ) -> (Vec<Header>, Option<String>) {
409 use std::fmt::Write;
410
411 let arena = Arena::new();
412 let options = self.comrak_options();
413
414 let mut normalized = String::with_capacity(content.len());
416 for line in content.lines() {
417 let trimmed = line.trim_end();
418 if !trimmed.starts_with('#') {
419 if let Some(anchor_start) = trimmed.rfind("{#") {
420 if let Some(anchor_end) = trimmed[anchor_start..].find('}') {
421 let text = trimmed[..anchor_start].trim_end();
422 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
423 let _ = writeln!(normalized, "## {text} {{#{id}}}");
424 continue;
425 }
426 }
427 }
428 normalized.push_str(line);
429 normalized.push('\n');
430 }
431
432 let root = parse_document(&arena, &normalized, &options);
433
434 let mut headers = Vec::new();
435 let mut found_title = None;
436
437 for node in root.descendants() {
438 if let NodeValue::Heading(NodeHeading { level, .. }) =
439 &node.data.borrow().value
440 {
441 let mut text = String::new();
442 let mut explicit_id = None;
443
444 for child in node.children() {
445 match &child.data.borrow().value {
446 NodeValue::Text(t) => text.push_str(t),
447 NodeValue::Code(t) => text.push_str(&t.literal),
448 NodeValue::Link(..)
449 | NodeValue::Emph
450 | NodeValue::Strong
451 | NodeValue::Subscript
452 | NodeValue::Strikethrough
453 | NodeValue::Superscript
454 | NodeValue::FootnoteReference(..) => {
455 text.push_str(&extract_inline_text(child));
456 },
457 NodeValue::HtmlInline(html) => {
458 let html_str = html.as_str();
460 if let Some(start) = html_str.find("{#") {
461 if let Some(end) = html_str[start..].find('}') {
462 let anchor = &html_str[start + 2..start + end];
463 explicit_id = Some(anchor.to_string());
464 }
465 }
466 },
467 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
468 NodeValue::Image(..) => {},
469 _ => {},
470 }
471 }
472
473 let trimmed = text.trim_end();
475 #[allow(clippy::option_if_let_else)]
476 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
478 if let Some(end) = trimmed[start..].find('}') {
479 let anchor = &trimmed[start + 2..start + end];
480 (trimmed[..start].trim_end().to_string(), anchor.to_string())
481 } else {
482 (
483 text.clone(),
484 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
485 )
486 }
487 } else {
488 (
489 text.clone(),
490 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
491 )
492 };
493 if *level == 1 && found_title.is_none() {
494 found_title = Some(final_text.clone());
495 }
496 headers.push(Header {
497 text: final_text,
498 level: *level,
499 id,
500 });
501 }
502 }
503
504 (headers, found_title)
505 }
506
507 fn convert_to_html(&self, content: &str) -> String {
509 let arena = Arena::new();
511 let options = self.comrak_options();
512 let root = parse_document(&arena, content, &options);
513
514 let prompt_transformer = PromptTransformer;
516 prompt_transformer.transform(root);
517
518 let mut html_output = String::new();
519 comrak::format_html(root, &options, &mut html_output).unwrap_or_default();
520
521 Self::process_header_anchors_html(&html_output)
523 }
524
525 fn process_header_anchors_html(html: &str) -> String {
528 use std::sync::LazyLock;
529
530 use regex::Regex;
531
532 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
533 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
534 .unwrap_or_else(|e| {
535 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
536 utils::never_matching_regex().unwrap_or_else(|_| {
537 #[allow(
539 clippy::expect_used,
540 reason = "This pattern is guaranteed to be valid"
541 )]
542 Regex::new(r"[^\s\S]")
543 .expect("regex pattern [^\\s\\S] should always compile")
544 })
545 })
546 });
547
548 HEADER_ANCHOR_RE
549 .replace_all(html, |caps: ®ex::Captures| {
550 let level = &caps[1];
551 let prefix = &caps[2];
552 let id = &caps[3];
553 let suffix = &caps[4];
554 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
555 })
556 .to_string()
557 }
558
559 fn comrak_options(&self) -> Options<'_> {
561 let mut options = Options::default();
562 if self.options.gfm {
563 options.extension.table = true;
564 options.extension.footnotes = true;
565 options.extension.strikethrough = true;
566 options.extension.tasklist = true;
567 options.extension.superscript = true;
568 options.extension.autolink = true;
569 }
570 options.render.r#unsafe = true;
571 options.extension.header_ids = None;
573 options.extension.description_lists = true;
574 options
575 }
576
577 #[cfg(feature = "nixpkgs")]
579 fn process_manpage_references_html(&self, html: &str) -> String {
580 super::extensions::process_manpage_references(
581 html,
582 self.manpage_urls.as_ref(),
583 )
584 }
585
586 #[cfg(not(feature = "nixpkgs"))]
589 fn process_manpage_references_html(&self, html: &str) -> String {
590 html.to_string()
591 }
592
593 #[allow(
595 clippy::unused_self,
596 reason = "Method signature matches processor pattern"
597 )]
598 fn kuchiki_postprocess(&self, html: &str) -> String {
599 kuchiki_postprocess_html(html, |document| {
601 Self::apply_dom_transformations(document);
602 })
603 }
604
605 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
607 Self::process_list_item_id_markers(document);
608 Self::process_header_anchor_comments(document);
609 Self::process_list_item_inline_anchors(document);
610 Self::process_paragraph_inline_anchors(document);
611 Self::process_remaining_inline_anchors(document);
612 Self::process_option_anchor_links(document);
613 Self::process_empty_auto_links(document);
614 Self::process_empty_html_links(document);
615 }
616
617 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
619 let mut to_modify = Vec::new();
620
621 for comment in document.inclusive_descendants() {
622 if let Some(comment_node) = comment.as_comment() {
623 let comment_text = comment_node.borrow();
624 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
625 let id = comment_text[id_start + 16..].trim();
626 if !id.is_empty()
627 && id
628 .chars()
629 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
630 {
631 if let Some(parent) = comment.parent() {
633 if let Some(element) = parent.as_element() {
634 if element.name.local.as_ref() == "li" {
635 to_modify.push((comment.clone(), id.to_string()));
636 }
637 }
638 }
639 }
640 }
641 }
642 }
643
644 for (comment_node, id) in to_modify {
645 let span = kuchikikiki::NodeRef::new_element(
646 markup5ever::QualName::new(
647 None,
648 markup5ever::ns!(html),
649 local_name!("span"),
650 ),
651 vec![
652 (
653 kuchikikiki::ExpandedName::new("", "id"),
654 kuchikikiki::Attribute {
655 prefix: None,
656 value: id,
657 },
658 ),
659 (
660 kuchikikiki::ExpandedName::new("", "class"),
661 kuchikikiki::Attribute {
662 prefix: None,
663 value: "nixos-anchor".into(),
664 },
665 ),
666 ],
667 );
668 comment_node.insert_after(span);
669 comment_node.detach();
670 }
671 }
672
673 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
675 let mut to_modify = Vec::new();
676
677 for comment in document.inclusive_descendants() {
678 if let Some(comment_node) = comment.as_comment() {
679 let comment_text = comment_node.borrow();
680 if let Some(anchor_start) = comment_text.find("anchor:") {
681 let id = comment_text[anchor_start + 7..].trim();
682 if !id.is_empty()
683 && id
684 .chars()
685 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
686 {
687 if let Some(parent) = comment.parent() {
689 if let Some(element) = parent.as_element() {
690 let tag_name = element.name.local.as_ref();
691 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
692 to_modify.push((
693 parent.clone(),
694 comment.clone(),
695 id.to_string(),
696 ));
697 }
698 }
699 }
700 }
701 }
702 }
703 }
704
705 for (header_element, comment_node, id) in to_modify {
706 if let Some(element) = header_element.as_element() {
707 element
708 .attributes
709 .borrow_mut()
710 .insert(local_name!("id"), id);
711 comment_node.detach();
712 }
713 }
714 }
715
716 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
718 for li_node in safe_select(document, "li") {
719 let li_element = li_node;
720
721 let has_code = !safe_select(&li_element, "code, pre").is_empty();
723 if has_code {
724 continue; }
726
727 let text_content = li_element.text_contents();
728
729 if let Some(anchor_start) = text_content.find("[]{#") {
730 if let Some(anchor_end) = text_content[anchor_start..].find('}') {
731 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
732 if !id.is_empty()
733 && id
734 .chars()
735 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
736 {
737 let remaining_content =
738 &text_content[anchor_start + anchor_end + 1..];
739
740 for child in li_element.children() {
742 child.detach();
743 }
744
745 let span = kuchikikiki::NodeRef::new_element(
746 markup5ever::QualName::new(
747 None,
748 markup5ever::ns!(html),
749 local_name!("span"),
750 ),
751 vec![
752 (
753 kuchikikiki::ExpandedName::new("", "id"),
754 kuchikikiki::Attribute {
755 prefix: None,
756 value: id.into(),
757 },
758 ),
759 (
760 kuchikikiki::ExpandedName::new("", "class"),
761 kuchikikiki::Attribute {
762 prefix: None,
763 value: "nixos-anchor".into(),
764 },
765 ),
766 ],
767 );
768 li_element.append(span);
769 if !remaining_content.is_empty() {
770 li_element
771 .append(kuchikikiki::NodeRef::new_text(remaining_content));
772 }
773 }
774 }
775 }
776 }
777 }
778
779 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
781 for p_node in safe_select(document, "p") {
782 let p_element = p_node;
783
784 let has_code = !safe_select(&p_element, "code, pre").is_empty();
786 if has_code {
787 continue; }
789
790 let text_content = p_element.text_contents();
791
792 if let Some(anchor_start) = text_content.find("[]{#") {
793 if let Some(anchor_end) = text_content[anchor_start..].find('}') {
794 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
795 if !id.is_empty()
796 && id
797 .chars()
798 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
799 {
800 let remaining_content =
801 &text_content[anchor_start + anchor_end + 1..];
802
803 for child in p_element.children() {
805 child.detach();
806 }
807
808 let span = kuchikikiki::NodeRef::new_element(
809 markup5ever::QualName::new(
810 None,
811 markup5ever::ns!(html),
812 local_name!("span"),
813 ),
814 vec![
815 (
816 kuchikikiki::ExpandedName::new("", "id"),
817 kuchikikiki::Attribute {
818 prefix: None,
819 value: id.into(),
820 },
821 ),
822 (
823 kuchikikiki::ExpandedName::new("", "class"),
824 kuchikikiki::Attribute {
825 prefix: None,
826 value: "nixos-anchor".into(),
827 },
828 ),
829 ],
830 );
831 p_element.append(span);
832 if !remaining_content.is_empty() {
833 p_element
834 .append(kuchikikiki::NodeRef::new_text(remaining_content));
835 }
836 }
837 }
838 }
839 }
840 }
841
842 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
844 let mut text_nodes_to_process = Vec::new();
845
846 for node in document.inclusive_descendants() {
847 if let Some(text_node) = node.as_text() {
848 let mut parent = node.parent();
850 let mut in_code = false;
851 while let Some(p) = parent {
852 if let Some(element) = p.as_element() {
853 if element.name.local == local_name!("code")
854 || element.name.local == local_name!("pre")
855 {
856 in_code = true;
857 break;
858 }
859 }
860 parent = p.parent();
861 }
862
863 if !in_code {
865 let text_content = text_node.borrow().clone();
866 if text_content.contains("[]{#") {
867 text_nodes_to_process.push((node.clone(), text_content));
868 }
869 }
870 }
871 }
872
873 for (text_node, text_content) in text_nodes_to_process {
874 let mut last_end = 0;
875 let mut new_children = Vec::new();
876
877 let chars = text_content.chars().collect::<Vec<_>>();
879 let mut i = 0;
880 while i < chars.len() {
881 if i + 4 < chars.len()
882 && chars[i] == '['
883 && chars[i + 1] == ']'
884 && chars[i + 2] == '{'
885 && chars[i + 3] == '#'
886 {
887 let anchor_start = i;
889 i += 4; let mut id = String::new();
892 while i < chars.len() && chars[i] != '}' {
893 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
894 {
895 id.push(chars[i]);
896 i += 1;
897 } else {
898 break;
899 }
900 }
901
902 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
903 let anchor_end = i + 1;
905
906 if anchor_start > last_end {
908 let before_text: String =
909 chars[last_end..anchor_start].iter().collect();
910 if !before_text.is_empty() {
911 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
912 }
913 }
914
915 let span = kuchikikiki::NodeRef::new_element(
917 markup5ever::QualName::new(
918 None,
919 markup5ever::ns!(html),
920 local_name!("span"),
921 ),
922 vec![
923 (
924 kuchikikiki::ExpandedName::new("", "id"),
925 kuchikikiki::Attribute {
926 prefix: None,
927 value: id,
928 },
929 ),
930 (
931 kuchikikiki::ExpandedName::new("", "class"),
932 kuchikikiki::Attribute {
933 prefix: None,
934 value: "nixos-anchor".into(),
935 },
936 ),
937 ],
938 );
939 new_children.push(span);
940
941 last_end = anchor_end;
942 i = anchor_end;
943 } else {
944 i += 1;
945 }
946 } else {
947 i += 1;
948 }
949 }
950
951 if last_end < chars.len() {
953 let after_text: String = chars[last_end..].iter().collect();
954 if !after_text.is_empty() {
955 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
956 }
957 }
958
959 if !new_children.is_empty() {
961 for child in new_children {
962 text_node.insert_before(child);
963 }
964 text_node.detach();
965 }
966 }
967 }
968
969 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
971 for link_node in safe_select(document, "a") {
972 let link_element = link_node;
973 if let Some(element) = link_element.as_element() {
974 let href = element
975 .attributes
976 .borrow()
977 .get(local_name!("href"))
978 .map(std::string::ToString::to_string);
979 let text_content = link_element.text_contents();
980
981 if let Some(href_value) = href {
982 if href_value.starts_with('#')
983 && (text_content.trim().is_empty()
984 || text_content.trim() == "{{ANCHOR}}")
985 {
986 if text_content.trim() == "{{ANCHOR}}" {
988 for child in link_element.children() {
989 child.detach();
990 }
991 }
992 let display_text = Self::humanize_anchor_id(&href_value);
994 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
995 }
996 }
997 }
998 }
999 }
1000
1001 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
1003 for link_node in safe_select(document, "a[href^='#']") {
1004 let link_element = link_node;
1005 let text_content = link_element.text_contents();
1006
1007 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1008 if text_content.trim() == "{{ANCHOR}}" {
1010 for child in link_element.children() {
1011 child.detach();
1012 }
1013 }
1014 if let Some(element) = link_element.as_element() {
1015 if let Some(href) =
1016 element.attributes.borrow().get(local_name!("href"))
1017 {
1018 let display_text = Self::humanize_anchor_id(href);
1019 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1020 }
1021 }
1022 }
1023 }
1024 }
1025
1026 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1028 let mut to_modify = Vec::new();
1029
1030 for link_node in safe_select(document, "a[href^='#opt-']") {
1032 let link_element = link_node;
1033 if let Some(element) = link_element.as_element() {
1034 let href = element
1035 .attributes
1036 .borrow()
1037 .get(local_name!("href"))
1038 .map(std::string::ToString::to_string);
1039 let text_content = link_element.text_contents();
1040
1041 if let Some(href_value) = href {
1042 if href_value.starts_with("#opt-") {
1043 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1045 || text_content.trim() == "{{ANCHOR}}";
1046 to_modify.push((
1047 link_element.clone(),
1048 option_anchor,
1049 needs_text_replacement,
1050 ));
1051 }
1052 }
1053 }
1054 }
1055
1056 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1058 if let Some(element) = link_element.as_element() {
1059 let new_href = format!("options.html#{option_anchor}");
1060 element
1061 .attributes
1062 .borrow_mut()
1063 .insert(local_name!("href"), new_href);
1064
1065 if needs_text_replacement {
1066 for child in link_element.children() {
1068 child.detach();
1069 }
1070
1071 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1074 let option_name = option_path.replace('-', ".");
1075 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1076 }
1077 }
1078 }
1079 }
1080 }
1081
1082 fn humanize_anchor_id(anchor: &str) -> String {
1084 let cleaned = anchor.trim_start_matches('#');
1086
1087 let without_prefix = cleaned
1089 .trim_start_matches("sec-")
1090 .trim_start_matches("ssec-")
1091 .trim_start_matches("opt-");
1092
1093 let spaced = without_prefix.replace(['-', '_'], " ");
1095
1096 spaced
1098 .split_whitespace()
1099 .map(|word| {
1100 let mut chars = word.chars();
1101 chars.next().map_or_else(String::new, |c| {
1102 c.to_uppercase().collect::<String>() + chars.as_str()
1103 })
1104 })
1105 .collect::<Vec<String>>()
1106 .join(" ")
1107 }
1108}
1109
1110pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1112 let mut text = String::new();
1113 for child in node.children() {
1114 match &child.data.borrow().value {
1115 NodeValue::Text(t) => text.push_str(t),
1116 NodeValue::Code(t) => text.push_str(&t.literal),
1117 NodeValue::Link(..)
1118 | NodeValue::Emph
1119 | NodeValue::Strong
1120 | NodeValue::Strikethrough
1121 | NodeValue::Superscript
1122 | NodeValue::Subscript
1123 | NodeValue::FootnoteReference(..) => {
1124 text.push_str(&extract_inline_text(child));
1125 },
1126 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1127 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1128 _ => {},
1129 }
1130 }
1131 text
1132}
1133
1134pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1136 let mut files = Vec::with_capacity(100);
1137
1138 for entry in WalkDir::new(input_dir)
1139 .follow_links(true)
1140 .into_iter()
1141 .filter_map(Result::ok)
1142 {
1143 let path = entry.path();
1144 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1145 files.push(path.to_owned());
1146 }
1147 }
1148
1149 trace!("Found {} markdown files to process", files.len());
1150 files
1151}
1152
1153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1155pub enum ProcessorFeature {
1156 Gfm,
1158 Nixpkgs,
1160 SyntaxHighlighting,
1162 ManpageUrls,
1164}
1165
1166fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1168where
1169 F: FnOnce(&kuchikikiki::NodeRef),
1170{
1171 process_safe(
1172 html,
1173 |html| {
1174 use tendril::TendrilSink;
1175
1176 let document = kuchikikiki::parse_html().one(html);
1177 transform_fn(&document);
1178
1179 let mut out = Vec::new();
1180 document.serialize(&mut out).ok();
1181 String::from_utf8(out).unwrap_or_default()
1182 },
1183 html,
1184 )
1185}