1use std::{
7 collections::HashMap,
8 path::{Path, PathBuf},
9};
10
11use comrak::{
12 Arena,
13 nodes::{AstNode, NodeHeading, NodeValue},
14 options::Options,
15 parse_document,
16};
17use log::trace;
18use markup5ever::local_name;
19use walkdir::WalkDir;
20
21#[derive(Debug, thiserror::Error)]
23pub enum DomError {
24 #[error("CSS selector failed: {0}")]
25 SelectorError(String),
26 #[error("DOM serialization failed: {0}")]
27 SerializationError(String),
28}
29
30pub type DomResult<T> = Result<T, DomError>;
32
33fn safe_select(
35 document: &kuchikikiki::NodeRef,
36 selector: &str,
37) -> Vec<kuchikikiki::NodeRef> {
38 match document.select(selector) {
39 Ok(selections) => selections.map(|sel| sel.as_node().clone()).collect(),
40 Err(e) => {
41 log::warn!("DOM selector '{selector}' failed: {e:?}");
42 Vec::new()
43 },
44 }
45}
46
47use super::{
48 process::process_safe,
49 types::{
50 AstTransformer,
51 MarkdownOptions,
52 MarkdownProcessor,
53 PromptTransformer,
54 },
55};
56use crate::{
57 syntax::create_default_manager,
58 types::{Header, MarkdownResult},
59 utils,
60};
61
62impl MarkdownProcessor {
63 #[must_use]
65 pub fn new(options: MarkdownOptions) -> Self {
66 let manpage_urls = options
67 .manpage_urls_path
68 .as_ref()
69 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
70
71 let syntax_manager = if options.highlight_code {
72 match create_default_manager() {
73 Ok(manager) => {
74 log::info!("Syntax highlighting initialized successfully");
75 Some(manager)
76 },
77 Err(e) => {
78 log::error!("Failed to initialize syntax highlighting: {e}");
79 log::warn!(
80 "Continuing without syntax highlighting - code blocks will not be \
81 highlighted"
82 );
83 None
84 },
85 }
86 } else {
87 None
88 };
89
90 Self {
91 options,
92 manpage_urls,
93 syntax_manager,
94 base_dir: std::path::PathBuf::from("."),
95 }
96 }
97
98 #[must_use]
100 pub const fn options(&self) -> &MarkdownOptions {
101 &self.options
102 }
103
104 #[must_use]
106 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
107 self.base_dir = base_dir.to_path_buf();
108 self
109 }
110
111 #[must_use]
113 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
114 match feature {
115 ProcessorFeature::Gfm => self.options.gfm,
116 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
117 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
118 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
119 }
120 }
121
122 #[must_use]
124 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
125 self.manpage_urls.as_ref()
126 }
127
128 #[must_use]
130 pub fn highlight_codeblocks(&self, html: &str) -> String {
131 use kuchikikiki::parse_html;
132 use tendril::TendrilSink;
133
134 if !self.options.highlight_code || self.syntax_manager.is_none() {
135 return html.to_string();
136 }
137
138 let document = parse_html().one(html);
139
140 let mut code_blocks = Vec::new();
142 for pre_node in safe_select(&document, "pre > code") {
143 let code_node = pre_node;
144 if let Some(element) = code_node.as_element() {
145 let language = element
146 .attributes
147 .borrow()
148 .get("class")
149 .and_then(|class| class.strip_prefix("language-"))
150 .unwrap_or("text")
151 .to_string();
152 let code_text = code_node.text_contents();
153
154 if let Some(pre_parent) = code_node.parent() {
155 code_blocks.push((
156 pre_parent.clone(),
157 code_node.clone(),
158 code_text,
159 language,
160 ));
161 }
162 }
163 }
164
165 for (pre_element, _code_node, code_text, language) in code_blocks {
167 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
168 {
169 let wrapped_html = format!(
171 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
172 );
173 let fragment = parse_html().one(wrapped_html.as_str());
174 pre_element.insert_after(fragment);
175 pre_element.detach();
176 }
177 }
179
180 let mut buf = Vec::new();
181 if let Err(e) = document.serialize(&mut buf) {
182 log::warn!("DOM serialization failed: {e:?}");
183 return html.to_string(); }
185 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
186 }
187
188 fn handle_hardtabs(&self, code: &str) -> String {
190 use super::types::TabStyle;
191
192 if !code.contains('\t') {
194 return code.to_string();
195 }
196
197 match self.options.tab_style {
198 TabStyle::None => code.to_string(),
200
201 TabStyle::Warn => {
203 log::warn!(
204 "Hard tabs detected in code block. Consider using spaces for \
205 consistency. Tools like editorconfig may help you normalize spaces \
206 in your documents."
207 );
208 code.to_string()
209 },
210
211 TabStyle::Normalize => {
214 log::debug!("Replacing hard tabs with spaces");
215 code.replace('\t', " ")
216 },
217 }
218 }
219
220 fn process_hardtabs(&self, markdown: &str) -> String {
222 use super::types::TabStyle;
223
224 if self.options.tab_style == TabStyle::None {
226 return markdown.to_string();
227 }
228
229 let mut result = String::with_capacity(markdown.len());
230 let mut lines = markdown.lines().peekable();
231 let mut in_code_block = false;
232 let mut code_fence_char = None;
233 let mut code_fence_count = 0;
234
235 while let Some(line) = lines.next() {
236 let trimmed = line.trim_start();
237
238 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
240 let Some(fence_char) = trimmed.chars().next() else {
241 result.push_str(line);
244 result.push('\n');
245 continue;
246 };
247 let fence_count =
248 trimmed.chars().take_while(|&c| c == fence_char).count();
249
250 if fence_count >= 3 {
251 if !in_code_block {
252 in_code_block = true;
254 code_fence_char = Some(fence_char);
255 code_fence_count = fence_count;
256 } else if code_fence_char == Some(fence_char)
257 && fence_count >= code_fence_count
258 {
259 in_code_block = false;
261 code_fence_char = None;
262 code_fence_count = 0;
263 }
264 }
265 }
266
267 let processed_line = if in_code_block && line.contains('\t') {
269 self.handle_hardtabs(line)
270 } else {
271 line.to_string()
272 };
273
274 result.push_str(&processed_line);
275
276 if lines.peek().is_some() {
278 result.push('\n');
279 }
280 }
281
282 result
283 }
284
285 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
288 if !self.options.highlight_code {
289 return None;
290 }
291
292 let syntax_manager = self.syntax_manager.as_ref()?;
293
294 syntax_manager
295 .highlight_code(code, language, self.options.highlight_theme.as_deref())
296 .ok()
297 }
298
299 #[must_use]
301 pub fn render(&self, markdown: &str) -> MarkdownResult {
302 let (preprocessed, included_files) = self.preprocess(markdown);
303 let (headers, title) = self.extract_headers(&preprocessed);
304 let html = self.process_html_pipeline(&preprocessed);
305
306 MarkdownResult {
307 html,
308 headers,
309 title,
310 included_files,
311 }
312 }
313
314 fn process_html_pipeline(&self, content: &str) -> String {
316 let mut html = self.convert_to_html(content);
317
318 if cfg!(feature = "ndg-flavored") {
320 #[cfg(feature = "ndg-flavored")]
321 {
322 html = super::extensions::process_option_references(
323 &html,
324 self.options.valid_options.as_ref(),
325 );
326 }
327 }
328
329 if self.options.nixpkgs {
330 html = self.process_manpage_references_html(&html);
331 }
332
333 if self.options.highlight_code {
334 html = self.highlight_codeblocks(&html);
335 }
336
337 self.kuchiki_postprocess(&html)
338 }
339
340 fn preprocess(
342 &self,
343 content: &str,
344 ) -> (String, Vec<crate::types::IncludedFile>) {
345 let mut processed = content.to_string();
346 let mut included_files = Vec::new();
347
348 processed = super::extensions::process_myst_autolinks(&processed);
350
351 processed = self.process_hardtabs(&processed);
353
354 if self.options.nixpkgs {
355 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
356 processed = content;
357 included_files = files;
358 }
359
360 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
361 processed = super::extensions::process_role_markup(
362 &processed,
363 self.manpage_urls.as_ref(),
364 self.options.auto_link_options,
365 self.options.valid_options.as_ref(),
366 );
367 }
368
369 (processed, included_files)
370 }
371
372 #[cfg(feature = "nixpkgs")]
374 fn apply_nixpkgs_preprocessing(
375 &self,
376 content: &str,
377 ) -> (String, Vec<crate::types::IncludedFile>) {
378 let (with_includes, included_files) =
379 match super::extensions::process_file_includes(content, &self.base_dir, 0)
380 {
381 Ok(result) => result,
382 Err(e) => {
383 log::warn!(
384 "File include processing failed: {e}. Continuing without includes."
385 );
386 (content.to_string(), Vec::new())
387 },
388 };
389 let with_blocks = super::extensions::process_block_elements(&with_includes);
390 let processed = super::extensions::process_inline_anchors(&with_blocks);
391 (processed, included_files)
392 }
393
394 #[cfg(not(feature = "nixpkgs"))]
396 fn apply_nixpkgs_preprocessing(
397 &self,
398 content: &str,
399 ) -> (String, Vec<crate::types::IncludedFile>) {
400 (content.to_string(), Vec::new())
401 }
402
403 #[must_use]
405 pub fn extract_headers(
406 &self,
407 content: &str,
408 ) -> (Vec<Header>, Option<String>) {
409 use std::fmt::Write;
410
411 let arena = Arena::new();
412 let options = self.comrak_options();
413
414 let mut normalized = String::with_capacity(content.len());
416 for line in content.lines() {
417 let trimmed = line.trim_end();
418 if !trimmed.starts_with('#')
419 && let Some(anchor_start) = trimmed.rfind("{#")
420 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
421 {
422 let text = trimmed[..anchor_start].trim_end();
423 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
424 let _ = writeln!(normalized, "## {text} {{#{id}}}");
425 continue;
426 }
427 normalized.push_str(line);
428 normalized.push('\n');
429 }
430
431 let root = parse_document(&arena, &normalized, &options);
432
433 let mut headers = Vec::new();
434 let mut found_title = None;
435
436 for node in root.descendants() {
437 if let NodeValue::Heading(NodeHeading { level, .. }) =
438 &node.data.borrow().value
439 {
440 let mut text = String::new();
441 let mut explicit_id = None;
442
443 for child in node.children() {
444 match &child.data.borrow().value {
445 NodeValue::Text(t) => text.push_str(t),
446 NodeValue::Code(t) => text.push_str(&t.literal),
447 NodeValue::Link(..)
448 | NodeValue::Emph
449 | NodeValue::Strong
450 | NodeValue::Subscript
451 | NodeValue::Strikethrough
452 | NodeValue::Superscript
453 | NodeValue::FootnoteReference(..) => {
454 text.push_str(&extract_inline_text(child));
455 },
456 NodeValue::HtmlInline(html) => {
457 let html_str = html.as_str();
459 if let Some(start) = html_str.find("{#")
460 && let Some(end) = html_str[start..].find('}')
461 {
462 let anchor = &html_str[start + 2..start + end];
463 explicit_id = Some(anchor.to_string());
464 }
465 },
466 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
467 NodeValue::Image(..) => {},
468 _ => {},
469 }
470 }
471
472 let trimmed = text.trim_end();
474 #[allow(clippy::option_if_let_else)]
475 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
477 if let Some(end) = trimmed[start..].find('}') {
478 let anchor = &trimmed[start + 2..start + end];
479 (trimmed[..start].trim_end().to_string(), anchor.to_string())
480 } else {
481 (
482 text.clone(),
483 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
484 )
485 }
486 } else {
487 (
488 text.clone(),
489 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
490 )
491 };
492 if *level == 1 && found_title.is_none() {
493 found_title = Some(final_text.clone());
494 }
495 headers.push(Header {
496 text: final_text,
497 level: *level,
498 id,
499 });
500 }
501 }
502
503 (headers, found_title)
504 }
505
506 fn convert_to_html(&self, content: &str) -> String {
508 let arena = Arena::new();
510 let options = self.comrak_options();
511 let root = parse_document(&arena, content, &options);
512
513 let prompt_transformer = PromptTransformer;
515 prompt_transformer.transform(root);
516
517 let mut html_output = String::new();
518 comrak::format_html(root, &options, &mut html_output).unwrap_or_default();
519
520 Self::process_header_anchors_html(&html_output)
522 }
523
524 fn process_header_anchors_html(html: &str) -> String {
528 use std::sync::LazyLock;
529
530 use regex::Regex;
531
532 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
534 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
535 .unwrap_or_else(|e| {
536 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
537 utils::never_matching_regex().unwrap_or_else(|_| {
538 #[allow(
539 clippy::expect_used,
540 reason = "This pattern is guaranteed to be valid"
541 )]
542 Regex::new(r"[^\s\S]")
543 .expect("regex pattern [^\\s\\S] should always compile")
544 })
545 })
546 });
547
548 static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
551 Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
552 log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
553 utils::never_matching_regex().unwrap_or_else(|_| {
554 #[allow(
555 clippy::expect_used,
556 reason = "This pattern is guaranteed to be valid"
557 )]
558 Regex::new(r"[^\s\S]")
559 .expect("regex pattern [^\\s\\S] should always compile")
560 })
561 })
562 });
563
564 static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
566 Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
567 log::error!("Failed to compile HTML_TAG_RE regex: {e}");
568 utils::never_matching_regex().unwrap_or_else(|_| {
569 #[allow(
570 clippy::expect_used,
571 reason = "This pattern is guaranteed to be valid"
572 )]
573 Regex::new(r"[^\s\S]")
574 .expect("regex pattern [^\\s\\S] should always compile")
575 })
576 })
577 });
578
579 let result = HEADER_ANCHOR_RE
581 .replace_all(html, |caps: ®ex::Captures| {
582 let level = &caps[1];
583 let prefix = &caps[2];
584 let id = &caps[3];
585 let suffix = &caps[4];
586 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
587 })
588 .to_string();
589
590 HEADER_NO_ID_RE
592 .replace_all(&result, |caps: ®ex::Captures| {
593 let level = &caps[1];
594 let content = &caps[2];
595 let text_only = HTML_TAG_RE.replace_all(content, "");
597 let id = utils::slugify(&text_only);
598 if id.is_empty() {
599 format!("<h{level}>{content}</h{level}>")
601 } else {
602 format!("<h{level} id=\"{id}\">{content}</h{level}>")
603 }
604 })
605 .to_string()
606 }
607
608 fn comrak_options(&self) -> Options<'_> {
610 let mut options = Options::default();
611 if self.options.gfm {
612 options.extension.table = true;
613 options.extension.footnotes = true;
614 options.extension.strikethrough = true;
615 options.extension.tasklist = true;
616 options.extension.superscript = true;
617 options.extension.autolink = true;
618 }
619 options.render.r#unsafe = true;
620 options.extension.header_ids = None;
622 options.extension.description_lists = true;
623 options
624 }
625
626 #[cfg(feature = "nixpkgs")]
628 fn process_manpage_references_html(&self, html: &str) -> String {
629 super::extensions::process_manpage_references(
630 html,
631 self.manpage_urls.as_ref(),
632 )
633 }
634
635 #[cfg(not(feature = "nixpkgs"))]
638 fn process_manpage_references_html(&self, html: &str) -> String {
639 html.to_string()
640 }
641
642 #[allow(
644 clippy::unused_self,
645 reason = "Method signature matches processor pattern"
646 )]
647 fn kuchiki_postprocess(&self, html: &str) -> String {
648 kuchiki_postprocess_html(html, |document| {
650 Self::apply_dom_transformations(document);
651 })
652 }
653
654 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
656 Self::process_list_item_id_markers(document);
657 Self::process_header_anchor_comments(document);
658 Self::process_list_item_inline_anchors(document);
659 Self::process_paragraph_inline_anchors(document);
660 Self::process_remaining_inline_anchors(document);
661 Self::process_option_anchor_links(document);
662 Self::process_empty_auto_links(document);
663 Self::process_empty_html_links(document);
664 }
665
666 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
668 let mut to_modify = Vec::new();
669
670 for comment in document.inclusive_descendants() {
671 if let Some(comment_node) = comment.as_comment() {
672 let comment_text = comment_node.borrow();
673 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
674 let id = comment_text[id_start + 16..].trim();
675 if !id.is_empty()
676 && id
677 .chars()
678 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
679 {
680 if let Some(parent) = comment.parent()
682 && let Some(element) = parent.as_element()
683 && element.name.local.as_ref() == "li"
684 {
685 to_modify.push((comment.clone(), id.to_string()));
686 }
687 }
688 }
689 }
690 }
691
692 for (comment_node, id) in to_modify {
693 let span = kuchikikiki::NodeRef::new_element(
694 markup5ever::QualName::new(
695 None,
696 markup5ever::ns!(html),
697 local_name!("span"),
698 ),
699 vec![
700 (
701 kuchikikiki::ExpandedName::new("", "id"),
702 kuchikikiki::Attribute {
703 prefix: None,
704 value: id,
705 },
706 ),
707 (
708 kuchikikiki::ExpandedName::new("", "class"),
709 kuchikikiki::Attribute {
710 prefix: None,
711 value: "nixos-anchor".into(),
712 },
713 ),
714 ],
715 );
716 comment_node.insert_after(span);
717 comment_node.detach();
718 }
719 }
720
721 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
723 let mut to_modify = Vec::new();
724
725 for comment in document.inclusive_descendants() {
726 if let Some(comment_node) = comment.as_comment() {
727 let comment_text = comment_node.borrow();
728 if let Some(anchor_start) = comment_text.find("anchor:") {
729 let id = comment_text[anchor_start + 7..].trim();
730 if !id.is_empty()
731 && id
732 .chars()
733 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
734 {
735 if let Some(parent) = comment.parent()
737 && let Some(element) = parent.as_element()
738 {
739 let tag_name = element.name.local.as_ref();
740 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
741 to_modify.push((
742 parent.clone(),
743 comment.clone(),
744 id.to_string(),
745 ));
746 }
747 }
748 }
749 }
750 }
751 }
752
753 for (header_element, comment_node, id) in to_modify {
754 if let Some(element) = header_element.as_element() {
755 element
756 .attributes
757 .borrow_mut()
758 .insert(local_name!("id"), id);
759 comment_node.detach();
760 }
761 }
762 }
763
764 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
766 for li_node in safe_select(document, "li") {
767 let li_element = li_node;
768
769 let has_code = !safe_select(&li_element, "code, pre").is_empty();
771 if has_code {
772 continue; }
774
775 let text_content = li_element.text_contents();
776
777 if let Some(anchor_start) = text_content.find("[]{#")
778 && let Some(anchor_end) = text_content[anchor_start..].find('}')
779 {
780 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
781 if !id.is_empty()
782 && id
783 .chars()
784 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
785 {
786 let remaining_content =
787 &text_content[anchor_start + anchor_end + 1..];
788
789 for child in li_element.children() {
791 child.detach();
792 }
793
794 let span = kuchikikiki::NodeRef::new_element(
795 markup5ever::QualName::new(
796 None,
797 markup5ever::ns!(html),
798 local_name!("span"),
799 ),
800 vec![
801 (
802 kuchikikiki::ExpandedName::new("", "id"),
803 kuchikikiki::Attribute {
804 prefix: None,
805 value: id.into(),
806 },
807 ),
808 (
809 kuchikikiki::ExpandedName::new("", "class"),
810 kuchikikiki::Attribute {
811 prefix: None,
812 value: "nixos-anchor".into(),
813 },
814 ),
815 ],
816 );
817 li_element.append(span);
818 if !remaining_content.is_empty() {
819 li_element
820 .append(kuchikikiki::NodeRef::new_text(remaining_content));
821 }
822 }
823 }
824 }
825 }
826
827 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
829 for p_node in safe_select(document, "p") {
830 let p_element = p_node;
831
832 let has_code = !safe_select(&p_element, "code, pre").is_empty();
834 if has_code {
835 continue; }
837
838 let text_content = p_element.text_contents();
839
840 if let Some(anchor_start) = text_content.find("[]{#")
841 && let Some(anchor_end) = text_content[anchor_start..].find('}')
842 {
843 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
844 if !id.is_empty()
845 && id
846 .chars()
847 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
848 {
849 let remaining_content =
850 &text_content[anchor_start + anchor_end + 1..];
851
852 for child in p_element.children() {
854 child.detach();
855 }
856
857 let span = kuchikikiki::NodeRef::new_element(
858 markup5ever::QualName::new(
859 None,
860 markup5ever::ns!(html),
861 local_name!("span"),
862 ),
863 vec![
864 (
865 kuchikikiki::ExpandedName::new("", "id"),
866 kuchikikiki::Attribute {
867 prefix: None,
868 value: id.into(),
869 },
870 ),
871 (
872 kuchikikiki::ExpandedName::new("", "class"),
873 kuchikikiki::Attribute {
874 prefix: None,
875 value: "nixos-anchor".into(),
876 },
877 ),
878 ],
879 );
880 p_element.append(span);
881 if !remaining_content.is_empty() {
882 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
883 }
884 }
885 }
886 }
887 }
888
889 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
891 let mut text_nodes_to_process = Vec::new();
892
893 for node in document.inclusive_descendants() {
894 if let Some(text_node) = node.as_text() {
895 let mut parent = node.parent();
897 let mut in_code = false;
898 while let Some(p) = parent {
899 if let Some(element) = p.as_element()
900 && (element.name.local == local_name!("code")
901 || element.name.local == local_name!("pre"))
902 {
903 in_code = true;
904 break;
905 }
906 parent = p.parent();
907 }
908
909 if !in_code {
911 let text_content = text_node.borrow().clone();
912 if text_content.contains("[]{#") {
913 text_nodes_to_process.push((node.clone(), text_content));
914 }
915 }
916 }
917 }
918
919 for (text_node, text_content) in text_nodes_to_process {
920 let mut last_end = 0;
921 let mut new_children = Vec::new();
922
923 let chars = text_content.chars().collect::<Vec<_>>();
925 let mut i = 0;
926 while i < chars.len() {
927 if i + 4 < chars.len()
928 && chars[i] == '['
929 && chars[i + 1] == ']'
930 && chars[i + 2] == '{'
931 && chars[i + 3] == '#'
932 {
933 let anchor_start = i;
935 i += 4; let mut id = String::new();
938 while i < chars.len() && chars[i] != '}' {
939 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
940 {
941 id.push(chars[i]);
942 i += 1;
943 } else {
944 break;
945 }
946 }
947
948 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
949 let anchor_end = i + 1;
951
952 if anchor_start > last_end {
954 let before_text: String =
955 chars[last_end..anchor_start].iter().collect();
956 if !before_text.is_empty() {
957 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
958 }
959 }
960
961 let span = kuchikikiki::NodeRef::new_element(
963 markup5ever::QualName::new(
964 None,
965 markup5ever::ns!(html),
966 local_name!("span"),
967 ),
968 vec![
969 (
970 kuchikikiki::ExpandedName::new("", "id"),
971 kuchikikiki::Attribute {
972 prefix: None,
973 value: id,
974 },
975 ),
976 (
977 kuchikikiki::ExpandedName::new("", "class"),
978 kuchikikiki::Attribute {
979 prefix: None,
980 value: "nixos-anchor".into(),
981 },
982 ),
983 ],
984 );
985 new_children.push(span);
986
987 last_end = anchor_end;
988 i = anchor_end;
989 } else {
990 i += 1;
991 }
992 } else {
993 i += 1;
994 }
995 }
996
997 if last_end < chars.len() {
999 let after_text: String = chars[last_end..].iter().collect();
1000 if !after_text.is_empty() {
1001 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
1002 }
1003 }
1004
1005 if !new_children.is_empty() {
1007 for child in new_children {
1008 text_node.insert_before(child);
1009 }
1010 text_node.detach();
1011 }
1012 }
1013 }
1014
1015 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
1017 for link_node in safe_select(document, "a") {
1018 let link_element = link_node;
1019 if let Some(element) = link_element.as_element() {
1020 let href = element
1021 .attributes
1022 .borrow()
1023 .get(local_name!("href"))
1024 .map(std::string::ToString::to_string);
1025 let text_content = link_element.text_contents();
1026
1027 if let Some(href_value) = href
1028 && href_value.starts_with('#')
1029 && (text_content.trim().is_empty()
1030 || text_content.trim() == "{{ANCHOR}}")
1031 {
1032 if text_content.trim() == "{{ANCHOR}}" {
1034 for child in link_element.children() {
1035 child.detach();
1036 }
1037 }
1038 let display_text = Self::humanize_anchor_id(&href_value);
1040 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1041 }
1042 }
1043 }
1044 }
1045
1046 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
1048 for link_node in safe_select(document, "a[href^='#']") {
1049 let link_element = link_node;
1050 let text_content = link_element.text_contents();
1051
1052 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1053 if text_content.trim() == "{{ANCHOR}}" {
1055 for child in link_element.children() {
1056 child.detach();
1057 }
1058 }
1059 if let Some(element) = link_element.as_element()
1060 && let Some(href) =
1061 element.attributes.borrow().get(local_name!("href"))
1062 {
1063 let display_text = Self::humanize_anchor_id(href);
1064 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1065 }
1066 }
1067 }
1068 }
1069
1070 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1072 let mut to_modify = Vec::new();
1073
1074 for link_node in safe_select(document, "a[href^='#opt-']") {
1076 let link_element = link_node;
1077 if let Some(element) = link_element.as_element() {
1078 let href = element
1079 .attributes
1080 .borrow()
1081 .get(local_name!("href"))
1082 .map(std::string::ToString::to_string);
1083 let text_content = link_element.text_contents();
1084
1085 if let Some(href_value) = href
1086 && href_value.starts_with("#opt-")
1087 {
1088 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1090 || text_content.trim() == "{{ANCHOR}}";
1091 to_modify.push((
1092 link_element.clone(),
1093 option_anchor,
1094 needs_text_replacement,
1095 ));
1096 }
1097 }
1098 }
1099
1100 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1102 if let Some(element) = link_element.as_element() {
1103 let new_href = format!("options.html#{option_anchor}");
1104 element
1105 .attributes
1106 .borrow_mut()
1107 .insert(local_name!("href"), new_href);
1108
1109 if needs_text_replacement {
1110 for child in link_element.children() {
1112 child.detach();
1113 }
1114
1115 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1118 let option_name = option_path.replace('-', ".");
1119 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1120 }
1121 }
1122 }
1123 }
1124 }
1125
1126 fn humanize_anchor_id(anchor: &str) -> String {
1128 let cleaned = anchor.trim_start_matches('#');
1130
1131 let without_prefix = cleaned
1133 .trim_start_matches("sec-")
1134 .trim_start_matches("ssec-")
1135 .trim_start_matches("opt-");
1136
1137 let spaced = without_prefix.replace(['-', '_'], " ");
1139
1140 spaced
1142 .split_whitespace()
1143 .map(|word| {
1144 let mut chars = word.chars();
1145 chars.next().map_or_else(String::new, |c| {
1146 c.to_uppercase().collect::<String>() + chars.as_str()
1147 })
1148 })
1149 .collect::<Vec<String>>()
1150 .join(" ")
1151 }
1152}
1153
1154pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1156 let mut text = String::new();
1157 for child in node.children() {
1158 match &child.data.borrow().value {
1159 NodeValue::Text(t) => text.push_str(t),
1160 NodeValue::Code(t) => text.push_str(&t.literal),
1161 NodeValue::Link(..)
1162 | NodeValue::Emph
1163 | NodeValue::Strong
1164 | NodeValue::Strikethrough
1165 | NodeValue::Superscript
1166 | NodeValue::Subscript
1167 | NodeValue::FootnoteReference(..) => {
1168 text.push_str(&extract_inline_text(child));
1169 },
1170 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1171 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1172 _ => {},
1173 }
1174 }
1175 text
1176}
1177
1178pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1180 let mut files = Vec::with_capacity(100);
1181
1182 for entry in WalkDir::new(input_dir)
1183 .follow_links(true)
1184 .into_iter()
1185 .filter_map(Result::ok)
1186 {
1187 let path = entry.path();
1188 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1189 files.push(path.to_owned());
1190 }
1191 }
1192
1193 trace!("Found {} markdown files to process", files.len());
1194 files
1195}
1196
1197#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1199pub enum ProcessorFeature {
1200 Gfm,
1202 Nixpkgs,
1204 SyntaxHighlighting,
1206 ManpageUrls,
1208}
1209
1210fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1212where
1213 F: FnOnce(&kuchikikiki::NodeRef),
1214{
1215 process_safe(
1216 html,
1217 |html| {
1218 use tendril::TendrilSink;
1219
1220 let document = kuchikikiki::parse_html().one(html);
1221 transform_fn(&document);
1222
1223 let mut out = Vec::new();
1224 let _ = document.serialize(&mut out);
1225 String::from_utf8(out).unwrap_or_default()
1226 },
1227 html,
1228 )
1229}