1use std::{
6 collections::HashMap,
7 path::{Path, PathBuf},
8};
9
10use comrak::{
11 Arena,
12 nodes::{AstNode, NodeHeading, NodeValue},
13 options::Options,
14 parse_document,
15};
16use log::trace;
17use markup5ever::local_name;
18use walkdir::WalkDir;
19
20use super::{
21 dom::safe_select,
22 process::process_safe,
23 types::{
24 AstTransformer,
25 MarkdownOptions,
26 MarkdownProcessor,
27 PromptTransformer,
28 },
29};
30use crate::{
31 syntax::create_default_manager,
32 types::{Header, MarkdownResult},
33 utils,
34};
35
36impl MarkdownProcessor {
37 #[must_use]
39 pub fn new(options: MarkdownOptions) -> Self {
40 let manpage_urls = options
41 .manpage_urls_path
42 .as_ref()
43 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
44
45 let syntax_manager = if options.highlight_code {
46 match create_default_manager() {
47 Ok(manager) => {
48 log::info!("Syntax highlighting initialized successfully");
49 Some(manager)
50 },
51 Err(e) => {
52 log::error!("Failed to initialize syntax highlighting: {e}");
53 log::warn!(
54 "Continuing without syntax highlighting - code blocks will not be \
55 highlighted"
56 );
57 None
58 },
59 }
60 } else {
61 None
62 };
63
64 Self {
65 options,
66 manpage_urls,
67 syntax_manager,
68 base_dir: std::path::PathBuf::from("."),
69 }
70 }
71
72 #[must_use]
74 pub const fn options(&self) -> &MarkdownOptions {
75 &self.options
76 }
77
78 #[must_use]
80 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
81 self.base_dir = base_dir.to_path_buf();
82 self
83 }
84
85 #[must_use]
87 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
88 match feature {
89 ProcessorFeature::Gfm => self.options.gfm,
90 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
91 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
92 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
93 }
94 }
95
96 #[must_use]
98 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
99 self.manpage_urls.as_ref()
100 }
101
102 #[must_use]
104 pub fn highlight_codeblocks(&self, html: &str) -> String {
105 use kuchikikiki::parse_html;
106 use tendril::TendrilSink;
107
108 if !self.options.highlight_code || self.syntax_manager.is_none() {
109 return html.to_string();
110 }
111
112 let document = parse_html().one(html);
113
114 let mut code_blocks = Vec::new();
116 for pre_node in safe_select(&document, "pre > code") {
117 let code_node = pre_node;
118 if let Some(element) = code_node.as_element() {
119 let language = element
120 .attributes
121 .borrow()
122 .get("class")
123 .and_then(|class| class.strip_prefix("language-"))
124 .unwrap_or("text")
125 .to_string();
126 let code_text = code_node.text_contents();
127
128 if let Some(pre_parent) = code_node.parent() {
129 code_blocks.push((
130 pre_parent.clone(),
131 code_node.clone(),
132 code_text,
133 language,
134 ));
135 }
136 }
137 }
138
139 for (pre_element, _code_node, code_text, language) in code_blocks {
141 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
142 {
143 let wrapped_html = format!(
145 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
146 );
147 let fragment = parse_html().one(wrapped_html.as_str());
148 pre_element.insert_after(fragment);
149 pre_element.detach();
150 }
151 }
153
154 let mut buf = Vec::new();
155 if let Err(e) = document.serialize(&mut buf) {
156 log::warn!("DOM serialization failed: {e:?}");
157 return html.to_string(); }
159 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
160 }
161
162 fn handle_hardtabs(&self, code: &str) -> String {
164 use super::types::TabStyle;
165
166 if !code.contains('\t') {
168 return code.to_string();
169 }
170
171 match self.options.tab_style {
172 TabStyle::None => code.to_string(),
174
175 TabStyle::Warn => {
177 log::warn!(
178 "Hard tabs detected in code block. Consider using spaces for \
179 consistency. Tools like editorconfig may help you normalize spaces \
180 in your documents."
181 );
182 code.to_string()
183 },
184
185 TabStyle::Normalize => {
188 log::debug!("Replacing hard tabs with spaces");
189 code.replace('\t', " ")
190 },
191 }
192 }
193
194 fn process_hardtabs(&self, markdown: &str) -> String {
196 use super::types::TabStyle;
197 use crate::utils::codeblock::FenceTracker;
198
199 if self.options.tab_style == TabStyle::None {
201 return markdown.to_string();
202 }
203
204 let mut result = String::with_capacity(markdown.len());
205 let mut lines = markdown.lines().peekable();
206 let mut tracker = FenceTracker::new();
207
208 while let Some(line) = lines.next() {
209 tracker = tracker.process_line(line);
210
211 let processed_line = if tracker.in_code_block() && line.contains('\t') {
213 self.handle_hardtabs(line)
214 } else {
215 line.to_string()
216 };
217
218 result.push_str(&processed_line);
219
220 if lines.peek().is_some() {
222 result.push('\n');
223 }
224 }
225
226 result
227 }
228
229 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
232 if !self.options.highlight_code {
233 return None;
234 }
235
236 let syntax_manager = self.syntax_manager.as_ref()?;
237
238 syntax_manager
239 .highlight_code(code, language, self.options.highlight_theme.as_deref())
240 .ok()
241 }
242
243 #[must_use]
245 pub fn render(&self, markdown: &str) -> MarkdownResult {
246 let (preprocessed, included_files) = self.preprocess(markdown);
247 let (headers, title) = self.extract_headers(&preprocessed);
248 let html = self.process_html_pipeline(&preprocessed);
249
250 MarkdownResult {
251 html,
252 headers,
253 title,
254 included_files,
255 }
256 }
257
258 fn process_html_pipeline(&self, content: &str) -> String {
260 let mut html = self.convert_to_html(content);
261
262 if cfg!(feature = "ndg-flavored") {
264 #[cfg(feature = "ndg-flavored")]
265 {
266 html = super::extensions::process_option_references(
267 &html,
268 self.options.valid_options.as_ref(),
269 );
270 }
271 }
272
273 if self.options.nixpkgs {
274 html = self.process_manpage_references_html(&html);
275 }
276
277 if self.options.highlight_code {
278 html = self.highlight_codeblocks(&html);
279 }
280
281 self.kuchiki_postprocess(&html)
282 }
283
284 fn preprocess(
286 &self,
287 content: &str,
288 ) -> (String, Vec<crate::types::IncludedFile>) {
289 let mut processed = content.to_string();
290 let mut included_files = Vec::new();
291
292 processed = super::extensions::process_myst_autolinks(&processed);
294
295 processed = self.process_hardtabs(&processed);
297
298 if self.options.nixpkgs {
299 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
300 processed = content;
301 included_files = files;
302 }
303
304 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
305 processed = super::extensions::process_role_markup(
306 &processed,
307 self.manpage_urls.as_ref(),
308 self.options.auto_link_options,
309 self.options.valid_options.as_ref(),
310 );
311 }
312
313 if cfg!(feature = "wiki") {
314 processed = super::extensions::process_wikilinks(&processed);
315 }
316
317 (processed, included_files)
318 }
319
320 #[cfg(feature = "nixpkgs")]
322 fn apply_nixpkgs_preprocessing(
323 &self,
324 content: &str,
325 ) -> (String, Vec<crate::types::IncludedFile>) {
326 let (with_includes, included_files) =
327 match super::extensions::process_file_includes(content, &self.base_dir, 0)
328 {
329 Ok(result) => result,
330 Err(e) => {
331 log::warn!(
332 "File include processing failed: {e}. Continuing without includes."
333 );
334 (content.to_string(), Vec::new())
335 },
336 };
337 let with_blocks = super::extensions::process_block_elements(&with_includes);
338 let processed = super::extensions::process_inline_anchors(&with_blocks);
339 (processed, included_files)
340 }
341
342 #[cfg(not(feature = "nixpkgs"))]
344 fn apply_nixpkgs_preprocessing(
345 &self,
346 content: &str,
347 ) -> (String, Vec<crate::types::IncludedFile>) {
348 (content.to_string(), Vec::new())
349 }
350
351 #[must_use]
353 pub fn extract_headers(
354 &self,
355 content: &str,
356 ) -> (Vec<Header>, Option<String>) {
357 use std::fmt::Write;
358
359 let arena = Arena::new();
360 let options = self.comrak_options();
361
362 let mut normalized = String::with_capacity(content.len());
364 for line in content.lines() {
365 let trimmed = line.trim_end();
366 if !trimmed.starts_with('#')
367 && let Some(anchor_start) = trimmed.rfind("{#")
368 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
369 {
370 let text = trimmed[..anchor_start].trim_end();
371 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
372 let _ = writeln!(normalized, "## {text} {{#{id}}}");
373 continue;
374 }
375 normalized.push_str(line);
376 normalized.push('\n');
377 }
378
379 let root = parse_document(&arena, &normalized, &options);
380
381 let mut headers = Vec::new();
382 let mut found_title = None;
383
384 for node in root.descendants() {
385 if let NodeValue::Heading(NodeHeading { level, .. }) =
386 &node.data.borrow().value
387 {
388 let mut text = String::new();
389 let mut explicit_id = None;
390
391 for child in node.children() {
392 match &child.data.borrow().value {
393 NodeValue::Text(t) => text.push_str(t),
394 NodeValue::Code(t) => text.push_str(&t.literal),
395 NodeValue::Link(..)
396 | NodeValue::Emph
397 | NodeValue::Strong
398 | NodeValue::Subscript
399 | NodeValue::Strikethrough
400 | NodeValue::Superscript
401 | NodeValue::FootnoteReference(..) => {
402 text.push_str(&extract_inline_text(child));
403 },
404 NodeValue::HtmlInline(html) => {
405 let html_str = html.as_str();
407 if let Some(start) = html_str.find("{#")
408 && let Some(end) = html_str[start..].find('}')
409 {
410 let anchor = &html_str[start + 2..start + end];
411 explicit_id = Some(anchor.to_string());
412 }
413 },
414 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
415 NodeValue::Image(..) => {},
416 _ => {},
417 }
418 }
419
420 let trimmed = text.trim_end();
422 #[allow(clippy::option_if_let_else)]
423 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
425 if let Some(end) = trimmed[start..].find('}') {
426 let anchor = &trimmed[start + 2..start + end];
427 (trimmed[..start].trim_end().to_string(), anchor.to_string())
428 } else {
429 (
430 text.clone(),
431 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
432 )
433 }
434 } else {
435 (
436 text.clone(),
437 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
438 )
439 };
440 if *level == 1 && found_title.is_none() {
441 found_title = Some(final_text.clone());
442 }
443 headers.push(Header {
444 text: final_text,
445 level: *level,
446 id,
447 });
448 }
449 }
450
451 (headers, found_title)
452 }
453
454 fn convert_to_html(&self, content: &str) -> String {
456 let arena = Arena::new();
458 let options = self.comrak_options();
459 let root = parse_document(&arena, content, &options);
460
461 let prompt_transformer = PromptTransformer;
463 prompt_transformer.transform(root);
464
465 let mut html_output = String::new();
466 if let Err(e) = comrak::format_html(root, &options, &mut html_output) {
467 log::error!("Failed to format HTML: {e}");
468 }
469
470 Self::process_header_anchors_html(&html_output)
472 }
473
474 fn process_header_anchors_html(html: &str) -> String {
478 use std::sync::LazyLock;
479
480 use regex::Regex;
481
482 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
484 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
485 .unwrap_or_else(|e| {
486 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
487 utils::never_matching_regex().unwrap_or_else(|_| {
488 #[allow(
489 clippy::expect_used,
490 reason = "This pattern is guaranteed to be valid"
491 )]
492 Regex::new(r"[^\s\S]")
493 .expect("regex pattern [^\\s\\S] should always compile")
494 })
495 })
496 });
497
498 static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
501 Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
502 log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
503 utils::never_matching_regex().unwrap_or_else(|_| {
504 #[allow(
505 clippy::expect_used,
506 reason = "This pattern is guaranteed to be valid"
507 )]
508 Regex::new(r"[^\s\S]")
509 .expect("regex pattern [^\\s\\S] should always compile")
510 })
511 })
512 });
513
514 static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
516 Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
517 log::error!("Failed to compile HTML_TAG_RE regex: {e}");
518 utils::never_matching_regex().unwrap_or_else(|_| {
519 #[allow(
520 clippy::expect_used,
521 reason = "This pattern is guaranteed to be valid"
522 )]
523 Regex::new(r"[^\s\S]")
524 .expect("regex pattern [^\\s\\S] should always compile")
525 })
526 })
527 });
528
529 let result = HEADER_ANCHOR_RE
531 .replace_all(html, |caps: ®ex::Captures| {
532 let level = &caps[1];
533 let prefix = &caps[2];
534 let id = &caps[3];
535 let suffix = &caps[4];
536 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
537 })
538 .to_string();
539
540 HEADER_NO_ID_RE
542 .replace_all(&result, |caps: ®ex::Captures| {
543 let level = &caps[1];
544 let content = &caps[2];
545 let text_only = HTML_TAG_RE.replace_all(content, "");
547 let id = utils::slugify(&text_only);
548 if id.is_empty() {
549 format!("<h{level}>{content}</h{level}>")
551 } else {
552 format!("<h{level} id=\"{id}\">{content}</h{level}>")
553 }
554 })
555 .to_string()
556 }
557
558 fn comrak_options(&self) -> Options<'_> {
560 let mut options = Options::default();
561 if self.options.gfm {
563 options.extension.table = true;
564 options.extension.footnotes = true;
565 options.extension.strikethrough = true;
566 options.extension.tasklist = true;
567 options.extension.superscript = true;
568 options.extension.autolink = true;
569 }
570
571 options.render.r#unsafe = true;
574
575 options.extension.header_id_prefix = None;
577 options.extension.description_lists = true;
578 options
579 }
580
581 #[cfg(feature = "nixpkgs")]
583 fn process_manpage_references_html(&self, html: &str) -> String {
584 super::extensions::process_manpage_references(
585 html,
586 self.manpage_urls.as_ref(),
587 )
588 }
589
590 #[cfg(not(feature = "nixpkgs"))]
593 fn process_manpage_references_html(&self, html: &str) -> String {
594 html.to_string()
595 }
596
597 #[allow(
599 clippy::unused_self,
600 reason = "Method signature matches processor pattern"
601 )]
602 fn kuchiki_postprocess(&self, html: &str) -> String {
603 kuchiki_postprocess_html(html, |document| {
605 Self::apply_dom_transformations(document);
606 })
607 }
608
609 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
611 Self::process_list_item_id_markers(document);
612 Self::process_header_anchor_comments(document);
613 Self::process_list_item_inline_anchors(document);
614 Self::process_paragraph_inline_anchors(document);
615 Self::process_remaining_inline_anchors(document);
616 Self::process_markdown_links(document);
617 Self::process_option_anchor_links(document);
618 Self::process_empty_auto_links(document);
619 Self::process_empty_html_links(document);
620 }
621
622 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
624 let mut to_modify = Vec::new();
625
626 for comment in document.inclusive_descendants() {
627 if let Some(comment_node) = comment.as_comment() {
628 let comment_text = comment_node.borrow();
629 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
630 let id = comment_text[id_start + 16..].trim();
631 if !id.is_empty()
632 && id
633 .chars()
634 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
635 {
636 if let Some(parent) = comment.parent()
638 && let Some(element) = parent.as_element()
639 && element.name.local.as_ref() == "li"
640 {
641 to_modify.push((comment.clone(), id.to_string()));
642 }
643 }
644 }
645 }
646 }
647
648 for (comment_node, id) in to_modify {
649 let span = kuchikikiki::NodeRef::new_element(
650 markup5ever::QualName::new(
651 None,
652 markup5ever::ns!(html),
653 local_name!("span"),
654 ),
655 vec![
656 (
657 kuchikikiki::ExpandedName::new("", "id"),
658 kuchikikiki::Attribute {
659 prefix: None,
660 value: id,
661 },
662 ),
663 (
664 kuchikikiki::ExpandedName::new("", "class"),
665 kuchikikiki::Attribute {
666 prefix: None,
667 value: "nixos-anchor".into(),
668 },
669 ),
670 ],
671 );
672 comment_node.insert_after(span);
673 comment_node.detach();
674 }
675 }
676
677 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
679 let mut to_modify = Vec::new();
680
681 for comment in document.inclusive_descendants() {
682 if let Some(comment_node) = comment.as_comment() {
683 let comment_text = comment_node.borrow();
684 if let Some(anchor_start) = comment_text.find("anchor:") {
685 let id = comment_text[anchor_start + 7..].trim();
686 if !id.is_empty()
687 && id
688 .chars()
689 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
690 {
691 if let Some(parent) = comment.parent()
693 && let Some(element) = parent.as_element()
694 {
695 let tag_name = element.name.local.as_ref();
696 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
697 to_modify.push((
698 parent.clone(),
699 comment.clone(),
700 id.to_string(),
701 ));
702 }
703 }
704 }
705 }
706 }
707 }
708
709 for (header_element, comment_node, id) in to_modify {
710 if let Some(element) = header_element.as_element() {
711 element
712 .attributes
713 .borrow_mut()
714 .insert(local_name!("id"), id);
715 comment_node.detach();
716 }
717 }
718 }
719
720 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
722 for li_node in safe_select(document, "li") {
723 let li_element = li_node;
724
725 let has_code = !safe_select(&li_element, "code, pre").is_empty();
727 if has_code {
728 continue; }
730
731 let text_content = li_element.text_contents();
732
733 if let Some(anchor_start) = text_content.find("[]{#")
734 && let Some(anchor_end) = text_content[anchor_start..].find('}')
735 {
736 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
737 if !id.is_empty()
738 && id
739 .chars()
740 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
741 {
742 let remaining_content =
743 &text_content[anchor_start + anchor_end + 1..];
744
745 for child in li_element.children() {
747 child.detach();
748 }
749
750 let span = kuchikikiki::NodeRef::new_element(
751 markup5ever::QualName::new(
752 None,
753 markup5ever::ns!(html),
754 local_name!("span"),
755 ),
756 vec![
757 (
758 kuchikikiki::ExpandedName::new("", "id"),
759 kuchikikiki::Attribute {
760 prefix: None,
761 value: id.into(),
762 },
763 ),
764 (
765 kuchikikiki::ExpandedName::new("", "class"),
766 kuchikikiki::Attribute {
767 prefix: None,
768 value: "nixos-anchor".into(),
769 },
770 ),
771 ],
772 );
773 li_element.append(span);
774 if !remaining_content.is_empty() {
775 li_element
776 .append(kuchikikiki::NodeRef::new_text(remaining_content));
777 }
778 }
779 }
780 }
781 }
782
783 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
785 for p_node in safe_select(document, "p") {
786 let p_element = p_node;
787
788 let has_code = !safe_select(&p_element, "code, pre").is_empty();
790 if has_code {
791 continue; }
793
794 let text_content = p_element.text_contents();
795
796 if let Some(anchor_start) = text_content.find("[]{#")
797 && let Some(anchor_end) = text_content[anchor_start..].find('}')
798 {
799 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
800 if !id.is_empty()
801 && id
802 .chars()
803 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
804 {
805 let remaining_content =
806 &text_content[anchor_start + anchor_end + 1..];
807
808 for child in p_element.children() {
810 child.detach();
811 }
812
813 let span = kuchikikiki::NodeRef::new_element(
814 markup5ever::QualName::new(
815 None,
816 markup5ever::ns!(html),
817 local_name!("span"),
818 ),
819 vec![
820 (
821 kuchikikiki::ExpandedName::new("", "id"),
822 kuchikikiki::Attribute {
823 prefix: None,
824 value: id.into(),
825 },
826 ),
827 (
828 kuchikikiki::ExpandedName::new("", "class"),
829 kuchikikiki::Attribute {
830 prefix: None,
831 value: "nixos-anchor".into(),
832 },
833 ),
834 ],
835 );
836 p_element.append(span);
837 if !remaining_content.is_empty() {
838 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
839 }
840 }
841 }
842 }
843 }
844
845 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
847 let mut text_nodes_to_process = Vec::new();
848
849 for node in document.inclusive_descendants() {
850 if let Some(text_node) = node.as_text() {
851 let mut parent = node.parent();
853 let mut in_code = false;
854 while let Some(p) = parent {
855 if let Some(element) = p.as_element()
856 && (element.name.local == local_name!("code")
857 || element.name.local == local_name!("pre"))
858 {
859 in_code = true;
860 break;
861 }
862 parent = p.parent();
863 }
864
865 if !in_code {
867 let text_content = text_node.borrow().clone();
868 if text_content.contains("[]{#") {
869 text_nodes_to_process.push((node.clone(), text_content));
870 }
871 }
872 }
873 }
874
875 for (text_node, text_content) in text_nodes_to_process {
876 let mut last_end = 0;
877 let mut new_children = Vec::new();
878
879 let chars = text_content.chars().collect::<Vec<_>>();
881 let mut i = 0;
882 while i < chars.len() {
883 if i + 4 < chars.len()
884 && chars[i] == '['
885 && chars[i + 1] == ']'
886 && chars[i + 2] == '{'
887 && chars[i + 3] == '#'
888 {
889 let anchor_start = i;
891 i += 4; let mut id = String::new();
894 while i < chars.len() && chars[i] != '}' {
895 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
896 {
897 id.push(chars[i]);
898 i += 1;
899 } else {
900 break;
901 }
902 }
903
904 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
905 let anchor_end = i + 1;
907
908 if anchor_start > last_end {
910 let before_text: String =
911 chars[last_end..anchor_start].iter().collect();
912 if !before_text.is_empty() {
913 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
914 }
915 }
916
917 let span = kuchikikiki::NodeRef::new_element(
919 markup5ever::QualName::new(
920 None,
921 markup5ever::ns!(html),
922 local_name!("span"),
923 ),
924 vec![
925 (
926 kuchikikiki::ExpandedName::new("", "id"),
927 kuchikikiki::Attribute {
928 prefix: None,
929 value: id,
930 },
931 ),
932 (
933 kuchikikiki::ExpandedName::new("", "class"),
934 kuchikikiki::Attribute {
935 prefix: None,
936 value: "nixos-anchor".into(),
937 },
938 ),
939 ],
940 );
941 new_children.push(span);
942
943 last_end = anchor_end;
944 i = anchor_end;
945 } else {
946 i += 1;
947 }
948 } else {
949 i += 1;
950 }
951 }
952
953 if last_end < chars.len() {
955 let after_text: String = chars[last_end..].iter().collect();
956 if !after_text.is_empty() {
957 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
958 }
959 }
960
961 if !new_children.is_empty() {
963 for child in new_children {
964 text_node.insert_before(child);
965 }
966 text_node.detach();
967 }
968 }
969 }
970
971 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
973 for link_node in safe_select(document, "a") {
974 let link_element = link_node;
975 if let Some(element) = link_element.as_element() {
976 let href = element
977 .attributes
978 .borrow()
979 .get(local_name!("href"))
980 .map(std::string::ToString::to_string);
981 let text_content = link_element.text_contents();
982
983 if let Some(href_value) = href
984 && href_value.starts_with('#')
985 && (text_content.trim().is_empty()
986 || text_content.trim() == "{{ANCHOR}}")
987 {
988 if text_content.trim() == "{{ANCHOR}}" {
990 for child in link_element.children() {
991 child.detach();
992 }
993 }
994 let display_text = Self::humanize_anchor_id(&href_value);
996 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
997 }
998 }
999 }
1000 }
1001
1002 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
1004 for link_node in safe_select(document, "a[href^='#']") {
1005 let link_element = link_node;
1006 let text_content = link_element.text_contents();
1007
1008 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1009 if text_content.trim() == "{{ANCHOR}}" {
1011 for child in link_element.children() {
1012 child.detach();
1013 }
1014 }
1015 if let Some(element) = link_element.as_element()
1016 && let Some(href) =
1017 element.attributes.borrow().get(local_name!("href"))
1018 {
1019 let display_text = Self::humanize_anchor_id(href);
1020 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1021 }
1022 }
1023 }
1024 }
1025
1026 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1028 let mut to_modify = Vec::new();
1029
1030 for link_node in safe_select(document, "a[href^='#opt-']") {
1032 let link_element = link_node;
1033 if let Some(element) = link_element.as_element() {
1034 let href = element
1035 .attributes
1036 .borrow()
1037 .get(local_name!("href"))
1038 .map(std::string::ToString::to_string);
1039 let text_content = link_element.text_contents();
1040
1041 if let Some(href_value) = href
1042 && href_value.starts_with("#opt-")
1043 {
1044 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1046 || text_content.trim() == "{{ANCHOR}}";
1047 to_modify.push((
1048 link_element.clone(),
1049 option_anchor,
1050 needs_text_replacement,
1051 ));
1052 }
1053 }
1054 }
1055
1056 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1058 if let Some(element) = link_element.as_element() {
1059 let new_href = format!("options.html#{option_anchor}");
1060 element
1061 .attributes
1062 .borrow_mut()
1063 .insert(local_name!("href"), new_href);
1064
1065 if needs_text_replacement {
1066 for child in link_element.children() {
1068 child.detach();
1069 }
1070
1071 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1074 let option_name = option_path.replace('-', ".");
1075 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1076 }
1077 }
1078 }
1079 }
1080 }
1081
1082 fn process_markdown_links(document: &kuchikikiki::NodeRef) {
1084 for link_node in safe_select(document, "a") {
1085 let link_element = link_node;
1086 if let Some(element) = link_element.as_element() {
1087 let href = element
1088 .attributes
1089 .borrow()
1090 .get(local_name!("href"))
1091 .map(std::string::ToString::to_string);
1092
1093 if let Some(href_value) = href {
1094 if !href_value.starts_with("http://")
1096 && !href_value.starts_with("https://")
1097 && !href_value.starts_with('#')
1098 && !href_value.starts_with("mailto:")
1099 {
1100 let (path_part, suffix) = href_value
1102 .find(|c| c == '#' || c == '?')
1103 .map_or((href_value.as_str(), ""), |idx| href_value.split_at(idx));
1104
1105 if std::path::Path::new(path_part)
1106 .extension()
1107 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
1108 {
1109 let new_href = format!("{}.html{}", &path_part[..path_part.len() - 3], suffix);
1110 element
1111 .attributes
1112 .borrow_mut()
1113 .insert(local_name!("href"), new_href);
1114 }
1115 }
1116 }
1117 }
1118 }
1119 }
1120
1121 fn humanize_anchor_id(anchor: &str) -> String {
1123 let cleaned = anchor.trim_start_matches('#');
1125
1126 let without_prefix = cleaned
1128 .trim_start_matches("sec-")
1129 .trim_start_matches("ssec-")
1130 .trim_start_matches("opt-");
1131
1132 let spaced = without_prefix.replace(['-', '_'], " ");
1134
1135 spaced
1137 .split_whitespace()
1138 .map(|word| {
1139 let mut chars = word.chars();
1140 chars.next().map_or_else(String::new, |c| {
1141 c.to_uppercase().collect::<String>() + chars.as_str()
1142 })
1143 })
1144 .collect::<Vec<String>>()
1145 .join(" ")
1146 }
1147}
1148
1149pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1151 let mut text = String::new();
1152 for child in node.children() {
1153 match &child.data.borrow().value {
1154 NodeValue::Text(t) => text.push_str(t),
1155 NodeValue::Code(t) => text.push_str(&t.literal),
1156 NodeValue::Link(..)
1157 | NodeValue::Emph
1158 | NodeValue::Strong
1159 | NodeValue::Strikethrough
1160 | NodeValue::Superscript
1161 | NodeValue::Subscript
1162 | NodeValue::FootnoteReference(..) => {
1163 text.push_str(&extract_inline_text(child));
1164 },
1165 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1166 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1167 _ => {},
1168 }
1169 }
1170 text
1171}
1172
1173pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1175 let mut files = Vec::with_capacity(100);
1176
1177 for entry in WalkDir::new(input_dir)
1178 .follow_links(true)
1179 .into_iter()
1180 .filter_map(Result::ok)
1181 {
1182 let path = entry.path();
1183 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1184 files.push(path.to_owned());
1185 }
1186 }
1187
1188 trace!("Found {} markdown files to process", files.len());
1189 files
1190}
1191
1192#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1194pub enum ProcessorFeature {
1195 Gfm,
1197 Nixpkgs,
1199 SyntaxHighlighting,
1201 ManpageUrls,
1203}
1204
1205fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1207where
1208 F: FnOnce(&kuchikikiki::NodeRef),
1209{
1210 process_safe(
1211 html,
1212 |html| {
1213 use tendril::TendrilSink;
1214
1215 let document = kuchikikiki::parse_html().one(html);
1216 transform_fn(&document);
1217
1218 let mut out = Vec::new();
1219 let _ = document.serialize(&mut out);
1220 String::from_utf8_lossy(&out).into_owned()
1221 },
1222 html,
1223 )
1224}