1use std::{
7 collections::HashMap,
8 path::{Path, PathBuf},
9};
10
11use comrak::{
12 Arena,
13 nodes::{AstNode, NodeHeading, NodeValue},
14 options::Options,
15 parse_document,
16};
17use log::trace;
18use markup5ever::local_name;
19use walkdir::WalkDir;
20
21#[derive(Debug, thiserror::Error)]
23pub enum DomError {
24 #[error("CSS selector failed: {0}")]
25 SelectorError(String),
26 #[error("DOM serialization failed: {0}")]
27 SerializationError(String),
28}
29
30pub type DomResult<T> = Result<T, DomError>;
32
33fn safe_select(
35 document: &kuchikikiki::NodeRef,
36 selector: &str,
37) -> Vec<kuchikikiki::NodeRef> {
38 match document.select(selector) {
39 Ok(selections) => selections.map(|sel| sel.as_node().clone()).collect(),
40 Err(e) => {
41 log::warn!("DOM selector '{selector}' failed: {e:?}");
42 Vec::new()
43 },
44 }
45}
46
47use super::{
48 process::process_safe,
49 types::{
50 AstTransformer,
51 MarkdownOptions,
52 MarkdownProcessor,
53 PromptTransformer,
54 },
55};
56use crate::{
57 syntax::create_default_manager,
58 types::{Header, MarkdownResult},
59 utils,
60};
61
62impl MarkdownProcessor {
63 #[must_use]
65 pub fn new(options: MarkdownOptions) -> Self {
66 let manpage_urls = options
67 .manpage_urls_path
68 .as_ref()
69 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
70
71 let syntax_manager = if options.highlight_code {
72 match create_default_manager() {
73 Ok(manager) => {
74 log::info!("Syntax highlighting initialized successfully");
75 Some(manager)
76 },
77 Err(e) => {
78 log::error!("Failed to initialize syntax highlighting: {e}");
79 log::warn!(
80 "Continuing without syntax highlighting - code blocks will not be \
81 highlighted"
82 );
83 None
84 },
85 }
86 } else {
87 None
88 };
89
90 Self {
91 options,
92 manpage_urls,
93 syntax_manager,
94 base_dir: std::path::PathBuf::from("."),
95 }
96 }
97
98 #[must_use]
100 pub const fn options(&self) -> &MarkdownOptions {
101 &self.options
102 }
103
104 #[must_use]
106 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
107 self.base_dir = base_dir.to_path_buf();
108 self
109 }
110
111 #[must_use]
113 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
114 match feature {
115 ProcessorFeature::Gfm => self.options.gfm,
116 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
117 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
118 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
119 }
120 }
121
122 #[must_use]
124 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
125 self.manpage_urls.as_ref()
126 }
127
128 #[must_use]
130 pub fn highlight_codeblocks(&self, html: &str) -> String {
131 use kuchikikiki::parse_html;
132 use tendril::TendrilSink;
133
134 if !self.options.highlight_code || self.syntax_manager.is_none() {
135 return html.to_string();
136 }
137
138 let document = parse_html().one(html);
139
140 let mut code_blocks = Vec::new();
142 for pre_node in safe_select(&document, "pre > code") {
143 let code_node = pre_node;
144 if let Some(element) = code_node.as_element() {
145 let language = element
146 .attributes
147 .borrow()
148 .get("class")
149 .and_then(|class| class.strip_prefix("language-"))
150 .unwrap_or("text")
151 .to_string();
152 let code_text = code_node.text_contents();
153
154 if let Some(pre_parent) = code_node.parent() {
155 code_blocks.push((
156 pre_parent.clone(),
157 code_node.clone(),
158 code_text,
159 language,
160 ));
161 }
162 }
163 }
164
165 for (pre_element, _code_node, code_text, language) in code_blocks {
167 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
168 {
169 let wrapped_html = format!(
171 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
172 );
173 let fragment = parse_html().one(wrapped_html.as_str());
174 pre_element.insert_after(fragment);
175 pre_element.detach();
176 }
177 }
179
180 let mut buf = Vec::new();
181 if let Err(e) = document.serialize(&mut buf) {
182 log::warn!("DOM serialization failed: {e:?}");
183 return html.to_string(); }
185 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
186 }
187
188 fn handle_hardtabs(&self, code: &str) -> String {
190 use super::types::TabStyle;
191
192 if !code.contains('\t') {
194 return code.to_string();
195 }
196
197 match self.options.tab_style {
198 TabStyle::None => code.to_string(),
200
201 TabStyle::Warn => {
203 log::warn!(
204 "Hard tabs detected in code block. Consider using spaces for \
205 consistency. Tools like editorconfig may help you normalize spaces \
206 in your documents."
207 );
208 code.to_string()
209 },
210
211 TabStyle::Normalize => {
214 log::debug!("Replacing hard tabs with spaces");
215 code.replace('\t', " ")
216 },
217 }
218 }
219
220 fn process_hardtabs(&self, markdown: &str) -> String {
222 use super::types::TabStyle;
223
224 if self.options.tab_style == TabStyle::None {
226 return markdown.to_string();
227 }
228
229 let mut result = String::with_capacity(markdown.len());
230 let mut lines = markdown.lines().peekable();
231 let mut in_code_block = false;
232 let mut code_fence_char = None;
233 let mut code_fence_count = 0;
234
235 while let Some(line) = lines.next() {
236 let trimmed = line.trim_start();
237
238 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
240 let Some(fence_char) = trimmed.chars().next() else {
241 result.push_str(line);
244 result.push('\n');
245 continue;
246 };
247 let fence_count =
248 trimmed.chars().take_while(|&c| c == fence_char).count();
249
250 if fence_count >= 3 {
251 if !in_code_block {
252 in_code_block = true;
254 code_fence_char = Some(fence_char);
255 code_fence_count = fence_count;
256 } else if code_fence_char == Some(fence_char)
257 && fence_count >= code_fence_count
258 {
259 in_code_block = false;
261 code_fence_char = None;
262 code_fence_count = 0;
263 }
264 }
265 }
266
267 let processed_line = if in_code_block && line.contains('\t') {
269 self.handle_hardtabs(line)
270 } else {
271 line.to_string()
272 };
273
274 result.push_str(&processed_line);
275
276 if lines.peek().is_some() {
278 result.push('\n');
279 }
280 }
281
282 result
283 }
284
285 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
288 if !self.options.highlight_code {
289 return None;
290 }
291
292 let syntax_manager = self.syntax_manager.as_ref()?;
293
294 syntax_manager
295 .highlight_code(code, language, self.options.highlight_theme.as_deref())
296 .ok()
297 }
298
299 #[must_use]
301 pub fn render(&self, markdown: &str) -> MarkdownResult {
302 let (preprocessed, included_files) = self.preprocess(markdown);
303 let (headers, title) = self.extract_headers(&preprocessed);
304 let html = self.process_html_pipeline(&preprocessed);
305
306 MarkdownResult {
307 html,
308 headers,
309 title,
310 included_files,
311 }
312 }
313
314 fn process_html_pipeline(&self, content: &str) -> String {
316 let mut html = self.convert_to_html(content);
317
318 if cfg!(feature = "ndg-flavored") {
320 #[cfg(feature = "ndg-flavored")]
321 {
322 html = super::extensions::process_option_references(
323 &html,
324 self.options.valid_options.as_ref(),
325 );
326 }
327 }
328
329 if self.options.nixpkgs {
330 html = self.process_manpage_references_html(&html);
331 }
332
333 if self.options.highlight_code {
334 html = self.highlight_codeblocks(&html);
335 }
336
337 self.kuchiki_postprocess(&html)
338 }
339
340 fn preprocess(
342 &self,
343 content: &str,
344 ) -> (String, Vec<crate::types::IncludedFile>) {
345 let mut processed = content.to_string();
346 let mut included_files = Vec::new();
347
348 processed = super::extensions::process_myst_autolinks(&processed);
350
351 processed = self.process_hardtabs(&processed);
353
354 if self.options.nixpkgs {
355 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
356 processed = content;
357 included_files = files;
358 }
359
360 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
361 processed = super::extensions::process_role_markup(
362 &processed,
363 self.manpage_urls.as_ref(),
364 self.options.auto_link_options,
365 self.options.valid_options.as_ref(),
366 );
367 }
368
369 (processed, included_files)
370 }
371
372 #[cfg(feature = "nixpkgs")]
374 fn apply_nixpkgs_preprocessing(
375 &self,
376 content: &str,
377 ) -> (String, Vec<crate::types::IncludedFile>) {
378 let (with_includes, included_files) =
379 match super::extensions::process_file_includes(content, &self.base_dir, 0)
380 {
381 Ok(result) => result,
382 Err(e) => {
383 log::warn!(
384 "File include processing failed: {e}. Continuing without includes."
385 );
386 (content.to_string(), Vec::new())
387 },
388 };
389 let with_blocks = super::extensions::process_block_elements(&with_includes);
390 let processed = super::extensions::process_inline_anchors(&with_blocks);
391 (processed, included_files)
392 }
393
394 #[cfg(not(feature = "nixpkgs"))]
396 fn apply_nixpkgs_preprocessing(
397 &self,
398 content: &str,
399 ) -> (String, Vec<crate::types::IncludedFile>) {
400 (content.to_string(), Vec::new())
401 }
402
403 #[must_use]
405 pub fn extract_headers(
406 &self,
407 content: &str,
408 ) -> (Vec<Header>, Option<String>) {
409 use std::fmt::Write;
410
411 let arena = Arena::new();
412 let options = self.comrak_options();
413
414 let mut normalized = String::with_capacity(content.len());
416 for line in content.lines() {
417 let trimmed = line.trim_end();
418 if !trimmed.starts_with('#')
419 && let Some(anchor_start) = trimmed.rfind("{#")
420 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
421 {
422 let text = trimmed[..anchor_start].trim_end();
423 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
424 let _ = writeln!(normalized, "## {text} {{#{id}}}");
425 continue;
426 }
427 normalized.push_str(line);
428 normalized.push('\n');
429 }
430
431 let root = parse_document(&arena, &normalized, &options);
432
433 let mut headers = Vec::new();
434 let mut found_title = None;
435
436 for node in root.descendants() {
437 if let NodeValue::Heading(NodeHeading { level, .. }) =
438 &node.data.borrow().value
439 {
440 let mut text = String::new();
441 let mut explicit_id = None;
442
443 for child in node.children() {
444 match &child.data.borrow().value {
445 NodeValue::Text(t) => text.push_str(t),
446 NodeValue::Code(t) => text.push_str(&t.literal),
447 NodeValue::Link(..)
448 | NodeValue::Emph
449 | NodeValue::Strong
450 | NodeValue::Subscript
451 | NodeValue::Strikethrough
452 | NodeValue::Superscript
453 | NodeValue::FootnoteReference(..) => {
454 text.push_str(&extract_inline_text(child));
455 },
456 NodeValue::HtmlInline(html) => {
457 let html_str = html.as_str();
459 if let Some(start) = html_str.find("{#")
460 && let Some(end) = html_str[start..].find('}')
461 {
462 let anchor = &html_str[start + 2..start + end];
463 explicit_id = Some(anchor.to_string());
464 }
465 },
466 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
467 NodeValue::Image(..) => {},
468 _ => {},
469 }
470 }
471
472 let trimmed = text.trim_end();
474 #[allow(clippy::option_if_let_else)]
475 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
477 if let Some(end) = trimmed[start..].find('}') {
478 let anchor = &trimmed[start + 2..start + end];
479 (trimmed[..start].trim_end().to_string(), anchor.to_string())
480 } else {
481 (
482 text.clone(),
483 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
484 )
485 }
486 } else {
487 (
488 text.clone(),
489 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
490 )
491 };
492 if *level == 1 && found_title.is_none() {
493 found_title = Some(final_text.clone());
494 }
495 headers.push(Header {
496 text: final_text,
497 level: *level,
498 id,
499 });
500 }
501 }
502
503 (headers, found_title)
504 }
505
506 fn convert_to_html(&self, content: &str) -> String {
508 let arena = Arena::new();
510 let options = self.comrak_options();
511 let root = parse_document(&arena, content, &options);
512
513 let prompt_transformer = PromptTransformer;
515 prompt_transformer.transform(root);
516
517 let mut html_output = String::new();
518 comrak::format_html(root, &options, &mut html_output).unwrap_or_default();
519
520 Self::process_header_anchors_html(&html_output)
522 }
523
524 fn process_header_anchors_html(html: &str) -> String {
527 use std::sync::LazyLock;
528
529 use regex::Regex;
530
531 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
532 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
533 .unwrap_or_else(|e| {
534 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
535 utils::never_matching_regex().unwrap_or_else(|_| {
536 #[allow(
538 clippy::expect_used,
539 reason = "This pattern is guaranteed to be valid"
540 )]
541 Regex::new(r"[^\s\S]")
542 .expect("regex pattern [^\\s\\S] should always compile")
543 })
544 })
545 });
546
547 HEADER_ANCHOR_RE
548 .replace_all(html, |caps: ®ex::Captures| {
549 let level = &caps[1];
550 let prefix = &caps[2];
551 let id = &caps[3];
552 let suffix = &caps[4];
553 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
554 })
555 .to_string()
556 }
557
558 fn comrak_options(&self) -> Options<'_> {
560 let mut options = Options::default();
561 if self.options.gfm {
562 options.extension.table = true;
563 options.extension.footnotes = true;
564 options.extension.strikethrough = true;
565 options.extension.tasklist = true;
566 options.extension.superscript = true;
567 options.extension.autolink = true;
568 }
569 options.render.r#unsafe = true;
570 options.extension.header_ids = None;
572 options.extension.description_lists = true;
573 options
574 }
575
576 #[cfg(feature = "nixpkgs")]
578 fn process_manpage_references_html(&self, html: &str) -> String {
579 super::extensions::process_manpage_references(
580 html,
581 self.manpage_urls.as_ref(),
582 )
583 }
584
585 #[cfg(not(feature = "nixpkgs"))]
588 fn process_manpage_references_html(&self, html: &str) -> String {
589 html.to_string()
590 }
591
592 #[allow(
594 clippy::unused_self,
595 reason = "Method signature matches processor pattern"
596 )]
597 fn kuchiki_postprocess(&self, html: &str) -> String {
598 kuchiki_postprocess_html(html, |document| {
600 Self::apply_dom_transformations(document);
601 })
602 }
603
604 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
606 Self::process_list_item_id_markers(document);
607 Self::process_header_anchor_comments(document);
608 Self::process_list_item_inline_anchors(document);
609 Self::process_paragraph_inline_anchors(document);
610 Self::process_remaining_inline_anchors(document);
611 Self::process_option_anchor_links(document);
612 Self::process_empty_auto_links(document);
613 Self::process_empty_html_links(document);
614 }
615
616 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
618 let mut to_modify = Vec::new();
619
620 for comment in document.inclusive_descendants() {
621 if let Some(comment_node) = comment.as_comment() {
622 let comment_text = comment_node.borrow();
623 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
624 let id = comment_text[id_start + 16..].trim();
625 if !id.is_empty()
626 && id
627 .chars()
628 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
629 {
630 if let Some(parent) = comment.parent()
632 && let Some(element) = parent.as_element()
633 && element.name.local.as_ref() == "li"
634 {
635 to_modify.push((comment.clone(), id.to_string()));
636 }
637 }
638 }
639 }
640 }
641
642 for (comment_node, id) in to_modify {
643 let span = kuchikikiki::NodeRef::new_element(
644 markup5ever::QualName::new(
645 None,
646 markup5ever::ns!(html),
647 local_name!("span"),
648 ),
649 vec![
650 (
651 kuchikikiki::ExpandedName::new("", "id"),
652 kuchikikiki::Attribute {
653 prefix: None,
654 value: id,
655 },
656 ),
657 (
658 kuchikikiki::ExpandedName::new("", "class"),
659 kuchikikiki::Attribute {
660 prefix: None,
661 value: "nixos-anchor".into(),
662 },
663 ),
664 ],
665 );
666 comment_node.insert_after(span);
667 comment_node.detach();
668 }
669 }
670
671 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
673 let mut to_modify = Vec::new();
674
675 for comment in document.inclusive_descendants() {
676 if let Some(comment_node) = comment.as_comment() {
677 let comment_text = comment_node.borrow();
678 if let Some(anchor_start) = comment_text.find("anchor:") {
679 let id = comment_text[anchor_start + 7..].trim();
680 if !id.is_empty()
681 && id
682 .chars()
683 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
684 {
685 if let Some(parent) = comment.parent()
687 && let Some(element) = parent.as_element()
688 {
689 let tag_name = element.name.local.as_ref();
690 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
691 to_modify.push((
692 parent.clone(),
693 comment.clone(),
694 id.to_string(),
695 ));
696 }
697 }
698 }
699 }
700 }
701 }
702
703 for (header_element, comment_node, id) in to_modify {
704 if let Some(element) = header_element.as_element() {
705 element
706 .attributes
707 .borrow_mut()
708 .insert(local_name!("id"), id);
709 comment_node.detach();
710 }
711 }
712 }
713
714 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
716 for li_node in safe_select(document, "li") {
717 let li_element = li_node;
718
719 let has_code = !safe_select(&li_element, "code, pre").is_empty();
721 if has_code {
722 continue; }
724
725 let text_content = li_element.text_contents();
726
727 if let Some(anchor_start) = text_content.find("[]{#")
728 && let Some(anchor_end) = text_content[anchor_start..].find('}')
729 {
730 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
731 if !id.is_empty()
732 && id
733 .chars()
734 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
735 {
736 let remaining_content =
737 &text_content[anchor_start + anchor_end + 1..];
738
739 for child in li_element.children() {
741 child.detach();
742 }
743
744 let span = kuchikikiki::NodeRef::new_element(
745 markup5ever::QualName::new(
746 None,
747 markup5ever::ns!(html),
748 local_name!("span"),
749 ),
750 vec![
751 (
752 kuchikikiki::ExpandedName::new("", "id"),
753 kuchikikiki::Attribute {
754 prefix: None,
755 value: id.into(),
756 },
757 ),
758 (
759 kuchikikiki::ExpandedName::new("", "class"),
760 kuchikikiki::Attribute {
761 prefix: None,
762 value: "nixos-anchor".into(),
763 },
764 ),
765 ],
766 );
767 li_element.append(span);
768 if !remaining_content.is_empty() {
769 li_element
770 .append(kuchikikiki::NodeRef::new_text(remaining_content));
771 }
772 }
773 }
774 }
775 }
776
777 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
779 for p_node in safe_select(document, "p") {
780 let p_element = p_node;
781
782 let has_code = !safe_select(&p_element, "code, pre").is_empty();
784 if has_code {
785 continue; }
787
788 let text_content = p_element.text_contents();
789
790 if let Some(anchor_start) = text_content.find("[]{#")
791 && let Some(anchor_end) = text_content[anchor_start..].find('}')
792 {
793 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
794 if !id.is_empty()
795 && id
796 .chars()
797 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
798 {
799 let remaining_content =
800 &text_content[anchor_start + anchor_end + 1..];
801
802 for child in p_element.children() {
804 child.detach();
805 }
806
807 let span = kuchikikiki::NodeRef::new_element(
808 markup5ever::QualName::new(
809 None,
810 markup5ever::ns!(html),
811 local_name!("span"),
812 ),
813 vec![
814 (
815 kuchikikiki::ExpandedName::new("", "id"),
816 kuchikikiki::Attribute {
817 prefix: None,
818 value: id.into(),
819 },
820 ),
821 (
822 kuchikikiki::ExpandedName::new("", "class"),
823 kuchikikiki::Attribute {
824 prefix: None,
825 value: "nixos-anchor".into(),
826 },
827 ),
828 ],
829 );
830 p_element.append(span);
831 if !remaining_content.is_empty() {
832 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
833 }
834 }
835 }
836 }
837 }
838
839 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
841 let mut text_nodes_to_process = Vec::new();
842
843 for node in document.inclusive_descendants() {
844 if let Some(text_node) = node.as_text() {
845 let mut parent = node.parent();
847 let mut in_code = false;
848 while let Some(p) = parent {
849 if let Some(element) = p.as_element()
850 && (element.name.local == local_name!("code")
851 || element.name.local == local_name!("pre"))
852 {
853 in_code = true;
854 break;
855 }
856 parent = p.parent();
857 }
858
859 if !in_code {
861 let text_content = text_node.borrow().clone();
862 if text_content.contains("[]{#") {
863 text_nodes_to_process.push((node.clone(), text_content));
864 }
865 }
866 }
867 }
868
869 for (text_node, text_content) in text_nodes_to_process {
870 let mut last_end = 0;
871 let mut new_children = Vec::new();
872
873 let chars = text_content.chars().collect::<Vec<_>>();
875 let mut i = 0;
876 while i < chars.len() {
877 if i + 4 < chars.len()
878 && chars[i] == '['
879 && chars[i + 1] == ']'
880 && chars[i + 2] == '{'
881 && chars[i + 3] == '#'
882 {
883 let anchor_start = i;
885 i += 4; let mut id = String::new();
888 while i < chars.len() && chars[i] != '}' {
889 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
890 {
891 id.push(chars[i]);
892 i += 1;
893 } else {
894 break;
895 }
896 }
897
898 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
899 let anchor_end = i + 1;
901
902 if anchor_start > last_end {
904 let before_text: String =
905 chars[last_end..anchor_start].iter().collect();
906 if !before_text.is_empty() {
907 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
908 }
909 }
910
911 let span = kuchikikiki::NodeRef::new_element(
913 markup5ever::QualName::new(
914 None,
915 markup5ever::ns!(html),
916 local_name!("span"),
917 ),
918 vec![
919 (
920 kuchikikiki::ExpandedName::new("", "id"),
921 kuchikikiki::Attribute {
922 prefix: None,
923 value: id,
924 },
925 ),
926 (
927 kuchikikiki::ExpandedName::new("", "class"),
928 kuchikikiki::Attribute {
929 prefix: None,
930 value: "nixos-anchor".into(),
931 },
932 ),
933 ],
934 );
935 new_children.push(span);
936
937 last_end = anchor_end;
938 i = anchor_end;
939 } else {
940 i += 1;
941 }
942 } else {
943 i += 1;
944 }
945 }
946
947 if last_end < chars.len() {
949 let after_text: String = chars[last_end..].iter().collect();
950 if !after_text.is_empty() {
951 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
952 }
953 }
954
955 if !new_children.is_empty() {
957 for child in new_children {
958 text_node.insert_before(child);
959 }
960 text_node.detach();
961 }
962 }
963 }
964
965 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
967 for link_node in safe_select(document, "a") {
968 let link_element = link_node;
969 if let Some(element) = link_element.as_element() {
970 let href = element
971 .attributes
972 .borrow()
973 .get(local_name!("href"))
974 .map(std::string::ToString::to_string);
975 let text_content = link_element.text_contents();
976
977 if let Some(href_value) = href
978 && href_value.starts_with('#')
979 && (text_content.trim().is_empty()
980 || text_content.trim() == "{{ANCHOR}}")
981 {
982 if text_content.trim() == "{{ANCHOR}}" {
984 for child in link_element.children() {
985 child.detach();
986 }
987 }
988 let display_text = Self::humanize_anchor_id(&href_value);
990 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
991 }
992 }
993 }
994 }
995
996 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
998 for link_node in safe_select(document, "a[href^='#']") {
999 let link_element = link_node;
1000 let text_content = link_element.text_contents();
1001
1002 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1003 if text_content.trim() == "{{ANCHOR}}" {
1005 for child in link_element.children() {
1006 child.detach();
1007 }
1008 }
1009 if let Some(element) = link_element.as_element()
1010 && let Some(href) =
1011 element.attributes.borrow().get(local_name!("href"))
1012 {
1013 let display_text = Self::humanize_anchor_id(href);
1014 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1015 }
1016 }
1017 }
1018 }
1019
1020 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1022 let mut to_modify = Vec::new();
1023
1024 for link_node in safe_select(document, "a[href^='#opt-']") {
1026 let link_element = link_node;
1027 if let Some(element) = link_element.as_element() {
1028 let href = element
1029 .attributes
1030 .borrow()
1031 .get(local_name!("href"))
1032 .map(std::string::ToString::to_string);
1033 let text_content = link_element.text_contents();
1034
1035 if let Some(href_value) = href
1036 && href_value.starts_with("#opt-")
1037 {
1038 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1040 || text_content.trim() == "{{ANCHOR}}";
1041 to_modify.push((
1042 link_element.clone(),
1043 option_anchor,
1044 needs_text_replacement,
1045 ));
1046 }
1047 }
1048 }
1049
1050 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1052 if let Some(element) = link_element.as_element() {
1053 let new_href = format!("options.html#{option_anchor}");
1054 element
1055 .attributes
1056 .borrow_mut()
1057 .insert(local_name!("href"), new_href);
1058
1059 if needs_text_replacement {
1060 for child in link_element.children() {
1062 child.detach();
1063 }
1064
1065 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1068 let option_name = option_path.replace('-', ".");
1069 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1070 }
1071 }
1072 }
1073 }
1074 }
1075
1076 fn humanize_anchor_id(anchor: &str) -> String {
1078 let cleaned = anchor.trim_start_matches('#');
1080
1081 let without_prefix = cleaned
1083 .trim_start_matches("sec-")
1084 .trim_start_matches("ssec-")
1085 .trim_start_matches("opt-");
1086
1087 let spaced = without_prefix.replace(['-', '_'], " ");
1089
1090 spaced
1092 .split_whitespace()
1093 .map(|word| {
1094 let mut chars = word.chars();
1095 chars.next().map_or_else(String::new, |c| {
1096 c.to_uppercase().collect::<String>() + chars.as_str()
1097 })
1098 })
1099 .collect::<Vec<String>>()
1100 .join(" ")
1101 }
1102}
1103
1104pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1106 let mut text = String::new();
1107 for child in node.children() {
1108 match &child.data.borrow().value {
1109 NodeValue::Text(t) => text.push_str(t),
1110 NodeValue::Code(t) => text.push_str(&t.literal),
1111 NodeValue::Link(..)
1112 | NodeValue::Emph
1113 | NodeValue::Strong
1114 | NodeValue::Strikethrough
1115 | NodeValue::Superscript
1116 | NodeValue::Subscript
1117 | NodeValue::FootnoteReference(..) => {
1118 text.push_str(&extract_inline_text(child));
1119 },
1120 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1121 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1122 _ => {},
1123 }
1124 }
1125 text
1126}
1127
1128pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1130 let mut files = Vec::with_capacity(100);
1131
1132 for entry in WalkDir::new(input_dir)
1133 .follow_links(true)
1134 .into_iter()
1135 .filter_map(Result::ok)
1136 {
1137 let path = entry.path();
1138 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1139 files.push(path.to_owned());
1140 }
1141 }
1142
1143 trace!("Found {} markdown files to process", files.len());
1144 files
1145}
1146
1147#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1149pub enum ProcessorFeature {
1150 Gfm,
1152 Nixpkgs,
1154 SyntaxHighlighting,
1156 ManpageUrls,
1158}
1159
1160fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1162where
1163 F: FnOnce(&kuchikikiki::NodeRef),
1164{
1165 process_safe(
1166 html,
1167 |html| {
1168 use tendril::TendrilSink;
1169
1170 let document = kuchikikiki::parse_html().one(html);
1171 transform_fn(&document);
1172
1173 let mut out = Vec::new();
1174 let _ = document.serialize(&mut out);
1175 String::from_utf8(out).unwrap_or_default()
1176 },
1177 html,
1178 )
1179}