1use std::{
6 collections::HashMap,
7 path::{Path, PathBuf},
8};
9
10use comrak::{
11 Arena,
12 nodes::{AstNode, NodeHeading, NodeValue},
13 options::Options,
14 parse_document,
15};
16use log::trace;
17use markup5ever::local_name;
18use walkdir::WalkDir;
19
20use super::{
21 dom::safe_select,
22 process::process_safe,
23 types::{
24 AstTransformer,
25 MarkdownOptions,
26 MarkdownProcessor,
27 PromptTransformer,
28 },
29};
30use crate::{
31 syntax::create_default_manager,
32 types::{Header, MarkdownResult},
33 utils,
34};
35
36impl MarkdownProcessor {
37 #[must_use]
39 pub fn new(options: MarkdownOptions) -> Self {
40 let manpage_urls = options
41 .manpage_urls_path
42 .as_ref()
43 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
44
45 let syntax_manager = if options.highlight_code {
46 match create_default_manager() {
47 Ok(manager) => {
48 log::info!("Syntax highlighting initialized successfully");
49 Some(manager)
50 },
51 Err(e) => {
52 log::error!("Failed to initialize syntax highlighting: {e}");
53 log::warn!(
54 "Continuing without syntax highlighting - code blocks will not be \
55 highlighted"
56 );
57 None
58 },
59 }
60 } else {
61 None
62 };
63
64 Self {
65 options,
66 manpage_urls,
67 syntax_manager,
68 base_dir: std::path::PathBuf::from("."),
69 }
70 }
71
72 #[must_use]
74 pub const fn options(&self) -> &MarkdownOptions {
75 &self.options
76 }
77
78 #[must_use]
80 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
81 self.base_dir = base_dir.to_path_buf();
82 self
83 }
84
85 #[must_use]
87 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
88 match feature {
89 ProcessorFeature::Gfm => self.options.gfm,
90 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
91 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
92 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
93 }
94 }
95
96 #[must_use]
98 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
99 self.manpage_urls.as_ref()
100 }
101
102 #[must_use]
104 pub fn highlight_codeblocks(&self, html: &str) -> String {
105 use kuchikikiki::parse_html;
106 use tendril::TendrilSink;
107
108 if !self.options.highlight_code || self.syntax_manager.is_none() {
109 return html.to_string();
110 }
111
112 let document = parse_html().one(html);
113
114 let mut code_blocks = Vec::new();
116 for pre_node in safe_select(&document, "pre > code") {
117 let code_node = pre_node;
118 if let Some(element) = code_node.as_element() {
119 let language = element
120 .attributes
121 .borrow()
122 .get("class")
123 .and_then(|class| class.strip_prefix("language-"))
124 .unwrap_or("text")
125 .to_string();
126 let code_text = code_node.text_contents();
127
128 if let Some(pre_parent) = code_node.parent() {
129 code_blocks.push((
130 pre_parent.clone(),
131 code_node.clone(),
132 code_text,
133 language,
134 ));
135 }
136 }
137 }
138
139 for (pre_element, _code_node, code_text, language) in code_blocks {
141 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
142 {
143 let wrapped_html = format!(
145 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
146 );
147 let fragment = parse_html().one(wrapped_html.as_str());
148 pre_element.insert_after(fragment);
149 pre_element.detach();
150 }
151 }
153
154 let mut buf = Vec::new();
155 if let Err(e) = document.serialize(&mut buf) {
156 log::warn!("DOM serialization failed: {e:?}");
157 return html.to_string(); }
159 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
160 }
161
162 fn handle_hardtabs(&self, code: &str) -> String {
164 use super::types::TabStyle;
165
166 if !code.contains('\t') {
168 return code.to_string();
169 }
170
171 match self.options.tab_style {
172 TabStyle::None => code.to_string(),
174
175 TabStyle::Warn => {
177 log::warn!(
178 "Hard tabs detected in code block. Consider using spaces for \
179 consistency. Tools like editorconfig may help you normalize spaces \
180 in your documents."
181 );
182 code.to_string()
183 },
184
185 TabStyle::Normalize => {
188 log::debug!("Replacing hard tabs with spaces");
189 code.replace('\t', " ")
190 },
191 }
192 }
193
194 fn process_hardtabs(&self, markdown: &str) -> String {
196 use super::types::TabStyle;
197 use crate::utils::codeblock::FenceTracker;
198
199 if self.options.tab_style == TabStyle::None {
201 return markdown.to_string();
202 }
203
204 let mut result = String::with_capacity(markdown.len());
205 let mut lines = markdown.lines().peekable();
206 let mut tracker = FenceTracker::new();
207
208 while let Some(line) = lines.next() {
209 tracker = tracker.process_line(line);
210
211 let processed_line = if tracker.in_code_block() && line.contains('\t') {
213 self.handle_hardtabs(line)
214 } else {
215 line.to_string()
216 };
217
218 result.push_str(&processed_line);
219
220 if lines.peek().is_some() {
222 result.push('\n');
223 }
224 }
225
226 result
227 }
228
229 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
232 if !self.options.highlight_code {
233 return None;
234 }
235
236 let syntax_manager = self.syntax_manager.as_ref()?;
237
238 syntax_manager
239 .highlight_code(code, language, self.options.highlight_theme.as_deref())
240 .ok()
241 }
242
243 #[must_use]
245 pub fn render(&self, markdown: &str) -> MarkdownResult {
246 let (preprocessed, included_files) = self.preprocess(markdown);
247 let (headers, title) = self.extract_headers(&preprocessed);
248 let html = self.process_html_pipeline(&preprocessed);
249
250 MarkdownResult {
251 html,
252 headers,
253 title,
254 included_files,
255 }
256 }
257
258 fn process_html_pipeline(&self, content: &str) -> String {
260 let mut html = self.convert_to_html(content);
261
262 if cfg!(feature = "ndg-flavored") {
264 #[cfg(feature = "ndg-flavored")]
265 {
266 html = super::extensions::process_option_references(
267 &html,
268 self.options.valid_options.as_ref(),
269 );
270 }
271 }
272
273 if self.options.nixpkgs {
274 html = self.process_manpage_references_html(&html);
275 }
276
277 if self.options.highlight_code {
278 html = self.highlight_codeblocks(&html);
279 }
280
281 self.kuchiki_postprocess(&html)
282 }
283
284 fn preprocess(
286 &self,
287 content: &str,
288 ) -> (String, Vec<crate::types::IncludedFile>) {
289 let mut processed = content.to_string();
290 let mut included_files = Vec::new();
291
292 processed = super::extensions::process_myst_autolinks(&processed);
294
295 processed = self.process_hardtabs(&processed);
297
298 if self.options.nixpkgs {
299 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
300 processed = content;
301 included_files = files;
302 }
303
304 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
305 processed = super::extensions::process_role_markup(
306 &processed,
307 self.manpage_urls.as_ref(),
308 self.options.auto_link_options,
309 self.options.valid_options.as_ref(),
310 );
311 }
312
313 if cfg!(feature = "wiki") {
314 processed = super::extensions::process_wikilinks(&processed);
315 }
316
317 (processed, included_files)
318 }
319
320 #[cfg(feature = "nixpkgs")]
322 fn apply_nixpkgs_preprocessing(
323 &self,
324 content: &str,
325 ) -> (String, Vec<crate::types::IncludedFile>) {
326 let (with_includes, included_files) =
327 match super::extensions::process_file_includes(content, &self.base_dir, 0)
328 {
329 Ok(result) => result,
330 Err(e) => {
331 log::warn!(
332 "File include processing failed: {e}. Continuing without includes."
333 );
334 (content.to_string(), Vec::new())
335 },
336 };
337 let with_blocks = super::extensions::process_block_elements(&with_includes);
338 let processed = super::extensions::process_inline_anchors(&with_blocks);
339 (processed, included_files)
340 }
341
342 #[cfg(not(feature = "nixpkgs"))]
344 fn apply_nixpkgs_preprocessing(
345 &self,
346 content: &str,
347 ) -> (String, Vec<crate::types::IncludedFile>) {
348 (content.to_string(), Vec::new())
349 }
350
351 #[must_use]
353 pub fn extract_headers(
354 &self,
355 content: &str,
356 ) -> (Vec<Header>, Option<String>) {
357 use std::fmt::Write;
358
359 let arena = Arena::new();
360 let options = self.comrak_options();
361
362 let mut normalized = String::with_capacity(content.len());
364 for line in content.lines() {
365 let trimmed = line.trim_end();
366 if !trimmed.starts_with('#')
367 && let Some(anchor_start) = trimmed.rfind("{#")
368 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
369 {
370 let text = trimmed[..anchor_start].trim_end();
371 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
372 let _ = writeln!(normalized, "## {text} {{#{id}}}");
373 continue;
374 }
375 normalized.push_str(line);
376 normalized.push('\n');
377 }
378
379 let root = parse_document(&arena, &normalized, &options);
380
381 let mut headers = Vec::new();
382 let mut found_title = None;
383
384 for node in root.descendants() {
385 if let NodeValue::Heading(NodeHeading { level, .. }) =
386 &node.data.borrow().value
387 {
388 let mut text = String::new();
389 let mut explicit_id = None;
390
391 for child in node.children() {
392 match &child.data.borrow().value {
393 NodeValue::Text(t) => text.push_str(t),
394 NodeValue::Code(t) => text.push_str(&t.literal),
395 NodeValue::Link(..)
396 | NodeValue::Emph
397 | NodeValue::Strong
398 | NodeValue::Subscript
399 | NodeValue::Strikethrough
400 | NodeValue::Superscript
401 | NodeValue::FootnoteReference(..) => {
402 text.push_str(&extract_inline_text(child));
403 },
404 NodeValue::HtmlInline(html) => {
405 let html_str = html.as_str();
407 if let Some(start) = html_str.find("{#")
408 && let Some(end) = html_str[start..].find('}')
409 {
410 let anchor = &html_str[start + 2..start + end];
411 explicit_id = Some(anchor.to_string());
412 }
413 },
414 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
415 NodeValue::Image(..) => {},
416 _ => {},
417 }
418 }
419
420 let trimmed = text.trim_end();
422 #[allow(clippy::option_if_let_else)]
423 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
425 if let Some(end) = trimmed[start..].find('}') {
426 let anchor = &trimmed[start + 2..start + end];
427 (trimmed[..start].trim_end().to_string(), anchor.to_string())
428 } else {
429 (
430 text.clone(),
431 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
432 )
433 }
434 } else {
435 (
436 text.clone(),
437 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
438 )
439 };
440 if *level == 1 && found_title.is_none() {
441 found_title = Some(final_text.clone());
442 }
443 headers.push(Header {
444 text: final_text,
445 level: *level,
446 id,
447 });
448 }
449 }
450
451 (headers, found_title)
452 }
453
454 fn convert_to_html(&self, content: &str) -> String {
456 let arena = Arena::new();
458 let options = self.comrak_options();
459 let root = parse_document(&arena, content, &options);
460
461 let prompt_transformer = PromptTransformer;
463 prompt_transformer.transform(root);
464
465 let mut html_output = String::new();
466 if let Err(e) = comrak::format_html(root, &options, &mut html_output) {
467 log::error!("Failed to format HTML: {e}");
468 }
469
470 Self::process_header_anchors_html(&html_output)
472 }
473
474 fn process_header_anchors_html(html: &str) -> String {
478 use std::sync::LazyLock;
479
480 use regex::Regex;
481
482 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
484 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
485 .unwrap_or_else(|e| {
486 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
487 utils::never_matching_regex().unwrap_or_else(|_| {
488 #[allow(
489 clippy::expect_used,
490 reason = "This pattern is guaranteed to be valid"
491 )]
492 Regex::new(r"[^\s\S]")
493 .expect("regex pattern [^\\s\\S] should always compile")
494 })
495 })
496 });
497
498 static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
501 Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
502 log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
503 utils::never_matching_regex().unwrap_or_else(|_| {
504 #[allow(
505 clippy::expect_used,
506 reason = "This pattern is guaranteed to be valid"
507 )]
508 Regex::new(r"[^\s\S]")
509 .expect("regex pattern [^\\s\\S] should always compile")
510 })
511 })
512 });
513
514 static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
516 Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
517 log::error!("Failed to compile HTML_TAG_RE regex: {e}");
518 utils::never_matching_regex().unwrap_or_else(|_| {
519 #[allow(
520 clippy::expect_used,
521 reason = "This pattern is guaranteed to be valid"
522 )]
523 Regex::new(r"[^\s\S]")
524 .expect("regex pattern [^\\s\\S] should always compile")
525 })
526 })
527 });
528
529 let result = HEADER_ANCHOR_RE
531 .replace_all(html, |caps: ®ex::Captures| {
532 let level = &caps[1];
533 let prefix = &caps[2];
534 let id = &caps[3];
535 let suffix = &caps[4];
536 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
537 })
538 .to_string();
539
540 HEADER_NO_ID_RE
542 .replace_all(&result, |caps: ®ex::Captures| {
543 let level = &caps[1];
544 let content = &caps[2];
545 let text_only = HTML_TAG_RE.replace_all(content, "");
547 let id = utils::slugify(&text_only);
548 if id.is_empty() {
549 format!("<h{level}>{content}</h{level}>")
551 } else {
552 format!("<h{level} id=\"{id}\">{content}</h{level}>")
553 }
554 })
555 .to_string()
556 }
557
558 fn comrak_options(&self) -> Options<'_> {
560 let mut options = Options::default();
561 if self.options.gfm {
563 options.extension.table = true;
564 options.extension.footnotes = true;
565 options.extension.strikethrough = true;
566 options.extension.tasklist = true;
567 options.extension.superscript = true;
568 options.extension.autolink = true;
569 }
570
571 options.render.r#unsafe = true;
574
575 options.extension.header_id_prefix = None;
577 options.extension.description_lists = true;
578 options
579 }
580
581 #[cfg(feature = "nixpkgs")]
583 fn process_manpage_references_html(&self, html: &str) -> String {
584 super::extensions::process_manpage_references(
585 html,
586 self.manpage_urls.as_ref(),
587 )
588 }
589
590 #[cfg(not(feature = "nixpkgs"))]
593 fn process_manpage_references_html(&self, html: &str) -> String {
594 html.to_string()
595 }
596
597 #[allow(
599 clippy::unused_self,
600 reason = "Method signature matches processor pattern"
601 )]
602 fn kuchiki_postprocess(&self, html: &str) -> String {
603 kuchiki_postprocess_html(html, |document| {
605 Self::apply_dom_transformations(document);
606 })
607 }
608
609 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
611 Self::process_list_item_id_markers(document);
612 Self::process_header_anchor_comments(document);
613 Self::process_list_item_inline_anchors(document);
614 Self::process_paragraph_inline_anchors(document);
615 Self::process_remaining_inline_anchors(document);
616 Self::process_option_anchor_links(document);
617 Self::process_empty_auto_links(document);
618 Self::process_empty_html_links(document);
619 }
620
621 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
623 let mut to_modify = Vec::new();
624
625 for comment in document.inclusive_descendants() {
626 if let Some(comment_node) = comment.as_comment() {
627 let comment_text = comment_node.borrow();
628 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
629 let id = comment_text[id_start + 16..].trim();
630 if !id.is_empty()
631 && id
632 .chars()
633 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
634 {
635 if let Some(parent) = comment.parent()
637 && let Some(element) = parent.as_element()
638 && element.name.local.as_ref() == "li"
639 {
640 to_modify.push((comment.clone(), id.to_string()));
641 }
642 }
643 }
644 }
645 }
646
647 for (comment_node, id) in to_modify {
648 let span = kuchikikiki::NodeRef::new_element(
649 markup5ever::QualName::new(
650 None,
651 markup5ever::ns!(html),
652 local_name!("span"),
653 ),
654 vec![
655 (
656 kuchikikiki::ExpandedName::new("", "id"),
657 kuchikikiki::Attribute {
658 prefix: None,
659 value: id,
660 },
661 ),
662 (
663 kuchikikiki::ExpandedName::new("", "class"),
664 kuchikikiki::Attribute {
665 prefix: None,
666 value: "nixos-anchor".into(),
667 },
668 ),
669 ],
670 );
671 comment_node.insert_after(span);
672 comment_node.detach();
673 }
674 }
675
676 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
678 let mut to_modify = Vec::new();
679
680 for comment in document.inclusive_descendants() {
681 if let Some(comment_node) = comment.as_comment() {
682 let comment_text = comment_node.borrow();
683 if let Some(anchor_start) = comment_text.find("anchor:") {
684 let id = comment_text[anchor_start + 7..].trim();
685 if !id.is_empty()
686 && id
687 .chars()
688 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
689 {
690 if let Some(parent) = comment.parent()
692 && let Some(element) = parent.as_element()
693 {
694 let tag_name = element.name.local.as_ref();
695 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
696 to_modify.push((
697 parent.clone(),
698 comment.clone(),
699 id.to_string(),
700 ));
701 }
702 }
703 }
704 }
705 }
706 }
707
708 for (header_element, comment_node, id) in to_modify {
709 if let Some(element) = header_element.as_element() {
710 element
711 .attributes
712 .borrow_mut()
713 .insert(local_name!("id"), id);
714 comment_node.detach();
715 }
716 }
717 }
718
719 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
721 for li_node in safe_select(document, "li") {
722 let li_element = li_node;
723
724 let has_code = !safe_select(&li_element, "code, pre").is_empty();
726 if has_code {
727 continue; }
729
730 let text_content = li_element.text_contents();
731
732 if let Some(anchor_start) = text_content.find("[]{#")
733 && let Some(anchor_end) = text_content[anchor_start..].find('}')
734 {
735 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
736 if !id.is_empty()
737 && id
738 .chars()
739 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
740 {
741 let remaining_content =
742 &text_content[anchor_start + anchor_end + 1..];
743
744 for child in li_element.children() {
746 child.detach();
747 }
748
749 let span = kuchikikiki::NodeRef::new_element(
750 markup5ever::QualName::new(
751 None,
752 markup5ever::ns!(html),
753 local_name!("span"),
754 ),
755 vec![
756 (
757 kuchikikiki::ExpandedName::new("", "id"),
758 kuchikikiki::Attribute {
759 prefix: None,
760 value: id.into(),
761 },
762 ),
763 (
764 kuchikikiki::ExpandedName::new("", "class"),
765 kuchikikiki::Attribute {
766 prefix: None,
767 value: "nixos-anchor".into(),
768 },
769 ),
770 ],
771 );
772 li_element.append(span);
773 if !remaining_content.is_empty() {
774 li_element
775 .append(kuchikikiki::NodeRef::new_text(remaining_content));
776 }
777 }
778 }
779 }
780 }
781
782 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
784 for p_node in safe_select(document, "p") {
785 let p_element = p_node;
786
787 let has_code = !safe_select(&p_element, "code, pre").is_empty();
789 if has_code {
790 continue; }
792
793 let text_content = p_element.text_contents();
794
795 if let Some(anchor_start) = text_content.find("[]{#")
796 && let Some(anchor_end) = text_content[anchor_start..].find('}')
797 {
798 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
799 if !id.is_empty()
800 && id
801 .chars()
802 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
803 {
804 let remaining_content =
805 &text_content[anchor_start + anchor_end + 1..];
806
807 for child in p_element.children() {
809 child.detach();
810 }
811
812 let span = kuchikikiki::NodeRef::new_element(
813 markup5ever::QualName::new(
814 None,
815 markup5ever::ns!(html),
816 local_name!("span"),
817 ),
818 vec![
819 (
820 kuchikikiki::ExpandedName::new("", "id"),
821 kuchikikiki::Attribute {
822 prefix: None,
823 value: id.into(),
824 },
825 ),
826 (
827 kuchikikiki::ExpandedName::new("", "class"),
828 kuchikikiki::Attribute {
829 prefix: None,
830 value: "nixos-anchor".into(),
831 },
832 ),
833 ],
834 );
835 p_element.append(span);
836 if !remaining_content.is_empty() {
837 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
838 }
839 }
840 }
841 }
842 }
843
844 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
846 let mut text_nodes_to_process = Vec::new();
847
848 for node in document.inclusive_descendants() {
849 if let Some(text_node) = node.as_text() {
850 let mut parent = node.parent();
852 let mut in_code = false;
853 while let Some(p) = parent {
854 if let Some(element) = p.as_element()
855 && (element.name.local == local_name!("code")
856 || element.name.local == local_name!("pre"))
857 {
858 in_code = true;
859 break;
860 }
861 parent = p.parent();
862 }
863
864 if !in_code {
866 let text_content = text_node.borrow().clone();
867 if text_content.contains("[]{#") {
868 text_nodes_to_process.push((node.clone(), text_content));
869 }
870 }
871 }
872 }
873
874 for (text_node, text_content) in text_nodes_to_process {
875 let mut last_end = 0;
876 let mut new_children = Vec::new();
877
878 let chars = text_content.chars().collect::<Vec<_>>();
880 let mut i = 0;
881 while i < chars.len() {
882 if i + 4 < chars.len()
883 && chars[i] == '['
884 && chars[i + 1] == ']'
885 && chars[i + 2] == '{'
886 && chars[i + 3] == '#'
887 {
888 let anchor_start = i;
890 i += 4; let mut id = String::new();
893 while i < chars.len() && chars[i] != '}' {
894 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
895 {
896 id.push(chars[i]);
897 i += 1;
898 } else {
899 break;
900 }
901 }
902
903 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
904 let anchor_end = i + 1;
906
907 if anchor_start > last_end {
909 let before_text: String =
910 chars[last_end..anchor_start].iter().collect();
911 if !before_text.is_empty() {
912 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
913 }
914 }
915
916 let span = kuchikikiki::NodeRef::new_element(
918 markup5ever::QualName::new(
919 None,
920 markup5ever::ns!(html),
921 local_name!("span"),
922 ),
923 vec![
924 (
925 kuchikikiki::ExpandedName::new("", "id"),
926 kuchikikiki::Attribute {
927 prefix: None,
928 value: id,
929 },
930 ),
931 (
932 kuchikikiki::ExpandedName::new("", "class"),
933 kuchikikiki::Attribute {
934 prefix: None,
935 value: "nixos-anchor".into(),
936 },
937 ),
938 ],
939 );
940 new_children.push(span);
941
942 last_end = anchor_end;
943 i = anchor_end;
944 } else {
945 i += 1;
946 }
947 } else {
948 i += 1;
949 }
950 }
951
952 if last_end < chars.len() {
954 let after_text: String = chars[last_end..].iter().collect();
955 if !after_text.is_empty() {
956 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
957 }
958 }
959
960 if !new_children.is_empty() {
962 for child in new_children {
963 text_node.insert_before(child);
964 }
965 text_node.detach();
966 }
967 }
968 }
969
970 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
972 for link_node in safe_select(document, "a") {
973 let link_element = link_node;
974 if let Some(element) = link_element.as_element() {
975 let href = element
976 .attributes
977 .borrow()
978 .get(local_name!("href"))
979 .map(std::string::ToString::to_string);
980 let text_content = link_element.text_contents();
981
982 if let Some(href_value) = href
983 && href_value.starts_with('#')
984 && (text_content.trim().is_empty()
985 || text_content.trim() == "{{ANCHOR}}")
986 {
987 if text_content.trim() == "{{ANCHOR}}" {
989 for child in link_element.children() {
990 child.detach();
991 }
992 }
993 let display_text = Self::humanize_anchor_id(&href_value);
995 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
996 }
997 }
998 }
999 }
1000
1001 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
1003 for link_node in safe_select(document, "a[href^='#']") {
1004 let link_element = link_node;
1005 let text_content = link_element.text_contents();
1006
1007 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1008 if text_content.trim() == "{{ANCHOR}}" {
1010 for child in link_element.children() {
1011 child.detach();
1012 }
1013 }
1014 if let Some(element) = link_element.as_element()
1015 && let Some(href) =
1016 element.attributes.borrow().get(local_name!("href"))
1017 {
1018 let display_text = Self::humanize_anchor_id(href);
1019 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1020 }
1021 }
1022 }
1023 }
1024
1025 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1027 let mut to_modify = Vec::new();
1028
1029 for link_node in safe_select(document, "a[href^='#opt-']") {
1031 let link_element = link_node;
1032 if let Some(element) = link_element.as_element() {
1033 let href = element
1034 .attributes
1035 .borrow()
1036 .get(local_name!("href"))
1037 .map(std::string::ToString::to_string);
1038 let text_content = link_element.text_contents();
1039
1040 if let Some(href_value) = href
1041 && href_value.starts_with("#opt-")
1042 {
1043 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1045 || text_content.trim() == "{{ANCHOR}}";
1046 to_modify.push((
1047 link_element.clone(),
1048 option_anchor,
1049 needs_text_replacement,
1050 ));
1051 }
1052 }
1053 }
1054
1055 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1057 if let Some(element) = link_element.as_element() {
1058 let new_href = format!("options.html#{option_anchor}");
1059 element
1060 .attributes
1061 .borrow_mut()
1062 .insert(local_name!("href"), new_href);
1063
1064 if needs_text_replacement {
1065 for child in link_element.children() {
1067 child.detach();
1068 }
1069
1070 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1073 let option_name = option_path.replace('-', ".");
1074 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1075 }
1076 }
1077 }
1078 }
1079 }
1080
1081 fn humanize_anchor_id(anchor: &str) -> String {
1083 let cleaned = anchor.trim_start_matches('#');
1085
1086 let without_prefix = cleaned
1088 .trim_start_matches("sec-")
1089 .trim_start_matches("ssec-")
1090 .trim_start_matches("opt-");
1091
1092 let spaced = without_prefix.replace(['-', '_'], " ");
1094
1095 spaced
1097 .split_whitespace()
1098 .map(|word| {
1099 let mut chars = word.chars();
1100 chars.next().map_or_else(String::new, |c| {
1101 c.to_uppercase().collect::<String>() + chars.as_str()
1102 })
1103 })
1104 .collect::<Vec<String>>()
1105 .join(" ")
1106 }
1107}
1108
1109pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1111 let mut text = String::new();
1112 for child in node.children() {
1113 match &child.data.borrow().value {
1114 NodeValue::Text(t) => text.push_str(t),
1115 NodeValue::Code(t) => text.push_str(&t.literal),
1116 NodeValue::Link(..)
1117 | NodeValue::Emph
1118 | NodeValue::Strong
1119 | NodeValue::Strikethrough
1120 | NodeValue::Superscript
1121 | NodeValue::Subscript
1122 | NodeValue::FootnoteReference(..) => {
1123 text.push_str(&extract_inline_text(child));
1124 },
1125 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1126 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1127 _ => {},
1128 }
1129 }
1130 text
1131}
1132
1133pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1135 let mut files = Vec::with_capacity(100);
1136
1137 for entry in WalkDir::new(input_dir)
1138 .follow_links(true)
1139 .into_iter()
1140 .filter_map(Result::ok)
1141 {
1142 let path = entry.path();
1143 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1144 files.push(path.to_owned());
1145 }
1146 }
1147
1148 trace!("Found {} markdown files to process", files.len());
1149 files
1150}
1151
1152#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1154pub enum ProcessorFeature {
1155 Gfm,
1157 Nixpkgs,
1159 SyntaxHighlighting,
1161 ManpageUrls,
1163}
1164
1165fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1167where
1168 F: FnOnce(&kuchikikiki::NodeRef),
1169{
1170 process_safe(
1171 html,
1172 |html| {
1173 use tendril::TendrilSink;
1174
1175 let document = kuchikikiki::parse_html().one(html);
1176 transform_fn(&document);
1177
1178 let mut out = Vec::new();
1179 let _ = document.serialize(&mut out);
1180 String::from_utf8_lossy(&out).into_owned()
1181 },
1182 html,
1183 )
1184}