1use std::{
2 collections::HashMap,
3 path::{Path, PathBuf},
4};
5
6use comrak::{
7 Arena, ComrakOptions,
8 nodes::{AstNode, NodeHeading, NodeValue},
9 parse_document,
10};
11use log::trace;
12use markup5ever::{local_name, ns};
13use walkdir::WalkDir;
14
15use crate::{
16 syntax::{SyntaxManager, create_default_manager},
17 types::{Header, MarkdownResult},
18 utils::{self, safely_process_markup},
19};
20
21#[derive(Debug, Clone)]
23pub struct MarkdownOptions {
24 pub gfm: bool,
26
27 pub nixpkgs: bool,
29
30 pub highlight_code: bool,
32
33 pub highlight_theme: Option<String>,
35
36 pub manpage_urls_path: Option<String>,
38}
39
40impl Default for MarkdownOptions {
41 fn default() -> Self {
42 Self {
43 gfm: cfg!(feature = "gfm"),
44 nixpkgs: cfg!(feature = "nixpkgs"),
45 highlight_code: cfg!(feature = "syntastica"),
46 manpage_urls_path: None,
47 highlight_theme: None,
48 }
49 }
50}
51
52pub struct MarkdownProcessor {
54 options: MarkdownOptions,
55 manpage_urls: Option<HashMap<String, String>>,
56 syntax_manager: Option<SyntaxManager>,
57}
58
59impl MarkdownProcessor {
60 #[must_use]
62 pub fn new(options: MarkdownOptions) -> Self {
63 let manpage_urls = options
64 .manpage_urls_path
65 .as_ref()
66 .and_then(|path| utils::load_manpage_urls(path).ok());
67
68 let syntax_manager = if options.highlight_code {
69 create_default_manager().ok()
70 } else {
71 None
72 };
73
74 Self {
75 options,
76 manpage_urls,
77 syntax_manager,
78 }
79 }
80
81 #[must_use]
83 pub fn highlight_codeblocks(&self, html: &str) -> String {
84 if !self.options.highlight_code || self.syntax_manager.is_none() {
85 return html.to_string();
86 }
87
88 use kuchikikiki::parse_html;
89 use tendril::TendrilSink;
90
91 let document = parse_html().one(html);
92
93 let mut code_blocks = Vec::new();
95 for pre_node in document.select("pre > code").unwrap() {
96 let code_node = pre_node.as_node();
97 if let Some(element) = code_node.as_element() {
98 let class_attr = element
99 .attributes
100 .borrow()
101 .get("class")
102 .map(std::string::ToString::to_string);
103 let language = class_attr
104 .as_deref()
105 .and_then(|s| s.strip_prefix("language-"))
106 .unwrap_or("text");
107 let code_text = code_node.text_contents();
108
109 if let Some(pre_parent) = code_node.parent() {
110 code_blocks.push((pre_parent.clone(), code_text, language.to_string()));
111 }
112 }
113 }
114
115 for (pre_element, code_text, language) in code_blocks {
117 if let Some(highlighted) = self.highlight_code_html(&code_text, &language) {
118 let fragment = parse_html().one(highlighted.as_str());
120 pre_element.insert_after(fragment);
121 pre_element.detach();
122 }
123 }
124
125 let mut buf = Vec::new();
126 document.serialize(&mut buf).unwrap();
127 String::from_utf8(buf).unwrap_or_default()
128 }
129
130 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
132 if !self.options.highlight_code {
133 return None;
134 }
135
136 let syntax_manager = self.syntax_manager.as_ref()?;
137
138 syntax_manager
139 .highlight_code(code, language, self.options.highlight_theme.as_deref())
140 .ok()
141 }
142
143 #[must_use]
145 pub fn render(&self, markdown: &str) -> MarkdownResult {
146 let preprocessed = self.preprocess(markdown);
148
149 let (headers, title) = self.extract_headers(&preprocessed);
151
152 let html = self.convert_to_html(&preprocessed);
154
155 let html = if cfg!(feature = "ndg-flavored") {
157 #[cfg(feature = "ndg-flavored")]
158 {
159 process_option_references(&html)
160 }
161 #[cfg(not(feature = "ndg-flavored"))]
162 {
163 html
164 }
165 } else {
166 html
167 };
168
169 let html = if self.options.gfm {
171 self.process_autolinks(&html)
172 } else {
173 html
174 };
175
176 let html = if self.options.nixpkgs {
178 self.process_manpage_references_html(&html)
179 } else {
180 html
181 };
182
183 let html = if self.options.highlight_code {
185 self.highlight_codeblocks(&html)
186 } else {
187 html
188 };
189
190 let html = self.kuchiki_postprocess(&html);
192
193 MarkdownResult {
194 html,
195 headers,
196 title,
197 }
198 }
199
200 fn preprocess(&self, content: &str) -> String {
202 let with_includes = if self.options.nixpkgs {
204 #[cfg(feature = "nixpkgs")]
205 {
206 process_file_includes(content, std::path::Path::new("."))
207 }
208 #[cfg(not(feature = "nixpkgs"))]
209 {
210 content.to_string()
211 }
212 } else {
213 content.to_string()
214 };
215
216 let preprocessed = if self.options.nixpkgs {
218 self.process_block_elements(&with_includes)
219 } else {
220 with_includes
221 };
222
223 let with_inline_anchors = if self.options.nixpkgs {
225 self.process_inline_anchors(&preprocessed)
226 } else {
227 preprocessed
228 };
229
230 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
232 self.process_role_markup(&with_inline_anchors)
233 } else {
234 with_inline_anchors
235 }
236 }
237
238 fn process_inline_anchors(&self, content: &str) -> String {
242 if !self.options.nixpkgs {
243 return content.to_string();
244 }
245 let mut result = String::with_capacity(content.len() + 100);
246 let mut in_code_block = false;
247 let mut code_fence_char = None;
248 let mut code_fence_count = 0;
249
250 for line in content.lines() {
251 let trimmed = line.trim_start();
252
253 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
255 let fence_char = trimmed.chars().next().unwrap();
256 let fence_count = trimmed.chars().take_while(|&c| c == fence_char).count();
257
258 if fence_count >= 3 {
259 if !in_code_block {
260 in_code_block = true;
262 code_fence_char = Some(fence_char);
263 code_fence_count = fence_count;
264 } else if code_fence_char == Some(fence_char) && fence_count >= code_fence_count
265 {
266 in_code_block = false;
268 code_fence_char = None;
269 code_fence_count = 0;
270 }
271 }
272 }
273
274 if in_code_block {
276 result.push_str(line);
278 result.push('\n');
279 } else {
280 if let Some(anchor_start) = Self::find_list_item_anchor(trimmed) {
283 if let Some(processed_line) = Self::process_list_item_anchor(line, anchor_start)
284 {
285 result.push_str(&processed_line);
286 result.push('\n');
287 continue;
288 }
289 }
290
291 result.push_str(&Self::process_line_anchors(line));
293 result.push('\n');
294 }
295 }
296
297 result
298 }
299
300 fn find_list_item_anchor(trimmed: &str) -> Option<usize> {
302 if (trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ "))
304 && trimmed.len() > 2
305 {
306 let after_marker = &trimmed[2..];
307 if after_marker.starts_with("[]{#") {
308 return Some(2);
309 }
310 }
311
312 let mut i = 0;
314 while i < trimmed.len() && trimmed.chars().nth(i).unwrap_or(' ').is_ascii_digit() {
315 i += 1;
316 }
317 if i > 0 && i < trimmed.len() - 1 && trimmed.chars().nth(i) == Some('.') {
318 let after_marker = &trimmed[i + 1..];
319 if after_marker.starts_with(" []{#") {
320 return Some(i + 2);
321 }
322 }
323
324 None
325 }
326
327 fn process_list_item_anchor(line: &str, anchor_start: usize) -> Option<String> {
329 let before_anchor = &line[..anchor_start];
330 let after_marker = &line[anchor_start..];
331
332 if !after_marker.starts_with("[]{#") {
333 return None;
334 }
335
336 if let Some(anchor_end) = after_marker.find('}') {
338 let id = &after_marker[4..anchor_end]; let remaining_content = &after_marker[anchor_end + 1..]; if id
343 .chars()
344 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
345 && !id.is_empty()
346 {
347 return Some(format!(
348 "{before_anchor}<span id=\"{id}\" class=\"nixos-anchor\"></span>{remaining_content}"
349 ));
350 }
351 }
352
353 None
354 }
355
356 fn process_line_anchors(line: &str) -> String {
358 let mut result = String::with_capacity(line.len());
359 let mut chars = line.chars().peekable();
360
361 while let Some(ch) = chars.next() {
362 if ch == '[' && chars.peek() == Some(&']') {
363 chars.next(); if chars.peek() == Some(&'{') {
367 chars.next(); if chars.peek() == Some(&'#') {
369 chars.next(); let mut id = String::new();
373 while let Some(&next_ch) = chars.peek() {
374 if next_ch == '}' {
375 chars.next(); if !id.is_empty()
379 && id
380 .chars()
381 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
382 {
383 result.push_str(&format!(
384 "<span id=\"{id}\" class=\"nixos-anchor\"></span>"
385 ));
386 } else {
387 result.push_str(&format!("[]{{{{#{id}}}}}"));
389 }
390 break;
391 } else if next_ch.is_ascii_alphanumeric()
392 || next_ch == '-'
393 || next_ch == '_'
394 {
395 id.push(next_ch);
396 chars.next();
397 } else {
398 result.push_str(&format!("[]{{{{#{id}"));
400 break;
401 }
402 }
403 } else {
404 result.push_str("]{");
406 }
407 } else {
408 result.push(']');
410 }
411 } else {
412 result.push(ch);
413 }
414 }
415
416 result
417 }
418
419 fn process_block_elements(&self, content: &str) -> String {
422 if !self.options.nixpkgs {
423 return content.to_string();
424 }
425 let mut result = Vec::new();
426 let mut lines = content.lines().peekable();
427 let mut in_code_block = false;
428 let mut code_fence_char = None;
429 let mut code_fence_count = 0;
430
431 while let Some(line) = lines.next() {
432 let trimmed = line.trim_start();
434 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
435 let fence_char = trimmed.chars().next().unwrap();
436 let fence_count = trimmed.chars().take_while(|&c| c == fence_char).count();
437
438 if fence_count >= 3 {
439 if !in_code_block {
440 in_code_block = true;
442 code_fence_char = Some(fence_char);
443 code_fence_count = fence_count;
444 } else if code_fence_char == Some(fence_char) && fence_count >= code_fence_count
445 {
446 in_code_block = false;
448 code_fence_char = None;
449 code_fence_count = 0;
450 }
451 }
452 }
453
454 if !in_code_block {
456 if let Some((callout_type, initial_content)) = Self::parse_github_callout(line) {
458 let content = self.collect_github_callout_content(&mut lines, initial_content);
459 let admonition = Self::render_admonition(&callout_type, None, &content);
460 result.push(admonition);
461 continue;
462 }
463
464 if let Some((adm_type, id)) = Self::parse_fenced_admonition_start(line) {
466 let content = self.collect_fenced_content(&mut lines);
467 let admonition = Self::render_admonition(&adm_type, id.as_deref(), &content);
468 result.push(admonition);
469 continue;
470 }
471
472 if let Some((id, title, content)) = Self::parse_figure_block(line, &mut lines) {
474 let figure = Self::render_figure(id.as_deref(), &title, &content);
475 result.push(figure);
476 continue;
477 }
478
479 if !line.is_empty() && !line.starts_with(':') {
481 if let Some(next_line) = lines.peek() {
482 if next_line.starts_with(": ") {
483 let term = line;
484 let def_line = lines.next().unwrap();
485 let definition = &def_line[4..]; let dl = format!("<dl>\n<dt>{term}</dt>\n<dd>{definition}</dd>\n</dl>");
487 result.push(dl);
488 continue;
489 }
490 }
491 }
492 }
493
494 result.push(line.to_string());
496 }
497
498 result.join("\n")
499 }
500
501 fn parse_github_callout(line: &str) -> Option<(String, String)> {
503 let trimmed = line.trim_start();
504 if !trimmed.starts_with("> [!") {
505 return None;
506 }
507
508 if let Some(close_bracket) = trimmed.find(']') {
510 if close_bracket > 4 {
511 let callout_type = &trimmed[4..close_bracket];
512
513 match callout_type {
515 "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION" | "DANGER" => {
516 let content = trimmed[close_bracket + 1..].trim();
517 return Some((callout_type.to_lowercase(), content.to_string()));
518 }
519 _ => return None,
520 }
521 }
522 }
523
524 None
525 }
526
527 fn collect_github_callout_content(
529 &self,
530 lines: &mut std::iter::Peekable<std::str::Lines>,
531 initial_content: String,
532 ) -> String {
533 let mut content = String::new();
534
535 if !initial_content.is_empty() {
536 content.push_str(&initial_content);
537 content.push('\n');
538 }
539
540 while let Some(line) = lines.peek() {
541 let trimmed = line.trim_start();
542 if trimmed.starts_with('>') {
543 let content_part = trimmed.strip_prefix('>').unwrap_or("").trim_start();
544 content.push_str(content_part);
545 content.push('\n');
546 lines.next(); } else {
548 break;
549 }
550 }
551
552 content.trim().to_string()
553 }
554
555 fn parse_fenced_admonition_start(line: &str) -> Option<(String, Option<String>)> {
557 let trimmed = line.trim();
558 if !trimmed.starts_with(":::") {
559 return None;
560 }
561
562 let after_colons = trimmed[3..].trim_start();
563 if !after_colons.starts_with("{.") {
564 return None;
565 }
566
567 if let Some(close_brace) = after_colons.find('}') {
569 let content = &after_colons[2..close_brace]; let parts: Vec<&str> = content.split_whitespace().collect();
573 if let Some(&adm_type) = parts.first() {
574 let id = parts
575 .iter()
576 .find(|part| part.starts_with('#'))
577 .map(|id_part| id_part[1..].to_string()); return Some((adm_type.to_string(), id));
580 }
581 }
582
583 None
584 }
585
586 fn collect_fenced_content(&self, lines: &mut std::iter::Peekable<std::str::Lines>) -> String {
588 let mut content = String::new();
589
590 for line in lines.by_ref() {
591 if line.trim().starts_with(":::") {
592 break;
593 }
594 content.push_str(line);
595 content.push('\n');
596 }
597
598 content.trim().to_string()
599 }
600
601 fn parse_figure_block(
603 line: &str,
604 lines: &mut std::iter::Peekable<std::str::Lines>,
605 ) -> Option<(Option<String>, String, String)> {
606 let trimmed = line.trim();
607 if !trimmed.starts_with(":::") {
608 return None;
609 }
610
611 let after_colons = trimmed[3..].trim_start();
612 if !after_colons.starts_with("{.figure") {
613 return None;
614 }
615
616 let id = if let Some(hash_pos) = after_colons.find('#') {
618 if let Some(close_brace) = after_colons.find('}') {
619 if hash_pos < close_brace {
620 Some(after_colons[hash_pos + 1..close_brace].trim().to_string())
621 } else {
622 None
623 }
624 } else {
625 None
626 }
627 } else {
628 None
629 };
630
631 let title = if let Some(title_line) = lines.next() {
633 let trimmed_title = title_line.trim();
634 if let Some(this) = trimmed_title.strip_prefix('#') {
635 { this.trim_matches(char::is_whitespace) }.to_string()
636 } else {
637 return None;
639 }
640 } else {
641 return None;
642 };
643
644 let mut content = String::new();
646 for line in lines.by_ref() {
647 if line.trim().starts_with(":::") {
648 break;
649 }
650 content.push_str(line);
651 content.push('\n');
652 }
653
654 Some((id, title, content.trim().to_string()))
655 }
656
657 fn render_admonition(adm_type: &str, id: Option<&str>, content: &str) -> String {
659 let capitalized_type = crate::utils::capitalize_first(adm_type);
660 let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
661
662 format!(
663 "<div class=\"admonition {adm_type}\"{id_attr}>\n<p class=\"admonition-title\">{capitalized_type}</p>\n\n{content}\n\n</div>"
664 )
665 }
666
667 fn render_figure(id: Option<&str>, title: &str, content: &str) -> String {
669 let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
670
671 format!("<figure{id_attr}>\n<figcaption>{title}</figcaption>\n{content}\n</figure>")
672 }
673
674 #[must_use]
676 pub fn extract_headers(&self, content: &str) -> (Vec<Header>, Option<String>) {
677 let arena = Arena::new();
678 let options = self.comrak_options();
679
680 let mut normalized = String::with_capacity(content.len());
682 for line in content.lines() {
683 let trimmed = line.trim_end();
684 if !trimmed.starts_with('#') {
685 if let Some(anchor_start) = trimmed.rfind("{#") {
686 if let Some(anchor_end) = trimmed[anchor_start..].find('}') {
687 let text = trimmed[..anchor_start].trim_end();
688 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
689 normalized.push_str(&format!("## {text} {{#{id}}}\n"));
690 continue;
691 }
692 }
693 }
694 normalized.push_str(line);
695 normalized.push('\n');
696 }
697
698 let root = parse_document(&arena, &normalized, &options);
699
700 let mut headers = Vec::new();
701 let mut found_title = None;
702
703 for node in root.descendants() {
704 if let NodeValue::Heading(NodeHeading { level, .. }) = &node.data.borrow().value {
705 let mut text = String::new();
706 let mut explicit_id = None;
707
708 for child in node.children() {
709 match &child.data.borrow().value {
710 NodeValue::Text(t) => text.push_str(t),
711 NodeValue::Code(t) => text.push_str(&t.literal),
712 NodeValue::Link(..) => text.push_str(&extract_inline_text(child)),
713 NodeValue::Emph => text.push_str(&extract_inline_text(child)),
714 NodeValue::Strong => text.push_str(&extract_inline_text(child)),
715 NodeValue::Strikethrough => text.push_str(&extract_inline_text(child)),
716 NodeValue::Superscript => text.push_str(&extract_inline_text(child)),
717 NodeValue::Subscript => text.push_str(&extract_inline_text(child)),
718 NodeValue::FootnoteReference(..) => {
719 text.push_str(&extract_inline_text(child));
720 }
721 NodeValue::HtmlInline(html) => {
722 let html_str = html.as_str();
724 if let Some(start) = html_str.find("{#") {
725 if let Some(end) = html_str[start..].find('}') {
726 let anchor = &html_str[start + 2..start + end];
727 explicit_id = Some(anchor.to_string());
728 }
729 }
730 }
731 NodeValue::Image(..) => {}
732 _ => {}
733 }
734 }
735
736 let trimmed = text.trim_end();
738 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
739 if let Some(end) = trimmed[start..].find('}') {
740 let anchor = &trimmed[start + 2..start + end];
741 (trimmed[..start].trim_end().to_string(), anchor.to_string())
742 } else {
743 (
744 text.clone(),
745 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
746 )
747 }
748 } else {
749 (
750 text.clone(),
751 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
752 )
753 };
754 if *level == 1 && found_title.is_none() {
755 found_title = Some(final_text.clone());
756 }
757 headers.push(Header {
758 text: final_text,
759 level: *level,
760 id,
761 });
762 }
763 }
764
765 (headers, found_title)
766 }
767
768 fn convert_to_html(&self, content: &str) -> String {
770 safely_process_markup(
771 content,
772 |content| {
773 let arena = Arena::new();
774 let options = self.comrak_options();
775 let root = parse_document(&arena, content, &options);
776
777 let prompt_transformer = PromptTransformer;
779 prompt_transformer.transform(root);
780
781 let mut html_output = Vec::new();
782 comrak::format_html(root, &options, &mut html_output).unwrap_or_default();
783 let html = String::from_utf8(html_output).unwrap_or_default();
784
785 self.process_header_anchors_html(&html)
787 },
788 "<div class=\"error\">Error processing markdown content</div>",
789 )
790 }
791
792 fn process_header_anchors_html(&self, html: &str) -> String {
794 use std::sync::LazyLock;
795
796 use regex::Regex;
797
798 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
799 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>").unwrap_or_else(
800 |e| {
801 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
802 utils::never_matching_regex()
803 },
804 )
805 });
806
807 HEADER_ANCHOR_RE
808 .replace_all(html, |caps: ®ex::Captures| {
809 let level = &caps[1];
810 let prefix = &caps[2];
811 let id = &caps[3];
812 let suffix = &caps[4];
813 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
814 })
815 .to_string()
816 }
817
818 fn comrak_options(&self) -> ComrakOptions<'_> {
820 let mut options = ComrakOptions::default();
821 if self.options.gfm {
822 options.extension.table = true;
823 options.extension.footnotes = true;
824 options.extension.strikethrough = true;
825 options.extension.tasklist = true;
826 options.extension.superscript = true;
827 }
828 options.render.unsafe_ = true;
829 options.extension.header_ids = None;
831 options
832 }
833
834 #[must_use]
838 pub fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
839 self.manpage_urls.as_ref()
840 }
841
842 #[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
845 #[must_use]
846 pub fn process_role_markup(&self, content: &str) -> String {
847 process_role_markup(content, self.manpage_urls.as_ref())
848 }
849
850 #[cfg(feature = "gfm")]
853 fn process_autolinks(&self, html: &str) -> String {
854 process_autolinks(html)
855 }
856
857 #[cfg(feature = "nixpkgs")]
860 fn process_manpage_references_html(&self, html: &str) -> String {
861 process_manpage_references(html, self.manpage_urls.as_ref())
862 }
863
864 fn kuchiki_postprocess(&self, html: &str) -> String {
866 safely_process_markup(
867 html,
868 |html| {
869 use tendril::TendrilSink;
870
871 let document = kuchikikiki::parse_html().one(html);
872
873 self.process_list_item_id_markers(&document);
875
876 self.process_header_anchor_comments(&document);
878
879 self.process_list_item_inline_anchors(&document);
881
882 self.process_paragraph_inline_anchors(&document);
884
885 self.process_remaining_inline_anchors(&document);
887
888 self.process_empty_auto_links(&document);
890
891 self.process_empty_html_links(&document);
893
894 let mut out = Vec::new();
895 document.serialize(&mut out).ok();
896 String::from_utf8(out).unwrap_or_default()
897 },
898 "",
900 )
901 }
902
903 fn process_list_item_id_markers(&self, document: &kuchikikiki::NodeRef) {
905 let mut to_modify = Vec::new();
906
907 for comment in document.inclusive_descendants() {
908 if let Some(comment_node) = comment.as_comment() {
909 let comment_text = comment_node.borrow();
910 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
911 let id = comment_text[id_start + 16..].trim();
912 if !id.is_empty()
913 && id
914 .chars()
915 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
916 {
917 if let Some(parent) = comment.parent() {
919 if let Some(element) = parent.as_element() {
920 if element.name.local.as_ref() == "li" {
921 to_modify.push((comment.clone(), id.to_string()));
922 }
923 }
924 }
925 }
926 }
927 }
928 }
929
930 for (comment_node, id) in to_modify {
931 let span = kuchikikiki::NodeRef::new_element(
932 markup5ever::QualName::new(None, ns!(html), local_name!("span")),
933 vec![
934 (
935 kuchikikiki::ExpandedName::new("", "id"),
936 kuchikikiki::Attribute {
937 prefix: None,
938 value: id,
939 },
940 ),
941 (
942 kuchikikiki::ExpandedName::new("", "class"),
943 kuchikikiki::Attribute {
944 prefix: None,
945 value: "nixos-anchor".into(),
946 },
947 ),
948 ],
949 );
950 comment_node.insert_after(span);
951 comment_node.detach();
952 }
953 }
954
955 fn process_header_anchor_comments(&self, document: &kuchikikiki::NodeRef) {
957 let mut to_modify = Vec::new();
958
959 for comment in document.inclusive_descendants() {
960 if let Some(comment_node) = comment.as_comment() {
961 let comment_text = comment_node.borrow();
962 if let Some(anchor_start) = comment_text.find("anchor:") {
963 let id = comment_text[anchor_start + 7..].trim();
964 if !id.is_empty()
965 && id
966 .chars()
967 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
968 {
969 if let Some(parent) = comment.parent() {
971 if let Some(element) = parent.as_element() {
972 let tag_name = element.name.local.as_ref();
973 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
974 to_modify.push((
975 parent.clone(),
976 comment.clone(),
977 id.to_string(),
978 ));
979 }
980 }
981 }
982 }
983 }
984 }
985 }
986
987 for (header_element, comment_node, id) in to_modify {
988 if let Some(element) = header_element.as_element() {
989 element
990 .attributes
991 .borrow_mut()
992 .insert(local_name!("id"), id);
993 comment_node.detach();
994 }
995 }
996 }
997
998 fn process_list_item_inline_anchors(&self, document: &kuchikikiki::NodeRef) {
1000 for li_node in document.select("li").unwrap() {
1001 let li_element = li_node.as_node();
1002
1003 let has_code = li_element.select("code, pre").is_ok()
1005 && li_element.select("code, pre").unwrap().next().is_some();
1006 if has_code {
1007 continue; }
1009
1010 let text_content = li_element.text_contents();
1011
1012 if let Some(anchor_start) = text_content.find("[]{#") {
1013 if let Some(anchor_end) = text_content[anchor_start..].find('}') {
1014 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
1015 if !id.is_empty()
1016 && id
1017 .chars()
1018 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
1019 {
1020 let remaining_content = &text_content[anchor_start + anchor_end + 1..];
1021
1022 for child in li_element.children() {
1024 child.detach();
1025 }
1026
1027 let span = kuchikikiki::NodeRef::new_element(
1028 markup5ever::QualName::new(None, ns!(html), local_name!("span")),
1029 vec![
1030 (
1031 kuchikikiki::ExpandedName::new("", "id"),
1032 kuchikikiki::Attribute {
1033 prefix: None,
1034 value: id.into(),
1035 },
1036 ),
1037 (
1038 kuchikikiki::ExpandedName::new("", "class"),
1039 kuchikikiki::Attribute {
1040 prefix: None,
1041 value: "nixos-anchor".into(),
1042 },
1043 ),
1044 ],
1045 );
1046 li_element.append(span);
1047 if !remaining_content.is_empty() {
1048 li_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
1049 }
1050 }
1051 }
1052 }
1053 }
1054 }
1055
1056 fn process_paragraph_inline_anchors(&self, document: &kuchikikiki::NodeRef) {
1058 for p_node in document.select("p").unwrap() {
1059 let p_element = p_node.as_node();
1060
1061 let has_code = p_element.select("code, pre").is_ok()
1063 && p_element.select("code, pre").unwrap().next().is_some();
1064 if has_code {
1065 continue; }
1067
1068 let text_content = p_element.text_contents();
1069
1070 if let Some(anchor_start) = text_content.find("[]{#") {
1071 if let Some(anchor_end) = text_content[anchor_start..].find('}') {
1072 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
1073 if !id.is_empty()
1074 && id
1075 .chars()
1076 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
1077 {
1078 let remaining_content = &text_content[anchor_start + anchor_end + 1..];
1079
1080 for child in p_element.children() {
1082 child.detach();
1083 }
1084
1085 let span = kuchikikiki::NodeRef::new_element(
1086 markup5ever::QualName::new(None, ns!(html), local_name!("span")),
1087 vec![
1088 (
1089 kuchikikiki::ExpandedName::new("", "id"),
1090 kuchikikiki::Attribute {
1091 prefix: None,
1092 value: id.into(),
1093 },
1094 ),
1095 (
1096 kuchikikiki::ExpandedName::new("", "class"),
1097 kuchikikiki::Attribute {
1098 prefix: None,
1099 value: "nixos-anchor".into(),
1100 },
1101 ),
1102 ],
1103 );
1104 p_element.append(span);
1105 if !remaining_content.is_empty() {
1106 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
1107 }
1108 }
1109 }
1110 }
1111 }
1112 }
1113
1114 fn process_remaining_inline_anchors(&self, document: &kuchikikiki::NodeRef) {
1116 let mut text_nodes_to_process = Vec::new();
1117
1118 for node in document.inclusive_descendants() {
1119 if let Some(text_node) = node.as_text() {
1120 let mut parent = node.parent();
1122 let mut in_code = false;
1123 while let Some(p) = parent {
1124 if let Some(element) = p.as_element() {
1125 if element.name.local == local_name!("code")
1126 || element.name.local == local_name!("pre")
1127 {
1128 in_code = true;
1129 break;
1130 }
1131 }
1132 parent = p.parent();
1133 }
1134
1135 if !in_code {
1137 let text_content = text_node.borrow().clone();
1138 if text_content.contains("[]{#") {
1139 text_nodes_to_process.push((node.clone(), text_content));
1140 }
1141 }
1142 }
1143 }
1144
1145 for (text_node, text_content) in text_nodes_to_process {
1146 let mut last_end = 0;
1147 let mut new_children = Vec::new();
1148
1149 let chars = text_content.chars().collect::<Vec<_>>();
1151 let mut i = 0;
1152 while i < chars.len() {
1153 if i + 4 < chars.len()
1154 && chars[i] == '['
1155 && chars[i + 1] == ']'
1156 && chars[i + 2] == '{'
1157 && chars[i + 3] == '#'
1158 {
1159 let anchor_start = i;
1161 i += 4; let mut id = String::new();
1164 while i < chars.len() && chars[i] != '}' {
1165 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_' {
1166 id.push(chars[i]);
1167 i += 1;
1168 } else {
1169 break;
1170 }
1171 }
1172
1173 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
1174 let anchor_end = i + 1;
1176
1177 if anchor_start > last_end {
1179 let before_text: String =
1180 chars[last_end..anchor_start].iter().collect();
1181 if !before_text.is_empty() {
1182 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
1183 }
1184 }
1185
1186 let span = kuchikikiki::NodeRef::new_element(
1188 markup5ever::QualName::new(None, ns!(html), local_name!("span")),
1189 vec![
1190 (
1191 kuchikikiki::ExpandedName::new("", "id"),
1192 kuchikikiki::Attribute {
1193 prefix: None,
1194 value: id,
1195 },
1196 ),
1197 (
1198 kuchikikiki::ExpandedName::new("", "class"),
1199 kuchikikiki::Attribute {
1200 prefix: None,
1201 value: "nixos-anchor".into(),
1202 },
1203 ),
1204 ],
1205 );
1206 new_children.push(span);
1207
1208 last_end = anchor_end;
1209 i = anchor_end;
1210 } else {
1211 i += 1;
1212 }
1213 } else {
1214 i += 1;
1215 }
1216 }
1217
1218 if last_end < chars.len() {
1220 let after_text: String = chars[last_end..].iter().collect();
1221 if !after_text.is_empty() {
1222 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
1223 }
1224 }
1225
1226 if !new_children.is_empty() {
1228 for child in new_children {
1229 text_node.insert_before(child);
1230 }
1231 text_node.detach();
1232 }
1233 }
1234 }
1235
1236 fn process_empty_auto_links(&self, document: &kuchikikiki::NodeRef) {
1238 for link_node in document.select("a").unwrap() {
1239 let link_element = link_node.as_node();
1240 if let Some(element) = link_element.as_element() {
1241 let href = element
1242 .attributes
1243 .borrow()
1244 .get(local_name!("href"))
1245 .map(std::string::ToString::to_string);
1246 let text_content = link_element.text_contents();
1247
1248 if let Some(href_value) = href {
1249 if href_value.starts_with('#') && text_content.trim().is_empty() {
1250 let display_text = self.humanize_anchor_id(&href_value);
1252 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1253 }
1254 }
1255 }
1256 }
1257 }
1258
1259 fn process_empty_html_links(&self, document: &kuchikikiki::NodeRef) {
1261 for link_node in document.select("a[href^='#']").unwrap() {
1262 let link_element = link_node.as_node();
1263 let text_content = link_element.text_contents();
1264
1265 if text_content.trim().is_empty() {
1266 if let Some(element) = link_element.as_element() {
1267 if let Some(href) = element.attributes.borrow().get(local_name!("href")) {
1268 let display_text = self.humanize_anchor_id(href);
1269 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1270 }
1271 }
1272 }
1273 }
1274 }
1275
1276 fn humanize_anchor_id(&self, anchor: &str) -> String {
1278 let cleaned = anchor.trim_start_matches('#');
1280
1281 let without_prefix = cleaned
1283 .trim_start_matches("sec-")
1284 .trim_start_matches("ssec-")
1285 .trim_start_matches("opt-");
1286
1287 let spaced = without_prefix.replace(['-', '_'], " ");
1289
1290 spaced
1292 .split_whitespace()
1293 .map(|word| {
1294 let mut chars = word.chars();
1295 chars.next().map_or_else(String::new, |c| {
1296 c.to_uppercase().collect::<String>() + chars.as_str()
1297 })
1298 })
1299 .collect::<Vec<String>>()
1300 .join(" ")
1301 }
1302}
1303
1304pub trait AstTransformer {
1306 fn transform<'a>(&self, node: &'a AstNode<'a>);
1307}
1308
1309pub struct PromptTransformer;
1312
1313impl AstTransformer for PromptTransformer {
1314 fn transform<'a>(&self, node: &'a AstNode<'a>) {
1315 use comrak::nodes::NodeValue;
1316 use regex::Regex;
1317
1318 let command_prompt_re = Regex::new(r"^\s*\$\s+(.+)$").unwrap();
1319 let repl_prompt_re = Regex::new(r"^nix-repl>\s*(.*)$").unwrap();
1320
1321 for child in node.children() {
1322 {
1323 let mut data = child.data.borrow_mut();
1324 if let NodeValue::Code(ref code) = data.value {
1325 let literal = code.literal.trim();
1326
1327 if let Some(caps) = command_prompt_re.captures(literal) {
1329 if !literal.starts_with("\\$") && !literal.starts_with("$$") {
1331 let command = caps[1].trim();
1332 let html = format!(
1333 "<code class=\"terminal\"><span class=\"prompt\">$</span> {command}</code>"
1334 );
1335 data.value = NodeValue::HtmlInline(html);
1336 }
1337 } else if let Some(caps) = repl_prompt_re.captures(literal) {
1338 if !literal.starts_with("nix-repl>>") {
1340 let expression = caps[1].trim();
1341 let html = format!(
1342 "<code class=\"nix-repl\"><span class=\"prompt\">nix-repl></span> {expression}</code>"
1343 );
1344 data.value = NodeValue::HtmlInline(html);
1345 }
1346 }
1347 }
1348 }
1349 self.transform(child);
1350 }
1351 }
1352}
1353
1354fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1355 let mut text = String::new();
1356 for child in node.children() {
1357 match &child.data.borrow().value {
1358 NodeValue::Text(t) => text.push_str(t),
1359 NodeValue::Code(t) => text.push_str(&t.literal),
1360 NodeValue::Link(..) => text.push_str(&extract_inline_text(child)),
1361 NodeValue::Emph => text.push_str(&extract_inline_text(child)),
1362 NodeValue::Strong => text.push_str(&extract_inline_text(child)),
1363 NodeValue::Strikethrough => text.push_str(&extract_inline_text(child)),
1364 NodeValue::Superscript => text.push_str(&extract_inline_text(child)),
1365 NodeValue::Subscript => text.push_str(&extract_inline_text(child)),
1366 NodeValue::FootnoteReference(..) => text.push_str(&extract_inline_text(child)),
1367 NodeValue::HtmlInline(_) => {}
1368 NodeValue::Image(..) => {}
1369 _ => {}
1370 }
1371 }
1372 text
1373}
1374
1375#[cfg(feature = "gfm")]
1387#[must_use]
1388pub fn apply_gfm_extensions(markdown: &str) -> String {
1389 markdown.to_owned()
1392}
1393
1394#[cfg(feature = "nixpkgs")]
1415#[must_use]
1416pub fn process_file_includes(markdown: &str, base_dir: &std::path::Path) -> String {
1417 use std::{fs, path::Path};
1418
1419 fn is_safe_path(path: &str) -> bool {
1421 let p = Path::new(path);
1422 !p.is_absolute() && !path.contains("..") && !path.contains('\\')
1423 }
1424
1425 fn read_includes(listing: &str, base_dir: &Path) -> String {
1427 let mut result = String::new();
1428 for line in listing.lines() {
1429 let trimmed = line.trim();
1430 if trimmed.is_empty() || !is_safe_path(trimmed) {
1431 continue;
1432 }
1433 let full_path = base_dir.join(trimmed);
1434 log::info!("Including file: {}", full_path.display());
1435 match fs::read_to_string(&full_path) {
1436 Ok(content) => {
1437 result.push_str(&content);
1438 if !content.ends_with('\n') {
1439 result.push('\n');
1440 }
1441 }
1442 Err(_) => {
1443 result.push_str(&format!(
1445 "<!-- ndg: could not include file: {} -->\n",
1446 full_path.display()
1447 ));
1448 }
1449 }
1450 }
1451 result
1452 }
1453
1454 let mut output = String::new();
1456 let mut lines = markdown.lines().peekable();
1457 let mut in_code_block = false;
1458 let mut code_fence_char = None;
1459 let mut code_fence_count = 0;
1460
1461 while let Some(line) = lines.next() {
1462 let trimmed = line.trim_start();
1463
1464 if !in_code_block && trimmed.starts_with("```{=include=}") {
1466 let mut include_listing = String::new();
1468 for next_line in lines.by_ref() {
1469 if next_line.trim_start().starts_with("```") {
1470 break;
1471 }
1472 include_listing.push_str(next_line);
1473 include_listing.push('\n');
1474 }
1475
1476 let included = read_includes(&include_listing, base_dir);
1477 output.push_str(&included);
1478 continue;
1479 }
1480
1481 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1483 let fence_char = trimmed.chars().next().unwrap();
1484 let fence_count = trimmed.chars().take_while(|&c| c == fence_char).count();
1485
1486 if fence_count >= 3 {
1487 if !in_code_block {
1488 in_code_block = true;
1490 code_fence_char = Some(fence_char);
1491 code_fence_count = fence_count;
1492 } else if code_fence_char == Some(fence_char) && fence_count >= code_fence_count {
1493 in_code_block = false;
1495 code_fence_char = None;
1496 code_fence_count = 0;
1497 }
1498 }
1499 }
1500
1501 output.push_str(line);
1503 output.push('\n');
1504 }
1505
1506 output
1507}
1508
1509#[cfg(any(feature = "nixpkgs", feature = "ndg-flavored"))]
1520#[must_use]
1521pub fn process_role_markup(
1522 content: &str,
1523 manpage_urls: Option<&std::collections::HashMap<String, String>>,
1524) -> String {
1525 let mut result = String::new();
1526 let mut chars = content.chars().peekable();
1527 let mut in_code_block = false;
1528 let mut in_inline_code = false;
1529 let mut code_fence_char = None;
1530 let mut code_fence_count = 0;
1531
1532 while let Some(ch) = chars.next() {
1533 if ch == '`' {
1535 let mut tick_count = 1;
1536 while chars.peek() == Some(&'`') {
1537 chars.next();
1538 tick_count += 1;
1539 }
1540
1541 if tick_count >= 3 {
1542 if !in_code_block {
1544 in_code_block = true;
1546 code_fence_char = Some('`');
1547 code_fence_count = tick_count;
1548 } else if code_fence_char == Some('`') && tick_count >= code_fence_count {
1549 in_code_block = false;
1551 code_fence_char = None;
1552 code_fence_count = 0;
1553 }
1554 } else if tick_count == 1 && !in_code_block {
1555 in_inline_code = !in_inline_code;
1557 }
1558
1559 result.push_str(&"`".repeat(tick_count));
1561 continue;
1562 }
1563
1564 if ch == '~' && chars.peek() == Some(&'~') {
1566 let mut tilde_count = 1;
1567 while chars.peek() == Some(&'~') {
1568 chars.next();
1569 tilde_count += 1;
1570 }
1571
1572 if tilde_count >= 3 {
1573 if !in_code_block {
1574 in_code_block = true;
1575 code_fence_char = Some('~');
1576 code_fence_count = tilde_count;
1577 } else if code_fence_char == Some('~') && tilde_count >= code_fence_count {
1578 in_code_block = false;
1579 code_fence_char = None;
1580 code_fence_count = 0;
1581 }
1582 }
1583
1584 result.push_str(&"~".repeat(tilde_count));
1585 continue;
1586 }
1587
1588 if ch == '\n' {
1590 in_inline_code = false;
1591 result.push(ch);
1592 continue;
1593 }
1594
1595 if ch == '{' && !in_code_block && !in_inline_code {
1597 let remaining: Vec<char> = chars.clone().collect();
1599 let remaining_str: String = remaining.iter().collect();
1600 let mut temp_chars = remaining_str.chars().peekable();
1601
1602 if let Some(role_markup) = parse_role_markup(&mut temp_chars, manpage_urls) {
1603 let remaining_after_parse: String = temp_chars.collect();
1605 let consumed = remaining_str.len() - remaining_after_parse.len();
1606 for _ in 0..consumed {
1607 chars.next();
1608 }
1609 result.push_str(&role_markup);
1610 } else {
1611 result.push(ch);
1613 }
1614 } else {
1615 result.push(ch);
1616 }
1617 }
1618
1619 result
1620}
1621
1622fn parse_role_markup(
1625 chars: &mut std::iter::Peekable<std::str::Chars>,
1626 manpage_urls: Option<&std::collections::HashMap<String, String>>,
1627) -> Option<String> {
1628 let mut role_name = String::new();
1629
1630 while let Some(&ch) = chars.peek() {
1632 if ch.is_ascii_lowercase() {
1633 role_name.push(ch);
1634 chars.next();
1635 } else {
1636 break;
1637 }
1638 }
1639
1640 if role_name.is_empty() {
1642 return None;
1643 }
1644
1645 if chars.peek() != Some(&'}') {
1647 return None;
1648 }
1649 chars.next(); if chars.peek() != Some(&'`') {
1653 return None;
1654 }
1655 chars.next(); let mut content = String::new();
1659 for ch in chars.by_ref() {
1660 if ch == '`' {
1661 if content.is_empty() && !matches!(role_name.as_str(), "manpage") {
1664 return None; }
1666 return Some(format_role_markup(&role_name, &content, manpage_urls));
1667 }
1668 content.push(ch);
1669 }
1670
1671 None
1673}
1674
1675fn format_role_markup(
1677 role_type: &str,
1678 content: &str,
1679 manpage_urls: Option<&std::collections::HashMap<String, String>>,
1680) -> String {
1681 match role_type {
1682 "manpage" => {
1683 if let Some(urls) = manpage_urls {
1684 if let Some(url) = urls.get(content) {
1685 let clean_url = extract_url_from_html(url);
1686 format!("<a href=\"{clean_url}\" class=\"manpage-reference\">{content}</a>")
1687 } else {
1688 format!("<span class=\"manpage-reference\">{content}</span>")
1689 }
1690 } else {
1691 format!("<span class=\"manpage-reference\">{content}</span>")
1692 }
1693 }
1694 "command" => format!("<code class=\"command\">{content}</code>"),
1695 "env" => format!("<code class=\"env-var\">{content}</code>"),
1696 "file" => format!("<code class=\"file-path\">{content}</code>"),
1697 "option" => {
1698 if cfg!(feature = "ndg-flavored") {
1699 let option_id = format!("option-{}", content.replace('.', "-"));
1700 format!(
1701 "<a class=\"option-reference\" href=\"options.html#{option_id}\"><code>{content}</code></a>"
1702 )
1703 } else {
1704 format!("<code>{content}</code>")
1705 }
1706 }
1707 "var" => format!("<code class=\"nix-var\">{content}</code>"),
1708 _ => format!("<span class=\"{role_type}-markup\">{content}</span>"),
1709 }
1710}
1711
1712#[cfg(feature = "nixpkgs")]
1723#[must_use]
1724pub fn process_inline_anchors(content: &str) -> String {
1725 let mut result = String::with_capacity(content.len() + 100);
1726 let mut in_code_block = false;
1727 let mut code_fence_char = None;
1728 let mut code_fence_count = 0;
1729
1730 for line in content.lines() {
1731 let trimmed = line.trim_start();
1732
1733 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1735 let fence_char = trimmed.chars().next().unwrap();
1736 let fence_count = trimmed.chars().take_while(|&c| c == fence_char).count();
1737
1738 if fence_count >= 3 {
1739 if !in_code_block {
1740 in_code_block = true;
1742 code_fence_char = Some(fence_char);
1743 code_fence_count = fence_count;
1744 } else if code_fence_char == Some(fence_char) && fence_count >= code_fence_count {
1745 in_code_block = false;
1747 code_fence_char = None;
1748 code_fence_count = 0;
1749 }
1750 }
1751 }
1752
1753 if in_code_block {
1755 result.push_str(line);
1757 result.push('\n');
1758 } else {
1759 if let Some(anchor_start) = find_list_item_anchor(trimmed) {
1762 if let Some(processed_line) = process_list_item_anchor(line, anchor_start) {
1763 result.push_str(&processed_line);
1764 result.push('\n');
1765 continue;
1766 }
1767 }
1768
1769 result.push_str(&process_line_anchors(line));
1771 result.push('\n');
1772 }
1773 }
1774
1775 result
1776}
1777
1778fn find_list_item_anchor(trimmed: &str) -> Option<usize> {
1780 if (trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ "))
1782 && trimmed.len() > 2
1783 {
1784 let after_marker = &trimmed[2..];
1785 if after_marker.starts_with("[]{#") {
1786 return Some(2);
1787 }
1788 }
1789
1790 let mut i = 0;
1792 while i < trimmed.len() && trimmed.chars().nth(i).unwrap_or(' ').is_ascii_digit() {
1793 i += 1;
1794 }
1795 if i > 0 && i < trimmed.len() - 1 && trimmed.chars().nth(i) == Some('.') {
1796 let after_marker = &trimmed[i + 1..];
1797 if after_marker.starts_with(" []{#") {
1798 return Some(i + 2);
1799 }
1800 }
1801
1802 None
1803}
1804
1805fn process_list_item_anchor(line: &str, anchor_start: usize) -> Option<String> {
1807 let before_anchor = &line[..anchor_start];
1808 let after_marker = &line[anchor_start..];
1809
1810 if !after_marker.starts_with("[]{#") {
1811 return None;
1812 }
1813
1814 if let Some(anchor_end) = after_marker.find('}') {
1816 let id = &after_marker[4..anchor_end]; let remaining_content = &after_marker[anchor_end + 1..]; if id
1821 .chars()
1822 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
1823 && !id.is_empty()
1824 {
1825 return Some(format!(
1826 "{before_anchor}<span id=\"{id}\" class=\"nixos-anchor\"></span>{remaining_content}"
1827 ));
1828 }
1829 }
1830
1831 None
1832}
1833
1834fn process_line_anchors(line: &str) -> String {
1836 let mut result = String::with_capacity(line.len());
1837 let mut chars = line.chars().peekable();
1838
1839 while let Some(ch) = chars.next() {
1840 if ch == '[' && chars.peek() == Some(&']') {
1841 chars.next(); if chars.peek() == Some(&'{') {
1845 chars.next(); if chars.peek() == Some(&'#') {
1847 chars.next(); let mut id = String::new();
1851 while let Some(&next_ch) = chars.peek() {
1852 if next_ch == '}' {
1853 chars.next(); if !id.is_empty()
1857 && id
1858 .chars()
1859 .all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
1860 {
1861 result.push_str(&format!(
1862 "<span id=\"{id}\" class=\"nixos-anchor\"></span>"
1863 ));
1864 } else {
1865 result.push_str(&format!("[]{{{{#{id}}}}}"));
1867 }
1868 break;
1869 } else if next_ch.is_ascii_alphanumeric()
1870 || next_ch == '-'
1871 || next_ch == '_'
1872 {
1873 id.push(next_ch);
1874 chars.next();
1875 } else {
1876 result.push_str(&format!("[]{{{{#{id}"));
1878 break;
1879 }
1880 }
1881 } else {
1882 result.push_str("]{");
1884 }
1885 } else {
1886 result.push(']');
1888 }
1889 } else {
1890 result.push(ch);
1891 }
1892 }
1893
1894 result
1895}
1896
1897#[cfg(feature = "nixpkgs")]
1908#[must_use]
1909pub fn process_block_elements(content: &str) -> String {
1910 let mut result = Vec::new();
1911 let mut lines = content.lines().peekable();
1912 let mut in_code_block = false;
1913 let mut code_fence_char = None;
1914 let mut code_fence_count = 0;
1915
1916 while let Some(line) = lines.next() {
1917 let trimmed = line.trim_start();
1919 if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
1920 let fence_char = trimmed.chars().next().unwrap();
1921 let fence_count = trimmed.chars().take_while(|&c| c == fence_char).count();
1922
1923 if fence_count >= 3 {
1924 if !in_code_block {
1925 in_code_block = true;
1927 code_fence_char = Some(fence_char);
1928 code_fence_count = fence_count;
1929 } else if code_fence_char == Some(fence_char) && fence_count >= code_fence_count {
1930 in_code_block = false;
1932 code_fence_char = None;
1933 code_fence_count = 0;
1934 }
1935 }
1936 }
1937
1938 if !in_code_block {
1940 if let Some((callout_type, initial_content)) = parse_github_callout(line) {
1942 let content = collect_github_callout_content(&mut lines, initial_content);
1943 let admonition = render_admonition(&callout_type, None, &content);
1944 result.push(admonition);
1945 continue;
1946 }
1947
1948 if let Some((adm_type, id)) = parse_fenced_admonition_start(line) {
1950 let content = collect_fenced_content(&mut lines);
1951 let admonition = render_admonition(&adm_type, id.as_deref(), &content);
1952 result.push(admonition);
1953 continue;
1954 }
1955
1956 if let Some((id, title, content)) = parse_figure_block(line, &mut lines) {
1958 let figure = render_figure(id.as_deref(), &title, &content);
1959 result.push(figure);
1960 continue;
1961 }
1962
1963 if !line.is_empty() && !line.starts_with(':') {
1965 if let Some(next_line) = lines.peek() {
1966 if next_line.starts_with(": ") {
1967 let term = line;
1968 let def_line = lines.next().unwrap();
1969 let definition = &def_line[4..]; let dl = format!("<dl>\n<dt>{term}</dt>\n<dd>{definition}</dd>\n</dl>");
1971 result.push(dl);
1972 continue;
1973 }
1974 }
1975 }
1976 }
1977
1978 result.push(line.to_string());
1980 }
1981
1982 result.join("\n")
1983}
1984
1985fn parse_github_callout(line: &str) -> Option<(String, String)> {
1987 let trimmed = line.trim_start();
1988 if !trimmed.starts_with("> [!") {
1989 return None;
1990 }
1991
1992 if let Some(close_bracket) = trimmed.find(']') {
1994 if close_bracket > 4 {
1995 let callout_type = &trimmed[4..close_bracket];
1996
1997 match callout_type {
1999 "NOTE" | "TIP" | "IMPORTANT" | "WARNING" | "CAUTION" | "DANGER" => {
2000 let content = trimmed[close_bracket + 1..].trim();
2001 return Some((callout_type.to_lowercase(), content.to_string()));
2002 }
2003 _ => return None,
2004 }
2005 }
2006 }
2007
2008 None
2009}
2010
2011fn collect_github_callout_content(
2013 lines: &mut std::iter::Peekable<std::str::Lines>,
2014 initial_content: String,
2015) -> String {
2016 let mut content = String::new();
2017
2018 if !initial_content.is_empty() {
2019 content.push_str(&initial_content);
2020 content.push('\n');
2021 }
2022
2023 while let Some(line) = lines.peek() {
2024 let trimmed = line.trim_start();
2025 if trimmed.starts_with('>') {
2026 let content_part = trimmed.strip_prefix('>').unwrap_or("").trim_start();
2027 content.push_str(content_part);
2028 content.push('\n');
2029 lines.next(); } else {
2031 break;
2032 }
2033 }
2034
2035 content.trim().to_string()
2036}
2037
2038fn parse_fenced_admonition_start(line: &str) -> Option<(String, Option<String>)> {
2040 let trimmed = line.trim();
2041 if !trimmed.starts_with(":::") {
2042 return None;
2043 }
2044
2045 let after_colons = trimmed[3..].trim_start();
2046 if !after_colons.starts_with("{.") {
2047 return None;
2048 }
2049
2050 if let Some(close_brace) = after_colons.find('}') {
2052 let content = &after_colons[2..close_brace]; let parts: Vec<&str> = content.split_whitespace().collect();
2056 if let Some(&adm_type) = parts.first() {
2057 let id = parts
2058 .iter()
2059 .find(|part| part.starts_with('#'))
2060 .map(|id_part| id_part[1..].to_string()); return Some((adm_type.to_string(), id));
2063 }
2064 }
2065
2066 None
2067}
2068
2069fn collect_fenced_content(lines: &mut std::iter::Peekable<std::str::Lines>) -> String {
2071 let mut content = String::new();
2072
2073 for line in lines.by_ref() {
2074 if line.trim().starts_with(":::") {
2075 break;
2076 }
2077 content.push_str(line);
2078 content.push('\n');
2079 }
2080
2081 content.trim().to_string()
2082}
2083
2084fn parse_figure_block(
2086 line: &str,
2087 lines: &mut std::iter::Peekable<std::str::Lines>,
2088) -> Option<(Option<String>, String, String)> {
2089 let trimmed = line.trim();
2090 if !trimmed.starts_with(":::") {
2091 return None;
2092 }
2093
2094 let after_colons = trimmed[3..].trim_start();
2095 if !after_colons.starts_with("{.figure") {
2096 return None;
2097 }
2098
2099 let id = if let Some(hash_pos) = after_colons.find('#') {
2101 if let Some(close_brace) = after_colons.find('}') {
2102 if hash_pos < close_brace {
2103 Some(after_colons[hash_pos + 1..close_brace].trim().to_string())
2104 } else {
2105 None
2106 }
2107 } else {
2108 None
2109 }
2110 } else {
2111 None
2112 };
2113
2114 let title = if let Some(title_line) = lines.next() {
2116 let trimmed_title = title_line.trim();
2117 if let Some(this) = trimmed_title.strip_prefix('#') {
2118 { this.trim_matches(char::is_whitespace) }.to_string()
2119 } else {
2120 return None;
2122 }
2123 } else {
2124 return None;
2125 };
2126
2127 let mut content = String::new();
2129 for line in lines.by_ref() {
2130 if line.trim().starts_with(":::") {
2131 break;
2132 }
2133 content.push_str(line);
2134 content.push('\n');
2135 }
2136
2137 Some((id, title, content.trim().to_string()))
2138}
2139
2140fn render_admonition(adm_type: &str, id: Option<&str>, content: &str) -> String {
2142 let capitalized_type = crate::utils::capitalize_first(adm_type);
2143 let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
2144
2145 format!(
2146 "<div class=\"admonition {adm_type}\"{id_attr}>\n<p class=\"admonition-title\">{capitalized_type}</p>\n\n{content}\n\n</div>"
2147 )
2148}
2149
2150fn render_figure(id: Option<&str>, title: &str, content: &str) -> String {
2152 let id_attr = id.map_or(String::new(), |id| format!(" id=\"{id}\""));
2153
2154 format!("<figure{id_attr}>\n<figcaption>{title}</figcaption>\n{content}\n</figure>")
2155}
2156
2157#[cfg(feature = "gfm")]
2168#[must_use]
2169pub fn process_autolinks(html: &str) -> String {
2170 safely_process_markup(
2171 html,
2172 |html| {
2173 use std::sync::LazyLock;
2174
2175 use kuchikikiki::NodeRef;
2176 use regex::Regex;
2177 use tendril::TendrilSink;
2178
2179 static AUTOLINK_RE: LazyLock<Regex> = LazyLock::new(|| {
2180 Regex::new(r#"(https?://[^\s<>"')\}]+)"#).unwrap_or_else(|e| {
2181 log::error!("Failed to compile AUTOLINK_RE regex: {e}");
2182 utils::never_matching_regex()
2183 })
2184 });
2185
2186 let document = kuchikikiki::parse_html().one(html);
2187
2188 let mut text_nodes_to_process = Vec::new();
2190
2191 for node in document.inclusive_descendants() {
2192 if let Some(text_node) = node.as_text() {
2193 let text_content = text_node.borrow().clone();
2194
2195 let mut is_inside_link = false;
2197 let mut is_inside_code = false;
2198 let mut current = Some(node.clone());
2199 while let Some(parent) = current.and_then(|n| n.parent()) {
2200 if let Some(element) = parent.as_element() {
2201 if element.name.local.as_ref() == "a" {
2202 is_inside_link = true;
2203 break;
2204 }
2205 if element.name.local.as_ref() == "code"
2206 || element.name.local.as_ref() == "pre"
2207 {
2208 is_inside_code = true;
2209 break;
2210 }
2211 }
2212 current = parent.parent();
2213 }
2214
2215 if !is_inside_link && !is_inside_code && AUTOLINK_RE.is_match(&text_content) {
2216 text_nodes_to_process.push((node.clone(), text_content));
2217 }
2218 }
2219 }
2220
2221 for (text_node, text_content) in text_nodes_to_process {
2223 let mut last_end = 0;
2224 let mut new_children = Vec::new();
2225
2226 for url_match in AUTOLINK_RE.find_iter(&text_content) {
2227 if url_match.start() > last_end {
2229 let before_text = &text_content[last_end..url_match.start()];
2230 if !before_text.is_empty() {
2231 new_children.push(NodeRef::new_text(before_text));
2232 }
2233 }
2234
2235 let mut url = url_match.as_str();
2237
2238 while let Some(last_char) = url.chars().last() {
2240 if matches!(last_char, '.' | '!' | '?' | ';' | ',' | ')' | ']' | '}') {
2241 url = &url[..url.len() - last_char.len_utf8()];
2242 } else {
2243 break;
2244 }
2245 }
2246
2247 let link = NodeRef::new_element(
2248 markup5ever::QualName::new(None, ns!(html), local_name!("a")),
2249 vec![(
2250 kuchikikiki::ExpandedName::new("", "href"),
2251 kuchikikiki::Attribute {
2252 prefix: None,
2253 value: url.into(),
2254 },
2255 )],
2256 );
2257 link.append(NodeRef::new_text(url));
2258 new_children.push(link);
2259
2260 let original_url = url_match.as_str();
2262 if url.len() < original_url.len() {
2263 let punctuation = &original_url[url.len()..];
2264 new_children.push(NodeRef::new_text(punctuation));
2265 }
2266
2267 last_end = url_match.end();
2268 }
2269
2270 if last_end < text_content.len() {
2272 let after_text = &text_content[last_end..];
2273 if !after_text.is_empty() {
2274 new_children.push(NodeRef::new_text(after_text));
2275 }
2276 }
2277
2278 if !new_children.is_empty() {
2280 for child in new_children {
2281 text_node.insert_before(child);
2282 }
2283 text_node.detach();
2284 }
2285 }
2286
2287 let mut out = Vec::new();
2288 document.serialize(&mut out).ok();
2289 String::from_utf8(out).unwrap_or_default()
2290 },
2291 "",
2293 )
2294}
2295
2296#[cfg(feature = "nixpkgs")]
2308#[must_use]
2309pub fn process_manpage_references(
2310 html: &str,
2311 manpage_urls: Option<&std::collections::HashMap<String, String>>,
2312) -> String {
2313 safely_process_markup(
2314 html,
2315 |html| {
2316 use kuchikikiki::NodeRef;
2317 use tendril::TendrilSink;
2318
2319 let document = kuchikikiki::parse_html().one(html);
2320 let mut to_replace = Vec::new();
2321
2322 for span_node in document.select("span.manpage-reference").unwrap() {
2324 let span_el = span_node.as_node();
2325 let span_text = span_el.text_contents();
2326
2327 if let Some(urls) = manpage_urls {
2328 if let Some(url) = urls.get(&span_text) {
2330 let clean_url = extract_url_from_html(url);
2331 let link = NodeRef::new_element(
2332 markup5ever::QualName::new(None, ns!(html), local_name!("a")),
2333 vec![
2334 (
2335 kuchikikiki::ExpandedName::new("", "href"),
2336 kuchikikiki::Attribute {
2337 prefix: None,
2338 value: clean_url.into(),
2339 },
2340 ),
2341 (
2342 kuchikikiki::ExpandedName::new("", "class"),
2343 kuchikikiki::Attribute {
2344 prefix: None,
2345 value: "manpage-reference".into(),
2346 },
2347 ),
2348 ],
2349 );
2350 link.append(NodeRef::new_text(span_text.clone()));
2351 to_replace.push((span_el.clone(), link));
2352 }
2353 }
2354 }
2355
2356 for (old, new) in to_replace {
2358 old.insert_before(new);
2359 old.detach();
2360 }
2361
2362 let mut out = Vec::new();
2363 document.serialize(&mut out).ok();
2364 String::from_utf8(out).unwrap_or_default()
2365 },
2366 "",
2368 )
2369}
2370
2371#[cfg(feature = "ndg-flavored")]
2386#[must_use]
2387pub fn process_option_references(html: &str) -> String {
2388 use kuchikikiki::{Attribute, ExpandedName, NodeRef};
2389 use markup5ever::{QualName, local_name, ns};
2390 use tendril::TendrilSink;
2391
2392 safely_process_markup(
2393 html,
2394 |html| {
2395 let document = kuchikikiki::parse_html().one(html);
2396
2397 let mut to_replace = vec![];
2398
2399 for code_node in document.select("code").unwrap() {
2400 let code_el = code_node.as_node();
2401 let code_text = code_el.text_contents();
2402
2403 if let Some(element) = code_el.as_element() {
2405 if let Some(class_attr) = element.attributes.borrow().get(local_name!("class"))
2406 {
2407 if class_attr.contains("command")
2408 || class_attr.contains("env-var")
2409 || class_attr.contains("file-path")
2410 || class_attr.contains("nixos-option")
2411 || class_attr.contains("nix-var")
2412 {
2413 continue;
2414 }
2415 }
2416 }
2417
2418 let mut is_already_option_ref = false;
2420 let mut current = code_el.parent();
2421 while let Some(parent) = current {
2422 if let Some(element) = parent.as_element() {
2423 if element.name.local == local_name!("a") {
2424 if let Some(class_attr) =
2425 element.attributes.borrow().get(local_name!("class"))
2426 {
2427 if class_attr.contains("option-reference") {
2428 is_already_option_ref = true;
2429 break;
2430 }
2431 }
2432 }
2433 }
2434 current = parent.parent();
2435 }
2436
2437 if !is_already_option_ref && is_nixos_option_reference(&code_text) {
2438 let option_id = format!("option-{}", code_text.replace('.', "-"));
2439 let attrs = vec![
2440 (
2441 ExpandedName::new("", "href"),
2442 Attribute {
2443 prefix: None,
2444 value: format!("options.html#{option_id}"),
2445 },
2446 ),
2447 (
2448 ExpandedName::new("", "class"),
2449 Attribute {
2450 prefix: None,
2451 value: "option-reference".into(),
2452 },
2453 ),
2454 ];
2455 let a = NodeRef::new_element(
2456 QualName::new(None, ns!(html), local_name!("a")),
2457 attrs,
2458 );
2459 let code = NodeRef::new_element(
2460 QualName::new(None, ns!(html), local_name!("code")),
2461 vec![],
2462 );
2463 code.append(NodeRef::new_text(code_text.clone()));
2464 a.append(code);
2465 to_replace.push((code_el.clone(), a));
2466 }
2467 }
2468
2469 for (old, new) in to_replace {
2470 old.insert_before(new);
2471 old.detach();
2472 }
2473
2474 let mut out = Vec::new();
2475 document.serialize(&mut out).ok();
2476 String::from_utf8(out).unwrap_or_default()
2477 },
2478 "",
2480 )
2481}
2482
2483fn is_nixos_option_reference(text: &str) -> bool {
2485 let dot_count = text.chars().filter(|&c| c == '.').count();
2487 if dot_count < 2 || text.chars().any(char::is_whitespace) {
2488 return false;
2489 }
2490
2491 if text.contains('<') || text.contains('>') || text.contains('$') || text.contains('/') {
2493 return false;
2494 }
2495
2496 if !text.chars().next().is_some_and(char::is_alphabetic) {
2498 return false;
2499 }
2500
2501 text.chars()
2503 .all(|c| c.is_alphanumeric() || c == '.' || c == '-' || c == '_')
2504}
2505
2506pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
2508 let mut files = Vec::with_capacity(100);
2509
2510 for entry in WalkDir::new(input_dir)
2511 .follow_links(true)
2512 .into_iter()
2513 .filter_map(Result::ok)
2514 {
2515 let path = entry.path();
2516 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
2517 files.push(path.to_owned());
2518 }
2519 }
2520
2521 trace!("Found {} markdown files to process", files.len());
2522 files
2523}
2524
2525fn extract_url_from_html(url_or_html: &str) -> &str {
2527 if url_or_html.starts_with("<a href=\"") {
2529 if let Some(start) = url_or_html.find("href=\"") {
2531 let start = start + 6; if let Some(end) = url_or_html[start..].find('"') {
2533 return &url_or_html[start..start + end];
2534 }
2535 }
2536 }
2537
2538 url_or_html
2540}