1use std::{
6 collections::HashMap,
7 path::{Path, PathBuf},
8 sync::LazyLock,
9};
10
11use comrak::{
12 Arena,
13 nodes::{AstNode, NodeHeading, NodeValue},
14 options::Options,
15 parse_document,
16};
17use log::trace;
18use markup5ever::local_name;
19use regex::Regex;
20use walkdir::WalkDir;
21
22use super::{
23 dom::safe_select,
24 process::process_safe,
25 types::{
26 AstTransformer,
27 MarkdownOptions,
28 MarkdownProcessor,
29 PromptTransformer,
30 },
31};
32use crate::{
33 syntax::create_default_manager,
34 types::{Header, MarkdownResult},
35 utils,
36};
37
38static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
39 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
40 .unwrap_or_else(|e| {
41 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
42 utils::never_matching_regex().unwrap_or_else(|_| {
43 #[allow(
44 clippy::expect_used,
45 reason = "This pattern is guaranteed to be valid"
46 )]
47 Regex::new(r"[^\s\S]")
48 .expect("regex pattern [^\\s\\S] should always compile")
49 })
50 })
51});
52
53static HEADER_NO_ID_RE: LazyLock<Regex> = LazyLock::new(|| {
54 Regex::new(r"<h([1-6])>(.*?)</h[1-6]>").unwrap_or_else(|e| {
55 log::error!("Failed to compile HEADER_NO_ID_RE regex: {e}");
56 utils::never_matching_regex().unwrap_or_else(|_| {
57 #[allow(
58 clippy::expect_used,
59 reason = "This pattern is guaranteed to be valid"
60 )]
61 Regex::new(r"[^\s\S]")
62 .expect("regex pattern [^\\s\\S] should always compile")
63 })
64 })
65});
66
67static HTML_TAG_RE: LazyLock<Regex> = LazyLock::new(|| {
68 Regex::new(r"<[^>]+>").unwrap_or_else(|e| {
69 log::error!("Failed to compile HTML_TAG_RE regex: {e}");
70 utils::never_matching_regex().unwrap_or_else(|_| {
71 #[allow(
72 clippy::expect_used,
73 reason = "This pattern is guaranteed to be valid"
74 )]
75 Regex::new(r"[^\s\S]")
76 .expect("regex pattern [^\\s\\S] should always compile")
77 })
78 })
79});
80
81impl MarkdownProcessor {
82 #[must_use]
84 pub fn new(options: MarkdownOptions) -> Self {
85 let manpage_urls = options
86 .manpage_urls_path
87 .as_ref()
88 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
89
90 let syntax_manager = if options.highlight_code {
91 match create_default_manager(
92 options
93 .syntax_queries_path
94 .as_deref()
95 .map(std::path::Path::new),
96 ) {
97 Ok(manager) => {
98 log::info!("Syntax highlighting initialized successfully");
99 Some(manager)
100 },
101 Err(e) => {
102 log::error!("Failed to initialize syntax highlighting: {e}");
103 log::warn!(
104 "Continuing without syntax highlighting - code blocks will not be \
105 highlighted"
106 );
107 None
108 },
109 }
110 } else {
111 None
112 };
113
114 Self {
115 options,
116 manpage_urls,
117 syntax_manager,
118 base_dir: std::path::PathBuf::from("."),
119 }
120 }
121
122 #[must_use]
124 pub const fn options(&self) -> &MarkdownOptions {
125 &self.options
126 }
127
128 #[must_use]
130 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
131 self.base_dir = base_dir.to_path_buf();
132 self
133 }
134
135 #[must_use]
137 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
138 match feature {
139 ProcessorFeature::Gfm => self.options.gfm,
140 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
141 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
142 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
143 }
144 }
145
146 #[must_use]
148 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
149 self.manpage_urls.as_ref()
150 }
151
152 #[must_use]
154 pub fn highlight_codeblocks(&self, html: &str) -> String {
155 use kuchikikiki::parse_html;
156 use tendril::TendrilSink;
157
158 if !self.options.highlight_code || self.syntax_manager.is_none() {
159 return html.to_string();
160 }
161
162 let document = parse_html().one(html);
163
164 let mut code_blocks = Vec::new();
166 for pre_node in safe_select(&document, "pre > code") {
167 let code_node = pre_node;
168 if let Some(element) = code_node.as_element() {
169 let language = element
170 .attributes
171 .borrow()
172 .get("class")
173 .and_then(|class| class.strip_prefix("language-"))
174 .unwrap_or("text")
175 .to_string();
176 let code_text = code_node.text_contents();
177
178 if let Some(pre_parent) = code_node.parent() {
179 code_blocks.push((
180 pre_parent.clone(),
181 code_node.clone(),
182 code_text,
183 language,
184 ));
185 }
186 }
187 }
188
189 for (pre_element, _code_node, code_text, language) in code_blocks {
191 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
192 {
193 let wrapped_html = format!(
195 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
196 );
197 let fragment = parse_html().one(wrapped_html.as_str());
198 pre_element.insert_after(fragment);
199 pre_element.detach();
200 }
201 }
203
204 let mut buf = Vec::new();
205 if let Err(e) = document.serialize(&mut buf) {
206 log::warn!("DOM serialization failed: {e:?}");
207 return html.to_string(); }
209 String::from_utf8(buf).unwrap_or_else(|_| html.to_string())
210 }
211
212 fn handle_hardtabs(&self, code: &str) -> String {
214 use super::types::TabStyle;
215
216 if !code.contains('\t') {
218 return code.to_string();
219 }
220
221 match self.options.tab_style {
222 TabStyle::None => code.to_string(),
224
225 TabStyle::Warn => {
227 log::warn!(
228 "Hard tabs detected in code block. Consider using spaces for \
229 consistency. Tools like editorconfig may help you normalize spaces \
230 in your documents."
231 );
232 code.to_string()
233 },
234
235 TabStyle::Normalize => {
238 log::debug!("Replacing hard tabs with spaces");
239 code.replace('\t', " ")
240 },
241 }
242 }
243
244 fn process_hardtabs(&self, markdown: &str) -> String {
246 use super::types::TabStyle;
247 use crate::utils::codeblock::FenceTracker;
248
249 if self.options.tab_style == TabStyle::None {
251 return markdown.to_string();
252 }
253
254 let mut result = String::with_capacity(markdown.len());
255 let mut lines = markdown.lines().peekable();
256 let mut tracker = FenceTracker::new();
257
258 while let Some(line) = lines.next() {
259 tracker = tracker.process_line(line);
260
261 let processed_line = if tracker.in_code_block() && line.contains('\t') {
263 self.handle_hardtabs(line)
264 } else {
265 line.to_string()
266 };
267
268 result.push_str(&processed_line);
269
270 if lines.peek().is_some() {
272 result.push('\n');
273 }
274 }
275
276 result
277 }
278
279 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
282 if !self.options.highlight_code {
283 return None;
284 }
285
286 let syntax_manager = self.syntax_manager.as_ref()?;
287
288 syntax_manager
289 .highlight_code(code, language, self.options.highlight_theme.as_deref())
290 .ok()
291 }
292
293 #[must_use]
295 pub fn render(&self, markdown: &str) -> MarkdownResult {
296 let (preprocessed, included_files) = self.preprocess(markdown);
297 let (headers, title) = self.extract_headers(&preprocessed);
298 let html = self.process_html_pipeline(&preprocessed);
299
300 MarkdownResult {
301 html,
302 headers,
303 title,
304 included_files,
305 }
306 }
307
308 fn process_html_pipeline(&self, content: &str) -> String {
310 let mut html = self.convert_to_html(content);
311
312 if cfg!(feature = "ndg-flavored") {
314 #[cfg(feature = "ndg-flavored")]
315 {
316 html = super::extensions::process_option_references(
317 &html,
318 self.options.valid_options.as_ref(),
319 );
320 }
321 }
322
323 if self.options.nixpkgs {
324 html = self.process_manpage_references_html(&html);
325 }
326
327 if self.options.highlight_code {
328 html = self.highlight_codeblocks(&html);
329 }
330
331 self.kuchiki_postprocess(&html)
332 }
333
334 fn preprocess(
336 &self,
337 content: &str,
338 ) -> (String, Vec<crate::types::IncludedFile>) {
339 let mut processed = content.to_string();
340 let mut included_files = Vec::new();
341
342 processed = super::extensions::process_myst_autolinks(&processed);
344
345 processed = self.process_hardtabs(&processed);
347
348 if self.options.nixpkgs {
349 let (content, files) = self.apply_nixpkgs_preprocessing(&processed);
350 processed = content;
351 included_files = files;
352 }
353
354 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
355 processed = super::extensions::process_role_markup(
356 &processed,
357 self.manpage_urls.as_ref(),
358 self.options.auto_link_options,
359 self.options.valid_options.as_ref(),
360 );
361 }
362
363 #[cfg(feature = "wiki")]
364 {
365 processed = super::extensions::process_wikilinks(&processed);
366 }
367
368 (processed, included_files)
369 }
370
371 #[cfg(feature = "nixpkgs")]
373 fn apply_nixpkgs_preprocessing(
374 &self,
375 content: &str,
376 ) -> (String, Vec<crate::types::IncludedFile>) {
377 let (with_includes, included_files) =
378 match super::extensions::process_file_includes(content, &self.base_dir, 0)
379 {
380 Ok(result) => result,
381 Err(e) => {
382 log::warn!(
383 "File include processing failed: {e}. Continuing without includes."
384 );
385 (content.to_string(), Vec::new())
386 },
387 };
388 let with_blocks = super::extensions::process_block_elements(&with_includes);
389 let with_spans = super::extensions::process_bracketed_spans(&with_blocks);
390 let processed = super::extensions::process_inline_anchors(&with_spans);
391 (processed, included_files)
392 }
393
394 #[cfg(not(feature = "nixpkgs"))]
396 fn apply_nixpkgs_preprocessing(
397 &self,
398 content: &str,
399 ) -> (String, Vec<crate::types::IncludedFile>) {
400 (content.to_string(), Vec::new())
401 }
402
403 #[must_use]
405 pub fn extract_headers(
406 &self,
407 content: &str,
408 ) -> (Vec<Header>, Option<String>) {
409 use std::fmt::Write;
410
411 let arena = Arena::new();
412 let options = self.comrak_options();
413
414 let content = remove_admonition_blocks_for_headers(content);
415
416 let mut normalized = String::with_capacity(content.len());
418 let mut lines = content.lines().peekable();
419 while let Some(line) = lines.next() {
420 let trimmed = line.trim();
421 if !trimmed.starts_with('#')
422 && !lines
423 .peek()
424 .is_some_and(|next| is_setext_heading_underline(next.trim()))
425 && let Some(anchor_start) = trimmed.rfind("{#")
426 && let Some(anchor_end) = trimmed[anchor_start..].find('}')
427 {
428 let text = trimmed[..anchor_start].trim_end();
429 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
430 let _ = writeln!(normalized, "## {text} {{#{id}}}");
431 continue;
432 }
433 normalized.push_str(line);
434 normalized.push('\n');
435 }
436
437 let root = parse_document(&arena, &normalized, &options);
438
439 let mut headers = Vec::new();
440 let mut found_title = None;
441
442 for node in root.descendants() {
443 if let NodeValue::Heading(NodeHeading { level, .. }) =
444 &node.data.borrow().value
445 {
446 let mut text = String::new();
447 let mut explicit_id = None;
448
449 for child in node.children() {
450 match &child.data.borrow().value {
451 NodeValue::Text(t) => text.push_str(t),
452 NodeValue::Code(t) => text.push_str(&t.literal),
453 NodeValue::Link(..)
454 | NodeValue::Emph
455 | NodeValue::Strong
456 | NodeValue::Subscript
457 | NodeValue::Strikethrough
458 | NodeValue::Superscript
459 | NodeValue::FootnoteReference(..) => {
460 text.push_str(&extract_inline_text(child));
461 },
462 NodeValue::HtmlInline(html) => {
463 let html_str = html.as_str();
465 if let Some(start) = html_str.find("{#")
466 && let Some(end) = html_str[start..].find('}')
467 {
468 let anchor = &html_str[start + 2..start + end];
469 explicit_id = Some(anchor.to_string());
470 }
471 },
472 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
473 NodeValue::Image(..) => {},
474 _ => {},
475 }
476 }
477
478 let trimmed = text.trim_end();
480 #[allow(clippy::option_if_let_else)]
481 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
483 if let Some(end) = trimmed[start..].find('}') {
484 let anchor = &trimmed[start + 2..start + end];
485 (trimmed[..start].trim_end().to_string(), anchor.to_string())
486 } else {
487 (text.clone(), explicit_id.unwrap_or_else(|| slugify_heading(&text)))
488 }
489 } else {
490 (text.clone(), explicit_id.unwrap_or_else(|| slugify_heading(&text)))
491 };
492 if *level == 1 && found_title.is_none() {
493 found_title = Some(final_text.clone());
494 }
495 headers.push(Header {
496 text: final_text,
497 level: *level,
498 id,
499 });
500 }
501 }
502
503 (headers, found_title)
504 }
505
506 fn convert_to_html(&self, content: &str) -> String {
508 let arena = Arena::new();
510 let options = self.comrak_options();
511 let root = parse_document(&arena, content, &options);
512
513 let prompt_transformer = PromptTransformer;
515 prompt_transformer.transform(root);
516
517 let mut html_output = String::new();
518 if let Err(e) = comrak::format_html(root, &options, &mut html_output) {
519 log::error!("Failed to format HTML: {e}");
520 }
521
522 Self::process_header_anchors_html(&html_output)
524 }
525
526 fn process_header_anchors_html(html: &str) -> String {
530 let result = HEADER_ANCHOR_RE
532 .replace_all(html, |caps: ®ex::Captures| {
533 let level = &caps[1];
534 let prefix = &caps[2];
535 let id = &caps[3];
536 let suffix = &caps[4];
537 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
538 })
539 .to_string();
540
541 HEADER_NO_ID_RE
543 .replace_all(&result, |caps: ®ex::Captures| {
544 let level = &caps[1];
545 let content = &caps[2];
546 let text_only = HTML_TAG_RE.replace_all(content, "");
548 let id = utils::slugify(&text_only);
549 if id.is_empty() {
550 format!("<h{level}>{content}</h{level}>")
552 } else {
553 format!("<h{level} id=\"{id}\">{content}</h{level}>")
554 }
555 })
556 .to_string()
557 }
558
559 fn comrak_options(&self) -> Options<'_> {
561 let mut options = Options::default();
562 if self.options.gfm {
564 options.extension.table = true;
565 options.extension.footnotes = true;
566 options.extension.strikethrough = true;
567 options.extension.tasklist = true;
568 options.extension.superscript = true;
569 options.extension.autolink = true;
570 }
571
572 options.render.r#unsafe = true;
575
576 options.extension.header_id_prefix = None;
578 options.extension.description_lists = true;
579 options
580 }
581
582 #[cfg(feature = "nixpkgs")]
584 fn process_manpage_references_html(&self, html: &str) -> String {
585 super::extensions::process_manpage_references(
586 html,
587 self.manpage_urls.as_ref(),
588 )
589 }
590
591 #[cfg(not(feature = "nixpkgs"))]
594 fn process_manpage_references_html(&self, html: &str) -> String {
595 html.to_string()
596 }
597
598 #[allow(
600 clippy::unused_self,
601 reason = "Method signature matches processor pattern"
602 )]
603 fn kuchiki_postprocess(&self, html: &str) -> String {
604 kuchiki_postprocess_html(html, |document| {
606 Self::apply_dom_transformations(document);
607 })
608 }
609
610 fn apply_dom_transformations(document: &kuchikikiki::NodeRef) {
612 Self::process_list_item_id_markers(document);
613 Self::process_header_anchor_comments(document);
614 Self::process_list_item_inline_anchors(document);
615 Self::process_paragraph_inline_anchors(document);
616 Self::process_remaining_inline_anchors(document);
617 Self::process_markdown_links(document);
618 Self::process_option_anchor_links(document);
619 Self::process_empty_auto_links(document);
620 Self::process_empty_html_links(document);
621 }
622
623 fn process_list_item_id_markers(document: &kuchikikiki::NodeRef) {
625 let mut to_modify = Vec::new();
626
627 for comment in document.inclusive_descendants() {
628 if let Some(comment_node) = comment.as_comment() {
629 let comment_text = comment_node.borrow();
630 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
631 let id = comment_text[id_start + 16..].trim();
632 if !id.is_empty()
633 && id
634 .chars()
635 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
636 {
637 if let Some(parent) = comment.parent()
639 && let Some(element) = parent.as_element()
640 && element.name.local.as_ref() == "li"
641 {
642 to_modify.push((comment.clone(), id.to_string()));
643 }
644 }
645 }
646 }
647 }
648
649 for (comment_node, id) in to_modify {
650 let span = kuchikikiki::NodeRef::new_element(
651 markup5ever::QualName::new(
652 None,
653 markup5ever::ns!(html),
654 local_name!("span"),
655 ),
656 vec![
657 (
658 kuchikikiki::ExpandedName::new("", "id"),
659 kuchikikiki::Attribute {
660 prefix: None,
661 value: id,
662 },
663 ),
664 (
665 kuchikikiki::ExpandedName::new("", "class"),
666 kuchikikiki::Attribute {
667 prefix: None,
668 value: "nixos-anchor".into(),
669 },
670 ),
671 ],
672 );
673 comment_node.insert_after(span);
674 comment_node.detach();
675 }
676 }
677
678 fn process_header_anchor_comments(document: &kuchikikiki::NodeRef) {
680 let mut to_modify = Vec::new();
681
682 for comment in document.inclusive_descendants() {
683 if let Some(comment_node) = comment.as_comment() {
684 let comment_text = comment_node.borrow();
685 if let Some(anchor_start) = comment_text.find("anchor:") {
686 let id = comment_text[anchor_start + 7..].trim();
687 if !id.is_empty()
688 && id
689 .chars()
690 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
691 {
692 if let Some(parent) = comment.parent()
694 && let Some(element) = parent.as_element()
695 {
696 let tag_name = element.name.local.as_ref();
697 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
698 to_modify.push((
699 parent.clone(),
700 comment.clone(),
701 id.to_string(),
702 ));
703 }
704 }
705 }
706 }
707 }
708 }
709
710 for (header_element, comment_node, id) in to_modify {
711 if let Some(element) = header_element.as_element() {
712 element
713 .attributes
714 .borrow_mut()
715 .insert(local_name!("id"), id);
716 comment_node.detach();
717 }
718 }
719 }
720
721 fn process_list_item_inline_anchors(document: &kuchikikiki::NodeRef) {
723 for li_node in safe_select(document, "li") {
724 let li_element = li_node;
725
726 let has_code = !safe_select(&li_element, "code, pre").is_empty();
728 if has_code {
729 continue; }
731
732 let text_content = li_element.text_contents();
733
734 if let Some(anchor_start) = text_content.find("[]{#")
735 && let Some(anchor_end) = text_content[anchor_start..].find('}')
736 {
737 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
738 if !id.is_empty()
739 && id
740 .chars()
741 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
742 {
743 let remaining_content =
744 &text_content[anchor_start + anchor_end + 1..];
745
746 for child in li_element.children() {
748 child.detach();
749 }
750
751 let span = kuchikikiki::NodeRef::new_element(
752 markup5ever::QualName::new(
753 None,
754 markup5ever::ns!(html),
755 local_name!("span"),
756 ),
757 vec![
758 (
759 kuchikikiki::ExpandedName::new("", "id"),
760 kuchikikiki::Attribute {
761 prefix: None,
762 value: id.into(),
763 },
764 ),
765 (
766 kuchikikiki::ExpandedName::new("", "class"),
767 kuchikikiki::Attribute {
768 prefix: None,
769 value: "nixos-anchor".into(),
770 },
771 ),
772 ],
773 );
774 li_element.append(span);
775 if !remaining_content.is_empty() {
776 li_element
777 .append(kuchikikiki::NodeRef::new_text(remaining_content));
778 }
779 }
780 }
781 }
782 }
783
784 fn process_paragraph_inline_anchors(document: &kuchikikiki::NodeRef) {
786 for p_node in safe_select(document, "p") {
787 let p_element = p_node;
788
789 let has_code = !safe_select(&p_element, "code, pre").is_empty();
791 if has_code {
792 continue; }
794
795 let text_content = p_element.text_contents();
796
797 if let Some(anchor_start) = text_content.find("[]{#")
798 && let Some(anchor_end) = text_content[anchor_start..].find('}')
799 {
800 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
801 if !id.is_empty()
802 && id
803 .chars()
804 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
805 {
806 let remaining_content =
807 &text_content[anchor_start + anchor_end + 1..];
808
809 for child in p_element.children() {
811 child.detach();
812 }
813
814 let span = kuchikikiki::NodeRef::new_element(
815 markup5ever::QualName::new(
816 None,
817 markup5ever::ns!(html),
818 local_name!("span"),
819 ),
820 vec![
821 (
822 kuchikikiki::ExpandedName::new("", "id"),
823 kuchikikiki::Attribute {
824 prefix: None,
825 value: id.into(),
826 },
827 ),
828 (
829 kuchikikiki::ExpandedName::new("", "class"),
830 kuchikikiki::Attribute {
831 prefix: None,
832 value: "nixos-anchor".into(),
833 },
834 ),
835 ],
836 );
837 p_element.append(span);
838 if !remaining_content.is_empty() {
839 p_element.append(kuchikikiki::NodeRef::new_text(remaining_content));
840 }
841 }
842 }
843 }
844 }
845
846 fn process_remaining_inline_anchors(document: &kuchikikiki::NodeRef) {
848 let mut text_nodes_to_process = Vec::new();
849
850 for node in document.inclusive_descendants() {
851 if let Some(text_node) = node.as_text() {
852 let mut parent = node.parent();
854 let mut in_code = false;
855 while let Some(p) = parent {
856 if let Some(element) = p.as_element()
857 && (element.name.local == local_name!("code")
858 || element.name.local == local_name!("pre"))
859 {
860 in_code = true;
861 break;
862 }
863 parent = p.parent();
864 }
865
866 if !in_code {
868 let text_content = text_node.borrow().clone();
869 if text_content.contains("[]{#") {
870 text_nodes_to_process.push((node.clone(), text_content));
871 }
872 }
873 }
874 }
875
876 for (text_node, text_content) in text_nodes_to_process {
877 let mut last_end = 0;
878 let mut new_children = Vec::new();
879
880 let chars = text_content.chars().collect::<Vec<_>>();
882 let mut i = 0;
883 while i < chars.len() {
884 if i + 4 < chars.len()
885 && chars[i] == '['
886 && chars[i + 1] == ']'
887 && chars[i + 2] == '{'
888 && chars[i + 3] == '#'
889 {
890 let anchor_start = i;
892 i += 4; let mut id = String::new();
895 while i < chars.len() && chars[i] != '}' {
896 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
897 {
898 id.push(chars[i]);
899 i += 1;
900 } else {
901 break;
902 }
903 }
904
905 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
906 let anchor_end = i + 1;
908
909 if anchor_start > last_end {
911 let before_text: String =
912 chars[last_end..anchor_start].iter().collect();
913 if !before_text.is_empty() {
914 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
915 }
916 }
917
918 let span = kuchikikiki::NodeRef::new_element(
920 markup5ever::QualName::new(
921 None,
922 markup5ever::ns!(html),
923 local_name!("span"),
924 ),
925 vec![
926 (
927 kuchikikiki::ExpandedName::new("", "id"),
928 kuchikikiki::Attribute {
929 prefix: None,
930 value: id,
931 },
932 ),
933 (
934 kuchikikiki::ExpandedName::new("", "class"),
935 kuchikikiki::Attribute {
936 prefix: None,
937 value: "nixos-anchor".into(),
938 },
939 ),
940 ],
941 );
942 new_children.push(span);
943
944 last_end = anchor_end;
945 i = anchor_end;
946 } else {
947 i += 1;
948 }
949 } else {
950 i += 1;
951 }
952 }
953
954 if last_end < chars.len() {
956 let after_text: String = chars[last_end..].iter().collect();
957 if !after_text.is_empty() {
958 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
959 }
960 }
961
962 if !new_children.is_empty() {
964 for child in new_children {
965 text_node.insert_before(child);
966 }
967 text_node.detach();
968 }
969 }
970 }
971
972 fn process_empty_auto_links(document: &kuchikikiki::NodeRef) {
974 for link_node in safe_select(document, "a") {
975 let link_element = link_node;
976 if let Some(element) = link_element.as_element() {
977 let href = element
978 .attributes
979 .borrow()
980 .get(local_name!("href"))
981 .map(std::string::ToString::to_string);
982 let text_content = link_element.text_contents();
983
984 if let Some(href_value) = href
985 && href_value.starts_with('#')
986 && (text_content.trim().is_empty()
987 || text_content.trim() == "{{ANCHOR}}")
988 {
989 if text_content.trim() == "{{ANCHOR}}" {
991 for child in link_element.children() {
992 child.detach();
993 }
994 }
995 let display_text = Self::humanize_anchor_id(&href_value);
997 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
998 }
999 }
1000 }
1001 }
1002
1003 fn process_empty_html_links(document: &kuchikikiki::NodeRef) {
1005 for link_node in safe_select(document, "a[href^='#']") {
1006 let link_element = link_node;
1007 let text_content = link_element.text_contents();
1008
1009 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
1010 if text_content.trim() == "{{ANCHOR}}" {
1012 for child in link_element.children() {
1013 child.detach();
1014 }
1015 }
1016 if let Some(element) = link_element.as_element()
1017 && let Some(href) =
1018 element.attributes.borrow().get(local_name!("href"))
1019 {
1020 let display_text = Self::humanize_anchor_id(href);
1021 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
1022 }
1023 }
1024 }
1025 }
1026
1027 fn process_option_anchor_links(document: &kuchikikiki::NodeRef) {
1029 let mut to_modify = Vec::new();
1030
1031 for link_node in safe_select(document, "a[href^='#opt-']") {
1033 let link_element = link_node;
1034 if let Some(element) = link_element.as_element() {
1035 let href = element
1036 .attributes
1037 .borrow()
1038 .get(local_name!("href"))
1039 .map(std::string::ToString::to_string);
1040 let text_content = link_element.text_contents();
1041
1042 if let Some(href_value) = href
1043 && href_value.starts_with("#opt-")
1044 {
1045 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
1047 || text_content.trim() == "{{ANCHOR}}";
1048 to_modify.push((
1049 link_element.clone(),
1050 option_anchor,
1051 needs_text_replacement,
1052 ));
1053 }
1054 }
1055 }
1056
1057 for (link_element, option_anchor, needs_text_replacement) in to_modify {
1059 if let Some(element) = link_element.as_element() {
1060 let new_href = format!("options.html#{option_anchor}");
1061 element
1062 .attributes
1063 .borrow_mut()
1064 .insert(local_name!("href"), new_href);
1065
1066 if needs_text_replacement {
1067 for child in link_element.children() {
1069 child.detach();
1070 }
1071
1072 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
1075 let option_name = option_path.replace('-', ".");
1076 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
1077 }
1078 }
1079 }
1080 }
1081 }
1082
1083 fn process_markdown_links(document: &kuchikikiki::NodeRef) {
1085 for link_node in safe_select(document, "a") {
1086 let link_element = link_node;
1087 if let Some(element) = link_element.as_element() {
1088 let href = element
1089 .attributes
1090 .borrow()
1091 .get(local_name!("href"))
1092 .map(std::string::ToString::to_string);
1093
1094 if let Some(href_value) = href {
1095 if !href_value.starts_with("http://")
1098 && !href_value.starts_with("https://")
1099 && !href_value.starts_with('#')
1100 && !href_value.starts_with("mailto:")
1101 {
1102 let (path_part, suffix) = href_value
1104 .find(['#', '?'])
1105 .map_or((href_value.as_str(), ""), |idx| {
1106 href_value.split_at(idx)
1107 });
1108
1109 if std::path::Path::new(path_part)
1110 .extension()
1111 .is_some_and(|ext| ext.eq_ignore_ascii_case("md"))
1112 {
1113 let new_href =
1114 format!("{}.html{}", &path_part[..path_part.len() - 3], suffix);
1115 element
1116 .attributes
1117 .borrow_mut()
1118 .insert(local_name!("href"), new_href);
1119 }
1120 }
1121 }
1122 }
1123 }
1124 }
1125
1126 fn humanize_anchor_id(anchor: &str) -> String {
1128 let cleaned = anchor.trim_start_matches('#');
1130
1131 let without_prefix = cleaned
1133 .trim_start_matches("sec-")
1134 .trim_start_matches("ssec-")
1135 .trim_start_matches("opt-");
1136
1137 let spaced = without_prefix.replace(['-', '_'], " ");
1139
1140 spaced
1142 .split_whitespace()
1143 .map(|word| {
1144 let mut chars = word.chars();
1145 chars.next().map_or_else(String::new, |c| {
1146 c.to_uppercase().collect::<String>() + chars.as_str()
1147 })
1148 })
1149 .collect::<Vec<String>>()
1150 .join(" ")
1151 }
1152}
1153
1154pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
1156 fn inner<'a>(node: &'a AstNode<'a>) -> String {
1157 let mut text = String::new();
1158 for child in node.children() {
1159 match &child.data.borrow().value {
1160 NodeValue::Text(t) => text.push_str(t),
1161 NodeValue::Code(t) => text.push_str(&t.literal),
1162 NodeValue::Link(..)
1163 | NodeValue::Emph
1164 | NodeValue::Strong
1165 | NodeValue::Strikethrough
1166 | NodeValue::Superscript
1167 | NodeValue::Subscript
1168 | NodeValue::FootnoteReference(..) => {
1169 text.push_str(&inner(child));
1170 },
1171 #[allow(clippy::match_same_arms, reason = "Explicit for clarity")]
1172 NodeValue::HtmlInline(_) | NodeValue::Image(..) => {},
1173 _ => {},
1174 }
1175 }
1176 text
1177 }
1178 inner(node)
1179}
1180
1181#[must_use]
1195pub(crate) fn slugify_heading(text: &str) -> String {
1196 utils::slugify(&html_escape::encode_text(text))
1197}
1198
1199pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
1201 let mut files = Vec::with_capacity(100);
1202
1203 for entry in WalkDir::new(input_dir)
1204 .follow_links(true)
1205 .into_iter()
1206 .filter_map(Result::ok)
1207 {
1208 let path = entry.path();
1209 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
1210 files.push(path.to_owned());
1211 }
1212 }
1213
1214 trace!("Found {} markdown files to process", files.len());
1215 files
1216}
1217
1218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1220pub enum ProcessorFeature {
1221 Gfm,
1223 Nixpkgs,
1225 SyntaxHighlighting,
1227 ManpageUrls,
1229}
1230
1231fn remove_admonition_blocks_for_headers(content: &str) -> String {
1232 let mut output = String::with_capacity(content.len());
1233 let mut admonition_depth = 0usize;
1234
1235 for line in content.lines() {
1236 let trimmed = line.trim_start();
1237 if trimmed.starts_with("<div class=\"admonition ") {
1238 admonition_depth += 1;
1239 output.push('\n');
1240 continue;
1241 }
1242
1243 if admonition_depth > 0 {
1244 if trimmed == "</div>" {
1245 admonition_depth -= 1;
1246 }
1247 output.push('\n');
1248 continue;
1249 }
1250
1251 output.push_str(line);
1252 output.push('\n');
1253 }
1254
1255 output
1256}
1257
1258fn is_setext_heading_underline(line: &str) -> bool {
1259 !line.is_empty()
1260 && (line.chars().all(|ch| ch == '=' || ch.is_whitespace())
1261 || line.chars().all(|ch| ch == '-' || ch.is_whitespace()))
1262}
1263
1264fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
1266where
1267 F: FnOnce(&kuchikikiki::NodeRef),
1268{
1269 process_safe(
1270 html,
1271 |html| {
1272 use tendril::TendrilSink;
1273
1274 let document = kuchikikiki::parse_html().one(html);
1275 transform_fn(&document);
1276
1277 let mut out = Vec::new();
1278 let _ = document.serialize(&mut out);
1279 String::from_utf8_lossy(&out).into_owned()
1280 },
1281 html,
1282 )
1283}