1use std::{
7 collections::HashMap,
8 path::{Path, PathBuf},
9};
10
11use comrak::{
12 Arena,
13 ComrakOptions,
14 nodes::{AstNode, NodeHeading, NodeValue},
15 parse_document,
16};
17use log::trace;
18use markup5ever::local_name;
19use walkdir::WalkDir;
20
21use super::{
22 process::process_safe,
23 types::{
24 AstTransformer,
25 MarkdownOptions,
26 MarkdownProcessor,
27 PromptTransformer,
28 },
29};
30use crate::{
31 syntax::create_default_manager,
32 types::{Header, MarkdownResult},
33 utils,
34};
35
36impl MarkdownProcessor {
37 #[must_use]
39 pub fn new(options: MarkdownOptions) -> Self {
40 let manpage_urls = options
41 .manpage_urls_path
42 .as_ref()
43 .and_then(|path| crate::utils::load_manpage_urls(path).ok());
44
45 let syntax_manager = if options.highlight_code {
46 create_default_manager().ok()
47 } else {
48 None
49 };
50
51 Self {
52 options,
53 manpage_urls,
54 syntax_manager,
55 included_files: std::cell::RefCell::new(Vec::new()),
56 base_dir: std::path::PathBuf::from("."),
57 }
58 }
59
60 #[must_use]
62 pub const fn options(&self) -> &MarkdownOptions {
63 &self.options
64 }
65
66 pub fn with_base_dir(mut self, base_dir: &std::path::Path) -> Self {
68 self.base_dir = base_dir.to_path_buf();
69 self
70 }
71
72 #[must_use]
74 pub const fn has_feature(&self, feature: ProcessorFeature) -> bool {
75 match feature {
76 ProcessorFeature::Gfm => self.options.gfm,
77 ProcessorFeature::Nixpkgs => self.options.nixpkgs,
78 ProcessorFeature::SyntaxHighlighting => self.options.highlight_code,
79 ProcessorFeature::ManpageUrls => self.manpage_urls.is_some(),
80 }
81 }
82
83 #[must_use]
85 pub const fn manpage_urls(&self) -> Option<&HashMap<String, String>> {
86 self.manpage_urls.as_ref()
87 }
88
89 #[must_use]
91 pub fn highlight_codeblocks(&self, html: &str) -> String {
92 if !self.options.highlight_code || self.syntax_manager.is_none() {
93 return html.to_string();
94 }
95
96 use kuchikikiki::parse_html;
97 use tendril::TendrilSink;
98
99 let document = parse_html().one(html);
100
101 let mut code_blocks = Vec::new();
103 for pre_node in document.select("pre > code").unwrap() {
104 let code_node = pre_node.as_node();
105 if let Some(element) = code_node.as_element() {
106 let class_attr = element
107 .attributes
108 .borrow()
109 .get("class")
110 .map(std::string::ToString::to_string);
111 let language = class_attr
112 .as_deref()
113 .and_then(|s| s.strip_prefix("language-"))
114 .unwrap_or("text");
115 let code_text = code_node.text_contents();
116
117 if let Some(pre_parent) = code_node.parent() {
118 code_blocks.push((
119 pre_parent.clone(),
120 code_node.clone(),
121 code_text,
122 language.to_string(),
123 ));
124 }
125 }
126 }
127
128 for (pre_element, _code_node, code_text, language) in code_blocks {
130 if let Some(highlighted) = self.highlight_code_html(&code_text, &language)
131 {
132 let wrapped_html = format!(
134 r#"<pre class="highlight"><code class="language-{language}">{highlighted}</code></pre>"#
135 );
136 let fragment = parse_html().one(wrapped_html.as_str());
137 pre_element.insert_after(fragment);
138 pre_element.detach();
139 }
140 }
142
143 let mut buf = Vec::new();
144 document.serialize(&mut buf).unwrap();
145 String::from_utf8(buf).unwrap_or_default()
146 }
147
148 fn highlight_code_html(&self, code: &str, language: &str) -> Option<String> {
151 if !self.options.highlight_code {
152 return None;
153 }
154
155 let syntax_manager = self.syntax_manager.as_ref()?;
156
157 syntax_manager
158 .highlight_code(code, language, self.options.highlight_theme.as_deref())
159 .ok()
160 }
161
162 #[must_use]
164 pub fn render(&self, markdown: &str) -> MarkdownResult {
165 self.included_files.borrow_mut().clear();
167
168 let preprocessed = self.preprocess(markdown);
169 let (headers, title) = self.extract_headers(&preprocessed);
170 let html = self.process_html_pipeline(&preprocessed);
171
172 MarkdownResult {
173 html,
174 headers,
175 title,
176 included_files: self.included_files.borrow().clone(),
177 }
178 }
179
180 fn process_html_pipeline(&self, content: &str) -> String {
182 let mut html = self.convert_to_html(content);
183
184 if cfg!(feature = "ndg-flavored") {
186 #[cfg(feature = "ndg-flavored")]
187 {
188 html = super::extensions::process_option_references(&html);
189 }
190 }
191
192 if self.options.nixpkgs {
193 html = self.process_manpage_references_html(&html);
194 }
195
196 if self.options.highlight_code {
197 html = self.highlight_codeblocks(&html);
198 }
199
200 self.kuchiki_postprocess(&html)
201 }
202
203 fn preprocess(&self, content: &str) -> String {
205 let mut processed = content.to_string();
206
207 processed = super::extensions::process_myst_autolinks(&processed);
209
210 if self.options.nixpkgs {
211 processed = self.apply_nixpkgs_preprocessing(&processed);
212 }
213
214 if self.options.nixpkgs || cfg!(feature = "ndg-flavored") {
215 processed = super::extensions::process_role_markup(
216 &processed,
217 self.manpage_urls.as_ref(),
218 self.options.auto_link_options,
219 );
220 }
221
222 processed
223 }
224
225 #[cfg(feature = "nixpkgs")]
227 fn apply_nixpkgs_preprocessing(&self, content: &str) -> String {
228 let (with_includes, included_files) =
229 super::extensions::process_file_includes(content, &self.base_dir);
230 self.included_files.borrow_mut().extend(included_files);
231 let with_blocks = super::extensions::process_block_elements(&with_includes);
232 super::extensions::process_inline_anchors(&with_blocks)
233 }
234
235 #[cfg(not(feature = "nixpkgs"))]
237 fn apply_nixpkgs_preprocessing(&self, content: &str) -> String {
238 content.to_string()
239 }
240
241 #[must_use]
243 pub fn extract_headers(
244 &self,
245 content: &str,
246 ) -> (Vec<Header>, Option<String>) {
247 let arena = Arena::new();
248 let options = self.comrak_options();
249
250 let mut normalized = String::with_capacity(content.len());
252 for line in content.lines() {
253 let trimmed = line.trim_end();
254 if !trimmed.starts_with('#') {
255 if let Some(anchor_start) = trimmed.rfind("{#") {
256 if let Some(anchor_end) = trimmed[anchor_start..].find('}') {
257 let text = trimmed[..anchor_start].trim_end();
258 let id = &trimmed[anchor_start + 2..anchor_start + anchor_end];
259 normalized.push_str(&format!("## {text} {{#{id}}}\n"));
260 continue;
261 }
262 }
263 }
264 normalized.push_str(line);
265 normalized.push('\n');
266 }
267
268 let root = parse_document(&arena, &normalized, &options);
269
270 let mut headers = Vec::new();
271 let mut found_title = None;
272
273 for node in root.descendants() {
274 if let NodeValue::Heading(NodeHeading { level, .. }) =
275 &node.data.borrow().value
276 {
277 let mut text = String::new();
278 let mut explicit_id = None;
279
280 for child in node.children() {
281 match &child.data.borrow().value {
282 NodeValue::Text(t) => text.push_str(t),
283 NodeValue::Code(t) => text.push_str(&t.literal),
284 NodeValue::Link(..) => text.push_str(&extract_inline_text(child)),
285 NodeValue::Emph => text.push_str(&extract_inline_text(child)),
286 NodeValue::Strong => text.push_str(&extract_inline_text(child)),
287 NodeValue::Strikethrough => {
288 text.push_str(&extract_inline_text(child));
289 },
290 NodeValue::Superscript => {
291 text.push_str(&extract_inline_text(child));
292 },
293 NodeValue::Subscript => text.push_str(&extract_inline_text(child)),
294 NodeValue::FootnoteReference(..) => {
295 text.push_str(&extract_inline_text(child));
296 },
297 NodeValue::HtmlInline(html) => {
298 let html_str = html.as_str();
300 if let Some(start) = html_str.find("{#") {
301 if let Some(end) = html_str[start..].find('}') {
302 let anchor = &html_str[start + 2..start + end];
303 explicit_id = Some(anchor.to_string());
304 }
305 }
306 },
307 NodeValue::Image(..) => {},
308 _ => {},
309 }
310 }
311
312 let trimmed = text.trim_end();
314 let (final_text, id) = if let Some(start) = trimmed.rfind("{#") {
315 if let Some(end) = trimmed[start..].find('}') {
316 let anchor = &trimmed[start + 2..start + end];
317 (trimmed[..start].trim_end().to_string(), anchor.to_string())
318 } else {
319 (
320 text.clone(),
321 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
322 )
323 }
324 } else {
325 (
326 text.clone(),
327 explicit_id.unwrap_or_else(|| utils::slugify(&text)),
328 )
329 };
330 if *level == 1 && found_title.is_none() {
331 found_title = Some(final_text.clone());
332 }
333 headers.push(Header {
334 text: final_text,
335 level: *level,
336 id,
337 });
338 }
339 }
340
341 (headers, found_title)
342 }
343
344 fn convert_to_html(&self, content: &str) -> String {
346 let arena = Arena::new();
348 let options = self.comrak_options();
349 let root = parse_document(&arena, content, &options);
350
351 let prompt_transformer = PromptTransformer;
353 prompt_transformer.transform(root);
354
355 let mut html_output = String::new();
356 comrak::format_html(root, &options, &mut html_output).unwrap_or_default();
357
358 self.process_header_anchors_html(&html_output)
360 }
361
362 fn process_header_anchors_html(&self, html: &str) -> String {
365 use std::sync::LazyLock;
366
367 use regex::Regex;
368
369 static HEADER_ANCHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
370 Regex::new(r"<h([1-6])>(.*?)\s*\{#([a-zA-Z0-9_-]+)\}(.*?)</h[1-6]>")
371 .unwrap_or_else(|e| {
372 log::error!("Failed to compile HEADER_ANCHOR_RE regex: {e}");
373 utils::never_matching_regex()
374 })
375 });
376
377 HEADER_ANCHOR_RE
378 .replace_all(html, |caps: ®ex::Captures| {
379 let level = &caps[1];
380 let prefix = &caps[2];
381 let id = &caps[3];
382 let suffix = &caps[4];
383 format!("<h{level} id=\"{id}\">{prefix}{suffix}</h{level}>")
384 })
385 .to_string()
386 }
387
388 fn comrak_options(&self) -> ComrakOptions<'_> {
390 let mut options = ComrakOptions::default();
391 if self.options.gfm {
392 options.extension.table = true;
393 options.extension.footnotes = true;
394 options.extension.strikethrough = true;
395 options.extension.tasklist = true;
396 options.extension.superscript = true;
397 options.extension.autolink = true;
398 }
399 options.render.unsafe_ = true;
400 options.extension.header_ids = None;
402 options.extension.description_lists = true;
403 options
404 }
405
406 #[cfg(feature = "nixpkgs")]
408 fn process_manpage_references_html(&self, html: &str) -> String {
409 super::extensions::process_manpage_references(
410 html,
411 self.manpage_urls.as_ref(),
412 )
413 }
414
415 #[cfg(not(feature = "nixpkgs"))]
418 fn process_manpage_references_html(&self, html: &str) -> String {
419 html.to_string()
420 }
421
422 fn kuchiki_postprocess(&self, html: &str) -> String {
424 kuchiki_postprocess_html(html, |document| {
426 self.apply_dom_transformations(document);
427 })
428 }
429
430 fn apply_dom_transformations(&self, document: &kuchikikiki::NodeRef) {
432 self.process_list_item_id_markers(document);
433 self.process_header_anchor_comments(document);
434 self.process_list_item_inline_anchors(document);
435 self.process_paragraph_inline_anchors(document);
436 self.process_remaining_inline_anchors(document);
437 self.process_option_anchor_links(document);
438 self.process_empty_auto_links(document);
439 self.process_empty_html_links(document);
440 }
441
442 fn process_list_item_id_markers(&self, document: &kuchikikiki::NodeRef) {
444 let mut to_modify = Vec::new();
445
446 for comment in document.inclusive_descendants() {
447 if let Some(comment_node) = comment.as_comment() {
448 let comment_text = comment_node.borrow();
449 if let Some(id_start) = comment_text.find("nixos-anchor-id:") {
450 let id = comment_text[id_start + 16..].trim();
451 if !id.is_empty()
452 && id
453 .chars()
454 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
455 {
456 if let Some(parent) = comment.parent() {
458 if let Some(element) = parent.as_element() {
459 if element.name.local.as_ref() == "li" {
460 to_modify.push((comment.clone(), id.to_string()));
461 }
462 }
463 }
464 }
465 }
466 }
467 }
468
469 for (comment_node, id) in to_modify {
470 let span = kuchikikiki::NodeRef::new_element(
471 markup5ever::QualName::new(
472 None,
473 markup5ever::ns!(html),
474 local_name!("span"),
475 ),
476 vec![
477 (
478 kuchikikiki::ExpandedName::new("", "id"),
479 kuchikikiki::Attribute {
480 prefix: None,
481 value: id,
482 },
483 ),
484 (
485 kuchikikiki::ExpandedName::new("", "class"),
486 kuchikikiki::Attribute {
487 prefix: None,
488 value: "nixos-anchor".into(),
489 },
490 ),
491 ],
492 );
493 comment_node.insert_after(span);
494 comment_node.detach();
495 }
496 }
497
498 fn process_header_anchor_comments(&self, document: &kuchikikiki::NodeRef) {
500 let mut to_modify = Vec::new();
501
502 for comment in document.inclusive_descendants() {
503 if let Some(comment_node) = comment.as_comment() {
504 let comment_text = comment_node.borrow();
505 if let Some(anchor_start) = comment_text.find("anchor:") {
506 let id = comment_text[anchor_start + 7..].trim();
507 if !id.is_empty()
508 && id
509 .chars()
510 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
511 {
512 if let Some(parent) = comment.parent() {
514 if let Some(element) = parent.as_element() {
515 let tag_name = element.name.local.as_ref();
516 if matches!(tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") {
517 to_modify.push((
518 parent.clone(),
519 comment.clone(),
520 id.to_string(),
521 ));
522 }
523 }
524 }
525 }
526 }
527 }
528 }
529
530 for (header_element, comment_node, id) in to_modify {
531 if let Some(element) = header_element.as_element() {
532 element
533 .attributes
534 .borrow_mut()
535 .insert(local_name!("id"), id);
536 comment_node.detach();
537 }
538 }
539 }
540
541 fn process_list_item_inline_anchors(&self, document: &kuchikikiki::NodeRef) {
543 for li_node in document.select("li").unwrap() {
544 let li_element = li_node.as_node();
545
546 let has_code = li_element.select("code, pre").is_ok()
548 && li_element.select("code, pre").unwrap().next().is_some();
549 if has_code {
550 continue; }
552
553 let text_content = li_element.text_contents();
554
555 if let Some(anchor_start) = text_content.find("[]{#") {
556 if let Some(anchor_end) = text_content[anchor_start..].find('}') {
557 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
558 if !id.is_empty()
559 && id
560 .chars()
561 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
562 {
563 let remaining_content =
564 &text_content[anchor_start + anchor_end + 1..];
565
566 for child in li_element.children() {
568 child.detach();
569 }
570
571 let span = kuchikikiki::NodeRef::new_element(
572 markup5ever::QualName::new(
573 None,
574 markup5ever::ns!(html),
575 local_name!("span"),
576 ),
577 vec![
578 (
579 kuchikikiki::ExpandedName::new("", "id"),
580 kuchikikiki::Attribute {
581 prefix: None,
582 value: id.into(),
583 },
584 ),
585 (
586 kuchikikiki::ExpandedName::new("", "class"),
587 kuchikikiki::Attribute {
588 prefix: None,
589 value: "nixos-anchor".into(),
590 },
591 ),
592 ],
593 );
594 li_element.append(span);
595 if !remaining_content.is_empty() {
596 li_element
597 .append(kuchikikiki::NodeRef::new_text(remaining_content));
598 }
599 }
600 }
601 }
602 }
603 }
604
605 fn process_paragraph_inline_anchors(&self, document: &kuchikikiki::NodeRef) {
607 for p_node in document.select("p").unwrap() {
608 let p_element = p_node.as_node();
609
610 let has_code = p_element.select("code, pre").is_ok()
612 && p_element.select("code, pre").unwrap().next().is_some();
613 if has_code {
614 continue; }
616
617 let text_content = p_element.text_contents();
618
619 if let Some(anchor_start) = text_content.find("[]{#") {
620 if let Some(anchor_end) = text_content[anchor_start..].find('}') {
621 let id = &text_content[anchor_start + 4..anchor_start + anchor_end];
622 if !id.is_empty()
623 && id
624 .chars()
625 .all(|c| c.is_alphanumeric() || c == '-' || c == '_')
626 {
627 let remaining_content =
628 &text_content[anchor_start + anchor_end + 1..];
629
630 for child in p_element.children() {
632 child.detach();
633 }
634
635 let span = kuchikikiki::NodeRef::new_element(
636 markup5ever::QualName::new(
637 None,
638 markup5ever::ns!(html),
639 local_name!("span"),
640 ),
641 vec![
642 (
643 kuchikikiki::ExpandedName::new("", "id"),
644 kuchikikiki::Attribute {
645 prefix: None,
646 value: id.into(),
647 },
648 ),
649 (
650 kuchikikiki::ExpandedName::new("", "class"),
651 kuchikikiki::Attribute {
652 prefix: None,
653 value: "nixos-anchor".into(),
654 },
655 ),
656 ],
657 );
658 p_element.append(span);
659 if !remaining_content.is_empty() {
660 p_element
661 .append(kuchikikiki::NodeRef::new_text(remaining_content));
662 }
663 }
664 }
665 }
666 }
667 }
668
669 fn process_remaining_inline_anchors(&self, document: &kuchikikiki::NodeRef) {
671 let mut text_nodes_to_process = Vec::new();
672
673 for node in document.inclusive_descendants() {
674 if let Some(text_node) = node.as_text() {
675 let mut parent = node.parent();
677 let mut in_code = false;
678 while let Some(p) = parent {
679 if let Some(element) = p.as_element() {
680 if element.name.local == local_name!("code")
681 || element.name.local == local_name!("pre")
682 {
683 in_code = true;
684 break;
685 }
686 }
687 parent = p.parent();
688 }
689
690 if !in_code {
692 let text_content = text_node.borrow().clone();
693 if text_content.contains("[]{#") {
694 text_nodes_to_process.push((node.clone(), text_content));
695 }
696 }
697 }
698 }
699
700 for (text_node, text_content) in text_nodes_to_process {
701 let mut last_end = 0;
702 let mut new_children = Vec::new();
703
704 let chars = text_content.chars().collect::<Vec<_>>();
706 let mut i = 0;
707 while i < chars.len() {
708 if i + 4 < chars.len()
709 && chars[i] == '['
710 && chars[i + 1] == ']'
711 && chars[i + 2] == '{'
712 && chars[i + 3] == '#'
713 {
714 let anchor_start = i;
716 i += 4; let mut id = String::new();
719 while i < chars.len() && chars[i] != '}' {
720 if chars[i].is_alphanumeric() || chars[i] == '-' || chars[i] == '_'
721 {
722 id.push(chars[i]);
723 i += 1;
724 } else {
725 break;
726 }
727 }
728
729 if i < chars.len() && chars[i] == '}' && !id.is_empty() {
730 let anchor_end = i + 1;
732
733 if anchor_start > last_end {
735 let before_text: String =
736 chars[last_end..anchor_start].iter().collect();
737 if !before_text.is_empty() {
738 new_children.push(kuchikikiki::NodeRef::new_text(before_text));
739 }
740 }
741
742 let span = kuchikikiki::NodeRef::new_element(
744 markup5ever::QualName::new(
745 None,
746 markup5ever::ns!(html),
747 local_name!("span"),
748 ),
749 vec![
750 (
751 kuchikikiki::ExpandedName::new("", "id"),
752 kuchikikiki::Attribute {
753 prefix: None,
754 value: id,
755 },
756 ),
757 (
758 kuchikikiki::ExpandedName::new("", "class"),
759 kuchikikiki::Attribute {
760 prefix: None,
761 value: "nixos-anchor".into(),
762 },
763 ),
764 ],
765 );
766 new_children.push(span);
767
768 last_end = anchor_end;
769 i = anchor_end;
770 } else {
771 i += 1;
772 }
773 } else {
774 i += 1;
775 }
776 }
777
778 if last_end < chars.len() {
780 let after_text: String = chars[last_end..].iter().collect();
781 if !after_text.is_empty() {
782 new_children.push(kuchikikiki::NodeRef::new_text(after_text));
783 }
784 }
785
786 if !new_children.is_empty() {
788 for child in new_children {
789 text_node.insert_before(child);
790 }
791 text_node.detach();
792 }
793 }
794 }
795
796 fn process_empty_auto_links(&self, document: &kuchikikiki::NodeRef) {
798 for link_node in document.select("a").unwrap() {
799 let link_element = link_node.as_node();
800 if let Some(element) = link_element.as_element() {
801 let href = element
802 .attributes
803 .borrow()
804 .get(local_name!("href"))
805 .map(std::string::ToString::to_string);
806 let text_content = link_element.text_contents();
807
808 if let Some(href_value) = href {
809 if href_value.starts_with('#')
810 && (text_content.trim().is_empty()
811 || text_content.trim() == "{{ANCHOR}}")
812 {
813 if text_content.trim() == "{{ANCHOR}}" {
815 for child in link_element.children() {
816 child.detach();
817 }
818 }
819 let display_text = self.humanize_anchor_id(&href_value);
821 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
822 }
823 }
824 }
825 }
826 }
827
828 fn process_empty_html_links(&self, document: &kuchikikiki::NodeRef) {
830 for link_node in document.select("a[href^='#']").unwrap() {
831 let link_element = link_node.as_node();
832 let text_content = link_element.text_contents();
833
834 if text_content.trim().is_empty() || text_content.trim() == "{{ANCHOR}}" {
835 if text_content.trim() == "{{ANCHOR}}" {
837 for child in link_element.children() {
838 child.detach();
839 }
840 }
841 if let Some(element) = link_element.as_element() {
842 if let Some(href) =
843 element.attributes.borrow().get(local_name!("href"))
844 {
845 let display_text = self.humanize_anchor_id(href);
846 link_element.append(kuchikikiki::NodeRef::new_text(display_text));
847 }
848 }
849 }
850 }
851 }
852
853 fn process_option_anchor_links(&self, document: &kuchikikiki::NodeRef) {
855 let mut to_modify = Vec::new();
856
857 for link_node in document.select("a[href^='#opt-']").unwrap() {
859 let link_element = link_node.as_node();
860 if let Some(element) = link_element.as_element() {
861 let href = element
862 .attributes
863 .borrow()
864 .get(local_name!("href"))
865 .map(std::string::ToString::to_string);
866 let text_content = link_element.text_contents();
867
868 if let Some(href_value) = href {
869 if href_value.starts_with("#opt-") {
870 let option_anchor = href_value[1..].to_string(); let needs_text_replacement = text_content.trim().is_empty()
872 || text_content.trim() == "{{ANCHOR}}";
873 to_modify.push((
874 link_element.clone(),
875 option_anchor,
876 needs_text_replacement,
877 ));
878 }
879 }
880 }
881 }
882
883 for (link_element, option_anchor, needs_text_replacement) in to_modify {
885 if let Some(element) = link_element.as_element() {
886 let new_href = format!("options.html#{option_anchor}");
887 element
888 .attributes
889 .borrow_mut()
890 .insert(local_name!("href"), new_href);
891
892 if needs_text_replacement {
893 for child in link_element.children() {
895 child.detach();
896 }
897
898 if let Some(option_path) = option_anchor.strip_prefix("opt-") {
901 let option_name = option_path.replace('-', ".");
902 link_element.append(kuchikikiki::NodeRef::new_text(option_name));
903 }
904 }
905 }
906 }
907 }
908
909 fn humanize_anchor_id(&self, anchor: &str) -> String {
911 let cleaned = anchor.trim_start_matches('#');
913
914 let without_prefix = cleaned
916 .trim_start_matches("sec-")
917 .trim_start_matches("ssec-")
918 .trim_start_matches("opt-");
919
920 let spaced = without_prefix.replace(['-', '_'], " ");
922
923 spaced
925 .split_whitespace()
926 .map(|word| {
927 let mut chars = word.chars();
928 chars.next().map_or_else(String::new, |c| {
929 c.to_uppercase().collect::<String>() + chars.as_str()
930 })
931 })
932 .collect::<Vec<String>>()
933 .join(" ")
934 }
935}
936
937pub fn extract_inline_text<'a>(node: &'a AstNode<'a>) -> String {
939 let mut text = String::new();
940 for child in node.children() {
941 match &child.data.borrow().value {
942 NodeValue::Text(t) => text.push_str(t),
943 NodeValue::Code(t) => text.push_str(&t.literal),
944 NodeValue::Link(..) => text.push_str(&extract_inline_text(child)),
945 NodeValue::Emph => text.push_str(&extract_inline_text(child)),
946 NodeValue::Strong => text.push_str(&extract_inline_text(child)),
947 NodeValue::Strikethrough => text.push_str(&extract_inline_text(child)),
948 NodeValue::Superscript => text.push_str(&extract_inline_text(child)),
949 NodeValue::Subscript => text.push_str(&extract_inline_text(child)),
950 NodeValue::FootnoteReference(..) => {
951 text.push_str(&extract_inline_text(child));
952 },
953 NodeValue::HtmlInline(_) => {},
954 NodeValue::Image(..) => {},
955 _ => {},
956 }
957 }
958 text
959}
960
961pub fn collect_markdown_files(input_dir: &Path) -> Vec<PathBuf> {
963 let mut files = Vec::with_capacity(100);
964
965 for entry in WalkDir::new(input_dir)
966 .follow_links(true)
967 .into_iter()
968 .filter_map(Result::ok)
969 {
970 let path = entry.path();
971 if path.is_file() && path.extension().is_some_and(|ext| ext == "md") {
972 files.push(path.to_owned());
973 }
974 }
975
976 trace!("Found {} markdown files to process", files.len());
977 files
978}
979
980#[derive(Debug, Clone, Copy, PartialEq, Eq)]
982pub enum ProcessorFeature {
983 Gfm,
985 Nixpkgs,
987 SyntaxHighlighting,
989 ManpageUrls,
991}
992
993fn kuchiki_postprocess_html<F>(html: &str, transform_fn: F) -> String
995where
996 F: FnOnce(&kuchikikiki::NodeRef),
997{
998 process_safe(
999 html,
1000 |html| {
1001 use tendril::TendrilSink;
1002
1003 let document = kuchikikiki::parse_html().one(html);
1004 transform_fn(&document);
1005
1006 let mut out = Vec::new();
1007 document.serialize(&mut out).ok();
1008 String::from_utf8(out).unwrap_or_default()
1009 },
1010 html,
1011 )
1012}