Skip to main content

mdx_gen/
extensions.rs

1//! Extension functionality for the MDX Gen library.
2//!
3//! This module provides utilities for enhancing Markdown processing,
4//! including custom block handling and table formatting.
5//! Syntax highlighting has moved to [`crate::highlight`].
6
7use crate::error::MarkdownError;
8use comrak::nodes::{NodeHtmlBlock, NodeValue};
9use regex::Regex;
10use std::cell::RefCell;
11use std::collections::HashMap;
12use std::str::FromStr;
13use std::sync::LazyLock;
14
15// ── Headings / table-of-contents ────────────────────────────────────
16
17/// A single heading discovered in a Markdown document.
18///
19/// Returned by [`collect_headings`] and by
20/// [`crate::process_markdown_with_toc`].
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Heading {
23    /// Heading level (1-6 for ATX, 1-2 for setext).
24    pub level: u8,
25    /// Plain-text content of the heading (markup stripped).
26    pub text: String,
27    /// The anchor id that comrak emits for this heading. Computed
28    /// with [`comrak::Anchorizer`] so it matches the `id="…"` value
29    /// produced when `MarkdownOptions::header_ids` is set.
30    pub id: String,
31}
32
33/// Walks the AST and returns one [`Heading`] per heading node, in
34/// document order.
35///
36/// `prefix` mirrors `MarkdownOptions::header_ids`:
37/// * `None` or `Some("")` → bare slug (`"introduction"`).
38/// * `Some(p)` → `format!("{p}{slug}")` (`"user-content-introduction"`).
39///
40/// Uses comrak's own `Anchorizer` so dedup behaviour (`-1`, `-2`
41/// suffixes for repeated headings) matches the rendered HTML.
42pub fn collect_headings<'a>(
43    root: comrak::nodes::Node<'a>,
44    prefix: Option<&str>,
45) -> Vec<Heading> {
46    let mut anchorizer = comrak::Anchorizer::new();
47    let mut out = Vec::new();
48    for node in root.descendants() {
49        let level = match node.data.borrow().value {
50            NodeValue::Heading(h) => h.level,
51            _ => continue,
52        };
53        let text = extract_text(node);
54        let slug = anchorizer.anchorize(&text);
55        let id = match prefix {
56            Some(p) if !p.is_empty() => format!("{p}{slug}"),
57            _ => slug,
58        };
59        out.push(Heading { level, text, id });
60    }
61    out
62}
63
64/// Recursively concatenates the plain-text content of a node's subtree,
65/// stripping all Markdown and HTML markup. Blocks are separated by
66/// whitespace to prevent words from merging.
67///
68/// Captures both inline code (`` `foo` ``) and fenced code blocks —
69/// useful for search indexing where readers may query terms that
70/// only appear inside code samples.
71pub fn collect_all_text<'a>(root: comrak::nodes::Node<'a>) -> String {
72    let mut buf = String::new();
73    for d in root.descendants() {
74        match &d.data.borrow().value {
75            NodeValue::Text(t) => buf.push_str(t),
76            NodeValue::Code(c) => buf.push_str(&c.literal),
77            NodeValue::CodeBlock(cb) => {
78                if !buf.is_empty() && !buf.ends_with(' ') {
79                    buf.push(' ');
80                }
81                buf.push_str(&cb.literal);
82            }
83            NodeValue::SoftBreak | NodeValue::LineBreak
84                if !buf.is_empty() && !buf.ends_with(' ') =>
85            {
86                buf.push(' ');
87            }
88            // Ensure space between structural elements.
89            NodeValue::Paragraph
90            | NodeValue::Heading(_)
91            | NodeValue::Item(_)
92            | NodeValue::BlockQuote
93            | NodeValue::Table(_)
94            | NodeValue::TableRow(_)
95            | NodeValue::TableCell
96                if !buf.is_empty() && !buf.ends_with(' ') =>
97            {
98                buf.push(' ');
99            }
100            _ => {}
101        }
102    }
103    buf.trim().to_string()
104}
105
106/// Recursively concatenates the text content of a node's subtree,
107/// matching what comrak renders inside `<h*>` tags (text, inline
108/// code, image alt text). Raw inline HTML is skipped.
109fn extract_text<'a>(node: comrak::nodes::Node<'a>) -> String {
110    let mut buf = String::new();
111    for d in node.descendants() {
112        match &d.data.borrow().value {
113            NodeValue::Text(t) => buf.push_str(t),
114            NodeValue::Code(c) => buf.push_str(&c.literal),
115            NodeValue::Image(img) => buf.push_str(&img.title),
116            _ => {}
117        }
118    }
119    buf
120}
121
122// ── Table regexes (cached, for legacy process_tables) ───────────────
123//
124// Opening and closing `<table>` tags are literal substrings, handled
125// by `str::replace` in `process_tables` below — no regex needed. The
126// `<td …>` rewrite does need a pattern to capture the attribute run,
127// so it stays as a cached `Regex`.
128
129static TABLE_CELL_RE: LazyLock<Regex> =
130    LazyLock::new(|| Regex::new(r"<td([^>]*)>").unwrap());
131
132/// Regex matching known custom block div elements inside HTML blocks.
133static CUSTOM_BLOCK_RE: LazyLock<Regex> = LazyLock::new(|| {
134    Regex::new(
135        r#"(?si)<div\s+class=["']?(note|warning|tip|info|important|caution)["']?>(.*?)</div>"#,
136    )
137    .unwrap()
138});
139
140// ── Column alignment ────────────────────────────────────────────────
141
142/// Alignment options for table columns.
143#[derive(Debug, Clone, Copy, PartialEq)]
144pub enum ColumnAlignment {
145    /// Align the column to the left.
146    Left,
147    /// Align the column to the center.
148    Center,
149    /// Align the column to the right.
150    Right,
151}
152
153// ── Custom block types ──────────────────────────────────────────────
154
155/// Represents different types of custom blocks.
156#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
157pub enum CustomBlockType {
158    /// A note block.
159    Note,
160    /// A warning block.
161    Warning,
162    /// A tip block.
163    Tip,
164    /// An info block.
165    Info,
166    /// An important block.
167    Important,
168    /// A caution block.
169    Caution,
170}
171
172impl CustomBlockType {
173    /// Returns the default Bootstrap alert class.
174    pub fn default_alert_class(&self) -> &'static str {
175        match self {
176            Self::Note => "alert-info",
177            Self::Warning => "alert-warning",
178            Self::Tip => "alert-success",
179            Self::Info => "alert-primary",
180            Self::Important => "alert-danger",
181            Self::Caution => "alert-secondary",
182        }
183    }
184
185    /// Returns the default human-readable title.
186    pub fn default_title(&self) -> &'static str {
187        match self {
188            Self::Note => "Note",
189            Self::Warning => "Warning",
190            Self::Tip => "Tip",
191            Self::Info => "Info",
192            Self::Important => "Important",
193            Self::Caution => "Caution",
194        }
195    }
196
197    /// Returns the default Bootstrap alert class for this block type.
198    pub fn get_alert_class(&self) -> &'static str {
199        self.default_alert_class()
200    }
201
202    /// Returns the default title for this block type.
203    pub fn get_title(&self) -> &'static str {
204        self.default_title()
205    }
206
207    /// Returns the alert class, respecting config overrides.
208    pub fn alert_class_with<'a>(
209        &self,
210        config: &'a CustomBlockConfig,
211    ) -> &'a str {
212        config
213            .class_overrides
214            .get(self)
215            .map(|s| s.as_str())
216            .unwrap_or_else(move || self.default_alert_class())
217    }
218
219    /// Returns the title, respecting config overrides.
220    pub fn title_with<'a>(
221        &self,
222        config: &'a CustomBlockConfig,
223    ) -> &'a str {
224        config
225            .title_overrides
226            .get(self)
227            .map(|s| s.as_str())
228            .unwrap_or_else(move || self.default_title())
229    }
230}
231
232impl FromStr for CustomBlockType {
233    type Err = MarkdownError;
234
235    fn from_str(block_type: &str) -> Result<Self, Self::Err> {
236        match block_type.to_lowercase().as_str() {
237            "note" => Ok(Self::Note),
238            "warning" => Ok(Self::Warning),
239            "tip" => Ok(Self::Tip),
240            "info" => Ok(Self::Info),
241            "important" => Ok(Self::Important),
242            "caution" => Ok(Self::Caution),
243            _ => Err(MarkdownError::CustomBlockError(format!(
244                "Unknown block type: {block_type}"
245            ))),
246        }
247    }
248}
249
250// ── Custom block configuration ──────────────────────────────────────
251
252/// Configuration for custom block rendering.
253///
254/// Allows overriding the default CSS class and title for each
255/// block type, enabling use with CSS frameworks other than Bootstrap.
256#[derive(Debug, Clone, Default)]
257pub struct CustomBlockConfig {
258    /// Override the CSS alert class per block type.
259    pub class_overrides: HashMap<CustomBlockType, String>,
260    /// Override the display title per block type.
261    pub title_overrides: HashMap<CustomBlockType, String>,
262}
263
264impl CustomBlockConfig {
265    /// Creates a new empty configuration (uses all defaults).
266    pub fn new() -> Self {
267        Self::default()
268    }
269
270    /// Overrides the CSS class for a specific block type.
271    pub fn with_class(
272        mut self,
273        block_type: CustomBlockType,
274        class: impl Into<String>,
275    ) -> Self {
276        self.class_overrides.insert(block_type, class.into());
277        self
278    }
279
280    /// Overrides the display title for a specific block type.
281    pub fn with_title(
282        mut self,
283        block_type: CustomBlockType,
284        title: impl Into<String>,
285    ) -> Self {
286        self.title_overrides.insert(block_type, title.into());
287        self
288    }
289}
290
291// ── AST-level custom block processing ───────────────────────────────
292
293/// Walks the comrak AST and transforms `HtmlBlock` nodes that contain
294/// known custom block divs into styled alert HTML.
295///
296/// This is safer than regex on rendered HTML because it only touches
297/// nodes the parser explicitly identified as raw HTML blocks.
298pub fn process_custom_block_nodes<'a>(
299    root: comrak::nodes::Node<'a>,
300    config: &CustomBlockConfig,
301) {
302    for node in root.descendants() {
303        let mut ast = node.data.borrow_mut();
304        if let NodeValue::HtmlBlock(ref mut block) = ast.value {
305            block.literal =
306                transform_custom_blocks(&block.literal, config);
307        }
308    }
309}
310
311/// Transforms custom block divs in a raw HTML string.
312fn transform_custom_blocks(
313    html: &str,
314    config: &CustomBlockConfig,
315) -> String {
316    CUSTOM_BLOCK_RE
317        .replace_all(html, |caps: &regex::Captures| {
318            let block_type = CustomBlockType::from_str(
319                caps.get(1).unwrap().as_str(),
320            )
321            .expect("regex only matches known block types");
322            generate_custom_block_html(block_type, &caps[2], config)
323        })
324        .to_string()
325}
326
327/// Generates the HTML for a custom block.
328fn generate_custom_block_html(
329    block_type: CustomBlockType,
330    content: &str,
331    config: &CustomBlockConfig,
332) -> String {
333    format!(
334        r#"<div class="alert {}" role="alert"><strong>{}:</strong> {}</div>"#,
335        block_type.alert_class_with(config),
336        block_type.title_with(config),
337        content
338    )
339}
340
341// ── AST-level table enhancement ─────────────────────────────────────
342
343/// Walks the comrak AST and replaces `Table` nodes with `HtmlBlock`
344/// nodes containing responsive-wrapped, class-enhanced table HTML.
345///
346/// This eliminates the last regex pass over rendered HTML.
347pub fn enhance_table_nodes<'a>(
348    root: comrak::nodes::Node<'a>,
349    arena: &'a comrak::Arena<'a>,
350    options: &comrak::Options,
351) {
352    // Collect table nodes first to avoid borrow issues during mutation
353    let table_nodes: Vec<comrak::nodes::Node<'a>> = root
354        .descendants()
355        .filter(|node| {
356            matches!(node.data.borrow().value, NodeValue::Table(_))
357        })
358        .collect();
359
360    for table_node in table_nodes {
361        // Render this table subtree to HTML
362        let mut table_html = String::new();
363        if comrak::format_html(table_node, options, &mut table_html)
364            .is_err()
365        {
366            continue;
367        }
368
369        // Apply the responsive wrapper and alignment classes
370        let enhanced = process_tables(&table_html);
371
372        // Create a replacement HtmlBlock node
373        let start = comrak::nodes::LineColumn { line: 0, column: 0 };
374        let replacement = arena.alloc(comrak::nodes::AstNode::new(
375            RefCell::new(comrak::nodes::Ast::new(
376                NodeValue::HtmlBlock(NodeHtmlBlock {
377                    block_type: 6, // generic block
378                    literal: enhanced,
379                }),
380                start,
381            )),
382        ));
383
384        // Insert replacement and remove original
385        table_node.insert_before(replacement);
386        table_node.detach();
387    }
388}
389
390// ── Legacy string-level custom block processing ─────────────────────
391
392/// Processes custom blocks in an HTML string.
393///
394/// Provided for backward compatibility. Prefer
395/// [`process_custom_block_nodes`] for AST-level processing.
396pub fn process_custom_blocks(content: &str) -> String {
397    transform_custom_blocks(content, &CustomBlockConfig::default())
398}
399
400// ── Table post-processing ───────────────────────────────────────────
401
402/// Processes tables, enhancing them with responsive design and alignment classes.
403pub fn process_tables(table_html: &str) -> String {
404    let table_html = table_html.replace(
405        "<table>",
406        r#"<div class="table-responsive"><table class="table">"#,
407    );
408    let table_html = table_html.replace("</table>", "</table></div>");
409
410    TABLE_CELL_RE
411        .replace_all(&table_html, |caps: &regex::Captures| {
412            let attrs = &caps[1];
413            if attrs.contains("align=\"center\"") {
414                format!(r#"<td{attrs} class="text-center">"#)
415            } else if attrs.contains("align=\"right\"") {
416                format!(r#"<td{attrs} class="text-right">"#)
417            } else {
418                format!(r#"<td{attrs} class="text-left">"#)
419            }
420        })
421        .to_string()
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    #[test]
429    fn test_process_custom_blocks_default_config() {
430        let input = r#"
431            <div class="note">This is a note.</div>
432            <div class="WARNING">This is a warning.</div>
433            <div class="Tip">This is a tip.</div>
434        "#;
435        let processed = process_custom_blocks(input);
436        assert!(processed.contains(r#"alert alert-info"#));
437        assert!(processed.contains(r#"alert alert-warning"#));
438        assert!(processed.contains(r#"alert alert-success"#));
439    }
440
441    #[test]
442    fn test_custom_block_config_overrides() {
443        let config = CustomBlockConfig::new()
444            .with_class(CustomBlockType::Note, "callout-info")
445            .with_title(CustomBlockType::Note, "Did you know?");
446
447        let html = generate_custom_block_html(
448            CustomBlockType::Note,
449            "test content",
450            &config,
451        );
452        assert!(html.contains("callout-info"));
453        assert!(html.contains("Did you know?:"));
454    }
455
456    #[test]
457    fn test_unknown_block_passthrough() {
458        let input =
459            r#"<div class="unknown">Should pass through.</div>"#;
460        let processed = process_custom_blocks(input);
461        assert_eq!(processed, input);
462    }
463
464    #[test]
465    fn test_process_tables() {
466        let input = r#"<table><tr><td align="center">Center</td><td align="right">Right</td><td>Left</td></tr></table>"#;
467        let processed = process_tables(input);
468        assert!(processed.contains(r#"table-responsive"#));
469        assert!(processed.contains(r#"text-center"#));
470        assert!(processed.contains(r#"text-right"#));
471        assert!(processed.contains(r#"text-left"#));
472    }
473
474    #[test]
475    fn test_process_multiple_tables() {
476        let input = "<table><tr><td>A</td></tr></table>\n<table><tr><td>B</td></tr></table>";
477        let processed = process_tables(input);
478        assert_eq!(processed.matches("table-responsive").count(), 2);
479    }
480
481    #[test]
482    fn test_unknown_block_type_from_str() {
483        let result = CustomBlockType::from_str("unknown");
484        assert!(result.is_err());
485        let err = result.unwrap_err();
486        assert!(
487            err.to_string().contains("Unknown block type: unknown"),
488            "Error message should contain the unknown type"
489        );
490    }
491
492    #[test]
493    fn test_unknown_block_type_from_str_various() {
494        for name in ["foobar", "alert", "danger", "success", ""] {
495            let result = CustomBlockType::from_str(name);
496            assert!(
497                result.is_err(),
498                "'{name}' should not parse as a valid block type"
499            );
500        }
501    }
502}