Skip to main content

rdx_transform/transforms/
auto_number.rs

1use std::cell::RefCell;
2use std::collections::HashMap;
3
4use rdx_ast::*;
5
6use crate::{Transform, synthetic_pos};
7
8// ---------------------------------------------------------------------------
9// Public registry types
10// ---------------------------------------------------------------------------
11
12/// A single numbered element's metadata.
13#[derive(Debug, Clone)]
14pub struct NumberEntry {
15    /// Conceptual kind: "Figure", "Table", "Listing", "Theorem", "Lemma",
16    /// "Corollary", "Proposition", "Conjecture", "Definition", "Example",
17    /// "Remark", "Equation", "Section".
18    pub kind: String,
19    /// Display number string: "1", "2", "1.1", etc.
20    pub number: String,
21    /// Optional caption or theorem title extracted from the element.
22    pub title: Option<String>,
23}
24
25/// Maps label strings to their numbered entries.
26///
27/// Built as a side-effect of running [`AutoNumber`] and accessible via
28/// [`AutoNumber::registry`] after the transform has been applied.
29#[derive(Debug, Default)]
30pub struct NumberRegistry {
31    /// label -> entry
32    pub entries: HashMap<String, NumberEntry>,
33}
34
35// ---------------------------------------------------------------------------
36// Internal counter state
37// ---------------------------------------------------------------------------
38
39#[derive(Debug, Default)]
40struct Counters {
41    figure: u32,
42    table: u32,
43    listing: u32,
44    /// Shared counter for: Theorem, Lemma, Corollary, Proposition, Conjecture.
45    theorem_group: u32,
46    /// Shared counter for: Definition, Example, Remark.
47    definition_group: u32,
48    equation: u32,
49    /// Section hierarchy stack: [h1_count, h2_count, h3_count, ...]
50    sections: Vec<u32>,
51}
52
53impl Counters {
54    /// Increment and return the section number for the given 1-based `depth`,
55    /// resetting all deeper counters. Returns a dot-separated string like "1",
56    /// "1.2", or "1.2.3".
57    fn next_section(&mut self, depth: u8) -> String {
58        let idx = (depth as usize).saturating_sub(1);
59        // Extend the stack if needed.
60        if self.sections.len() <= idx {
61            self.sections.resize(idx + 1, 0);
62        }
63        // Increment this level and zero out deeper levels.
64        self.sections[idx] += 1;
65        self.sections.truncate(idx + 1);
66
67        self.sections
68            .iter()
69            .map(|n| n.to_string())
70            .collect::<Vec<_>>()
71            .join(".")
72    }
73}
74
75// ---------------------------------------------------------------------------
76// Transform
77// ---------------------------------------------------------------------------
78
79/// Walks the AST and assigns sequential numbers to figures, tables, listings,
80/// theorem-group components, definition-group components, display math
81/// equations, and headings.
82///
83/// Numbers are injected as a `number` string attribute on `Component` nodes.
84/// For `MathDisplay` nodes (which are not components), the numbers are only
85/// stored in the registry.
86///
87/// After calling [`Transform::transform`], retrieve the full registry via
88/// [`AutoNumber::registry`].
89///
90/// # Example
91///
92/// ```rust
93/// use rdx_transform::{AutoNumber, Transform, parse};
94///
95/// let mut root = parse("<Figure id=\"fig:arch\">\n</Figure>\n");
96/// let numberer = AutoNumber::new();
97/// numberer.transform(&mut root, "");
98/// let reg = numberer.registry();
99/// assert_eq!(reg.entries["fig:arch"].number, "1");
100/// ```
101pub struct AutoNumber {
102    /// If true, number headings hierarchically (1, 1.1, 1.1.2).
103    pub number_headings: bool,
104    /// If true, prefix figure/table numbers with chapter (Figure 2.3 vs Figure 7).
105    /// When enabled, the chapter counter is the h1 section counter.
106    pub per_chapter: bool,
107    /// Interior mutability via `RefCell` is required because the [`Transform`]
108    /// trait takes `&self` (not `&mut self`), yet we need to mutate the registry
109    /// during `transform()`. Transforms run single-threaded, so `RefCell` is
110    /// the appropriate lightweight primitive here (no locking overhead).
111    registry: RefCell<NumberRegistry>,
112}
113
114impl AutoNumber {
115    pub fn new() -> Self {
116        AutoNumber {
117            number_headings: true,
118            per_chapter: false,
119            registry: RefCell::new(NumberRegistry::default()),
120        }
121    }
122
123    /// Return a shared borrow of the registry built during the last call to
124    /// [`Transform::transform`].
125    ///
126    /// # Panics
127    ///
128    /// Panics if the registry is already mutably borrowed elsewhere. This
129    /// should never happen because transforms run single-threaded.
130    pub fn registry(&self) -> std::cell::Ref<'_, NumberRegistry> {
131        self.registry.borrow()
132    }
133}
134
135impl Default for AutoNumber {
136    fn default() -> Self {
137        Self::new()
138    }
139}
140
141impl Transform for AutoNumber {
142    fn name(&self) -> &str {
143        "auto-number"
144    }
145
146    fn transform(&self, root: &mut Root, _source: &str) {
147        // Clear any previous run.
148        self.registry.borrow_mut().entries.clear();
149
150        let mut counters = Counters::default();
151        process_nodes(
152            &mut root.children,
153            &mut counters,
154            self.number_headings,
155            self.per_chapter,
156            &self.registry,
157        );
158    }
159}
160
161// ---------------------------------------------------------------------------
162// Core recursive walker
163// ---------------------------------------------------------------------------
164
165/// Helper: look up the string value of an attribute by name.
166fn attr_str<'a>(comp: &'a ComponentNode, name: &str) -> Option<&'a str> {
167    comp.attributes.iter().find_map(|a| {
168        if a.name == name {
169            if let AttributeValue::String(s) = &a.value {
170                Some(s.as_str())
171            } else {
172                None
173            }
174        } else {
175            None
176        }
177    })
178}
179
180/// Helper: extract plain text from a node's children (best-effort caption).
181fn children_text(nodes: &[Node]) -> String {
182    let mut out = String::new();
183    crate::walk(nodes, &mut |n| {
184        if let Node::Text(t) = n {
185            out.push_str(&t.value);
186        }
187    });
188    out
189}
190
191/// Inject or replace a `number` attribute on a component.
192fn inject_number(comp: &mut ComponentNode, number: &str) {
193    // Remove any existing `number` attribute first.
194    comp.attributes.retain(|a| a.name != "number");
195    comp.attributes.push(AttributeNode {
196        name: "number".to_string(),
197        value: AttributeValue::String(number.to_string()),
198        position: synthetic_pos(),
199    });
200}
201
202/// Format a plain counter as a chapter-prefixed string when `per_chapter` is
203/// true. The chapter is the first element of `sections` (h1 counter).
204fn format_number(counter: u32, sections: &[u32], per_chapter: bool) -> String {
205    if per_chapter {
206        let chapter = sections.first().copied().unwrap_or(0);
207        format!("{}.{}", chapter, counter)
208    } else {
209        counter.to_string()
210    }
211}
212
213fn process_nodes(
214    nodes: &mut [Node],
215    counters: &mut Counters,
216    number_headings: bool,
217    per_chapter: bool,
218    registry_mutex: &RefCell<NumberRegistry>,
219) {
220    for node in nodes.iter_mut() {
221        match node {
222            // ---------------------------------------------------------------
223            // Headings
224            // ---------------------------------------------------------------
225            Node::Heading(h) => {
226                if number_headings {
227                    if let Some(depth) = h.depth {
228                        let num = counters.next_section(depth);
229                        if let Some(ref id) = h.id.clone() {
230                            let title = children_text(&h.children);
231                            let mut reg = registry_mutex.borrow_mut();
232                            reg.entries.insert(
233                                id.clone(),
234                                NumberEntry {
235                                    kind: "Section".to_string(),
236                                    number: num.clone(),
237                                    title: if title.is_empty() { None } else { Some(title) },
238                                },
239                            );
240                        }
241                        // Recurse into heading children.
242                        process_nodes(
243                            &mut h.children,
244                            counters,
245                            number_headings,
246                            per_chapter,
247                            registry_mutex,
248                        );
249                    }
250                } else {
251                    process_nodes(
252                        &mut h.children,
253                        counters,
254                        number_headings,
255                        per_chapter,
256                        registry_mutex,
257                    );
258                }
259            }
260
261            // ---------------------------------------------------------------
262            // MathDisplay
263            // ---------------------------------------------------------------
264            Node::MathDisplay(m) => {
265                if let Some(ref label) = m.label.clone() {
266                    counters.equation += 1;
267                    let num = counters.equation.to_string();
268                    let mut reg = registry_mutex.borrow_mut();
269                    reg.entries.insert(
270                        label.clone(),
271                        NumberEntry {
272                            kind: "Equation".to_string(),
273                            number: num,
274                            title: None,
275                        },
276                    );
277                }
278            }
279
280            // ---------------------------------------------------------------
281            // Components
282            // ---------------------------------------------------------------
283            Node::Component(comp) => {
284                let name = comp.name.clone();
285                match name.as_str() {
286                    "Figure" => {
287                        counters.figure += 1;
288                        let num = format_number(counters.figure, &counters.sections, per_chapter);
289                        inject_number(comp, &num);
290                        if let Some(id) = attr_str(comp, "id").map(str::to_string) {
291                            let title = children_text(&comp.children);
292                            let mut reg = registry_mutex.borrow_mut();
293                            reg.entries.insert(
294                                id,
295                                NumberEntry {
296                                    kind: "Figure".to_string(),
297                                    number: num,
298                                    title: if title.is_empty() { None } else { Some(title) },
299                                },
300                            );
301                        }
302                    }
303                    "TableFigure" => {
304                        counters.table += 1;
305                        let num = format_number(counters.table, &counters.sections, per_chapter);
306                        inject_number(comp, &num);
307                        if let Some(id) = attr_str(comp, "id").map(str::to_string) {
308                            let title = children_text(&comp.children);
309                            let mut reg = registry_mutex.borrow_mut();
310                            reg.entries.insert(
311                                id,
312                                NumberEntry {
313                                    kind: "Table".to_string(),
314                                    number: num,
315                                    title: if title.is_empty() { None } else { Some(title) },
316                                },
317                            );
318                        }
319                    }
320                    "Listing" => {
321                        counters.listing += 1;
322                        let num = format_number(counters.listing, &counters.sections, per_chapter);
323                        inject_number(comp, &num);
324                        if let Some(id) = attr_str(comp, "id").map(str::to_string) {
325                            let title = children_text(&comp.children);
326                            let mut reg = registry_mutex.borrow_mut();
327                            reg.entries.insert(
328                                id,
329                                NumberEntry {
330                                    kind: "Listing".to_string(),
331                                    number: num,
332                                    title: if title.is_empty() { None } else { Some(title) },
333                                },
334                            );
335                        }
336                    }
337                    "Theorem" | "Lemma" | "Corollary" | "Proposition" | "Conjecture" => {
338                        counters.theorem_group += 1;
339                        let num = counters.theorem_group.to_string();
340                        inject_number(comp, &num);
341                        if let Some(id) = attr_str(comp, "id").map(str::to_string) {
342                            let title = attr_str(comp, "title").map(str::to_string);
343                            let mut reg = registry_mutex.borrow_mut();
344                            reg.entries.insert(
345                                id,
346                                NumberEntry {
347                                    kind: name.clone(),
348                                    number: num,
349                                    title,
350                                },
351                            );
352                        }
353                    }
354                    "Definition" | "Example" | "Remark" => {
355                        counters.definition_group += 1;
356                        let num = counters.definition_group.to_string();
357                        inject_number(comp, &num);
358                        if let Some(id) = attr_str(comp, "id").map(str::to_string) {
359                            let title = attr_str(comp, "title").map(str::to_string);
360                            let mut reg = registry_mutex.borrow_mut();
361                            reg.entries.insert(
362                                id,
363                                NumberEntry {
364                                    kind: name.clone(),
365                                    number: num,
366                                    title,
367                                },
368                            );
369                        }
370                    }
371                    _ => {}
372                }
373                // Always recurse into component children.
374                process_nodes(
375                    &mut comp.children,
376                    counters,
377                    number_headings,
378                    per_chapter,
379                    registry_mutex,
380                );
381            }
382
383            // ---------------------------------------------------------------
384            // All other container nodes — recurse
385            // ---------------------------------------------------------------
386            Node::Paragraph(b)
387            | Node::List(b)
388            | Node::ListItem(b)
389            | Node::Blockquote(b)
390            | Node::Html(b)
391            | Node::Table(b)
392            | Node::TableRow(b)
393            | Node::TableCell(b)
394            | Node::Emphasis(b)
395            | Node::Strong(b)
396            | Node::Strikethrough(b)
397            | Node::ThematicBreak(b)
398            | Node::DefinitionList(b)
399            | Node::DefinitionTerm(b)
400            | Node::DefinitionDescription(b) => {
401                process_nodes(
402                    &mut b.children,
403                    counters,
404                    number_headings,
405                    per_chapter,
406                    registry_mutex,
407                );
408            }
409            Node::Link(l) => {
410                process_nodes(
411                    &mut l.children,
412                    counters,
413                    number_headings,
414                    per_chapter,
415                    registry_mutex,
416                );
417            }
418            Node::Image(i) => {
419                process_nodes(
420                    &mut i.children,
421                    counters,
422                    number_headings,
423                    per_chapter,
424                    registry_mutex,
425                );
426            }
427            Node::FootnoteDefinition(f) => {
428                process_nodes(
429                    &mut f.children,
430                    counters,
431                    number_headings,
432                    per_chapter,
433                    registry_mutex,
434                );
435            }
436            _ => {}
437        }
438    }
439}
440
441// ---------------------------------------------------------------------------
442// Tests
443// ---------------------------------------------------------------------------
444
445#[cfg(test)]
446mod tests {
447    use super::*;
448    use rdx_parser::parse;
449
450    #[test]
451    fn figures_numbered_sequentially() {
452        let mut root = parse(
453            "<Figure id=\"fig:a\">\n</Figure>\n\
454             <Figure id=\"fig:b\">\n</Figure>\n\
455             <Figure id=\"fig:c\">\n</Figure>\n",
456        );
457        let numberer = AutoNumber::new();
458        numberer.transform(&mut root, "");
459        let reg = numberer.registry();
460        assert_eq!(reg.entries["fig:a"].number, "1");
461        assert_eq!(reg.entries["fig:b"].number, "2");
462        assert_eq!(reg.entries["fig:c"].number, "3");
463        assert_eq!(reg.entries["fig:a"].kind, "Figure");
464    }
465
466    #[test]
467    fn tables_numbered_sequentially() {
468        let mut root = parse(
469            "<TableFigure id=\"tbl:a\">\n</TableFigure>\n\
470             <TableFigure id=\"tbl:b\">\n</TableFigure>\n",
471        );
472        let numberer = AutoNumber::new();
473        numberer.transform(&mut root, "");
474        let reg = numberer.registry();
475        assert_eq!(reg.entries["tbl:a"].number, "1");
476        assert_eq!(reg.entries["tbl:b"].number, "2");
477        assert_eq!(reg.entries["tbl:a"].kind, "Table");
478    }
479
480    #[test]
481    fn theorem_group_shared_counter() {
482        // Theorem, Lemma share the same counter.
483        let mut root = parse(
484            "<Theorem id=\"thm:one\">\n</Theorem>\n\
485             <Lemma id=\"lem:two\">\n</Lemma>\n\
486             <Corollary id=\"cor:three\">\n</Corollary>\n",
487        );
488        let numberer = AutoNumber::new();
489        numberer.transform(&mut root, "");
490        let reg = numberer.registry();
491        assert_eq!(reg.entries["thm:one"].number, "1");
492        assert_eq!(reg.entries["lem:two"].number, "2");
493        assert_eq!(reg.entries["cor:three"].number, "3");
494    }
495
496    #[test]
497    fn definition_group_shared_counter() {
498        let mut root = parse(
499            "<Definition id=\"def:one\">\n</Definition>\n\
500             <Example id=\"ex:two\">\n</Example>\n\
501             <Remark id=\"rem:three\">\n</Remark>\n",
502        );
503        let numberer = AutoNumber::new();
504        numberer.transform(&mut root, "");
505        let reg = numberer.registry();
506        assert_eq!(reg.entries["def:one"].number, "1");
507        assert_eq!(reg.entries["ex:two"].number, "2");
508        assert_eq!(reg.entries["rem:three"].number, "3");
509    }
510
511    #[test]
512    fn equations_numbered_from_label() {
513        let mut root = parse(
514            "$$ {#eq:first}\nE = mc^2\n$$\n\
515             $$ {#eq:second}\na^2 + b^2 = c^2\n$$\n",
516        );
517        let numberer = AutoNumber::new();
518        numberer.transform(&mut root, "");
519        let reg = numberer.registry();
520        assert_eq!(reg.entries["eq:first"].number, "1");
521        assert_eq!(reg.entries["eq:second"].number, "2");
522        assert_eq!(reg.entries["eq:first"].kind, "Equation");
523    }
524
525    #[test]
526    fn number_attribute_injected_on_figure() {
527        let mut root = parse("<Figure id=\"fig:x\">\n</Figure>\n");
528        AutoNumber::new().transform(&mut root, "");
529        match &root.children[0] {
530            Node::Component(c) => {
531                let num_attr = c.attributes.iter().find(|a| a.name == "number");
532                assert!(num_attr.is_some(), "Expected 'number' attribute");
533                assert_eq!(
534                    num_attr.unwrap().value,
535                    AttributeValue::String("1".to_string())
536                );
537            }
538            other => panic!("Expected component, got {:?}", other),
539        }
540    }
541
542    /// Build a Heading node with a preset `id` (since ENABLE_HEADING_ATTRIBUTES is
543    /// not active in the parser, IDs must be injected programmatically in tests).
544    fn make_heading(depth: u8, text: &str, id: &str) -> Node {
545        let pos = Position {
546            start: Point {
547                line: 1,
548                column: 1,
549                offset: 0,
550            },
551            end: Point {
552                line: 1,
553                column: 1,
554                offset: 0,
555            },
556        };
557        Node::Heading(StandardBlockNode {
558            depth: Some(depth),
559            ordered: None,
560            checked: None,
561            id: Some(id.to_string()),
562            children: vec![Node::Text(TextNode {
563                value: text.to_string(),
564                position: pos.clone(),
565            })],
566            position: pos,
567        })
568    }
569
570    #[test]
571    fn headings_get_section_numbers() {
572        // ENABLE_HEADING_ATTRIBUTES is not active in the parser so we must
573        // construct the heading nodes with ids already set.
574        let pos = Position {
575            start: Point {
576                line: 1,
577                column: 1,
578                offset: 0,
579            },
580            end: Point {
581                line: 1,
582                column: 1,
583                offset: 0,
584            },
585        };
586        let mut root = Root {
587            node_type: RootType::Root,
588            frontmatter: None,
589            children: vec![
590                make_heading(1, "Chapter One", "ch1"),
591                make_heading(2, "Section A", "sec:a"),
592                make_heading(2, "Section B", "sec:b"),
593            ],
594            position: pos,
595        };
596        let numberer = AutoNumber {
597            number_headings: true,
598            ..AutoNumber::new()
599        };
600        numberer.transform(&mut root, "");
601        let reg = numberer.registry();
602        assert_eq!(reg.entries["ch1"].number, "1");
603        assert_eq!(reg.entries["sec:a"].number, "1.1");
604        assert_eq!(reg.entries["sec:b"].number, "1.2");
605    }
606
607    #[test]
608    fn per_chapter_prefixes_figure_numbers() {
609        // ENABLE_HEADING_ATTRIBUTES is not active in the parser so we must
610        // construct the heading node with an id already set.
611        let numberer = AutoNumber {
612            number_headings: true,
613            per_chapter: true,
614            ..AutoNumber::new()
615        };
616        // Parse the Figure component, then prepend a hand-crafted heading.
617        let mut root = parse("<Figure id=\"fig:a\">\n</Figure>\n");
618        root.children.insert(0, make_heading(1, "Chapter", "ch1"));
619        numberer.transform(&mut root, "");
620        let reg = numberer.registry();
621        assert_eq!(reg.entries["fig:a"].number, "1.1");
622    }
623
624    #[test]
625    fn re_run_clears_previous_registry() {
626        let numberer = AutoNumber::new();
627        let mut root = parse("<Figure id=\"fig:a\">\n</Figure>\n");
628        numberer.transform(&mut root, "");
629        numberer.transform(&mut root, "");
630        // After second run the registry should still have just one entry.
631        assert_eq!(numberer.registry().entries.len(), 1);
632    }
633
634    #[test]
635    fn figures_and_tables_have_separate_counters() {
636        let mut root = parse(
637            "<Figure id=\"fig:a\">\n</Figure>\n\
638             <TableFigure id=\"tbl:a\">\n</TableFigure>\n\
639             <Figure id=\"fig:b\">\n</Figure>\n",
640        );
641        let numberer = AutoNumber::new();
642        numberer.transform(&mut root, "");
643        let reg = numberer.registry();
644        assert_eq!(reg.entries["fig:a"].number, "1");
645        assert_eq!(reg.entries["fig:b"].number, "2");
646        assert_eq!(reg.entries["tbl:a"].number, "1");
647    }
648}