Skip to main content

lex_babel/
transforms.rs

1//! Transform integration for lex-babel formats
2//!
3//! This module provides transform-style interfaces for format conversions.
4//! While lex-parser provides the core transform infrastructure, lex-babel
5//! adds serialization transforms that operate on AST nodes.
6
7use crate::format::Format;
8use crate::formats::lex::formatting_rules::FormattingRules;
9use crate::formats::lex::LexFormat;
10use lex_core::lex::ast::elements::typed_content::ContentElement;
11use lex_core::lex::ast::{ContentItem, Document, List, ListItem, Session};
12
13/// Serialize a Document to Lex format with default formatting rules
14///
15/// This provides a simple functional interface that can be used
16/// in transform-style pipelines outside the standard lex-parser transforms.
17///
18/// # Example
19///
20/// ```
21/// use lex_babel::transforms::serialize_to_lex;
22/// use lex_core::lex::transforms::standard::STRING_TO_AST;
23///
24/// let source = "Hello world\n";
25/// let doc = STRING_TO_AST.run(source.to_string()).unwrap();
26/// let formatted = serialize_to_lex(&doc).unwrap();
27/// assert_eq!(formatted, "Hello world\n");
28/// ```
29pub fn serialize_to_lex(doc: &Document) -> Result<String, String> {
30    let format = LexFormat::default();
31    format.serialize(doc).map_err(|e| e.to_string())
32}
33
34/// Serialize a Document to Lex format with custom formatting rules
35///
36/// # Example
37///
38/// ```
39/// use lex_babel::transforms::serialize_to_lex_with_rules;
40/// use lex_babel::formats::lex::formatting_rules::FormattingRules;
41/// use lex_core::lex::transforms::standard::STRING_TO_AST;
42///
43/// let source = "Hello world\n";
44/// let doc = STRING_TO_AST.run(source.to_string()).unwrap();
45///
46/// let mut rules = FormattingRules::default();
47/// rules.indent_string = "  ".to_string(); // 2-space indent
48///
49/// let formatted = serialize_to_lex_with_rules(&doc, rules).unwrap();
50/// ```
51pub fn serialize_to_lex_with_rules(
52    doc: &Document,
53    rules: FormattingRules,
54) -> Result<String, String> {
55    let format = LexFormat::new(rules);
56    format.serialize(doc).map_err(|e| e.to_string())
57}
58
59/// Round-trip transformation: parse and re-serialize
60///
61/// Useful for formatting operations and testing.
62///
63/// # Example
64///
65/// ```
66/// use lex_babel::transforms::format_lex_source;
67///
68/// let source = "Hello world\n";
69/// let formatted = format_lex_source(source).unwrap();
70/// assert_eq!(formatted, "Hello world\n");
71/// ```
72pub fn format_lex_source(source: &str) -> Result<String, String> {
73    use lex_core::lex::transforms::standard::STRING_TO_AST;
74
75    let mut doc = STRING_TO_AST
76        .run(source.to_string())
77        .map_err(|e| e.to_string())?;
78
79    normalize_footnotes(&mut doc);
80
81    serialize_to_lex(&doc)
82}
83
84/// Normalizes footnote definitions in a document from session-based format to list-based format.
85///
86/// Lex supports two formats for footnotes:
87/// 1. **Session-based** (legacy): Each note is a child session with title "1. Note content"
88/// 2. **List-based** (preferred): Notes are list items within a Notes/Footnotes session
89///
90/// This function converts session-based footnotes to list-based format during formatting,
91/// producing cleaner, more compact output.
92fn normalize_footnotes(doc: &mut Document) {
93    if let Some(ContentItem::Session(last_session)) = doc.root.children.as_mut_vec().last_mut() {
94        let title = last_session.title.as_string();
95        if title.trim().eq_ignore_ascii_case("Notes")
96            || title.trim().eq_ignore_ascii_case("Footnotes")
97        {
98            convert_session_notes_to_list(last_session);
99        }
100    }
101}
102
103/// Converts session-based footnote children to a single list.
104///
105/// Handles three content types within a Notes session:
106/// - **Numbered sessions** (e.g., "1. Note"): Converted to list items
107/// - **Existing lists**: Items are merged into the output list
108/// - **Blank lines**: Removed to compact the output
109fn convert_session_notes_to_list(session: &mut Session) {
110    let has_legacy_content = session.children.iter().any(|c| match c {
111        ContentItem::Session(s) => split_numbered_title(s.title.as_string()).is_some(),
112        ContentItem::List(_) | ContentItem::BlankLineGroup(_) => true,
113        _ => false,
114    });
115
116    if !has_legacy_content {
117        return;
118    }
119
120    let mut new_children = Vec::new();
121    let mut current_list_items = Vec::new();
122
123    // Drain children from the session
124    let children_vec = session.children.as_mut_vec();
125    let old_children = std::mem::take(children_vec);
126
127    for mut child in old_children {
128        // handle Session -> ListItem
129        let mut handled = false;
130
131        if let ContentItem::Session(inner_session) = &child {
132            let title = inner_session.title.as_string();
133            if let Some((number_part, content_part)) = split_numbered_title(title) {
134                handled = true;
135
136                let mut children_elements = Vec::new();
137                for inner_child in inner_session.children.iter().cloned() {
138                    if let Ok(el) = ContentElement::try_from(inner_child) {
139                        children_elements.push(el);
140                    }
141                }
142
143                let list_item = ListItem::with_content(
144                    number_part.to_string(),
145                    content_part.trim().to_string(),
146                    children_elements,
147                );
148                current_list_items.push(list_item);
149            }
150        } else if let ContentItem::List(l) = &mut child {
151            // Merge list items
152            handled = true;
153            // We need to extract items. ListContainer wraps generic content but typically ListContent::ListItem.
154            // We'll iterate and filter/map.
155            let items = std::mem::take(l.items.as_mut_vec());
156            for item in items {
157                if let ContentItem::ListItem(li) = item {
158                    current_list_items.push(li);
159                }
160                // If it's not a ListItem (e.g. comment), we drop it for now as per refactoring goal "Clean List".
161            }
162        } else if let ContentItem::BlankLineGroup(_) = child {
163            // Skip blank lines in Notes session to compact them
164            handled = true;
165        }
166
167        if !handled {
168            // If we encounter something else (e.g. Paragraph), we assume it breaks the list or is a preamble.
169            // Flush current items first.
170            if !current_list_items.is_empty() {
171                new_children.push(ContentItem::List(List::new(std::mem::take(
172                    &mut current_list_items,
173                ))));
174            }
175            new_children.push(child);
176        }
177    }
178
179    // Flush remaining
180    if !current_list_items.is_empty() {
181        new_children.push(ContentItem::List(List::new(current_list_items)));
182    }
183
184    *session.children.as_mut_vec() = new_children;
185}
186
187/// Splits a numbered title like "1. Note Title" into its marker and content parts.
188///
189/// Returns `Some(("1.", " Note Title"))` for valid numbered titles, `None` otherwise.
190/// The marker includes the trailing dot to preserve the original format for list item creation.
191fn split_numbered_title(title: &str) -> Option<(&str, &str)> {
192    let title = title.trim();
193    let number_len = title.chars().take_while(|c| c.is_ascii_digit()).count();
194    if number_len > 0 && title.chars().nth(number_len) == Some('.') {
195        let (num, rest) = title.split_at(number_len + 1);
196        return Some((num, rest));
197    }
198    None
199}
200
201#[cfg(test)]
202mod tests {
203    use super::*;
204    use lex_core::lex::ast::Paragraph;
205
206    #[test]
207    fn test_serialize_to_lex() {
208        let doc = Document::with_content(vec![ContentItem::Paragraph(Paragraph::from_line(
209            "Test".to_string(),
210        ))]);
211
212        let result = serialize_to_lex(&doc);
213        assert!(result.is_ok());
214        assert_eq!(result.unwrap(), "Test\n");
215    }
216
217    #[test]
218    fn test_serialize_with_custom_rules() {
219        let doc = Document::with_content(vec![ContentItem::Paragraph(Paragraph::from_line(
220            "Test".to_string(),
221        ))]);
222
223        let rules = FormattingRules {
224            indent_string: "  ".to_string(),
225            ..Default::default()
226        };
227
228        let result = serialize_to_lex_with_rules(&doc, rules);
229        assert!(result.is_ok());
230    }
231
232    #[test]
233    fn test_format_lex_source() {
234        let source = "Hello world\n";
235        let formatted = format_lex_source(source);
236        assert!(formatted.is_ok());
237        assert_eq!(formatted.unwrap(), "Hello world\n");
238    }
239
240    #[test]
241    fn test_round_trip_simple() {
242        let original = "Introduction\n\n    This is a session.\n";
243        let formatted = format_lex_source(original).unwrap();
244
245        // Parse both and compare (structural equivalence)
246        use lex_core::lex::transforms::standard::STRING_TO_AST;
247
248        let doc1 = STRING_TO_AST.run(original.to_string()).unwrap();
249        let doc2 = STRING_TO_AST.run(formatted.clone()).unwrap();
250
251        // Both should parse successfully
252        assert_eq!(doc1.root.children.len(), doc2.root.children.len());
253    }
254
255    #[test]
256    fn test_normalize_footnotes() {
257        let original = "Title\n\n    Content\n\nNotes\n\n    1. Note One\n\n    2. Note Two\n";
258        // This parses as Session("Notes") -> [Session("1. Note One"), Session("2. Note Two")]
259        // normally, but we want it to become a List.
260        let formatted = format_lex_source(original).unwrap();
261
262        // Verification
263        use lex_core::lex::transforms::standard::STRING_TO_AST;
264
265        let doc = STRING_TO_AST.run(formatted.clone()).unwrap();
266        let last_session = doc.root.children.last().unwrap();
267        if let ContentItem::Session(s) = last_session {
268            assert_eq!(s.title.as_string().trim(), "Notes");
269            assert_eq!(s.children.len(), 1);
270            if let ContentItem::List(l) = &s.children[0] {
271                assert_eq!(l.items.len(), 2);
272                if let ContentItem::ListItem(item) = &l.items[0] {
273                    assert_eq!(item.marker().trim(), "1.");
274                    assert_eq!(item.text().trim(), "Note One");
275                } else {
276                    panic!("Expected ListItem, found {:?}", l.items[0]);
277                }
278            } else {
279                panic!("Expected List, found {:?}", s.children[0]);
280            }
281        } else {
282            panic!("Expected Session");
283        }
284    }
285}