Skip to main content

lex_babel/common/verbatim/
table.rs

1use super::VerbatimHandler;
2use crate::ir::nodes::{
3    DocNode, InlineContent, Paragraph, Table, TableCell, TableCellAlignment, TableRow,
4};
5use lex_core::lex::ast::Verbatim;
6use std::collections::HashMap;
7
8/// Handler for `doc.table` verbatim blocks.
9///
10/// Parses markdown-style pipe tables into `DocNode::Table` and serializes them back.
11pub struct TableHandler;
12
13impl VerbatimHandler for TableHandler {
14    fn label(&self) -> &str {
15        "doc.table"
16    }
17
18    fn to_ir(&self, content: &str, _params: &HashMap<String, String>) -> Option<DocNode> {
19        Some(parse_pipe_table(content))
20    }
21
22    fn convert_from_ir(&self, node: &DocNode) -> Option<(String, HashMap<String, String>)> {
23        if let DocNode::Table(table) = node {
24            Some((serialize_pipe_table(table), HashMap::new()))
25        } else {
26            None
27        }
28    }
29
30    fn format_content(
31        &self,
32        verbatim: &Verbatim,
33    ) -> Result<Option<String>, crate::error::FormatError> {
34        // Reconstruct content from lines
35        let mut content = String::new();
36        for item in &verbatim.children {
37            if let lex_core::lex::ast::ContentItem::VerbatimLine(line) = item {
38                content.push_str(line.content.as_string());
39                content.push('\n');
40            }
41        }
42
43        // Dedent content to ensure markdown parser sees a table, not a code block
44        let lines: Vec<&str> = content.lines().collect();
45        let min_indent = lines
46            .iter()
47            .filter(|line| !line.trim().is_empty())
48            .map(|line| line.len() - line.trim_start().len())
49            .min()
50            .unwrap_or(0);
51
52        let dedented_content = lines
53            .iter()
54            .map(|line| {
55                if line.len() >= min_indent {
56                    &line[min_indent..]
57                } else {
58                    line
59                }
60            })
61            .collect::<Vec<_>>()
62            .join("\n");
63
64        // Parse the dedented content as markdown
65        let doc = match crate::formats::markdown::parser::parse_from_markdown(&dedented_content) {
66            Ok(d) => d,
67            Err(e) => {
68                // We return Ok(None) instead of Err because formatting failure shouldn't break serialization
69                // It just means we can't format it nicely
70                println!("TableHandler: Markdown parse failed: {e:?}");
71                return Ok(None);
72            }
73        };
74
75        // The markdown parser should have identified a table
76        // It converts markdown tables to Lex VerbatimBlock with "table" label (or similar)
77        // with the content already formatted (by serialize_pipe_table during conversion).
78        // We just need to extract it.
79        for child in &doc.root.children {
80            if let lex_core::lex::ast::ContentItem::VerbatimBlock(verbatim) = child {
81                // Reconstruct content from lines
82                let mut formatted = String::new();
83                for item in &verbatim.children {
84                    if let lex_core::lex::ast::ContentItem::VerbatimLine(line) = item {
85                        formatted.push_str(line.content.as_string());
86                        formatted.push('\n');
87                    }
88                }
89                return Ok(Some(formatted));
90            }
91        }
92        Ok(None)
93    }
94}
95
96fn parse_pipe_table(content: &str) -> DocNode {
97    let mut header = Vec::new();
98    let mut rows = Vec::new();
99    let mut alignments = Vec::new();
100
101    let lines: Vec<&str> = content
102        .lines()
103        .map(|l| l.trim())
104        .filter(|l| !l.is_empty())
105        .collect();
106
107    if lines.is_empty() {
108        return DocNode::Table(Table {
109            rows,
110            header,
111            caption: None,
112        });
113    }
114
115    // Parse header
116    if let Some(header_line) = lines.first() {
117        let cells = parse_table_row(header_line);
118        let mut header_row = TableRow { cells: Vec::new() };
119        for cell_content in cells {
120            header_row.cells.push(TableCell {
121                content: vec![DocNode::Paragraph(Paragraph {
122                    content: vec![InlineContent::Text(cell_content)],
123                })],
124                header: true,
125                align: TableCellAlignment::None,
126            });
127        }
128        header.push(header_row);
129    }
130
131    // Parse separator line to determine alignments
132    if lines.len() > 1 {
133        let separator = lines[1];
134        if separator.contains(['-', '|']) {
135            let parts = parse_table_row(separator);
136            for part in parts {
137                let trimmed = part.trim();
138                if trimmed.starts_with(':') && trimmed.ends_with(':') {
139                    alignments.push(TableCellAlignment::Center);
140                } else if trimmed.ends_with(':') {
141                    alignments.push(TableCellAlignment::Right);
142                } else if trimmed.starts_with(':') {
143                    alignments.push(TableCellAlignment::Left);
144                } else {
145                    alignments.push(TableCellAlignment::None);
146                }
147            }
148        }
149    }
150
151    // Parse body rows
152    for line in lines.iter().skip(2) {
153        let cells = parse_table_row(line);
154        let mut row = TableRow { cells: Vec::new() };
155        for (i, cell_content) in cells.into_iter().enumerate() {
156            let align = if i < alignments.len() {
157                alignments[i]
158            } else {
159                TableCellAlignment::None
160            };
161
162            row.cells.push(TableCell {
163                content: vec![DocNode::Paragraph(Paragraph {
164                    content: vec![InlineContent::Text(cell_content)],
165                })],
166                header: false,
167                align,
168            });
169        }
170        rows.push(row);
171    }
172
173    // Apply alignments to header
174    if !header.is_empty() {
175        for (i, cell) in header[0].cells.iter_mut().enumerate() {
176            if i < alignments.len() {
177                cell.align = alignments[i];
178            }
179        }
180    }
181
182    DocNode::Table(Table {
183        rows,
184        header,
185        caption: None,
186    })
187}
188
189fn parse_table_row(line: &str) -> Vec<String> {
190    let line = line.trim();
191    let line = line.strip_prefix('|').unwrap_or(line);
192    let line = line.strip_suffix('|').unwrap_or(line);
193
194    line.split('|').map(|s| s.trim().to_string()).collect()
195}
196
197fn serialize_pipe_table(table: &Table) -> String {
198    let mut output = String::new();
199
200    // 1. Calculate column widths
201    let mut col_widths = Vec::new();
202
203    // Check header
204    for row in &table.header {
205        for (i, cell) in row.cells.iter().enumerate() {
206            let width = cell_text_width(cell);
207            if i >= col_widths.len() {
208                col_widths.push(width);
209            } else {
210                col_widths[i] = col_widths[i].max(width);
211            }
212        }
213    }
214
215    // Check body
216    for row in &table.rows {
217        for (i, cell) in row.cells.iter().enumerate() {
218            let width = cell_text_width(cell);
219            if i >= col_widths.len() {
220                col_widths.push(width);
221            } else {
222                col_widths[i] = col_widths[i].max(width);
223            }
224        }
225    }
226
227    // Ensure minimum width of 3 for alignment markers
228    for width in &mut col_widths {
229        *width = (*width).max(3);
230    }
231
232    // 2. Serialize Header
233    for row in &table.header {
234        output.push('|');
235        for (i, cell) in row.cells.iter().enumerate() {
236            let text = cell_text(cell);
237            let width = col_widths.get(i).copied().unwrap_or(text.len());
238            output.push_str(&format!(" {text:width$} |"));
239        }
240        output.push('\n');
241    }
242
243    // 3. Serialize Separator
244    if !col_widths.is_empty() {
245        output.push('|');
246        for (i, width) in col_widths.iter().enumerate() {
247            let align = table
248                .header
249                .first()
250                .and_then(|row| row.cells.get(i))
251                .map(|c| c.align)
252                .unwrap_or(TableCellAlignment::None);
253
254            let dashes = "-".repeat(width.saturating_sub(2));
255            match align {
256                TableCellAlignment::Left => output.push_str(&format!(" :{dashes}- |")),
257                TableCellAlignment::Right => output.push_str(&format!(" -{dashes}: |")),
258                TableCellAlignment::Center => output.push_str(&format!(" :{dashes}: |")),
259                TableCellAlignment::None => output.push_str(&format!(" -{dashes}- |")),
260            }
261        }
262        output.push('\n');
263    }
264
265    // 4. Serialize Body
266    for row in &table.rows {
267        output.push('|');
268        for (i, cell) in row.cells.iter().enumerate() {
269            let text = cell_text(cell);
270            let width = col_widths.get(i).copied().unwrap_or(text.len());
271            output.push_str(&format!(" {text:width$} |"));
272        }
273        output.push('\n');
274    }
275
276    output
277}
278
279fn cell_text(cell: &TableCell) -> String {
280    // Simple extraction for now, similar to existing logic
281    if let Some(DocNode::Paragraph(p)) = cell.content.first() {
282        p.content
283            .iter()
284            .map(|ic| match ic {
285                InlineContent::Text(t) => t.clone(),
286                InlineContent::Bold(c) => format!("*{}*", inline_content_to_text(c)),
287                InlineContent::Italic(c) => format!("_{}_", inline_content_to_text(c)),
288                InlineContent::Code(c) => format!("`{c}`"),
289                InlineContent::Math(c) => format!("${c}$"),
290                InlineContent::Reference(c) => format!("[{c}]"),
291                InlineContent::Marker(c) => c.clone(),
292                InlineContent::Image(image) => {
293                    let mut text = format!("![{}]({})", image.alt, image.src);
294                    if let Some(title) = &image.title {
295                        text.push_str(&format!(" \"{title}\""));
296                    }
297                    text
298                }
299            })
300            .collect()
301    } else {
302        String::new()
303    }
304}
305
306fn cell_text_width(cell: &TableCell) -> usize {
307    cell_text(cell).len()
308}
309
310fn inline_content_to_text(content: &[InlineContent]) -> String {
311    content
312        .iter()
313        .map(|ic| match ic {
314            InlineContent::Text(t) => t.clone(),
315            InlineContent::Bold(c) => format!("*{}*", inline_content_to_text(c)),
316            InlineContent::Italic(c) => format!("_{}_", inline_content_to_text(c)),
317            InlineContent::Code(c) => format!("`{c}`"),
318            InlineContent::Math(c) => format!("${c}$"),
319            InlineContent::Reference(c) => format!("[{c}]"),
320            InlineContent::Marker(c) => c.clone(),
321            InlineContent::Image(image) => {
322                let mut text = format!("![{}]({})", image.alt, image.src);
323                if let Some(title) = &image.title {
324                    text.push_str(&format!(" \"{title}\""));
325                }
326                text
327            }
328        })
329        .collect()
330}