Skip to main content

rushdown/parser/
table.rs

1use core::cell::RefCell;
2
3use alloc::{boxed::Box, rc::Rc, vec::Vec};
4
5use crate::{
6    as_kind_data, as_type_data_mut,
7    ast::{
8        self, Arena, CodeSpan, KindData, NodeRef, Table, TableBody, TableCell, TableCellAlignment,
9        TableHeader, TableRow, WalkStatus,
10    },
11    context::{ContextKey, ContextKeyRegistry, ObjectValue},
12    parser::{self, AstTransformer, Context, ParagraphTransformer},
13    scanner::{
14        scan_table_delim_center, scan_table_delim_left, scan_table_delim_none,
15        scan_table_delim_right,
16    },
17    text::{self, Reader, Segment},
18    util::{indent_width, is_blank, is_punct, is_space, trim_right_space, TinyVec},
19    Result,
20};
21
22struct EscapedPipeCell {
23    cell: NodeRef,
24    pos: Vec<usize>,
25}
26
27fn get_escaped_pipe_cells(
28    ctx: &mut Context,
29    key: ContextKey<ObjectValue>,
30) -> &mut Vec<EscapedPipeCell> {
31    ctx.get_mut(key)
32        .unwrap()
33        .downcast_mut::<Vec<EscapedPipeCell>>()
34        .unwrap()
35}
36
37const ESCAPED_PIPE_CELL: &str = "_epc";
38
39/// [`ParagraphTransformer`] that transforms table paragraphs into table nodes.
40#[derive(Debug)]
41pub struct TableParagraphTransformer {
42    escaped_pipe_cell: ContextKey<ObjectValue>,
43}
44
45impl TableParagraphTransformer {
46    /// Returns a new [`TableParagraphTransformer`].
47    pub fn new(reg: Rc<RefCell<ContextKeyRegistry>>) -> Self {
48        let escaped_pipe_cell = reg
49            .borrow_mut()
50            .get_or_create::<ObjectValue>(ESCAPED_PIPE_CELL);
51
52        Self { escaped_pipe_cell }
53    }
54
55    fn parse_row(
56        &self,
57        arena: &mut Arena,
58        segment: &Segment,
59        alignments: &[TableCellAlignment],
60        is_header: bool,
61        reader: &text::BasicReader,
62        ctx: &mut Context,
63    ) -> Option<NodeRef> {
64        let source = reader.source();
65        let segment = segment.trim_left_space(source).trim_right_space(source);
66        let node_pos = segment.start();
67        let line = segment.bytes(source);
68        let mut pos = if line.first().is_some_and(|&b| b == b'|') {
69            1
70        } else {
71            0
72        };
73        let limit = if line.last().is_some_and(|&b| b == b'|') {
74            line.len() - 1
75        } else {
76            line.len()
77        };
78
79        let row_ref = arena.new_node(TableRow::new());
80        arena[row_ref].set_pos(node_pos);
81        let mut i = 0;
82        while pos < limit {
83            let alignment = if i >= alignments.len() {
84                if !is_header {
85                    return Some(row_ref);
86                }
87                TableCellAlignment::None
88            } else {
89                alignments[i]
90            };
91            let start = pos;
92            let mut end = 0;
93            let mut escaped_pipe_cell: Option<EscapedPipeCell> = None;
94            let cell_ref = arena.new_node(TableCell::with_alignment(alignment));
95            while pos < limit {
96                if line[pos] == b'\\' && line.get(pos + 1).is_some_and(|&b| is_punct(b)) {
97                    if line[pos + 1] == b'|' {
98                        if escaped_pipe_cell.is_none() {
99                            escaped_pipe_cell = Some(EscapedPipeCell {
100                                cell: cell_ref,
101                                pos: Vec::new(),
102                            });
103                        }
104                        escaped_pipe_cell
105                            .as_mut()
106                            .unwrap()
107                            .pos
108                            .push(pos + segment.start());
109                    }
110                    pos += 2;
111                } else if line[pos] == b'|' {
112                    end = 1;
113                    pos += 1;
114                    break;
115                } else {
116                    pos += 1;
117                }
118            }
119            if let Some(escaped_pipe_cell) = escaped_pipe_cell {
120                if ctx.get(self.escaped_pipe_cell).is_none() {
121                    let lst = Vec::<EscapedPipeCell>::new();
122                    ctx.insert(self.escaped_pipe_cell, Box::new(lst));
123                }
124                let lst = get_escaped_pipe_cells(ctx, self.escaped_pipe_cell);
125                lst.push(escaped_pipe_cell);
126            }
127            let mut col_seg: Segment =
128                (segment.start() + start, segment.start() + pos - end).into();
129            col_seg = col_seg.trim_left_space(source).trim_right_space(source);
130            as_type_data_mut!(arena, cell_ref, Block).append_source_line(col_seg);
131            arena[cell_ref].set_pos((segment.start() + start).saturating_sub(1));
132            row_ref.append_child_fast(arena, cell_ref);
133            i += 1;
134        }
135        while i < alignments.len() {
136            let cell_ref = arena.new_node(TableCell::with_alignment(TableCellAlignment::None));
137            row_ref.append_child_fast(arena, cell_ref);
138            i += 1;
139        }
140        Some(row_ref)
141    }
142}
143
144impl ParagraphTransformer for TableParagraphTransformer {
145    fn transform(
146        &self,
147        arena: &mut Arena,
148        paragraph_ref: NodeRef,
149        reader: &mut text::BasicReader,
150        ctx: &mut Context,
151    ) {
152        let mut i = 1;
153        let mut start = i;
154        let mut lines = as_type_data_mut!(arena, paragraph_ref, Block).take_source();
155        if lines.len() < 2 {
156            as_type_data_mut!(arena, paragraph_ref, Block).put_back_source(lines);
157            return;
158        }
159        let mut alignments_opt: Option<Vec<TableCellAlignment>> = None;
160        let mut header_row_ref_opt: Option<NodeRef> = None;
161        while i < lines.len() {
162            match parse_delimiter(&lines[i], reader) {
163                Some(a) => match self.parse_row(arena, &lines[i - 1], &a, true, reader, ctx) {
164                    Some(n) => {
165                        if arena[n].children(arena).count() != a.len() {
166                            n.delete(arena);
167                            i += 1;
168                            continue;
169                        }
170                        alignments_opt = Some(a);
171                        header_row_ref_opt = Some(n);
172                        i += 1;
173                        start = i - 2;
174                        break;
175                    }
176                    None => {
177                        i += 1;
178                        continue;
179                    }
180                },
181                None => {
182                    i += 1;
183                    continue;
184                }
185            }
186        }
187
188        match (alignments_opt, header_row_ref_opt) {
189            (Some(alignments), Some(header_row_ref)) => {
190                let header_ref = arena.new_node(TableHeader::new());
191                header_ref.append_child_fast(arena, header_row_ref);
192                let table_ref = arena.new_node(Table::new());
193                table_ref.append_child_fast(arena, header_ref);
194                if let Some(pos) = arena[header_row_ref].pos() {
195                    arena[header_ref].set_pos(pos);
196                    arena[table_ref].set_pos(pos);
197                }
198                let body_ref = arena.new_node(TableBody::new());
199                while i < lines.len() {
200                    if let Some(row_ref) =
201                        self.parse_row(arena, &lines[i], &alignments, false, reader, ctx)
202                    {
203                        body_ref.append_child_fast(arena, row_ref);
204                    }
205                    i += 1;
206                }
207                if let Some(fc) = arena[body_ref].first_child() {
208                    table_ref.append_child_fast(arena, body_ref);
209                    if let Some(pos) = arena[fc].pos() {
210                        arena[body_ref].set_pos(pos);
211                    }
212                }
213                lines.drain(start..i);
214                arena[paragraph_ref].parent().unwrap().insert_after(
215                    arena,
216                    paragraph_ref,
217                    table_ref,
218                );
219                if lines.is_empty() {
220                    paragraph_ref.remove(arena);
221                } else {
222                    as_type_data_mut!(arena, paragraph_ref, Block).put_back_source(lines);
223                }
224            }
225            _ => {
226                as_type_data_mut!(arena, paragraph_ref, Block).put_back_source(lines);
227            }
228        }
229    }
230}
231
232fn parse_delimiter(
233    segment: &Segment,
234    reader: &text::BasicReader,
235) -> Option<Vec<TableCellAlignment>> {
236    let line = segment.bytes(reader.source());
237    if !is_table_delim(&line) {
238        return None;
239    }
240    let mut cols = line.split(|&b| b == b'|').collect::<Vec<&[u8]>>();
241    if is_blank(cols[0]) {
242        cols.remove(0);
243    }
244    if !cols.is_empty() && is_blank(cols[cols.len() - 1]) {
245        cols.pop();
246    }
247    let mut alignments = Vec::<TableCellAlignment>::new();
248    for col in cols {
249        if scan_table_delim_left(col).is_some_and(|l| l == col.len()) {
250            alignments.push(TableCellAlignment::Left);
251        } else if scan_table_delim_right(col).is_some_and(|l| l == col.len()) {
252            alignments.push(TableCellAlignment::Right);
253        } else if scan_table_delim_center(col).is_some_and(|l| l == col.len()) {
254            alignments.push(TableCellAlignment::Center);
255        } else if scan_table_delim_none(col).is_some_and(|l| l == col.len()) {
256            alignments.push(TableCellAlignment::None);
257        } else {
258            return None;
259        }
260    }
261    Some(alignments)
262}
263
264fn is_table_delim(bs: &[u8]) -> bool {
265    let (w, _) = indent_width(bs, 0);
266    if w > 3 {
267        return false;
268    }
269    let mut all_sep = true;
270    for &b in trim_right_space(bs) {
271        if b != b'-' {
272            all_sep = false;
273        }
274        if !(is_space(b) || b == b'-' || b == b'|' || b == b':') {
275            return false;
276        }
277    }
278    !all_sep
279}
280
281/// [`AstTransformer`] that transforms escaped pipe cells in tables.
282#[derive(Debug)]
283pub struct TableAstTransformer {
284    escaped_pipe_cell: ContextKey<ObjectValue>,
285}
286
287impl TableAstTransformer {
288    /// Returns a new [`TableAstTransformer`].
289    pub fn new(reg: Rc<RefCell<ContextKeyRegistry>>) -> Self {
290        let escaped_pipe_cell = reg
291            .borrow_mut()
292            .get_or_create::<ObjectValue>(ESCAPED_PIPE_CELL);
293        Self { escaped_pipe_cell }
294    }
295}
296
297impl AstTransformer for TableAstTransformer {
298    fn transform(
299        &self,
300        arena: &mut Arena,
301        _doc_ref: NodeRef,
302        _reader: &mut text::BasicReader,
303        ctx: &mut parser::Context,
304    ) {
305        let Some(mut lstv) = ctx.remove(self.escaped_pipe_cell) else {
306            return;
307        };
308        let lst = lstv.downcast_mut::<Vec<EscapedPipeCell>>().unwrap();
309        let mut code_spans: Vec<(NodeRef, usize)> = Vec::new();
310        for (i, epc) in lst.iter().enumerate() {
311            if arena.get(epc.cell).is_some() {
312                ast::walk(
313                    arena,
314                    epc.cell,
315                    &mut |arena: &Arena, node_ref: NodeRef, entering: bool| -> Result<WalkStatus> {
316                        if entering {
317                            if let Some(n) = arena.get(node_ref) {
318                                if let KindData::CodeSpan(_) = n.kind_data() {
319                                    code_spans.push((node_ref, i));
320                                }
321                            }
322                            return Ok(WalkStatus::Continue);
323                        }
324                        Ok(WalkStatus::SkipChildren)
325                    },
326                )
327                .expect("walk failed");
328            }
329        }
330
331        for (code_span, i) in code_spans {
332            let mut new_indices: TinyVec<text::Index> = TinyVec::empty();
333            let mut modified = false;
334            if let Some(indices) = as_kind_data!(arena, code_span, CodeSpan).indices() {
335                for mut index in indices.iter().copied() {
336                    let mut added = false;
337                    'l: loop {
338                        for (j, &pos) in lst[i].pos.iter().enumerate() {
339                            if index.start() <= pos && pos < index.stop() {
340                                modified = true;
341                                let t1: text::Index = (index.start(), pos).into();
342                                let t2: text::Index = (pos + 1, index.stop()).into();
343                                if j != 0 {
344                                    new_indices.pop();
345                                }
346                                new_indices.push(t1);
347                                new_indices.push(t2);
348                                added = true;
349                                index = (pos + 1, index.stop()).into();
350                                continue 'l;
351                            }
352                        }
353                        break;
354                    }
355                    if !added {
356                        new_indices.push(index);
357                    }
358                }
359            }
360            if modified {
361                let parent = arena[code_span].parent().unwrap();
362                let new_code_span = arena.new_node(CodeSpan::from_indices(new_indices));
363                if let Some(pos) = arena[code_span].pos() {
364                    arena[new_code_span].set_pos(pos);
365                }
366                parent.replace_child(arena, code_span, new_code_span);
367            }
368        }
369    }
370}