Skip to main content

comrak/parser/
table.rs

1use std::borrow::Cow;
2use std::cmp::min;
3use std::mem;
4
5use crate::nodes::{Ast, LineColumn, Node, NodeTable, NodeValue, TableAlignment};
6use crate::parser::Parser;
7use crate::scanners;
8use crate::strings::{count_newlines, is_line_end_char, newlines_of, trim_cow};
9
10// Limit to prevent a malicious input from causing a denial of service.
11// See get_num_autocompleted_cells.
12const MAX_AUTOCOMPLETED_CELLS: usize = 500_000;
13
14pub fn try_opening_block<'a>(
15    parser: &mut Parser<'a, '_, '_>,
16    container: Node<'a>,
17    line: &str,
18) -> Option<(Node<'a>, bool, bool)> {
19    let aligns = match &container.data().value {
20        NodeValue::Paragraph => None,
21        NodeValue::Table(nt) => Some(nt.alignments.clone()),
22        _ => return None,
23    };
24
25    match aligns {
26        None => try_opening_header(parser, container, line),
27        Some(ref aligns) => try_opening_row(parser, container, aligns, line),
28    }
29}
30
31fn try_opening_header<'a>(
32    parser: &mut Parser<'a, '_, '_>,
33    container: Node<'a>,
34    line: &str,
35) -> Option<(Node<'a>, bool, bool)> {
36    if container.data().table_visited {
37        return Some((container, false, false));
38    }
39
40    if scanners::table_start(&line[parser.first_nonspace..]).is_none() {
41        return Some((container, false, false));
42    }
43
44    let spoiler = parser.options.extension.spoiler;
45
46    let delimiter_row = match row(&line[parser.first_nonspace..], spoiler) {
47        Some(delimiter_row) => delimiter_row,
48        None => return Some((container, false, true)),
49    };
50
51    let mut container_content = mem::take(&mut container.data_mut().content);
52    let mut header_row = match row(&container_content, spoiler) {
53        Some(header_row) => header_row,
54        None => {
55            mem::swap(&mut container.data_mut().content, &mut container_content);
56            return Some((container, false, true));
57        }
58    };
59
60    if header_row.cells.len() != delimiter_row.cells.len() {
61        mem::swap(&mut container.data_mut().content, &mut container_content);
62        return Some((container, false, true));
63    }
64
65    if header_row.paragraph_offset > 0 {
66        try_inserting_table_header_paragraph(
67            parser,
68            container,
69            &container_content,
70            header_row.paragraph_offset,
71        );
72    }
73
74    let mut alignments = vec![];
75    for cell in delimiter_row.cells {
76        let cell_content = cell.content.as_bytes();
77        let left = !cell_content.is_empty() && cell_content[0] == b':';
78        let right = !cell_content.is_empty() && cell_content[cell_content.len() - 1] == b':';
79        alignments.push(if left && right {
80            TableAlignment::Center
81        } else if left {
82            TableAlignment::Left
83        } else if right {
84            TableAlignment::Right
85        } else {
86            TableAlignment::None
87        });
88    }
89
90    let start = container.data().sourcepos.start;
91    let child = Ast::new(
92        NodeValue::Table(Box::new(NodeTable {
93            alignments,
94            num_columns: header_row.cells.len(),
95            num_rows: 0,
96            num_nonempty_cells: 0,
97        })),
98        start,
99    );
100    let table = parser.arena.alloc(child.into());
101    container.append(table);
102
103    let header = parser.add_child(table, NodeValue::TableRow(true), start.column);
104    {
105        let header_ast = &mut header.data_mut();
106        header_ast.sourcepos.start.line = start.line;
107        header_ast.sourcepos.end = start.column_add(
108            (container_content.len()
109                - newlines_of(&container_content)
110                - 1
111                - header_row.paragraph_offset) as isize,
112        );
113    }
114
115    let mut i = 0;
116
117    while i < header_row.cells.len() {
118        let cell = &mut header_row.cells[i];
119        let ast_cell = parser.add_child(
120            header,
121            NodeValue::TableCell,
122            start.column + cell.start_offset - header_row.paragraph_offset,
123        );
124        let ast = &mut ast_cell.data_mut();
125        ast.sourcepos.start.line = start.line;
126        ast.sourcepos.end =
127            start.column_add((cell.end_offset - header_row.paragraph_offset) as isize);
128        mem::swap(&mut ast.content, cell.content.to_mut());
129        ast.line_offsets.push(
130            start.column + cell.start_offset - 1 + cell.internal_offset
131                - header_row.paragraph_offset,
132        );
133
134        i += 1;
135    }
136
137    mem::swap(&mut container.data_mut().content, &mut container_content);
138
139    let offset = line.len() - newlines_of(line) - parser.offset;
140    parser.advance_offset(line, offset, false);
141
142    adjust_table_counters(table, i, (parser.line_number, offset).into());
143
144    Some((table, true, false))
145}
146
147fn try_opening_row<'a>(
148    parser: &mut Parser<'a, '_, '_>,
149    container: Node<'a>,
150    alignments: &[TableAlignment],
151    line: &str,
152) -> Option<(Node<'a>, bool, bool)> {
153    if parser.blank {
154        return None;
155    }
156
157    if get_num_autocompleted_cells(container) > MAX_AUTOCOMPLETED_CELLS {
158        return None;
159    }
160
161    let sourcepos = container.data().sourcepos;
162    let spoiler = parser.options.extension.spoiler;
163    let mut this_row = row(&line[parser.first_nonspace..], spoiler)?;
164
165    let new_row = parser.add_child(
166        container,
167        NodeValue::TableRow(false),
168        sourcepos.start.column,
169    );
170    new_row.data_mut().sourcepos.end.column = parser.curline_end_col;
171
172    let mut i = 0;
173    let mut last_column = sourcepos.start.column;
174
175    while i < min(alignments.len(), this_row.cells.len()) {
176        let cell = &mut this_row.cells[i];
177        let cell_node = parser.add_child(
178            new_row,
179            NodeValue::TableCell,
180            sourcepos.start.column + cell.start_offset,
181        );
182        let cell_ast = &mut cell_node.data_mut();
183        cell_ast.sourcepos.end.column = sourcepos.start.column + cell.end_offset;
184        mem::swap(&mut cell_ast.content, cell.content.to_mut());
185        cell_ast
186            .line_offsets
187            .push(sourcepos.start.column + cell.start_offset - 1 + cell.internal_offset);
188
189        last_column = cell_ast.sourcepos.end.column;
190
191        i += 1;
192    }
193
194    while i < alignments.len() {
195        let cell_node = parser.add_child(new_row, NodeValue::TableCell, last_column + 1);
196        // for autocompleted (empty) cells, set end column equal to start
197        let cell_ast = &mut cell_node.data_mut();
198        cell_ast.sourcepos.end.column = last_column + 1;
199        i += 1;
200    }
201
202    let offset = line.len() - parser.offset - newlines_of(line);
203    parser.advance_offset(line, offset, false);
204
205    adjust_table_counters(container, i, (parser.line_number, offset).into());
206
207    Some((new_row, false, false))
208}
209
210struct Row<'t> {
211    paragraph_offset: usize,
212    cells: Vec<Cell<'t>>,
213}
214
215struct Cell<'t> {
216    start_offset: usize,
217    end_offset: usize,
218    internal_offset: usize,
219    content: Cow<'t, str>,
220}
221
222fn row(string: &str, spoiler: bool) -> Option<Row<'_>> {
223    let bytes = string.as_bytes();
224    let len = string.len();
225    let mut cells: Vec<Cell> = vec![];
226
227    let mut offset = scanners::table_cell_end(string).unwrap_or(0);
228
229    let mut paragraph_offset = 0;
230
231    while offset < len {
232        let cell_matched = scanners::table_cell(&string[offset..], spoiler).unwrap_or(0);
233        let pipe_matched = scanners::table_cell_end(&string[offset + cell_matched..]).unwrap_or(0);
234
235        if cell_matched > 0 || pipe_matched > 0 {
236            let mut cell = unescape_pipes(&string[offset..offset + cell_matched]);
237            trim_cow(&mut cell);
238
239            let mut start_offset = offset;
240            let mut internal_offset = 0;
241
242            while start_offset > paragraph_offset && bytes[start_offset - 1] != b'|' {
243                start_offset -= 1;
244                internal_offset += 1;
245            }
246
247            if cells.len() == u16::MAX as usize {
248                return None;
249            }
250
251            cells.push(Cell {
252                start_offset,
253                end_offset: offset + cell_matched - 1,
254                internal_offset,
255                content: cell,
256            });
257        }
258
259        offset += cell_matched + pipe_matched;
260
261        if pipe_matched == 0 {
262            let row_end_offset = scanners::table_row_end(&string[offset..]).unwrap_or(0);
263            offset += row_end_offset;
264
265            if row_end_offset == 0 || offset == len {
266                break;
267            }
268
269            paragraph_offset = offset;
270            cells.clear();
271            offset += scanners::table_cell_end(&string[offset..]).unwrap_or(0);
272        }
273    }
274
275    if offset != len || cells.is_empty() {
276        None
277    } else {
278        Some(Row {
279            paragraph_offset,
280            cells,
281        })
282    }
283}
284
285fn try_inserting_table_header_paragraph<'a>(
286    parser: &mut Parser<'a, '_, '_>,
287    container: Node<'a>,
288    container_content: &str,
289    paragraph_offset: usize,
290) {
291    if container
292        .parent()
293        .is_some_and(|p| !p.can_contain_type(&NodeValue::Paragraph))
294    {
295        return;
296    }
297
298    let preface = &container_content[..paragraph_offset];
299    let mut paragraph_content = unescape_pipes(preface);
300    let (newlines, _since_newline) = count_newlines(&paragraph_content);
301    trim_cow(&mut paragraph_content);
302    let paragraph_content = paragraph_content.to_string();
303
304    let container_ast = &mut container.data_mut();
305    let start = container_ast.sourcepos.start;
306
307    let mut paragraph = Ast::new(NodeValue::Paragraph, start);
308    paragraph.sourcepos.end.line = start.line + newlines - 1;
309
310    for n in 0..newlines {
311        paragraph.line_offsets.push(container_ast.line_offsets[n]);
312    }
313
314    let last_line_offset = *paragraph.line_offsets.last().unwrap_or(&0);
315    paragraph.sourcepos.end.column = last_line_offset
316        + preface
317            .as_bytes()
318            .iter()
319            .rev()
320            .skip(1)
321            .take_while(|&&c| !is_line_end_char(c))
322            .count();
323
324    container_ast.sourcepos.start.line += newlines;
325    container_ast.sourcepos.start.column = container_ast.line_offsets[newlines] + 1;
326
327    paragraph.content = paragraph_content;
328    let node = parser.arena.alloc(paragraph.into());
329    container.insert_before(node);
330}
331
332fn unescape_pipes(string: &str) -> Cow<'_, str> {
333    let mut v = String::new();
334    let mut offset = 0;
335    let mut last_was_backslash = false;
336
337    for (i, c) in string.char_indices() {
338        if last_was_backslash {
339            if c == '|' {
340                v.push_str(&string[offset..i - 1]);
341                offset = i;
342            }
343            last_was_backslash = false;
344        } else if c == '\\' {
345            last_was_backslash = true;
346        }
347    }
348
349    if offset == 0 {
350        string.into()
351    } else {
352        v.push_str(&string[offset..]);
353        v.into()
354    }
355}
356
357// Increment the number of rows in the table. Also update num_nonempty_cells,
358// which keeps track of the number of cells which were parsed from the
359// input file. (If one of the rows is too short, then the trailing cells are
360// autocompleted. Autocompleted cells are not counted in num_nonempty_cells.)
361// The purpose of this is to prevent a malicious input from generating a very
362// large number of autocompleted cells, which could cause a denial of service
363// vulnerability.
364fn adjust_table_counters(container: Node<'_>, i: usize, end: LineColumn) {
365    let mut ast = container.data_mut();
366    let NodeValue::Table(ref mut nt) = ast.value else {
367        unreachable!();
368    };
369    nt.num_rows += 1;
370    nt.num_nonempty_cells += i;
371    ast.sourcepos.end = end;
372}
373
374// Calculate the number of autocompleted cells.
375fn get_num_autocompleted_cells(container: Node<'_>) -> usize {
376    return match container.data().value {
377        NodeValue::Table(ref node_table) => {
378            let num_cells = node_table.num_columns * node_table.num_rows;
379
380            if num_cells < node_table.num_nonempty_cells {
381                0
382            } else {
383                (node_table.num_columns * node_table.num_rows) - node_table.num_nonempty_cells
384            }
385        }
386        _ => 0,
387    };
388}
389
390pub fn matches(line: &str, spoiler: bool) -> bool {
391    row(line, spoiler).is_some()
392}