mdbook_extended_markdown_table/
extended_table.rs

1use anyhow::{bail, Result};
2
3struct ExtendedTableCell {
4    start_column: usize,
5    width: usize,
6    start_row: usize,
7    height: usize,
8    contents: String,
9}
10
11impl ExtendedTableCell {
12    fn is_header(&self) -> bool {
13        self.start_row == 0
14    }
15
16    fn is_left_border(&self) -> bool {
17        self.start_column == 0
18    }
19
20    fn into_html(self) -> String {
21        let mut classes = vec![];
22        if self.is_header() {
23            classes.push("extended-markdown-header");
24        }
25        if self.is_left_border() {
26            classes.push("extended-markdown-left-border");
27        }
28        let classes = classes.join(" ");
29
30        format!(
31            r#"<div class="{}" style="grid-column-start: {}; grid-column-end: {}; grid-row-start: {}; grid-row-end: {}">{}</div>"#,
32            classes,
33            self.start_column + 1,
34            self.start_column + 1 + self.width,
35            self.start_row + 1,
36            self.start_row + 1 + self.height,
37            self.contents
38        )
39    }
40}
41
42pub struct ExtendedTable {
43    cells: Vec<ExtendedTableCell>,
44}
45
46impl ExtendedTable {
47    pub fn parse(contents: &str) -> Result<Self> {
48        let lines_count = contents.lines().count();
49        if lines_count == 0 {
50            bail!("Table cannot be empty");
51        } else if lines_count % 2 == 0 {
52            bail!("Table must have an odd number of lines: lines must alternate between grid content and horizontal separators, with no separators on the top or bottom");
53        }
54        let width_chars = contents.lines().next().unwrap().chars().count();
55
56        // Holds all the indices of a '|' character within _any_ of the lines of the table. This is
57        // used to determine the ground truth for locations of column separators.
58        let mut all_column_separators = std::collections::HashSet::<usize>::new();
59
60        let mut has_zero_column_row = false;
61
62        for line in contents.lines() {
63            let line_length = line.chars().count();
64            if !line.starts_with('|') || !line.ends_with('|') || line_length != width_chars {
65                bail!("All lines of a tables must start and end with a '|' character, and be the same length");
66            }
67
68            let mut num_columns = 0;
69            for (i, c) in line.chars().enumerate() {
70                if c == '|' {
71                    all_column_separators.insert(i);
72                    if i != 0 {
73                        num_columns += 1;
74                    }
75                }
76            }
77
78            // The previous error should have precedence in the case of `|` on the first line, so
79            // we defer this until after checking all lines
80            if num_columns < 1 {
81                has_zero_column_row = true;
82            }
83        }
84
85        if has_zero_column_row {
86            bail!("Tables must have at least one column");
87        }
88
89        // Convert the above HashMap into a sorted Vec instead, for easier iteration.
90        let mut all_column_separators: Vec<_> = all_column_separators.into_iter().collect();
91        all_column_separators.sort();
92
93        // Find all the locations of cell boundaries within a base unit grid template.
94        let unit_cell_boundaries = Self::get_cell_boundaries(contents, &all_column_separators);
95
96        // Fun algorithm time!
97        // Iterate through the rows to discover the rightmost boundaries of cells in the output.
98        // Store the row on which each right boundary starts in `last_vertical_separator_indices`.
99        // When a bottom-right corner of a cell is found, record its dimensions and contents, and
100        // store it into `cells`.
101        let mut cells = vec![];
102        let mut last_vertical_separator_indices: Vec<usize> =
103            vec![0; unit_cell_boundaries[0].len()];
104        for row in 0..unit_cell_boundaries.len() {
105            let mut last_horizontal_separator_index = 0;
106            for col in 0..unit_cell_boundaries[0].len() {
107                // Cell's bottom-right corner
108                if unit_cell_boundaries[row][col].right && unit_cell_boundaries[row][col].bottom {
109                    let start_column = last_horizontal_separator_index;
110                    let width = col + 1 - last_horizontal_separator_index;
111                    let start_row = last_vertical_separator_indices[col];
112                    let height = row - last_vertical_separator_indices[col] + 1;
113
114                    let mut cell_contents = String::new();
115                    let content_start = all_column_separators[start_column] + 1;
116                    let content_len = all_column_separators[col + 1] - content_start;
117                    let lines = contents.lines().skip(start_row * 2);
118                    for line in lines.take(height * 2 - 1) {
119                        let additional_text = line
120                            .chars()
121                            .skip(content_start)
122                            .take(content_len)
123                            .collect::<String>()
124                            .trim()
125                            .to_string();
126                        let padding = if cell_contents.is_empty() || additional_text.is_empty() {
127                            ""
128                        } else {
129                            " "
130                        };
131                        cell_contents = format!("{}{}{}", cell_contents, padding, additional_text);
132                    }
133
134                    cells.push(ExtendedTableCell {
135                        start_column,
136                        width,
137                        start_row,
138                        height,
139                        contents: cell_contents,
140                    });
141                }
142                // Cell's right border - cache the start row of the next cell to the right
143                if unit_cell_boundaries[row][col].right {
144                    last_horizontal_separator_index = col + 1;
145                }
146                // Cell's bottom border - cache the start column of the next cell below
147                if unit_cell_boundaries[row][col].bottom {
148                    last_vertical_separator_indices[col] = row + 1;
149                }
150            }
151        }
152
153        Ok(Self { cells })
154    }
155
156    /// Constructs a 2D array with the dimensions of the intended table's base geometry (i.e. as if
157    /// no cells were merged). Each element describes whether or not the corresponding cell
158    /// location in the table has a border separating it from the cell immediately to its right, as
159    /// well as from the cell immediately below it. Each cell in the rightmost column always has a
160    /// right border, and each cell in the bottom row always has a bottom border.
161    fn get_cell_boundaries(
162        contents: &str,
163        all_column_separator_indices: &[usize],
164    ) -> Vec<Vec<CellBoundaries>> {
165        // First, just look for each potential cell's boundary separating it from the cell
166        // immediately to its right.
167        let all_horizontal_separators = contents.lines().step_by(2).map(|line| {
168            // offset by 1 because we skip the first column separator
169            let mut chars: Box<dyn Iterator<Item = char>> = Box::new(line.chars().skip(1));
170            let mut last_index = 0;
171            let mut seps = Vec::with_capacity(all_column_separator_indices.len());
172            for column_separator_index in all_column_separator_indices.iter().skip(1) {
173                chars = Box::new(chars.skip(*column_separator_index - last_index - 1));
174                let sep = chars.next().unwrap();
175                seps.push(sep == '|');
176                last_index = *column_separator_index;
177            }
178            seps
179        });
180
181        // Same as before, but for each potential cell's boundary separating it from the cell
182        // immediately below. **All** of the characters within a separator must be a dash in order
183        // to count as a boundary.
184        let all_vertical_separators = contents
185            .lines()
186            .skip(1)
187            .step_by(2)
188            .map(|line| {
189                let mut chars = line.chars();
190                let mut seps = Vec::with_capacity(all_column_separator_indices.len() - 1);
191                for window in
192                    all_column_separator_indices[0..all_column_separator_indices.len()].windows(2)
193                {
194                    let left_column_separator_index = window[0];
195                    let right_column_separator_index = window[1];
196
197                    chars.by_ref().take(1).for_each(drop);
198
199                    let mut consumed = 0;
200                    let to_consume = right_column_separator_index - left_column_separator_index - 1;
201                    if chars.by_ref().take(to_consume).all(|c| {
202                        consumed += 1;
203                        c == '-'
204                    }) {
205                        seps.push(true);
206                    } else {
207                        chars.by_ref().take(to_consume - consumed).for_each(drop);
208                        seps.push(false);
209                    }
210                }
211                seps
212            })
213            // Add the bottom row, which doesn't actually exist in the source string
214            .chain(std::iter::once(vec![
215                true;
216                all_column_separator_indices.len() - 1
217            ]));
218
219        // Combine the two together.
220        all_horizontal_separators
221            .zip(all_vertical_separators)
222            .map(|(h_line, v_line)| {
223                h_line
224                    .into_iter()
225                    .zip(v_line.into_iter())
226                    .map(|(right, bottom)| CellBoundaries { right, bottom })
227                    .collect()
228            })
229            .collect()
230    }
231
232    pub fn into_html(self) -> String {
233        let mut contents: String = String::new();
234        self.cells
235            .into_iter()
236            .for_each(|cell| contents += &cell.into_html());
237        format!(r#"<div class="extended-markdown-table">{}</div>"#, contents)
238    }
239}
240
241/// Records whether or not a unit position in a table grid has a boundary to its right and bottom.
242/// A position without a boundary will be merged with the neighboring position.
243struct CellBoundaries {
244    /// Whether or not the position has a boundary between it and the position immediately to the
245    /// right.
246    right: bool,
247    /// Whether or not the position has a boundary between it and the position immediately to the
248    /// bottom.
249    bottom: bool,
250}
251
252#[cfg(test)]
253mod tests {
254    use super::*;
255
256    #[test]
257    fn test_simple() {
258        let table = r###"
259| Packet ID |       State        | Bound To | Field Name | Field Type |                    Notes                   |
260|-----------|--------------------|----------|------------|------------|--------------------------------------------|
261|      0x00 | Decentralized Auth | Client   | Reason     | Chat       | The reason why the player was disconnected |
262"###.trim();
263
264        let parsed = ExtendedTable::parse(table).unwrap();
265
266        assert_eq!(parsed.cells.len(), 6 * 2);
267    }
268
269    #[test]
270    fn test_complex() {
271        let table = r###"
272| Packet ID |       State        | Bound To |     Field Name       |       Field Type               |                    Notes                        |
273|-----------|--------------------|----------|----------------------|--------------------------------|-------------------------------------------------|
274|      0x03 | Decentralized Auth | Client   | Has Profile Data     | Boolean                        | Whether or not the remaining fields are present |
275|           |                    |          |----------------------|--------------------------------|-------------------------------------------------|
276|           |                    |          | UUID                 | UUID                           |                                                 |
277|           |                    |          |----------------------|--------------------------------|-------------------------------------------------|
278|           |                    |          | Username             | String (16)                    |                                                 |
279|           |                    |          |----------------------|--------------------------------|-------------------------------------------------|
280|           |                    |          | Number of Properties | VarInt                         | Number of elements in the following array       |
281|           |                    |          |----------------------|--------------------------------|-------------------------------------------------|
282|           |                    |          | Property | Name      | Array | String(32767)          |                                                 |
283|           |                    |          |          |-----------|       |------------------------|-------------------------------------------------|
284|           |                    |          |          | Value     |       | String(32767)          |                                                 |
285|           |                    |          |          |-----------|       |------------------------|-------------------------------------------------|
286|           |                    |          |          | Is Signed |       | Boolean                | Generally false for Decentralized Auth          |
287|           |                    |          |          |-----------|       |------------------------|-------------------------------------------------|
288|           |                    |          |          | Signature |       | Optional String(32767) | Only if Is Signed is true                       |
289"###.trim();
290
291        let parsed = ExtendedTable::parse(table).unwrap();
292
293        assert_eq!(parsed.cells.len(), 6 + 3 + 3 * 4 + 1 + 4 + 1 + 4 * 2);
294
295        {
296            let cell = &parsed
297                .cells
298                .iter()
299                .find(|cell| cell.contents == "Field Name")
300                .expect("find cell with `Field Name` as content");
301            assert_eq!(cell.start_column, 3);
302            assert_eq!(cell.width, 2);
303            assert_eq!(cell.start_row, 0);
304            assert_eq!(cell.height, 1);
305        }
306
307        {
308            let cell = &parsed
309                .cells
310                .iter()
311                .find(|cell| cell.contents == "0x03")
312                .expect("find cell with `0x03` as content");
313            assert_eq!(cell.start_column, 0);
314            assert_eq!(cell.width, 1);
315            assert_eq!(cell.start_row, 1);
316            assert_eq!(cell.height, 8);
317        }
318
319        {
320            let cell = &parsed
321                .cells
322                .iter()
323                .find(|cell| cell.contents == "Array")
324                .expect("find cell with `Array` as content");
325            assert_eq!(cell.start_column, 5);
326            assert_eq!(cell.width, 1);
327            assert_eq!(cell.start_row, 5);
328            assert_eq!(cell.height, 4);
329        }
330    }
331
332    #[test]
333    fn test_misaligned() {
334        let table = r###"
335| One col spanning multiple rows | Info | Another | More information |
336|-------------------------------------------------|------------------|
337|   ?   |  Another col overlapping the other one  |       null       |
338"###
339        .trim();
340
341        let parsed = ExtendedTable::parse(table).unwrap();
342
343        assert_eq!(parsed.cells.len(), 4 + 3);
344    }
345
346    #[test]
347    fn test_multiline_wrapping() {
348        let table = r###"
349| One col spanning multiple rows | Info | Another | More information |
350|-------------------------------------------------| could be         |
351|   ?   |                                         |      appreciated |
352|-------|   Another col overlapping the other one,|------------------|
353|   no  |but this time,   it has a lot of text!   |       null       |
354"###
355        .trim();
356
357        let parsed = ExtendedTable::parse(table).unwrap();
358
359        assert_eq!(parsed.cells.len(), 3 + 2 + 1 + 2);
360
361        {
362            let cell = &parsed
363                .cells
364                .iter()
365                .find(|cell| cell.contents.starts_with("More information"))
366                .expect("find cell starting with `More information` as content");
367            assert_eq!(cell.start_column, 4);
368            assert_eq!(cell.width, 1);
369            assert_eq!(cell.start_row, 0);
370            assert_eq!(cell.height, 2);
371            assert_eq!(cell.contents, "More information could be appreciated");
372        }
373
374        {
375            let cell = &parsed
376                .cells
377                .iter()
378                .find(|cell| cell.contents.starts_with("Another col"))
379                .expect("find cell starting with `Another col` as content");
380            assert_eq!(cell.start_column, 1);
381            assert_eq!(cell.width, 3);
382            assert_eq!(cell.start_row, 1);
383            assert_eq!(cell.height, 2);
384            assert_eq!(
385                cell.contents,
386                "Another col overlapping the other one, but this time,   it has a lot of text!"
387            );
388        }
389    }
390
391    #[test]
392    fn test_large() {
393        let table = r###"
394|a|b|c|d|e|f|g|h|
395|-|-|-------|-|-|
396|i|j| chonk |k|l|
397|-|-|       |-|-|
398|m|n|       |o|p|
399|-|-|       |-|-|
400|q|r|       |s|t|
401|-|-----------|-|
402|u|  v  |  w  |x|
403"###
404        .trim();
405
406        let parsed = ExtendedTable::parse(table).unwrap();
407
408        assert_eq!(parsed.cells.len(), 8 + 2 * 3 + 1 + 2 * 3 + 4);
409
410        {
411            let cell = &parsed
412                .cells
413                .iter()
414                .find(|cell| cell.contents == "o")
415                .expect("find cell with `o` as content");
416            assert_eq!(cell.start_column, 6);
417            assert_eq!(cell.width, 1);
418            assert_eq!(cell.start_row, 2);
419            assert_eq!(cell.height, 1);
420        }
421
422        {
423            let cell = &parsed
424                .cells
425                .iter()
426                .find(|cell| cell.contents == "chonk")
427                .expect("find cell with `chonk` as content");
428            assert_eq!(cell.start_column, 2);
429            assert_eq!(cell.width, 4);
430            assert_eq!(cell.start_row, 1);
431            assert_eq!(cell.height, 3);
432        }
433
434        {
435            let cell = &parsed
436                .cells
437                .iter()
438                .find(|cell| cell.contents == "v")
439                .expect("find cell with `v` as content");
440            assert_eq!(cell.start_column, 1);
441            assert_eq!(cell.width, 3);
442            assert_eq!(cell.start_row, 4);
443            assert_eq!(cell.height, 1);
444        }
445    }
446
447    #[test]
448    fn test_separators() {
449        let table = r###"
450|a|b|c|d|e|f|g|h|
451|-|-|-------|-|-|
452|i|j| chonk |k|l|
453|-|-|       |-|-|
454|m|n|       |o|p|
455|-|-|       |-|-|
456|q|r|       |s|t|
457|-|-----------|-|
458|u|  v  |  w  |x|
459"###
460        .trim();
461
462        let separators =
463            ExtendedTable::get_cell_boundaries(table, &vec![0, 2, 4, 6, 8, 10, 12, 14, 16]);
464
465        for i in 0..separators[0].len() {
466            assert!(separators[0][i].right);
467            assert!(separators[0][i].bottom);
468        }
469
470        for r in 1..3 {
471            for i in [0, 1, 6, 7] {
472                assert!(separators[r][i].right);
473                assert!(separators[r][i].bottom);
474            }
475            for i in [2, 3, 4] {
476                assert!(!separators[r][i].right);
477                assert!(!separators[r][i].bottom);
478            }
479            assert!(separators[r][5].right);
480            assert!(!separators[r][5].bottom);
481        }
482
483        for i in 0..separators[0].len() {
484            assert!(separators[3][i].bottom);
485        }
486
487        for i in 0..separators[0].len() {
488            assert!(separators[4][i].bottom);
489        }
490    }
491}