table_extractor/parser/
markdown.rs1use crate::error::Result;
2use crate::{Parser, Table};
3
4pub struct MarkdownParser;
5
6impl Parser for MarkdownParser {
7 fn parse(&self, input: &str) -> Result<Table> {
8 let lines: Vec<&str> = input.lines().collect();
9
10 if lines.is_empty() {
11 return Ok(Table::new(vec![], vec![]));
12 }
13
14 let mut headers = Vec::new();
15 let mut rows = Vec::new();
16 let mut found_separator = false;
17
18 for line in lines {
19 let trimmed = line.trim();
20
21 if trimmed.is_empty() {
23 continue;
24 }
25
26 if is_separator_line(trimmed) {
28 found_separator = true;
29 continue;
30 }
31
32 let cells = parse_markdown_row(trimmed);
34
35 if !found_separator && headers.is_empty() {
36 headers = cells;
38 } else if found_separator {
39 rows.push(cells);
41 }
42 }
43
44 Table::new_validated(headers, rows)
45 }
46}
47
48fn is_separator_line(line: &str) -> bool {
49 line.chars().all(|c| matches!(c, '|' | '-' | ':' | ' '))
51 && line.contains('-')
52 && line.contains('|')
53}
54
55fn parse_markdown_row(line: &str) -> Vec<String> {
56 let trimmed = line.trim().trim_start_matches('|').trim_end_matches('|');
58
59 let estimated_cols = trimmed.chars().filter(|&c| c == '|').count() + 1;
61 let mut cells = Vec::with_capacity(estimated_cols);
62
63 for cell in trimmed.split('|') {
66 cells.push(cell.trim().to_string());
67 }
68
69 cells
70}
71
72#[cfg(test)]
73mod tests {
74 use super::*;
75
76 #[test]
77 fn test_parse_markdown() {
78 let input = r#"| API Metric Name | MongoDB Slice | Position |
79|-----------------|---------------|----------|
80| sessions | ACQUISITION | Index 0 |
81| newUsers | ACQUISITION | Index 1 |"#;
82
83 let parser = MarkdownParser;
84 let table = parser.parse(input).unwrap();
85
86 assert_eq!(
87 table.headers(),
88 &["API Metric Name", "MongoDB Slice", "Position"]
89 );
90 assert_eq!(table.rows().len(), 2);
91 assert_eq!(table.rows()[0], vec!["sessions", "ACQUISITION", "Index 0"]);
92 assert_eq!(table.rows()[1], vec!["newUsers", "ACQUISITION", "Index 1"]);
93 }
94}