table_extractor/parser/
markdown.rs1use crate::error::Result;
2use crate::{Parser, Table};
3
4pub struct MarkdownParser;
5
6impl Parser for MarkdownParser {
7 fn parse(&self, input: &str) -> Result<Table> {
8 let lines: Vec<&str> = input.lines().collect();
9
10 if lines.is_empty() {
11 return Ok(Table::new(vec![], vec![]));
12 }
13
14 let mut headers = Vec::new();
15 let mut rows = Vec::new();
16 let mut found_separator = false;
17
18 for line in lines {
19 let trimmed = line.trim();
20
21 if trimmed.is_empty() {
23 continue;
24 }
25
26 if is_separator_line(trimmed) {
28 found_separator = true;
29 continue;
30 }
31
32 let cells = parse_markdown_row(trimmed);
34
35 if !found_separator && headers.is_empty() {
36 headers = cells;
38 } else if found_separator {
39 rows.push(cells);
41 }
42 }
43
44 Table::new_validated(headers, rows)
45 }
46}
47
48fn is_separator_line(line: &str) -> bool {
49 line.chars().all(|c| matches!(c, '|' | '-' | ':' | ' '))
51 && line.contains('-')
52 && line.contains('|')
53}
54
55fn parse_markdown_row(line: &str) -> Vec<String> {
56 let trimmed = line.trim().trim_start_matches('|').trim_end_matches('|');
58
59 trimmed
61 .split('|')
62 .map(|cell| cell.trim().to_string())
63 .collect()
64}
65
66#[cfg(test)]
67mod tests {
68 use super::*;
69
70 #[test]
71 fn test_parse_markdown() {
72 let input = r#"| API Metric Name | MongoDB Slice | Position |
73|-----------------|---------------|----------|
74| sessions | ACQUISITION | Index 0 |
75| newUsers | ACQUISITION | Index 1 |"#;
76
77 let parser = MarkdownParser;
78 let table = parser.parse(input).unwrap();
79
80 assert_eq!(
81 table.headers,
82 vec!["API Metric Name", "MongoDB Slice", "Position"]
83 );
84 assert_eq!(table.rows.len(), 2);
85 assert_eq!(table.rows[0], vec!["sessions", "ACQUISITION", "Index 0"]);
86 assert_eq!(table.rows[1], vec!["newUsers", "ACQUISITION", "Index 1"]);
87 }
88}