table_extractor/parser/
postgres.rs

1use crate::error::Result;
2use crate::{Parser, Table};
3use regex::Regex;
4use std::sync::OnceLock;
5
6/// Regex pattern for PostgreSQL separator lines.
7/// Valid format: `----+-------+-----` (sequences of dashes separated by plus signs)
8static POSTGRES_SEP_LINE: OnceLock<Regex> = OnceLock::new();
9
10fn get_postgres_sep_line() -> &'static Regex {
11    POSTGRES_SEP_LINE.get_or_init(|| {
12        Regex::new(r"^\s*-+(\+-+)+\s*$").expect("Invalid PostgreSQL separator regex")
13    })
14}
15
16pub struct PostgresParser;
17
18impl Parser for PostgresParser {
19    fn parse(&self, input: &str) -> Result<Table> {
20        let lines: Vec<&str> = input.lines().collect();
21
22        if lines.is_empty() {
23            return Ok(Table::new(vec![], vec![]));
24        }
25
26        let mut headers = Vec::new();
27        let mut rows = Vec::new();
28        let mut found_separator = false;
29
30        for line in lines {
31            let trimmed = line.trim();
32
33            // Skip empty lines
34            if trimmed.is_empty() {
35                continue;
36            }
37
38            // Check if this is a separator line (contains dashes and pipes)
39            if is_separator_line(trimmed) {
40                found_separator = true;
41                continue;
42            }
43
44            // Parse the row
45            let cells = parse_postgres_row(trimmed);
46
47            if !found_separator && headers.is_empty() {
48                // First row is the header
49                headers = cells;
50            } else if found_separator {
51                // Data rows come after the separator
52                rows.push(cells);
53            }
54        }
55
56        Table::new_validated(headers, rows)
57    }
58}
59
60fn is_separator_line(line: &str) -> bool {
61    // Use strict regex to match valid PostgreSQL separator format: ----+----+----
62    // This prevents false positives like "+ - + -" or "++----"
63    get_postgres_sep_line().is_match(line)
64}
65
66fn parse_postgres_row(line: &str) -> Vec<String> {
67    // Estimate column count for pre-allocation
68    let estimated_cols = line.chars().filter(|&c| c == '|').count() + 1;
69    let mut cells = Vec::with_capacity(estimated_cols);
70
71    // Split by | and trim each cell
72    // Note: We preserve empty cells as they represent NULL values in PostgreSQL
73    // Pre-allocation reduces allocations for large tables
74    for cell in line.split('|') {
75        cells.push(cell.trim().to_string());
76    }
77
78    cells
79}
80
81#[cfg(test)]
82mod tests {
83    use super::*;
84
85    #[test]
86    fn test_parse_postgres() {
87        let input = r#" id | store_id | shopify_location_id | name | active
88----+----------+---------------------+------+--------
89  1 |        1 | gid://shopify/...   | 2299 | t
90  2 |        1 | gid://shopify/...   | 4510 | t"#;
91
92        let parser = PostgresParser;
93        let table = parser.parse(input).unwrap();
94
95        assert_eq!(
96            table.headers(),
97            &["id", "store_id", "shopify_location_id", "name", "active"]
98        );
99        assert_eq!(table.rows().len(), 2);
100        assert_eq!(
101            table.rows()[0],
102            vec!["1", "1", "gid://shopify/...", "2299", "t"]
103        );
104    }
105
106    #[test]
107    fn test_parse_postgres_with_empty_cells() {
108        // PostgreSQL NULL values appear as empty cells
109        let input = r#" id | name  | email
110----+-------+-------
111  1 | Alice | a@b.c
112  2 | Bob   |
113  3 |       | c@d.e"#;
114
115        let parser = PostgresParser;
116        let table = parser.parse(input).unwrap();
117
118        assert_eq!(table.headers(), &["id", "name", "email"]);
119        assert_eq!(table.rows().len(), 3);
120
121        // All rows should have 3 cells, even if some are empty
122        assert_eq!(table.rows()[0], vec!["1", "Alice", "a@b.c"]);
123        assert_eq!(
124            table.rows()[1],
125            vec!["2", "Bob", ""],
126            "Empty email should be preserved"
127        );
128        assert_eq!(
129            table.rows()[2],
130            vec!["3", "", "c@d.e"],
131            "Empty name should be preserved"
132        );
133    }
134
135    #[test]
136    fn test_separator_validation_valid() {
137        // Valid PostgreSQL separator patterns
138        assert!(is_separator_line("----+-------+-----"));
139        assert!(is_separator_line("  ----+----  ")); // with leading/trailing spaces
140        assert!(is_separator_line("-+-")); // minimal valid
141        assert!(is_separator_line("-----+-----+-----+-----")); // multiple sections
142        assert!(is_separator_line("--+--+--")); // short dashes
143    }
144
145    #[test]
146    fn test_separator_validation_invalid() {
147        // Invalid patterns that should be rejected
148        assert!(!is_separator_line("+ - + -")); // spaces between
149        assert!(!is_separator_line("++++----")); // no proper structure
150        assert!(!is_separator_line("  +  -  +  ")); // random spacing
151        assert!(!is_separator_line("----")); // only dashes, no plus
152        assert!(!is_separator_line("++++")); // only plus signs
153        assert!(!is_separator_line("-")); // single dash
154        assert!(!is_separator_line("+")); // single plus
155        assert!(!is_separator_line("")); // empty
156        assert!(!is_separator_line("  ")); // only spaces
157        assert!(!is_separator_line("+-+-")); // starts with plus
158    }
159
160    #[test]
161    fn test_reject_invalid_separator_no_data() {
162        // Input with invalid separator should not find a separator
163        let input = r#" id | name
164+ - + -
165  1 | Alice"#;
166
167        let parser = PostgresParser;
168        let table = parser.parse(input).unwrap();
169
170        // Without a valid separator, it treats all lines as potential headers
171        // The invalid separator line gets parsed as a data row
172        assert_eq!(table.headers(), &["id", "name"]);
173        // The rest are treated as rows (before finding separator)
174        assert_eq!(table.rows().len(), 0);
175    }
176}