table_extractor/parser/
postgres.rs

1use crate::error::Result;
2use crate::{Parser, Table};
3use once_cell::sync::Lazy;
4use regex::Regex;
5
6/// Regex pattern for PostgreSQL separator lines.
7/// Valid format: `----+-------+-----` (sequences of dashes separated by plus signs)
8static POSTGRES_SEP_LINE: Lazy<Regex> =
9    Lazy::new(|| Regex::new(r"^\s*-+(\+-+)+\s*$").expect("Invalid PostgreSQL separator regex"));
10
11pub struct PostgresParser;
12
13impl Parser for PostgresParser {
14    fn parse(&self, input: &str) -> Result<Table> {
15        let lines: Vec<&str> = input.lines().collect();
16
17        if lines.is_empty() {
18            return Ok(Table::new(vec![], vec![]));
19        }
20
21        let mut headers = Vec::new();
22        let mut rows = Vec::new();
23        let mut found_separator = false;
24
25        for line in lines {
26            let trimmed = line.trim();
27
28            // Skip empty lines
29            if trimmed.is_empty() {
30                continue;
31            }
32
33            // Check if this is a separator line (contains dashes and pipes)
34            if is_separator_line(trimmed) {
35                found_separator = true;
36                continue;
37            }
38
39            // Parse the row
40            let cells = parse_postgres_row(trimmed);
41
42            if !found_separator && headers.is_empty() {
43                // First row is the header
44                headers = cells;
45            } else if found_separator {
46                // Data rows come after the separator
47                rows.push(cells);
48            }
49        }
50
51        Table::new_validated(headers, rows)
52    }
53}
54
55fn is_separator_line(line: &str) -> bool {
56    // Use strict regex to match valid PostgreSQL separator format: ----+----+----
57    // This prevents false positives like "+ - + -" or "++----"
58    POSTGRES_SEP_LINE.is_match(line)
59}
60
61fn parse_postgres_row(line: &str) -> Vec<String> {
62    // Split by | and trim each cell
63    // Note: We preserve empty cells as they represent NULL values in PostgreSQL
64    line.split('|')
65        .map(|cell| cell.trim().to_string())
66        .collect()
67}
68
69#[cfg(test)]
70mod tests {
71    use super::*;
72
73    #[test]
74    fn test_parse_postgres() {
75        let input = r#" id | store_id | shopify_location_id | name | active
76----+----------+---------------------+------+--------
77  1 |        1 | gid://shopify/...   | 2299 | t
78  2 |        1 | gid://shopify/...   | 4510 | t"#;
79
80        let parser = PostgresParser;
81        let table = parser.parse(input).unwrap();
82
83        assert_eq!(
84            table.headers,
85            vec!["id", "store_id", "shopify_location_id", "name", "active"]
86        );
87        assert_eq!(table.rows.len(), 2);
88        assert_eq!(
89            table.rows[0],
90            vec!["1", "1", "gid://shopify/...", "2299", "t"]
91        );
92    }
93
94    #[test]
95    fn test_parse_postgres_with_empty_cells() {
96        // PostgreSQL NULL values appear as empty cells
97        let input = r#" id | name  | email
98----+-------+-------
99  1 | Alice | a@b.c
100  2 | Bob   |
101  3 |       | c@d.e"#;
102
103        let parser = PostgresParser;
104        let table = parser.parse(input).unwrap();
105
106        assert_eq!(table.headers, vec!["id", "name", "email"]);
107        assert_eq!(table.rows.len(), 3);
108
109        // All rows should have 3 cells, even if some are empty
110        assert_eq!(table.rows[0], vec!["1", "Alice", "a@b.c"]);
111        assert_eq!(
112            table.rows[1],
113            vec!["2", "Bob", ""],
114            "Empty email should be preserved"
115        );
116        assert_eq!(
117            table.rows[2],
118            vec!["3", "", "c@d.e"],
119            "Empty name should be preserved"
120        );
121    }
122
123    #[test]
124    fn test_separator_validation_valid() {
125        // Valid PostgreSQL separator patterns
126        assert!(is_separator_line("----+-------+-----"));
127        assert!(is_separator_line("  ----+----  ")); // with leading/trailing spaces
128        assert!(is_separator_line("-+-")); // minimal valid
129        assert!(is_separator_line("-----+-----+-----+-----")); // multiple sections
130        assert!(is_separator_line("--+--+--")); // short dashes
131    }
132
133    #[test]
134    fn test_separator_validation_invalid() {
135        // Invalid patterns that should be rejected
136        assert!(!is_separator_line("+ - + -")); // spaces between
137        assert!(!is_separator_line("++++----")); // no proper structure
138        assert!(!is_separator_line("  +  -  +  ")); // random spacing
139        assert!(!is_separator_line("----")); // only dashes, no plus
140        assert!(!is_separator_line("++++")); // only plus signs
141        assert!(!is_separator_line("-")); // single dash
142        assert!(!is_separator_line("+")); // single plus
143        assert!(!is_separator_line("")); // empty
144        assert!(!is_separator_line("  ")); // only spaces
145        assert!(!is_separator_line("+-+-")); // starts with plus
146    }
147
148    #[test]
149    fn test_reject_invalid_separator_no_data() {
150        // Input with invalid separator should not find a separator
151        let input = r#" id | name
152+ - + -
153  1 | Alice"#;
154
155        let parser = PostgresParser;
156        let table = parser.parse(input).unwrap();
157
158        // Without a valid separator, it treats all lines as potential headers
159        // The invalid separator line gets parsed as a data row
160        assert_eq!(table.headers, vec!["id", "name"]);
161        // The rest are treated as rows (before finding separator)
162        assert_eq!(table.rows.len(), 0);
163    }
164}