table_to_csv/
parser.rs

1use anyhow::{Context, Result};
2use regex::Regex;
3use std::fs;
4use std::path::Path;
5
6use crate::types::Table;
7
8/// Parse SQL file and extract table schemas and data
9pub fn parse_sql_file<P: AsRef<Path>>(sql_file_path: P) -> Result<(Vec<Table>, String)> {
10    let content = fs::read_to_string(sql_file_path)
11        .context("Failed to read SQL file")?;
12    
13    let mut tables = Vec::new();
14    
15    // Generic regex to match any CREATE TABLE statement
16    let create_table_regex = Regex::new(r"(?i)(?s)CREATE TABLE\s+(\w+)\s*\((.*?)\)(?:\s|;)")?;
17    
18    for captures in create_table_regex.captures_iter(&content) {
19        let table_name = captures.get(1).unwrap().as_str();
20        let columns_text = captures.get(2).unwrap().as_str();
21        let columns = parse_table_columns(columns_text);
22        
23        if !columns.is_empty() {
24            let num_columns = columns.len();
25            println!("Found table: {} with {} columns", table_name, num_columns);
26            tables.push(Table {
27                name: table_name.to_string(),
28                columns,
29            });
30        }
31    }
32    
33    Ok((tables, content))
34}
35
36/// Parse column definitions from CREATE TABLE statement
37pub fn parse_table_columns(columns_text: &str) -> Vec<String> {
38    let mut columns = Vec::new();
39    
40    // Join all lines and split by comma to handle multi-line column definitions
41    let joined = columns_text.lines()
42        .map(|l| l.trim())
43        .collect::<Vec<_>>()
44        .join(" ");
45    
46    // Split by comma and extract column names
47    for part in joined.split(',') {
48        let part = part.trim();
49        if !part.is_empty() 
50            && !part.starts_with("FOREIGN KEY") 
51            && !part.starts_with("PRIMARY KEY") {
52            // Extract the column name (first word)
53            if let Some(first_word) = part.split_whitespace().next() {
54                let col_name = first_word.trim();
55                if !col_name.is_empty() && col_name.chars().all(|c| c.is_alphanumeric() || c == '_') {
56                    columns.push(col_name.to_string());
57                }
58            }
59        }
60    }
61    
62    columns
63}
64
65/// Extract INSERT VALUES from SQL for a specific table
66pub fn extract_insert_values(content: &str, table_name: &str) -> Result<Vec<Vec<String>>> {
67    let mut rows = Vec::new();
68    
69    // Pattern to match INSERT statements - try both with and without quotes
70    // This handles different SQL dialects (PostgreSQL uses ", MySQL uses `, SQLite supports both)
71    let patterns = vec![
72        format!(r#"(?s)INSERT INTO "{}" VALUES\((.*?)\);"#, regex::escape(table_name)),
73        format!(r"(?s)INSERT INTO {} VALUES\((.*?)\);", regex::escape(table_name)),
74        format!(r#"(?s)INSERT INTO `{}` VALUES\((.*?)\);"#, regex::escape(table_name)),
75    ];
76    
77    for pattern in patterns.iter() {
78        match Regex::new(pattern) {
79            Ok(insert_regex) => {
80                let matches = insert_regex.captures_iter(content);
81                for captures in matches {
82                    let values_str = captures.get(1).unwrap().as_str();
83                    let values_str = handle_replace_function(values_str);
84                    let values = parse_values(&values_str);
85                    rows.push(values);
86                }
87                if !rows.is_empty() {
88                    break;
89                }
90            }
91            Err(e) => {
92                eprintln!("Warning: Failed to compile regex pattern: {}", e);
93            }
94        }
95    }
96    
97    Ok(rows)
98}
99
100/// Handle replace() function in SQL values
101fn handle_replace_function(values_str: &str) -> String {
102    if !values_str.contains("replace(") {
103        return values_str.to_string();
104    }
105    
106    let replace_regex = Regex::new(r"(?s)replace\('(.*?)',.*?\)").unwrap();
107    
108    replace_regex.replace_all(values_str, |caps: &regex::Captures| {
109        let json_content = caps.get(1)
110            .map_or("", |m| m.as_str())
111            .replace("\\'", "'")
112            .replace("\\n", "\n");
113        format!("'{}'", json_content)
114    }).to_string()
115}
116
117/// Parse comma-separated values from INSERT statement
118fn parse_values(values_str: &str) -> Vec<String> {
119    let mut values = Vec::new();
120    let mut current_value = String::new();
121    let mut in_quotes = false;
122    let mut quote_char = ' ';
123    let chars: Vec<char> = values_str.chars().collect();
124    let mut i = 0;
125    
126    while i < chars.len() {
127        let char = chars[i];
128        
129        if !in_quotes {
130            if char == '\'' || char == '"' {
131                in_quotes = true;
132                quote_char = char;
133                current_value.push(char);
134            } else if char == ',' {
135                values.push(current_value.trim().to_string());
136                current_value.clear();
137            } else {
138                current_value.push(char);
139            }
140        } else {
141            if char == quote_char {
142                // Check if it's an escaped quote
143                if i + 1 < chars.len() && chars[i + 1] == quote_char {
144                    current_value.push(char);
145                    current_value.push(char);
146                    i += 1; // Skip the next quote
147                } else {
148                    in_quotes = false;
149                    quote_char = ' ';
150                    current_value.push(char);
151                }
152            } else {
153                current_value.push(char);
154            }
155        }
156        
157        i += 1;
158    }
159    
160    // Add the last value
161    if !current_value.is_empty() {
162        values.push(current_value.trim().to_string());
163    }
164    
165    // Clean up values
166    values.into_iter()
167        .map(|val| clean_value(val))
168        .collect()
169}
170
171/// Clean up a single value (remove quotes, unescape)
172fn clean_value(val: String) -> String {
173    let val = val.trim();
174    
175    // Remove surrounding quotes if present
176    let cleaned = if (val.starts_with('\'') && val.ends_with('\'')) || 
177                    (val.starts_with('"') && val.ends_with('"')) {
178        let inner = &val[1..val.len()-1];
179        // Unescape doubled quotes
180        inner.replace("''", "'").replace("\"\"", "\"")
181    } else {
182        val.to_string()
183    };
184    
185    cleaned
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191    
192    #[test]
193    fn test_parse_table_columns() {
194        let columns_text = r#"
195            id TEXT PRIMARY KEY,
196            callSessionId TEXT NOT NULL,
197            type TEXT NOT NULL,
198            FOREIGN KEY (callSessionId) REFERENCES CallSession(id)
199        "#;
200        
201        let columns = parse_table_columns(columns_text);
202        assert_eq!(columns, vec!["id", "callSessionId", "type"]);
203    }
204    
205    #[test]
206    fn test_clean_value() {
207        assert_eq!(clean_value("'test'".to_string()), "test");
208        assert_eq!(clean_value("\"test\"".to_string()), "test");
209        assert_eq!(clean_value("'test''s'".to_string()), "test's");
210        assert_eq!(clean_value("test".to_string()), "test");
211    }
212    
213    #[test]
214    fn test_parse_values() {
215        let values_str = "'value1', 'value2', 'value''3'";
216        let values = parse_values(values_str);
217        assert_eq!(values, vec!["value1", "value2", "value'3"]);
218    }
219}
220