qail_core/analyzer/
scanner.rs

1//! Source code scanner for QAIL and SQL queries.
2
3use regex::Regex;
4use std::fs;
5use std::path::{Path, PathBuf};
6
7use super::rust_ast::RustAnalyzer;
8
9/// Analysis mode for the codebase scanner
10#[derive(Debug, Clone, Copy, PartialEq)]
11pub enum AnalysisMode {
12    /// Full Rust AST analysis using `syn` (100% accurate)
13    RustAST,
14    Regex,
15}
16
17/// Type of query found in source code.
18#[derive(Debug, Clone, PartialEq)]
19pub enum QueryType {
20    /// Native QAIL query (get::, set::, del::, add::)
21    Qail,
22    RawSql,
23}
24
25/// A reference to a query in source code.
26#[derive(Debug, Clone)]
27pub struct CodeReference {
28    /// File path where the reference was found
29    pub file: PathBuf,
30    /// Line number (1-indexed)
31    pub line: usize,
32    pub table: String,
33    pub columns: Vec<String>,
34    /// Type of query
35    pub query_type: QueryType,
36    /// Code snippet containing the reference
37    pub snippet: String,
38}
39
40/// Analysis result for a single file
41#[derive(Debug, Clone)]
42pub struct FileAnalysis {
43    /// File path
44    pub file: PathBuf,
45    /// Analysis mode used
46    pub mode: AnalysisMode,
47    /// Number of references found
48    pub ref_count: usize,
49    pub safe: bool,
50}
51
52/// Complete scan result with per-file breakdown
53#[derive(Debug, Default)]
54pub struct ScanResult {
55    /// All code references found
56    pub refs: Vec<CodeReference>,
57    /// Per-file analysis breakdown
58    pub files: Vec<FileAnalysis>,
59}
60
61/// Scanner for finding QAIL and SQL references in source code.
62pub struct CodebaseScanner {
63    /// Regex patterns for QAIL queries (legacy symbol syntax)
64    qail_action_pattern: Regex,
65    qail_column_pattern: Regex,
66    /// Regex patterns for QAIL queries (v2 keyword syntax)
67    qail_v2_get_pattern: Regex,
68    qail_v2_set_pattern: Regex,
69    qail_v2_del_pattern: Regex,
70    qail_v2_add_pattern: Regex,
71    /// Regex patterns for SQL queries
72    sql_select_pattern: Regex,
73    sql_insert_pattern: Regex,
74    sql_update_pattern: Regex,
75    sql_delete_pattern: Regex,
76}
77
78impl Default for CodebaseScanner {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl CodebaseScanner {
85    /// Create a new scanner with default patterns.
86    pub fn new() -> Self {
87        Self {
88            // QAIL legacy patterns: get::table, set::table, del::table, add::table
89            qail_action_pattern: Regex::new(r"(get|set|del|add)::(\w+)").unwrap(),
90            // QAIL column: 'column_name
91            qail_column_pattern: Regex::new(r"'(\w+)").unwrap(),
92            // QAIL v2 keyword patterns
93            qail_v2_get_pattern: Regex::new(r"\bget\s+(\w+)\s+fields\s+(.+?)(?:\s+where|\s+order|\s+limit|$)").unwrap(),
94            qail_v2_set_pattern: Regex::new(r"\bset\s+(\w+)\s+values\s+(.+?)(?:\s+where|$)").unwrap(),
95            qail_v2_del_pattern: Regex::new(r"\bdel\s+(\w+)(?:\s+where|$)").unwrap(),
96            qail_v2_add_pattern: Regex::new(r"\badd\s+(\w+)\s+fields\s+(.+?)\s+values").unwrap(),
97            // SQL patterns
98            sql_select_pattern: Regex::new(r"(?i)SELECT\s+(.+?)\s+FROM\s+(\w+)").unwrap(),
99            sql_insert_pattern: Regex::new(r"(?i)INSERT\s+INTO\s+(\w+)").unwrap(),
100            sql_update_pattern: Regex::new(r"(?i)UPDATE\s+(\w+)\s+SET").unwrap(),
101            sql_delete_pattern: Regex::new(r"(?i)DELETE\s+FROM\s+(\w+)").unwrap(),
102        }
103    }
104
105    /// Scan a directory for all QAIL and SQL references.
106    pub fn scan(&self, path: &Path) -> Vec<CodeReference> {
107        self.scan_with_details(path).refs
108    }
109
110    /// Scan a directory with detailed per-file breakdown.
111    pub fn scan_with_details(&self, path: &Path) -> ScanResult {
112        let mut result = ScanResult::default();
113
114        if path.is_file() {
115            if let Some(ext) = path.extension()
116                && (ext == "rs" || ext == "ts" || ext == "js" || ext == "py")
117            {
118                let mode = if ext == "rs" { AnalysisMode::RustAST } else { AnalysisMode::Regex };
119                let file_refs = self.scan_file(path);
120                let ref_count = file_refs.len();
121                
122                result.files.push(FileAnalysis {
123                    file: path.to_path_buf(),
124                    mode,
125                    ref_count,
126                    safe: true, // Will be updated after impact analysis
127                });
128                result.refs.extend(file_refs);
129            }
130        } else if path.is_dir() {
131            self.scan_dir_with_details(path, &mut result);
132        }
133
134        result
135    }
136
137    /// Recursively scan a directory with per-file tracking.
138    fn scan_dir_with_details(&self, dir: &Path, result: &mut ScanResult) {
139        let entries = match fs::read_dir(dir) {
140            Ok(e) => e,
141            Err(_) => return,
142        };
143
144        for entry in entries.flatten() {
145            let path = entry.path();
146
147            // Skip common non-source directories
148            if path.is_dir() {
149                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
150                if name == "target"
151                    || name == "node_modules"
152                    || name == ".git"
153                    || name == "vendor"
154                    || name == "__pycache__"
155                    || name == "dist"
156                {
157                    continue;
158                }
159                self.scan_dir_with_details(&path, result);
160            } else if let Some(ext) = path.extension()
161                && (ext == "rs" || ext == "ts" || ext == "js" || ext == "py")
162            {
163                let mode = if ext == "rs" { AnalysisMode::RustAST } else { AnalysisMode::Regex };
164                let file_refs = self.scan_file(&path);
165                let ref_count = file_refs.len();
166                
167                if ref_count > 0 {
168                    result.files.push(FileAnalysis {
169                        file: path.clone(),
170                        mode,
171                        ref_count,
172                        safe: true,
173                    });
174                }
175                result.refs.extend(file_refs);
176            }
177        }
178    }
179
180    /// Scan a single file for references.
181    /// Uses Rust AST analyzer for .rs files + regex for raw SQL, regex-only for others.
182    fn scan_file(&self, path: &Path) -> Vec<CodeReference> {
183        let mut refs = Vec::new();
184        
185        // For Rust files: run AST analyzer first, then also run regex for raw SQL
186        if path.extension().map(|e| e == "rs").unwrap_or(false) {
187            refs.extend(RustAnalyzer::scan_file(path));
188        }
189
190        // Regex scanner: for non-Rust files OR for raw SQL detection in Rust files
191
192        let content = match fs::read_to_string(path) {
193            Ok(c) => c,
194            Err(_) => return refs,
195        };
196
197        for (line_num, line) in content.lines().enumerate() {
198            let line_number = line_num + 1;
199
200            for cap in self.qail_action_pattern.captures_iter(line) {
201                let action = cap.get(1).map(|m| m.as_str()).unwrap_or("");
202                let table = cap.get(2).map(|m| m.as_str()).unwrap_or("");
203
204                let columns: Vec<String> = self
205                    .qail_column_pattern
206                    .captures_iter(line)
207                    .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
208                    .collect();
209
210                refs.push(CodeReference {
211                    file: path.to_path_buf(),
212                    line: line_number,
213                    table: table.to_string(),
214                    columns,
215                    query_type: QueryType::Qail,
216                    snippet: format!("{}::{}", action, table),
217                });
218            }
219
220            for cap in self.qail_v2_get_pattern.captures_iter(line) {
221                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
222                let columns_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
223                let columns = Self::parse_v2_columns(columns_str);
224
225                refs.push(CodeReference {
226                    file: path.to_path_buf(),
227                    line: line_number,
228                    table: table.to_string(),
229                    columns,
230                    query_type: QueryType::Qail,
231                    snippet: format!("get {} fields ...", table),
232                });
233            }
234
235            for cap in self.qail_v2_set_pattern.captures_iter(line) {
236                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
237                let columns_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
238                let columns = Self::parse_v2_set_columns(columns_str);
239
240                refs.push(CodeReference {
241                    file: path.to_path_buf(),
242                    line: line_number,
243                    table: table.to_string(),
244                    columns,
245                    query_type: QueryType::Qail,
246                    snippet: format!("set {} values ...", table),
247                });
248            }
249
250            for cap in self.qail_v2_del_pattern.captures_iter(line) {
251                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
252
253                refs.push(CodeReference {
254                    file: path.to_path_buf(),
255                    line: line_number,
256                    table: table.to_string(),
257                    columns: vec![],
258                    query_type: QueryType::Qail,
259                    snippet: format!("del {}", table),
260                });
261            }
262
263            for cap in self.qail_v2_add_pattern.captures_iter(line) {
264                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
265                let columns_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
266                let columns = Self::parse_v2_columns(columns_str);
267
268                refs.push(CodeReference {
269                    file: path.to_path_buf(),
270                    line: line_number,
271                    table: table.to_string(),
272                    columns,
273                    query_type: QueryType::Qail,
274                    snippet: format!("add {} fields ...", table),
275                });
276            }
277
278            for cap in self.sql_select_pattern.captures_iter(line) {
279                let columns_str = cap.get(1).map(|m| m.as_str()).unwrap_or("");
280                let table = cap.get(2).map(|m| m.as_str()).unwrap_or("");
281
282                let columns = if columns_str.trim() == "*" {
283                    vec!["*".to_string()]
284                } else {
285                    columns_str
286                        .split(',')
287                        .map(|c| c.trim().to_string())
288                        .filter(|c| !c.is_empty())
289                        .collect()
290                };
291
292                refs.push(CodeReference {
293                    file: path.to_path_buf(),
294                    line: line_number,
295                    table: table.to_string(),
296                    columns,
297                    query_type: QueryType::RawSql,
298                    snippet: line.trim().chars().take(60).collect(),
299                });
300            }
301
302            for cap in self.sql_insert_pattern.captures_iter(line) {
303                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
304                refs.push(CodeReference {
305                    file: path.to_path_buf(),
306                    line: line_number,
307                    table: table.to_string(),
308                    columns: vec![],
309                    query_type: QueryType::RawSql,
310                    snippet: line.trim().chars().take(60).collect(),
311                });
312            }
313
314            for cap in self.sql_update_pattern.captures_iter(line) {
315                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
316                refs.push(CodeReference {
317                    file: path.to_path_buf(),
318                    line: line_number,
319                    table: table.to_string(),
320                    columns: vec![],
321                    query_type: QueryType::RawSql,
322                    snippet: line.trim().chars().take(60).collect(),
323                });
324            }
325
326            for cap in self.sql_delete_pattern.captures_iter(line) {
327                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
328                refs.push(CodeReference {
329                    file: path.to_path_buf(),
330                    line: line_number,
331                    table: table.to_string(),
332                    columns: vec![],
333                    query_type: QueryType::RawSql,
334                    snippet: line.trim().chars().take(60).collect(),
335                });
336            }
337        }
338
339        refs
340    }
341
342    /// Parse v2 column list: "id, name, email" or "*"
343    fn parse_v2_columns(columns_str: &str) -> Vec<String> {
344        if columns_str.trim() == "*" {
345            return vec!["*".to_string()];
346        }
347        columns_str
348            .split(',')
349            .map(|c| c.trim().to_string())
350            .filter(|c| !c.is_empty() && !c.starts_with('$'))
351            .collect()
352    }
353
354    /// Parse v2 SET column assignments: "name = 'Alice', status = 'active'"
355    fn parse_v2_set_columns(columns_str: &str) -> Vec<String> {
356        columns_str
357            .split(',')
358            .filter_map(|assignment| {
359                let parts: Vec<&str> = assignment.split('=').collect();
360                if !parts.is_empty() {
361                    Some(parts[0].trim().to_string())
362                } else {
363                    None
364                }
365            })
366            .filter(|c| !c.is_empty())
367            .collect()
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374
375    #[test]
376    fn test_qail_pattern() {
377        let scanner = CodebaseScanner::new();
378        let line = r#"let result = qail!("get::users:'name'email[id=$1]");"#;
379
380        assert!(scanner.qail_action_pattern.is_match(line));
381
382        let cap = scanner.qail_action_pattern.captures(line).unwrap();
383        assert_eq!(cap.get(1).unwrap().as_str(), "get");
384        assert_eq!(cap.get(2).unwrap().as_str(), "users");
385    }
386
387    #[test]
388    fn test_sql_select_pattern() {
389        let scanner = CodebaseScanner::new();
390        let line = r#"sqlx::query("SELECT name, email FROM users WHERE id = $1")"#;
391
392        assert!(scanner.sql_select_pattern.is_match(line));
393
394        let cap = scanner.sql_select_pattern.captures(line).unwrap();
395        assert_eq!(cap.get(2).unwrap().as_str(), "users");
396    }
397
398    #[test]
399    fn test_column_extraction() {
400        let scanner = CodebaseScanner::new();
401        let line = r#"get::users:'name'email'created_at"#;
402
403        let columns: Vec<String> = scanner
404            .qail_column_pattern
405            .captures_iter(line)
406            .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
407            .collect();
408
409        assert_eq!(columns, vec!["name", "email", "created_at"]);
410    }
411}