qail_core/analyzer/
scanner.rs

1//! Source code scanner for QAIL and SQL queries.
2
3use regex::Regex;
4use std::fs;
5use std::path::{Path, PathBuf};
6
7use super::rust_ast::RustAnalyzer;
8
9/// Analysis mode for the codebase scanner
10#[derive(Debug, Clone, Copy, PartialEq)]
11pub enum AnalysisMode {
12    /// Full Rust AST analysis using `syn` (100% accurate)
13    RustAST,
14    Regex,
15}
16
17/// Type of query found in source code.
18#[derive(Debug, Clone, PartialEq)]
19pub enum QueryType {
20    /// Native QAIL query (get::, set::, del::, add::)
21    Qail,
22    RawSql,
23}
24
25/// A reference to a query in source code.
26#[derive(Debug, Clone)]
27pub struct CodeReference {
28    /// File path where the reference was found
29    pub file: PathBuf,
30    /// Line number (1-indexed)
31    pub line: usize,
32    pub table: String,
33    pub columns: Vec<String>,
34    /// Type of query
35    pub query_type: QueryType,
36    /// Code snippet containing the reference
37    pub snippet: String,
38}
39
40/// Analysis result for a single file
41#[derive(Debug, Clone)]
42pub struct FileAnalysis {
43    /// File path
44    pub file: PathBuf,
45    /// Analysis mode used
46    pub mode: AnalysisMode,
47    pub ref_count: usize,
48    pub safe: bool,
49}
50
51/// Complete scan result with per-file breakdown
52#[derive(Debug, Default)]
53pub struct ScanResult {
54    /// All code references found
55    pub refs: Vec<CodeReference>,
56    /// Per-file analysis breakdown
57    pub files: Vec<FileAnalysis>,
58}
59
60/// Scanner for finding QAIL and SQL references in source code.
61pub struct CodebaseScanner {
62    /// Regex patterns for QAIL queries (legacy symbol syntax)
63    qail_action_pattern: Regex,
64    qail_column_pattern: Regex,
65    /// Regex patterns for QAIL queries (v2 keyword syntax)
66    qail_v2_get_pattern: Regex,
67    qail_v2_set_pattern: Regex,
68    qail_v2_del_pattern: Regex,
69    qail_v2_add_pattern: Regex,
70    /// Regex patterns for SQL queries
71    sql_select_pattern: Regex,
72    sql_insert_pattern: Regex,
73    sql_update_pattern: Regex,
74    sql_delete_pattern: Regex,
75}
76
77impl Default for CodebaseScanner {
78    fn default() -> Self {
79        Self::new()
80    }
81}
82
83impl CodebaseScanner {
84    /// Create a new scanner with default patterns.
85    pub fn new() -> Self {
86        Self {
87            // QAIL legacy patterns: get::table, set::table, del::table, add::table
88            qail_action_pattern: Regex::new(r"(get|set|del|add)::(\w+)").unwrap(),
89            // QAIL column: 'column_name
90            qail_column_pattern: Regex::new(r"'(\w+)").unwrap(),
91            // QAIL v2 keyword patterns
92            qail_v2_get_pattern: Regex::new(r"\bget\s+(\w+)\s+fields\s+(.+?)(?:\s+where|\s+order|\s+limit|$)").unwrap(),
93            qail_v2_set_pattern: Regex::new(r"\bset\s+(\w+)\s+values\s+(.+?)(?:\s+where|$)").unwrap(),
94            qail_v2_del_pattern: Regex::new(r"\bdel\s+(\w+)(?:\s+where|$)").unwrap(),
95            qail_v2_add_pattern: Regex::new(r"\badd\s+(\w+)\s+fields\s+(.+?)\s+values").unwrap(),
96            // SQL patterns
97            sql_select_pattern: Regex::new(r"(?i)SELECT\s+(.+?)\s+FROM\s+(\w+)").unwrap(),
98            sql_insert_pattern: Regex::new(r"(?i)INSERT\s+INTO\s+(\w+)").unwrap(),
99            sql_update_pattern: Regex::new(r"(?i)UPDATE\s+(\w+)\s+SET").unwrap(),
100            sql_delete_pattern: Regex::new(r"(?i)DELETE\s+FROM\s+(\w+)").unwrap(),
101        }
102    }
103
104    /// Scan a directory for all QAIL and SQL references.
105    pub fn scan(&self, path: &Path) -> Vec<CodeReference> {
106        self.scan_with_details(path).refs
107    }
108
109    /// Scan a directory with detailed per-file breakdown.
110    pub fn scan_with_details(&self, path: &Path) -> ScanResult {
111        let mut result = ScanResult::default();
112
113        if path.is_file() {
114            if let Some(ext) = path.extension()
115                && (ext == "rs" || ext == "ts" || ext == "js" || ext == "py")
116            {
117                let mode = if ext == "rs" { AnalysisMode::RustAST } else { AnalysisMode::Regex };
118                let file_refs = self.scan_file(path);
119                let ref_count = file_refs.len();
120                
121                result.files.push(FileAnalysis {
122                    file: path.to_path_buf(),
123                    mode,
124                    ref_count,
125                    safe: true, // Will be updated after impact analysis
126                });
127                result.refs.extend(file_refs);
128            }
129        } else if path.is_dir() {
130            self.scan_dir_with_details(path, &mut result);
131        }
132
133        result
134    }
135
136    /// Recursively scan a directory with per-file tracking.
137    fn scan_dir_with_details(&self, dir: &Path, result: &mut ScanResult) {
138        let entries = match fs::read_dir(dir) {
139            Ok(e) => e,
140            Err(_) => return,
141        };
142
143        for entry in entries.flatten() {
144            let path = entry.path();
145
146            // Skip common non-source directories
147            if path.is_dir() {
148                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
149                if name == "target"
150                    || name == "node_modules"
151                    || name == ".git"
152                    || name == "vendor"
153                    || name == "__pycache__"
154                    || name == "dist"
155                {
156                    continue;
157                }
158                self.scan_dir_with_details(&path, result);
159            } else if let Some(ext) = path.extension()
160                && (ext == "rs" || ext == "ts" || ext == "js" || ext == "py")
161            {
162                let mode = if ext == "rs" { AnalysisMode::RustAST } else { AnalysisMode::Regex };
163                let file_refs = self.scan_file(&path);
164                let ref_count = file_refs.len();
165                
166                if ref_count > 0 {
167                    result.files.push(FileAnalysis {
168                        file: path.clone(),
169                        mode,
170                        ref_count,
171                        safe: true,
172                    });
173                }
174                result.refs.extend(file_refs);
175            }
176        }
177    }
178
179    /// Scan a single file for references.
180    /// Uses Rust AST analyzer for .rs files + regex for raw SQL, regex-only for others.
181    fn scan_file(&self, path: &Path) -> Vec<CodeReference> {
182        let mut refs = Vec::new();
183        
184        // For Rust files: run AST analyzer first, then also run regex for raw SQL
185        if path.extension().map(|e| e == "rs").unwrap_or(false) {
186            refs.extend(RustAnalyzer::scan_file(path));
187        }
188
189        // Regex scanner: for non-Rust files OR for raw SQL detection in Rust files
190
191        let content = match fs::read_to_string(path) {
192            Ok(c) => c,
193            Err(_) => return refs,
194        };
195
196        for (line_num, line) in content.lines().enumerate() {
197            let line_number = line_num + 1;
198
199            for cap in self.qail_action_pattern.captures_iter(line) {
200                let action = cap.get(1).map(|m| m.as_str()).unwrap_or("");
201                let table = cap.get(2).map(|m| m.as_str()).unwrap_or("");
202
203                let columns: Vec<String> = self
204                    .qail_column_pattern
205                    .captures_iter(line)
206                    .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
207                    .collect();
208
209                refs.push(CodeReference {
210                    file: path.to_path_buf(),
211                    line: line_number,
212                    table: table.to_string(),
213                    columns,
214                    query_type: QueryType::Qail,
215                    snippet: format!("{}::{}", action, table),
216                });
217            }
218
219            for cap in self.qail_v2_get_pattern.captures_iter(line) {
220                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
221                let columns_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
222                let columns = Self::parse_v2_columns(columns_str);
223
224                refs.push(CodeReference {
225                    file: path.to_path_buf(),
226                    line: line_number,
227                    table: table.to_string(),
228                    columns,
229                    query_type: QueryType::Qail,
230                    snippet: format!("get {} fields ...", table),
231                });
232            }
233
234            for cap in self.qail_v2_set_pattern.captures_iter(line) {
235                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
236                let columns_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
237                let columns = Self::parse_v2_set_columns(columns_str);
238
239                refs.push(CodeReference {
240                    file: path.to_path_buf(),
241                    line: line_number,
242                    table: table.to_string(),
243                    columns,
244                    query_type: QueryType::Qail,
245                    snippet: format!("set {} values ...", table),
246                });
247            }
248
249            for cap in self.qail_v2_del_pattern.captures_iter(line) {
250                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
251
252                refs.push(CodeReference {
253                    file: path.to_path_buf(),
254                    line: line_number,
255                    table: table.to_string(),
256                    columns: vec![],
257                    query_type: QueryType::Qail,
258                    snippet: format!("del {}", table),
259                });
260            }
261
262            for cap in self.qail_v2_add_pattern.captures_iter(line) {
263                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
264                let columns_str = cap.get(2).map(|m| m.as_str()).unwrap_or("");
265                let columns = Self::parse_v2_columns(columns_str);
266
267                refs.push(CodeReference {
268                    file: path.to_path_buf(),
269                    line: line_number,
270                    table: table.to_string(),
271                    columns,
272                    query_type: QueryType::Qail,
273                    snippet: format!("add {} fields ...", table),
274                });
275            }
276
277            for cap in self.sql_select_pattern.captures_iter(line) {
278                let columns_str = cap.get(1).map(|m| m.as_str()).unwrap_or("");
279                let table = cap.get(2).map(|m| m.as_str()).unwrap_or("");
280
281                let columns = if columns_str.trim() == "*" {
282                    vec!["*".to_string()]
283                } else {
284                    columns_str
285                        .split(',')
286                        .map(|c| c.trim().to_string())
287                        .filter(|c| !c.is_empty())
288                        .collect()
289                };
290
291                refs.push(CodeReference {
292                    file: path.to_path_buf(),
293                    line: line_number,
294                    table: table.to_string(),
295                    columns,
296                    query_type: QueryType::RawSql,
297                    snippet: line.trim().chars().take(60).collect(),
298                });
299            }
300
301            for cap in self.sql_insert_pattern.captures_iter(line) {
302                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
303                refs.push(CodeReference {
304                    file: path.to_path_buf(),
305                    line: line_number,
306                    table: table.to_string(),
307                    columns: vec![],
308                    query_type: QueryType::RawSql,
309                    snippet: line.trim().chars().take(60).collect(),
310                });
311            }
312
313            for cap in self.sql_update_pattern.captures_iter(line) {
314                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
315                refs.push(CodeReference {
316                    file: path.to_path_buf(),
317                    line: line_number,
318                    table: table.to_string(),
319                    columns: vec![],
320                    query_type: QueryType::RawSql,
321                    snippet: line.trim().chars().take(60).collect(),
322                });
323            }
324
325            for cap in self.sql_delete_pattern.captures_iter(line) {
326                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
327                refs.push(CodeReference {
328                    file: path.to_path_buf(),
329                    line: line_number,
330                    table: table.to_string(),
331                    columns: vec![],
332                    query_type: QueryType::RawSql,
333                    snippet: line.trim().chars().take(60).collect(),
334                });
335            }
336        }
337
338        refs
339    }
340
341    /// Parse v2 column list: "id, name, email" or "*"
342    fn parse_v2_columns(columns_str: &str) -> Vec<String> {
343        if columns_str.trim() == "*" {
344            return vec!["*".to_string()];
345        }
346        columns_str
347            .split(',')
348            .map(|c| c.trim().to_string())
349            .filter(|c| !c.is_empty() && !c.starts_with('$'))
350            .collect()
351    }
352
353    /// Parse v2 SET column assignments: "name = 'Alice', status = 'active'"
354    fn parse_v2_set_columns(columns_str: &str) -> Vec<String> {
355        columns_str
356            .split(',')
357            .filter_map(|assignment| {
358                let parts: Vec<&str> = assignment.split('=').collect();
359                if !parts.is_empty() {
360                    Some(parts[0].trim().to_string())
361                } else {
362                    None
363                }
364            })
365            .filter(|c| !c.is_empty())
366            .collect()
367    }
368}
369
370#[cfg(test)]
371mod tests {
372    use super::*;
373
374    #[test]
375    fn test_qail_pattern() {
376        let scanner = CodebaseScanner::new();
377        let line = r#"let result = qail!("get::users:'name'email[id=$1]");"#;
378
379        assert!(scanner.qail_action_pattern.is_match(line));
380
381        let cap = scanner.qail_action_pattern.captures(line).unwrap();
382        assert_eq!(cap.get(1).unwrap().as_str(), "get");
383        assert_eq!(cap.get(2).unwrap().as_str(), "users");
384    }
385
386    #[test]
387    fn test_sql_select_pattern() {
388        let scanner = CodebaseScanner::new();
389        let line = r#"sqlx::query("SELECT name, email FROM users WHERE id = $1")"#;
390
391        assert!(scanner.sql_select_pattern.is_match(line));
392
393        let cap = scanner.sql_select_pattern.captures(line).unwrap();
394        assert_eq!(cap.get(2).unwrap().as_str(), "users");
395    }
396
397    #[test]
398    fn test_column_extraction() {
399        let scanner = CodebaseScanner::new();
400        let line = r#"get::users:'name'email'created_at"#;
401
402        let columns: Vec<String> = scanner
403            .qail_column_pattern
404            .captures_iter(line)
405            .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
406            .collect();
407
408        assert_eq!(columns, vec!["name", "email", "created_at"]);
409    }
410}