qail_core/analyzer/
scanner.rs

1//! Source code scanner for QAIL and SQL queries.
2
3use std::path::{Path, PathBuf};
4use std::fs;
5use regex::Regex;
6
7/// Type of query found in source code.
8#[derive(Debug, Clone, PartialEq)]
9pub enum QueryType {
10    /// Native QAIL query (get::, set::, del::, add::)
11    Qail,
12    /// Raw SQL query (SELECT, INSERT, UPDATE, DELETE)
13    RawSql,
14}
15
16/// A reference to a query in source code.
17#[derive(Debug, Clone)]
18pub struct CodeReference {
19    /// File path where the reference was found
20    pub file: PathBuf,
21    /// Line number (1-indexed)
22    pub line: usize,
23    /// Table name referenced
24    pub table: String,
25    /// Column names referenced (if any)
26    pub columns: Vec<String>,
27    /// Type of query
28    pub query_type: QueryType,
29    /// Code snippet containing the reference
30    pub snippet: String,
31}
32
33/// Scanner for finding QAIL and SQL references in source code.
34pub struct CodebaseScanner {
35    /// Regex patterns for QAIL queries
36    qail_action_pattern: Regex,
37    qail_column_pattern: Regex,
38    /// Regex patterns for SQL queries
39    sql_select_pattern: Regex,
40    sql_insert_pattern: Regex,
41    sql_update_pattern: Regex,
42    sql_delete_pattern: Regex,
43}
44
45impl Default for CodebaseScanner {
46    fn default() -> Self {
47        Self::new()
48    }
49}
50
51impl CodebaseScanner {
52    /// Create a new scanner with default patterns.
53    pub fn new() -> Self {
54        Self {
55            // QAIL patterns: get::table, set::table, del::table, add::table
56            qail_action_pattern: Regex::new(r"(get|set|del|add)::(\w+)").unwrap(),
57            // QAIL column: 'column_name
58            qail_column_pattern: Regex::new(r"'(\w+)").unwrap(),
59            // SQL patterns
60            sql_select_pattern: Regex::new(r"(?i)SELECT\s+(.+?)\s+FROM\s+(\w+)").unwrap(),
61            sql_insert_pattern: Regex::new(r"(?i)INSERT\s+INTO\s+(\w+)").unwrap(),
62            sql_update_pattern: Regex::new(r"(?i)UPDATE\s+(\w+)\s+SET").unwrap(),
63            sql_delete_pattern: Regex::new(r"(?i)DELETE\s+FROM\s+(\w+)").unwrap(),
64        }
65    }
66    
67    /// Scan a directory for all QAIL and SQL references.
68    pub fn scan(&self, path: &Path) -> Vec<CodeReference> {
69        let mut refs = Vec::new();
70        
71        if path.is_file() {
72            if let Some(ext) = path.extension() {
73                if ext == "rs" || ext == "ts" || ext == "js" || ext == "py" {
74                    refs.extend(self.scan_file(path));
75                }
76            }
77        } else if path.is_dir() {
78            self.scan_dir_recursive(path, &mut refs);
79        }
80        
81        refs
82    }
83    
84    /// Recursively scan a directory.
85    fn scan_dir_recursive(&self, dir: &Path, refs: &mut Vec<CodeReference>) {
86        let entries = match fs::read_dir(dir) {
87            Ok(e) => e,
88            Err(_) => return,
89        };
90        
91        for entry in entries.flatten() {
92            let path = entry.path();
93            
94            // Skip common non-source directories
95            if path.is_dir() {
96                let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
97                if name == "target" || name == "node_modules" || name == ".git" || 
98                   name == "vendor" || name == "__pycache__" {
99                    continue;
100                }
101                self.scan_dir_recursive(&path, refs);
102            } else if let Some(ext) = path.extension() {
103                if ext == "rs" || ext == "ts" || ext == "js" || ext == "py" {
104                    refs.extend(self.scan_file(&path));
105                }
106            }
107        }
108    }
109    
110    /// Scan a single file for references.
111    fn scan_file(&self, path: &Path) -> Vec<CodeReference> {
112        let mut refs = Vec::new();
113        
114        let content = match fs::read_to_string(path) {
115            Ok(c) => c,
116            Err(_) => return refs,
117        };
118        
119        for (line_num, line) in content.lines().enumerate() {
120            let line_number = line_num + 1;
121            
122            // Check for QAIL queries
123            for cap in self.qail_action_pattern.captures_iter(line) {
124                let action = cap.get(1).map(|m| m.as_str()).unwrap_or("");
125                let table = cap.get(2).map(|m| m.as_str()).unwrap_or("");
126                
127                // Extract column references from the same line
128                let columns: Vec<String> = self.qail_column_pattern
129                    .captures_iter(line)
130                    .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
131                    .collect();
132                
133                refs.push(CodeReference {
134                    file: path.to_path_buf(),
135                    line: line_number,
136                    table: table.to_string(),
137                    columns,
138                    query_type: QueryType::Qail,
139                    snippet: format!("{}::{}", action, table),
140                });
141            }
142            
143            // Check for SQL SELECT
144            for cap in self.sql_select_pattern.captures_iter(line) {
145                let columns_str = cap.get(1).map(|m| m.as_str()).unwrap_or("");
146                let table = cap.get(2).map(|m| m.as_str()).unwrap_or("");
147                
148                let columns = if columns_str.trim() == "*" {
149                    vec!["*".to_string()]
150                } else {
151                    columns_str.split(',')
152                        .map(|c| c.trim().to_string())
153                        .filter(|c| !c.is_empty())
154                        .collect()
155                };
156                
157                refs.push(CodeReference {
158                    file: path.to_path_buf(),
159                    line: line_number,
160                    table: table.to_string(),
161                    columns,
162                    query_type: QueryType::RawSql,
163                    snippet: line.trim().chars().take(60).collect(),
164                });
165            }
166            
167            // Check for SQL INSERT
168            for cap in self.sql_insert_pattern.captures_iter(line) {
169                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
170                refs.push(CodeReference {
171                    file: path.to_path_buf(),
172                    line: line_number,
173                    table: table.to_string(),
174                    columns: vec![],
175                    query_type: QueryType::RawSql,
176                    snippet: line.trim().chars().take(60).collect(),
177                });
178            }
179            
180            // Check for SQL UPDATE
181            for cap in self.sql_update_pattern.captures_iter(line) {
182                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
183                refs.push(CodeReference {
184                    file: path.to_path_buf(),
185                    line: line_number,
186                    table: table.to_string(),
187                    columns: vec![],
188                    query_type: QueryType::RawSql,
189                    snippet: line.trim().chars().take(60).collect(),
190                });
191            }
192            
193            // Check for SQL DELETE
194            for cap in self.sql_delete_pattern.captures_iter(line) {
195                let table = cap.get(1).map(|m| m.as_str()).unwrap_or("");
196                refs.push(CodeReference {
197                    file: path.to_path_buf(),
198                    line: line_number,
199                    table: table.to_string(),
200                    columns: vec![],
201                    query_type: QueryType::RawSql,
202                    snippet: line.trim().chars().take(60).collect(),
203                });
204            }
205        }
206        
207        refs
208    }
209}
210
211#[cfg(test)]
212mod tests {
213    use super::*;
214    
215    #[test]
216    fn test_qail_pattern() {
217        let scanner = CodebaseScanner::new();
218        let line = r#"let result = qail!("get::users:'name'email[id=$1]");"#;
219        
220        assert!(scanner.qail_action_pattern.is_match(line));
221        
222        let cap = scanner.qail_action_pattern.captures(line).unwrap();
223        assert_eq!(cap.get(1).unwrap().as_str(), "get");
224        assert_eq!(cap.get(2).unwrap().as_str(), "users");
225    }
226    
227    #[test]
228    fn test_sql_select_pattern() {
229        let scanner = CodebaseScanner::new();
230        let line = r#"sqlx::query("SELECT name, email FROM users WHERE id = $1")"#;
231        
232        assert!(scanner.sql_select_pattern.is_match(line));
233        
234        let cap = scanner.sql_select_pattern.captures(line).unwrap();
235        assert_eq!(cap.get(2).unwrap().as_str(), "users");
236    }
237    
238    #[test]
239    fn test_column_extraction() {
240        let scanner = CodebaseScanner::new();
241        let line = r#"get::users:'name'email'created_at"#;
242        
243        let columns: Vec<String> = scanner.qail_column_pattern
244            .captures_iter(line)
245            .filter_map(|c| c.get(1).map(|m| m.as_str().to_string()))
246            .collect();
247        
248        assert_eq!(columns, vec!["name", "email", "created_at"]);
249    }
250}