Skip to main content

codelens_engine/import_graph/
parsers.rs

1use regex::Regex;
2use std::collections::HashMap;
3use std::path::Path;
4use std::sync::LazyLock;
5
6// ── Python ────────────────────────────────────────────────────────────────────
7pub(super) static PY_IMPORT_RE: LazyLock<Regex> =
8    LazyLock::new(|| Regex::new(r"(?m)^\s*import\s+([A-Za-z0-9_.,\s]+)").unwrap());
9pub(super) static PY_FROM_RE: LazyLock<Regex> =
10    LazyLock::new(|| Regex::new(r"(?m)^\s*from\s+([A-Za-z0-9_\.]+)\s+import\s+").unwrap());
11
12// ── JavaScript / TypeScript ───────────────────────────────────────────────────
13pub(super) static JS_IMPORT_FROM_RE: LazyLock<Regex> =
14    LazyLock::new(|| Regex::new(r#"(?m)\bimport\s+[^;]*?\sfrom\s+["']([^"']+)["']"#).unwrap());
15pub(super) static JS_IMPORT_SIDE_EFFECT_RE: LazyLock<Regex> =
16    LazyLock::new(|| Regex::new(r#"(?m)\bimport\s+["']([^"']+)["']"#).unwrap());
17pub(super) static JS_REQUIRE_RE: LazyLock<Regex> =
18    LazyLock::new(|| Regex::new(r#"require\(\s*["']([^"']+)["']\s*\)"#).unwrap());
19pub(super) static JS_DYNAMIC_IMPORT_RE: LazyLock<Regex> =
20    LazyLock::new(|| Regex::new(r#"import\(\s*["']([^"']+)["']\s*\)"#).unwrap());
21pub(super) static JS_REEXPORT_RE: LazyLock<Regex> =
22    LazyLock::new(|| Regex::new(r#"(?m)\bexport\s+[^;]*?\sfrom\s+["']([^"']+)["']"#).unwrap());
23
24// ── Go ────────────────────────────────────────────────────────────────────────
25pub(super) static GO_SINGLE_RE: LazyLock<Regex> =
26    LazyLock::new(|| Regex::new(r#"(?m)^\s*import\s+"([^"]+)""#).unwrap());
27pub(super) static GO_BLOCK_RE: LazyLock<Regex> =
28    LazyLock::new(|| Regex::new(r#""([^"]+)""#).unwrap());
29pub(super) static GO_BLOCK_SECTION_RE: LazyLock<Regex> =
30    LazyLock::new(|| Regex::new(r#"(?s)\bimport\s*\(([^)]*)\)"#).unwrap());
31
32// ── Java ──────────────────────────────────────────────────────────────────────
33pub(super) static JAVA_IMPORT_RE: LazyLock<Regex> =
34    LazyLock::new(|| Regex::new(r"(?m)^\s*import\s+(?:static\s+)?([A-Za-z0-9_.]+)\s*;").unwrap());
35
36// ── Kotlin ────────────────────────────────────────────────────────────────────
37pub(super) static KT_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
38    Regex::new(r"(?m)^\s*import\s+([A-Za-z0-9_.]+)(?:\s+as\s+[A-Za-z0-9_]+)?").unwrap()
39});
40
41// ── Rust ──────────────────────────────────────────────────────────────────────
42pub(super) static RS_USE_RE: LazyLock<Regex> = LazyLock::new(|| {
43    Regex::new(r"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?use\s+([A-Za-z0-9_]+(?:::[A-Za-z0-9_]+)*)(?:::\{([^}]+)\})?")
44        .unwrap()
45});
46pub(super) static RS_MOD_RE: LazyLock<Regex> = LazyLock::new(|| {
47    Regex::new(r"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?mod\s+([A-Za-z0-9_]+)\s*;").unwrap()
48});
49
50// ── Ruby ──────────────────────────────────────────────────────────────────────
51pub(super) static RB_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
52    Regex::new(r#"(?m)^\s*(?:require|require_relative|load)\s+["']([^"']+)["']"#).unwrap()
53});
54
55// ── C / C++ ───────────────────────────────────────────────────────────────────
56pub(super) static C_INCLUDE_RE: LazyLock<Regex> =
57    LazyLock::new(|| Regex::new(r#"(?m)^\s*#\s*include\s+[<"]([^>"]+)[>"]"#).unwrap());
58
59// ── PHP ───────────────────────────────────────────────────────────────────────
60pub(super) static PHP_USE_RE: LazyLock<Regex> =
61    LazyLock::new(|| Regex::new(r"(?m)^\s*use\s+([A-Za-z0-9_\\]+)\s*;").unwrap());
62pub(super) static PHP_REQ_RE: LazyLock<Regex> = LazyLock::new(|| {
63    Regex::new(r#"(?m)^\s*(?:require|require_once|include|include_once)\s+["']([^"']+)["']\s*;"#)
64        .unwrap()
65});
66
67// ── C# ───────────────────────────────────────────────────────────────────────
68pub(super) static CS_USING_RE: LazyLock<Regex> =
69    LazyLock::new(|| Regex::new(r"(?m)^\s*using\s+(?:static\s+)?([A-Za-z0-9_.]+)\s*;").unwrap());
70
71// ── Dart ─────────────────────────────────────────────────────────────────────
72pub(super) static DART_IMPORT_RE: LazyLock<Regex> =
73    LazyLock::new(|| Regex::new(r#"(?m)^\s*import\s+["']([^"']+)["']"#).unwrap());
74pub(super) static DART_EXPORT_RE: LazyLock<Regex> =
75    LazyLock::new(|| Regex::new(r#"(?m)^\s*export\s+["']([^"']+)["']"#).unwrap());
76
77// ── collect_top_level_funcs patterns ─────────────────────────────────────────
78pub(super) static TLF_PY_RE: LazyLock<Regex> =
79    LazyLock::new(|| Regex::new(r"(?m)^def ([A-Za-z_][A-Za-z0-9_]*)").unwrap());
80pub(super) static TLF_JS_RE1: LazyLock<Regex> =
81    LazyLock::new(|| Regex::new(r"(?m)^function ([A-Za-z_][A-Za-z0-9_]*)").unwrap());
82pub(super) static TLF_JS_RE2: LazyLock<Regex> = LazyLock::new(|| {
83    Regex::new(r"(?m)^(?:export\s+)?(?:async\s+)?function ([A-Za-z_][A-Za-z0-9_]*)").unwrap()
84});
85pub(super) static TLF_GO_RE: LazyLock<Regex> =
86    LazyLock::new(|| Regex::new(r"(?m)^func ([A-Za-z_][A-Za-z0-9_]*)").unwrap());
87pub(super) static TLF_JVM_RE: LazyLock<Regex> = LazyLock::new(|| {
88    Regex::new(r"(?m)(?:public|private|protected|static|\s)+\s+\w+\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(")
89        .unwrap()
90});
91pub(super) static TLF_RS_RE: LazyLock<Regex> = LazyLock::new(|| {
92    Regex::new(r"(?m)^(?:pub(?:\([^)]*\))?\s+)?fn ([A-Za-z_][A-Za-z0-9_]*)").unwrap()
93});
94
95// ── extract_imports dispatcher ───────────────────────────────────────────────
96
97pub(super) fn extract_imports(path: &Path) -> Vec<String> {
98    let Ok(content) = std::fs::read_to_string(path) else {
99        return Vec::new();
100    };
101    extract_imports_from_source(path, &content)
102}
103
104/// Extract imports from already-loaded source content (avoids re-reading disk).
105pub fn extract_imports_from_source(path: &Path, content: &str) -> Vec<String> {
106    match path
107        .extension()
108        .and_then(|ext| ext.to_str())
109        .unwrap_or_default()
110        .to_ascii_lowercase()
111        .as_str()
112    {
113        "py" => extract_python_imports(content),
114        "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => extract_js_imports(content),
115        "go" => extract_go_imports(content),
116        "java" => extract_java_imports(content),
117        "kt" | "kts" => extract_kotlin_imports(content),
118        "rs" => extract_rust_imports(content),
119        "rb" => extract_ruby_imports(content),
120        "c" | "cc" | "cpp" | "cxx" | "h" | "hh" | "hpp" | "hxx" => extract_c_imports(content),
121        "php" => extract_php_imports(content),
122        "cs" => extract_csharp_imports(content),
123        "dart" => extract_dart_imports(content),
124        "scala" | "sc" => extract_scala_imports(content),
125        "swift" => extract_swift_imports(content),
126        "css" | "scss" | "less" => extract_css_imports(content),
127        _ => Vec::new(),
128    }
129}
130
131// ── Language-specific extractors ─────────────────────────────────────────────
132
133pub(super) fn extract_python_imports(content: &str) -> Vec<String> {
134    let mut imports = Vec::new();
135    for capture in PY_IMPORT_RE.captures_iter(content) {
136        let Some(modules) = capture.get(1) else {
137            continue;
138        };
139        for module in modules.as_str().split(',') {
140            let module = module.split_whitespace().next().unwrap_or_default();
141            if !module.is_empty() {
142                imports.push(module.to_owned());
143            }
144        }
145    }
146    for capture in PY_FROM_RE.captures_iter(content) {
147        let Some(module) = capture.get(1) else {
148            continue;
149        };
150        imports.push(module.as_str().trim().to_owned());
151    }
152    imports
153}
154
155pub(super) fn extract_js_imports(content: &str) -> Vec<String> {
156    let mut imports = Vec::new();
157    for regex in [
158        &*JS_IMPORT_FROM_RE,
159        &*JS_IMPORT_SIDE_EFFECT_RE,
160        &*JS_REQUIRE_RE,
161        &*JS_DYNAMIC_IMPORT_RE,
162        &*JS_REEXPORT_RE,
163    ] {
164        for capture in regex.captures_iter(content) {
165            let Some(module) = capture.get(1) else {
166                continue;
167            };
168            imports.push(module.as_str().trim().to_owned());
169        }
170    }
171    imports
172}
173
174pub(super) fn extract_go_imports(content: &str) -> Vec<String> {
175    let mut imports = Vec::new();
176    for cap in GO_SINGLE_RE.captures_iter(content) {
177        if let Some(m) = cap.get(1) {
178            imports.push(m.as_str().to_owned());
179        }
180    }
181    for section in GO_BLOCK_SECTION_RE.captures_iter(content) {
182        if let Some(body) = section.get(1) {
183            for cap in GO_BLOCK_RE.captures_iter(body.as_str()) {
184                if let Some(m) = cap.get(1) {
185                    imports.push(m.as_str().to_owned());
186                }
187            }
188        }
189    }
190    imports
191}
192
193pub(super) fn extract_java_imports(content: &str) -> Vec<String> {
194    JAVA_IMPORT_RE
195        .captures_iter(content)
196        .filter_map(|cap| cap.get(1))
197        .map(|m| m.as_str().to_owned())
198        .collect()
199}
200
201pub(super) fn extract_kotlin_imports(content: &str) -> Vec<String> {
202    KT_IMPORT_RE
203        .captures_iter(content)
204        .filter_map(|cap| cap.get(1))
205        .map(|m| m.as_str().to_owned())
206        .collect()
207}
208
209pub(super) fn extract_rust_imports(content: &str) -> Vec<String> {
210    let mut imports = Vec::new();
211
212    for cap in RS_MOD_RE.captures_iter(content) {
213        if let Some(m) = cap.get(1) {
214            imports.push(m.as_str().to_owned());
215        }
216    }
217
218    for cap in RS_USE_RE.captures_iter(content) {
219        let base = cap.get(1).map(|m| m.as_str()).unwrap_or("");
220        if let Some(brace) = cap.get(2) {
221            for item in brace.as_str().split(',') {
222                let item = item.trim();
223                if !item.is_empty() {
224                    imports.push(format!("{base}::{item}"));
225                }
226            }
227        } else if !base.is_empty() {
228            imports.push(base.to_owned());
229        }
230    }
231    imports
232}
233
234pub(super) fn extract_ruby_imports(content: &str) -> Vec<String> {
235    RB_IMPORT_RE
236        .captures_iter(content)
237        .filter_map(|cap| cap.get(1))
238        .map(|m| m.as_str().to_owned())
239        .collect()
240}
241
242pub(super) fn extract_c_imports(content: &str) -> Vec<String> {
243    C_INCLUDE_RE
244        .captures_iter(content)
245        .filter_map(|cap| cap.get(1))
246        .map(|m| m.as_str().to_owned())
247        .collect()
248}
249
250pub(super) fn extract_php_imports(content: &str) -> Vec<String> {
251    let mut imports = Vec::new();
252    for re in [&*PHP_USE_RE, &*PHP_REQ_RE] {
253        for cap in re.captures_iter(content) {
254            if let Some(m) = cap.get(1) {
255                imports.push(m.as_str().to_owned());
256            }
257        }
258    }
259    imports
260}
261
262pub(super) fn extract_csharp_imports(content: &str) -> Vec<String> {
263    CS_USING_RE
264        .captures_iter(content)
265        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
266        .collect()
267}
268
269pub(super) fn extract_dart_imports(content: &str) -> Vec<String> {
270    let mut imports = Vec::new();
271    for re in [&*DART_IMPORT_RE, &*DART_EXPORT_RE] {
272        for cap in re.captures_iter(content) {
273            if let Some(m) = cap.get(1) {
274                let path = m.as_str();
275                if !path.starts_with("dart:") {
276                    imports.push(path.to_owned());
277                }
278            }
279        }
280    }
281    imports
282}
283
284// ── Scala ────────────────────────────────────────────────────────────────────
285static SCALA_IMPORT_RE: LazyLock<Regex> =
286    LazyLock::new(|| Regex::new(r"(?m)^\s*import\s+([A-Za-z0-9_\.]+)").unwrap());
287
288fn extract_scala_imports(content: &str) -> Vec<String> {
289    SCALA_IMPORT_RE
290        .captures_iter(content)
291        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
292        .collect()
293}
294
295// ── Swift ────────────────────────────────────────────────────────────────────
296static SWIFT_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
297    Regex::new(
298        r"(?m)^\s*import\s+(?:class\s+|struct\s+|enum\s+|protocol\s+|func\s+)?([A-Za-z0-9_\.]+)",
299    )
300    .unwrap()
301});
302
303fn extract_swift_imports(content: &str) -> Vec<String> {
304    SWIFT_IMPORT_RE
305        .captures_iter(content)
306        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
307        .collect()
308}
309
310// ── CSS/SCSS ─────────────────────────────────────────────────────────────────
311static CSS_IMPORT_RE: LazyLock<Regex> =
312    LazyLock::new(|| Regex::new(r#"(?m)@import\s+(?:url\()?["']([^"']+)["']\)?"#).unwrap());
313
314fn extract_css_imports(content: &str) -> Vec<String> {
315    CSS_IMPORT_RE
316        .captures_iter(content)
317        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
318        .collect()
319}
320
321// ── extract_imports_for_file (public wrapper) ────────────────────────────────
322
323/// Extract raw import strings from a file. Public for use by the indexer.
324pub fn extract_imports_for_file(path: &Path) -> Vec<String> {
325    extract_imports(path)
326}
327
328// ── collect_top_level_funcs ──────────────────────────────────────────────────
329
330/// Lightweight regex-based top-level function name extractor.
331/// Fills `funcs` map with (name -> line_number). Does not overwrite existing entries.
332pub(super) fn collect_top_level_funcs(
333    path: &Path,
334    source: &str,
335    funcs: &mut HashMap<String, usize>,
336) {
337    let ext = path
338        .extension()
339        .and_then(|e| e.to_str())
340        .map(|e| e.to_ascii_lowercase())
341        .unwrap_or_default();
342
343    let regexes: &[&Regex] = match ext.as_str() {
344        "py" => &[&*TLF_PY_RE],
345        "js" | "mjs" | "cjs" | "ts" | "tsx" | "jsx" => &[&*TLF_JS_RE1, &*TLF_JS_RE2],
346        "go" => &[&*TLF_GO_RE],
347        "java" | "kt" | "cs" => &[&*TLF_JVM_RE],
348        "rs" => &[&*TLF_RS_RE],
349        "dart" => &[&*TLF_PY_RE, &*TLF_JVM_RE],
350        _ => return,
351    };
352
353    for re in regexes {
354        for cap in re.captures_iter(source) {
355            let Some(m) = cap.get(1) else { continue };
356            let name = m.as_str().to_owned();
357            if !name.is_empty() {
358                let offset = m.start();
359                let line = source[..offset].bytes().filter(|&b| b == b'\n').count() + 1;
360                funcs.entry(name).or_insert(line);
361            }
362        }
363    }
364}