codelens_engine/import_graph/
parsers.rs1use regex::Regex;
2use std::collections::HashMap;
3use std::path::Path;
4use std::sync::LazyLock;
5
6pub(super) static PY_IMPORT_RE: LazyLock<Regex> =
8 LazyLock::new(|| Regex::new(r"(?m)^\s*import\s+([A-Za-z0-9_.,\s]+)").unwrap());
9pub(super) static PY_FROM_RE: LazyLock<Regex> =
10 LazyLock::new(|| Regex::new(r"(?m)^\s*from\s+([A-Za-z0-9_\.]+)\s+import\s+").unwrap());
11
12pub(super) static JS_IMPORT_FROM_RE: LazyLock<Regex> =
14 LazyLock::new(|| Regex::new(r#"(?m)\bimport\s+[^;]*?\sfrom\s+["']([^"']+)["']"#).unwrap());
15pub(super) static JS_IMPORT_SIDE_EFFECT_RE: LazyLock<Regex> =
16 LazyLock::new(|| Regex::new(r#"(?m)\bimport\s+["']([^"']+)["']"#).unwrap());
17pub(super) static JS_REQUIRE_RE: LazyLock<Regex> =
18 LazyLock::new(|| Regex::new(r#"require\(\s*["']([^"']+)["']\s*\)"#).unwrap());
19pub(super) static JS_DYNAMIC_IMPORT_RE: LazyLock<Regex> =
20 LazyLock::new(|| Regex::new(r#"import\(\s*["']([^"']+)["']\s*\)"#).unwrap());
21pub(super) static JS_REEXPORT_RE: LazyLock<Regex> =
22 LazyLock::new(|| Regex::new(r#"(?m)\bexport\s+[^;]*?\sfrom\s+["']([^"']+)["']"#).unwrap());
23
24pub(super) static GO_SINGLE_RE: LazyLock<Regex> =
26 LazyLock::new(|| Regex::new(r#"(?m)^\s*import\s+"([^"]+)""#).unwrap());
27pub(super) static GO_BLOCK_RE: LazyLock<Regex> =
28 LazyLock::new(|| Regex::new(r#""([^"]+)""#).unwrap());
29pub(super) static GO_BLOCK_SECTION_RE: LazyLock<Regex> =
30 LazyLock::new(|| Regex::new(r#"(?s)\bimport\s*\(([^)]*)\)"#).unwrap());
31
32pub(super) static JAVA_IMPORT_RE: LazyLock<Regex> =
34 LazyLock::new(|| Regex::new(r"(?m)^\s*import\s+(?:static\s+)?([A-Za-z0-9_.]+)\s*;").unwrap());
35
36pub(super) static KT_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
38 Regex::new(r"(?m)^\s*import\s+([A-Za-z0-9_.]+)(?:\s+as\s+[A-Za-z0-9_]+)?").unwrap()
39});
40
41pub(super) static RS_USE_RE: LazyLock<Regex> = LazyLock::new(|| {
43 Regex::new(r"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?use\s+([A-Za-z0-9_]+(?:::[A-Za-z0-9_]+)*)(?:::\{([^}]+)\})?")
44 .unwrap()
45});
46pub(super) static RS_MOD_RE: LazyLock<Regex> = LazyLock::new(|| {
47 Regex::new(r"(?m)^\s*(?:pub(?:\([^)]*\))?\s+)?mod\s+([A-Za-z0-9_]+)\s*;").unwrap()
48});
49
50pub(super) static RB_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
52 Regex::new(r#"(?m)^\s*(?:require|require_relative|load)\s+["']([^"']+)["']"#).unwrap()
53});
54
55pub(super) static C_INCLUDE_RE: LazyLock<Regex> =
57 LazyLock::new(|| Regex::new(r#"(?m)^\s*#\s*include\s+[<"]([^>"]+)[>"]"#).unwrap());
58
59pub(super) static PHP_USE_RE: LazyLock<Regex> =
61 LazyLock::new(|| Regex::new(r"(?m)^\s*use\s+([A-Za-z0-9_\\]+)\s*;").unwrap());
62pub(super) static PHP_REQ_RE: LazyLock<Regex> = LazyLock::new(|| {
63 Regex::new(r#"(?m)^\s*(?:require|require_once|include|include_once)\s+["']([^"']+)["']\s*;"#)
64 .unwrap()
65});
66
67pub(super) static CS_USING_RE: LazyLock<Regex> =
69 LazyLock::new(|| Regex::new(r"(?m)^\s*using\s+(?:static\s+)?([A-Za-z0-9_.]+)\s*;").unwrap());
70
71pub(super) static DART_IMPORT_RE: LazyLock<Regex> =
73 LazyLock::new(|| Regex::new(r#"(?m)^\s*import\s+["']([^"']+)["']"#).unwrap());
74pub(super) static DART_EXPORT_RE: LazyLock<Regex> =
75 LazyLock::new(|| Regex::new(r#"(?m)^\s*export\s+["']([^"']+)["']"#).unwrap());
76
77pub(super) static TLF_PY_RE: LazyLock<Regex> =
79 LazyLock::new(|| Regex::new(r"(?m)^def ([A-Za-z_][A-Za-z0-9_]*)").unwrap());
80pub(super) static TLF_JS_RE1: LazyLock<Regex> =
81 LazyLock::new(|| Regex::new(r"(?m)^function ([A-Za-z_][A-Za-z0-9_]*)").unwrap());
82pub(super) static TLF_JS_RE2: LazyLock<Regex> = LazyLock::new(|| {
83 Regex::new(r"(?m)^(?:export\s+)?(?:async\s+)?function ([A-Za-z_][A-Za-z0-9_]*)").unwrap()
84});
85pub(super) static TLF_GO_RE: LazyLock<Regex> =
86 LazyLock::new(|| Regex::new(r"(?m)^func ([A-Za-z_][A-Za-z0-9_]*)").unwrap());
87pub(super) static TLF_JVM_RE: LazyLock<Regex> = LazyLock::new(|| {
88 Regex::new(r"(?m)(?:public|private|protected|static|\s)+\s+\w+\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(")
89 .unwrap()
90});
91pub(super) static TLF_RS_RE: LazyLock<Regex> = LazyLock::new(|| {
92 Regex::new(r"(?m)^(?:pub(?:\([^)]*\))?\s+)?fn ([A-Za-z_][A-Za-z0-9_]*)").unwrap()
93});
94
95pub(super) fn extract_imports(path: &Path) -> Vec<String> {
98 let Ok(content) = std::fs::read_to_string(path) else {
99 return Vec::new();
100 };
101 extract_imports_from_source(path, &content)
102}
103
104pub fn extract_imports_from_source(path: &Path, content: &str) -> Vec<String> {
106 match path
107 .extension()
108 .and_then(|ext| ext.to_str())
109 .unwrap_or_default()
110 .to_ascii_lowercase()
111 .as_str()
112 {
113 "py" => extract_python_imports(content),
114 "js" | "jsx" | "ts" | "tsx" | "mjs" | "cjs" => extract_js_imports(content),
115 "go" => extract_go_imports(content),
116 "java" => extract_java_imports(content),
117 "kt" | "kts" => extract_kotlin_imports(content),
118 "rs" => extract_rust_imports(content),
119 "rb" => extract_ruby_imports(content),
120 "c" | "cc" | "cpp" | "cxx" | "h" | "hh" | "hpp" | "hxx" => extract_c_imports(content),
121 "php" => extract_php_imports(content),
122 "cs" => extract_csharp_imports(content),
123 "dart" => extract_dart_imports(content),
124 "scala" | "sc" => extract_scala_imports(content),
125 "swift" => extract_swift_imports(content),
126 "css" | "scss" | "less" => extract_css_imports(content),
127 _ => Vec::new(),
128 }
129}
130
131pub(super) fn extract_python_imports(content: &str) -> Vec<String> {
134 let mut imports = Vec::new();
135 for capture in PY_IMPORT_RE.captures_iter(content) {
136 let Some(modules) = capture.get(1) else {
137 continue;
138 };
139 for module in modules.as_str().split(',') {
140 let module = module.split_whitespace().next().unwrap_or_default();
141 if !module.is_empty() {
142 imports.push(module.to_owned());
143 }
144 }
145 }
146 for capture in PY_FROM_RE.captures_iter(content) {
147 let Some(module) = capture.get(1) else {
148 continue;
149 };
150 imports.push(module.as_str().trim().to_owned());
151 }
152 imports
153}
154
155pub(super) fn extract_js_imports(content: &str) -> Vec<String> {
156 let mut imports = Vec::new();
157 for regex in [
158 &*JS_IMPORT_FROM_RE,
159 &*JS_IMPORT_SIDE_EFFECT_RE,
160 &*JS_REQUIRE_RE,
161 &*JS_DYNAMIC_IMPORT_RE,
162 &*JS_REEXPORT_RE,
163 ] {
164 for capture in regex.captures_iter(content) {
165 let Some(module) = capture.get(1) else {
166 continue;
167 };
168 imports.push(module.as_str().trim().to_owned());
169 }
170 }
171 imports
172}
173
174pub(super) fn extract_go_imports(content: &str) -> Vec<String> {
175 let mut imports = Vec::new();
176 for cap in GO_SINGLE_RE.captures_iter(content) {
177 if let Some(m) = cap.get(1) {
178 imports.push(m.as_str().to_owned());
179 }
180 }
181 for section in GO_BLOCK_SECTION_RE.captures_iter(content) {
182 if let Some(body) = section.get(1) {
183 for cap in GO_BLOCK_RE.captures_iter(body.as_str()) {
184 if let Some(m) = cap.get(1) {
185 imports.push(m.as_str().to_owned());
186 }
187 }
188 }
189 }
190 imports
191}
192
193pub(super) fn extract_java_imports(content: &str) -> Vec<String> {
194 JAVA_IMPORT_RE
195 .captures_iter(content)
196 .filter_map(|cap| cap.get(1))
197 .map(|m| m.as_str().to_owned())
198 .collect()
199}
200
201pub(super) fn extract_kotlin_imports(content: &str) -> Vec<String> {
202 KT_IMPORT_RE
203 .captures_iter(content)
204 .filter_map(|cap| cap.get(1))
205 .map(|m| m.as_str().to_owned())
206 .collect()
207}
208
209pub(super) fn extract_rust_imports(content: &str) -> Vec<String> {
210 let mut imports = Vec::new();
211
212 for cap in RS_MOD_RE.captures_iter(content) {
213 if let Some(m) = cap.get(1) {
214 imports.push(m.as_str().to_owned());
215 }
216 }
217
218 for cap in RS_USE_RE.captures_iter(content) {
219 let base = cap.get(1).map(|m| m.as_str()).unwrap_or("");
220 if let Some(brace) = cap.get(2) {
221 for item in brace.as_str().split(',') {
222 let item = item.trim();
223 if !item.is_empty() {
224 imports.push(format!("{base}::{item}"));
225 }
226 }
227 } else if !base.is_empty() {
228 imports.push(base.to_owned());
229 }
230 }
231 imports
232}
233
234pub(super) fn extract_ruby_imports(content: &str) -> Vec<String> {
235 RB_IMPORT_RE
236 .captures_iter(content)
237 .filter_map(|cap| cap.get(1))
238 .map(|m| m.as_str().to_owned())
239 .collect()
240}
241
242pub(super) fn extract_c_imports(content: &str) -> Vec<String> {
243 C_INCLUDE_RE
244 .captures_iter(content)
245 .filter_map(|cap| cap.get(1))
246 .map(|m| m.as_str().to_owned())
247 .collect()
248}
249
250pub(super) fn extract_php_imports(content: &str) -> Vec<String> {
251 let mut imports = Vec::new();
252 for re in [&*PHP_USE_RE, &*PHP_REQ_RE] {
253 for cap in re.captures_iter(content) {
254 if let Some(m) = cap.get(1) {
255 imports.push(m.as_str().to_owned());
256 }
257 }
258 }
259 imports
260}
261
262pub(super) fn extract_csharp_imports(content: &str) -> Vec<String> {
263 CS_USING_RE
264 .captures_iter(content)
265 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
266 .collect()
267}
268
269pub(super) fn extract_dart_imports(content: &str) -> Vec<String> {
270 let mut imports = Vec::new();
271 for re in [&*DART_IMPORT_RE, &*DART_EXPORT_RE] {
272 for cap in re.captures_iter(content) {
273 if let Some(m) = cap.get(1) {
274 let path = m.as_str();
275 if !path.starts_with("dart:") {
276 imports.push(path.to_owned());
277 }
278 }
279 }
280 }
281 imports
282}
283
284static SCALA_IMPORT_RE: LazyLock<Regex> =
286 LazyLock::new(|| Regex::new(r"(?m)^\s*import\s+([A-Za-z0-9_\.]+)").unwrap());
287
288fn extract_scala_imports(content: &str) -> Vec<String> {
289 SCALA_IMPORT_RE
290 .captures_iter(content)
291 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
292 .collect()
293}
294
295static SWIFT_IMPORT_RE: LazyLock<Regex> = LazyLock::new(|| {
297 Regex::new(
298 r"(?m)^\s*import\s+(?:class\s+|struct\s+|enum\s+|protocol\s+|func\s+)?([A-Za-z0-9_\.]+)",
299 )
300 .unwrap()
301});
302
303fn extract_swift_imports(content: &str) -> Vec<String> {
304 SWIFT_IMPORT_RE
305 .captures_iter(content)
306 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
307 .collect()
308}
309
310static CSS_IMPORT_RE: LazyLock<Regex> =
312 LazyLock::new(|| Regex::new(r#"(?m)@import\s+(?:url\()?["']([^"']+)["']\)?"#).unwrap());
313
314fn extract_css_imports(content: &str) -> Vec<String> {
315 CSS_IMPORT_RE
316 .captures_iter(content)
317 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_owned()))
318 .collect()
319}
320
321pub fn extract_imports_for_file(path: &Path) -> Vec<String> {
325 extract_imports(path)
326}
327
328pub(super) fn collect_top_level_funcs(
333 path: &Path,
334 source: &str,
335 funcs: &mut HashMap<String, usize>,
336) {
337 let ext = path
338 .extension()
339 .and_then(|e| e.to_str())
340 .map(|e| e.to_ascii_lowercase())
341 .unwrap_or_default();
342
343 let regexes: &[&Regex] = match ext.as_str() {
344 "py" => &[&*TLF_PY_RE],
345 "js" | "mjs" | "cjs" | "ts" | "tsx" | "jsx" => &[&*TLF_JS_RE1, &*TLF_JS_RE2],
346 "go" => &[&*TLF_GO_RE],
347 "java" | "kt" | "cs" => &[&*TLF_JVM_RE],
348 "rs" => &[&*TLF_RS_RE],
349 "dart" => &[&*TLF_PY_RE, &*TLF_JVM_RE],
350 _ => return,
351 };
352
353 for re in regexes {
354 for cap in re.captures_iter(source) {
355 let Some(m) = cap.get(1) else { continue };
356 let name = m.as_str().to_owned();
357 if !name.is_empty() {
358 let offset = m.start();
359 let line = source[..offset].bytes().filter(|&b| b == b'\n').count() + 1;
360 funcs.entry(name).or_insert(line);
361 }
362 }
363 }
364}