1use regex::Regex;
2use std::collections::HashSet;
3
4#[cfg(feature = "tree-sitter")]
5use super::deep_queries::{self, ImportKind};
6
7macro_rules! static_regex {
8 ($pattern:expr) => {{
9 static RE: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
10 RE.get_or_init(|| {
11 regex::Regex::new($pattern).expect(concat!("BUG: invalid static regex: ", $pattern))
12 })
13 }};
14}
15
16fn import_re() -> &'static Regex {
17 static_regex!(r#"import\s+(?:\{[^}]*\}\s+from\s+|.*from\s+)['"]([^'"]+)['"]"#)
18}
19fn require_re() -> &'static Regex {
20 static_regex!(r#"require\(['"]([^'"]+)['"]\)"#)
21}
22fn rust_use_re() -> &'static Regex {
23 static_regex!(r"^use\s+([\w:]+)")
24}
25fn py_import_re() -> &'static Regex {
26 static_regex!(r"^(?:from\s+(\S+)\s+import|import\s+(\S+))")
27}
28fn go_import_re() -> &'static Regex {
29 static_regex!(r#""([^"]+)""#)
30}
31
32#[derive(Debug, Clone)]
33pub struct DepInfo {
34 pub imports: Vec<String>,
35 pub exports: Vec<String>,
36}
37
38pub fn extract_deps(content: &str, ext: &str) -> DepInfo {
39 let lang = crate::core::language_capabilities::language_for_ext(ext);
40 match lang {
41 Some(
42 crate::core::language_capabilities::LanguageId::TypeScript
43 | crate::core::language_capabilities::LanguageId::JavaScript
44 | crate::core::language_capabilities::LanguageId::Vue
45 | crate::core::language_capabilities::LanguageId::Svelte,
46 ) => extract_ts_deps(content),
47 Some(crate::core::language_capabilities::LanguageId::Rust) => extract_rust_deps(content),
48 Some(crate::core::language_capabilities::LanguageId::Python) => {
49 extract_python_deps(content)
50 }
51 Some(crate::core::language_capabilities::LanguageId::Go) => extract_go_deps(content),
52 Some(
53 crate::core::language_capabilities::LanguageId::C
54 | crate::core::language_capabilities::LanguageId::Cpp,
55 ) => extract_c_like_deps(content),
56 Some(crate::core::language_capabilities::LanguageId::Ruby) => extract_ruby_deps(content),
57 Some(crate::core::language_capabilities::LanguageId::Php) => extract_php_deps(content),
58 Some(crate::core::language_capabilities::LanguageId::Bash) => extract_bash_deps(content),
59 Some(crate::core::language_capabilities::LanguageId::Kotlin) => {
60 extract_kotlin_deps(content)
61 }
62 Some(crate::core::language_capabilities::LanguageId::Dart) => {
63 let mut imports = HashSet::new();
64 let re = static_regex!(r#"^\s*(?:import|export|part)\s+['"]([^'"]+)['"]"#);
65 for line in content.lines() {
66 let trimmed = line.trim();
67 if let Some(caps) = re.captures(trimmed) {
68 let p = caps[1].trim();
69 if p.starts_with('.') || p.starts_with('/') {
70 imports.insert(clean_path_like(p));
71 }
72 }
73 }
74 DepInfo {
75 imports: imports.into_iter().collect(),
76 exports: Vec::new(),
77 }
78 }
79 Some(crate::core::language_capabilities::LanguageId::Zig) => {
80 let mut imports = HashSet::new();
81 let re = static_regex!(r#"@import\(\s*"([^"]+)"\s*\)"#);
82 for line in content.lines() {
83 let trimmed = line.trim();
84 if let Some(caps) = re.captures(trimmed) {
85 let p = caps[1].trim();
86 if p.starts_with('.')
87 || p.contains('/')
88 || std::path::Path::new(p)
89 .extension()
90 .is_some_and(|e| e.eq_ignore_ascii_case("zig"))
91 {
92 imports.insert(clean_path_like(p));
93 }
94 }
95 }
96 DepInfo {
97 imports: imports.into_iter().collect(),
98 exports: Vec::new(),
99 }
100 }
101 _ => DepInfo {
102 imports: Vec::new(),
103 exports: Vec::new(),
104 },
105 }
106}
107
108fn extract_ts_deps(content: &str) -> DepInfo {
109 let mut imports = HashSet::new();
110 let mut exports = Vec::new();
111
112 for line in content.lines() {
113 let trimmed = line.trim();
114
115 if let Some(caps) = import_re().captures(trimmed) {
116 let path = &caps[1];
117 if path.starts_with('.') || path.starts_with('/') {
118 imports.insert(clean_import_path(path));
119 }
120 }
121 if let Some(caps) = require_re().captures(trimmed) {
122 let path = &caps[1];
123 if path.starts_with('.') || path.starts_with('/') {
124 imports.insert(clean_import_path(path));
125 }
126 }
127
128 if trimmed.starts_with("export ") {
129 if let Some(name) = extract_export_name(trimmed) {
130 exports.push(name);
131 }
132 }
133 }
134
135 DepInfo {
136 imports: imports.into_iter().collect(),
137 exports,
138 }
139}
140
141fn extract_rust_deps(content: &str) -> DepInfo {
142 let mut imports = HashSet::new();
143 let mut exports = Vec::new();
144
145 for line in content.lines() {
146 let trimmed = line.trim();
147
148 if let Some(caps) = rust_use_re().captures(trimmed) {
149 let path = &caps[1];
150 if !path.starts_with("std::") && !path.starts_with("core::") {
151 imports.insert(path.to_string());
152 }
153 }
154
155 if trimmed.starts_with("pub fn ") || trimmed.starts_with("pub async fn ") {
156 if let Some(name) = trimmed
157 .split('(')
158 .next()
159 .and_then(|s| s.split_whitespace().last())
160 {
161 exports.push(name.to_string());
162 }
163 } else if trimmed.starts_with("pub struct ")
164 || trimmed.starts_with("pub enum ")
165 || trimmed.starts_with("pub trait ")
166 {
167 if let Some(name) = trimmed.split_whitespace().nth(2) {
168 let clean = name.trim_end_matches(|c: char| !c.is_alphanumeric() && c != '_');
169 exports.push(clean.to_string());
170 }
171 }
172 }
173
174 DepInfo {
175 imports: imports.into_iter().collect(),
176 exports,
177 }
178}
179
180fn extract_python_deps(content: &str) -> DepInfo {
181 let mut imports = HashSet::new();
182 let mut exports = Vec::new();
183
184 for line in content.lines() {
185 let trimmed = line.trim();
186
187 if let Some(caps) = py_import_re().captures(trimmed) {
188 if let Some(m) = caps.get(1).or(caps.get(2)) {
189 let module = m.as_str();
190 if !module.starts_with("os")
191 && !module.starts_with("sys")
192 && !module.starts_with("json")
193 {
194 imports.insert(module.to_string());
195 }
196 }
197 }
198
199 if trimmed.starts_with("def ") && !trimmed.contains('_') {
200 if let Some(name) = trimmed
201 .strip_prefix("def ")
202 .and_then(|s| s.split('(').next())
203 {
204 exports.push(name.to_string());
205 }
206 } else if trimmed.starts_with("class ") {
207 if let Some(name) = trimmed
208 .strip_prefix("class ")
209 .and_then(|s| s.split(['(', ':']).next())
210 {
211 exports.push(name.to_string());
212 }
213 }
214 }
215
216 DepInfo {
217 imports: imports.into_iter().collect(),
218 exports,
219 }
220}
221
222fn extract_go_deps(content: &str) -> DepInfo {
223 let mut imports = HashSet::new();
224 let mut exports = Vec::new();
225
226 let mut in_import_block = false;
227 for line in content.lines() {
228 let trimmed = line.trim();
229
230 if trimmed.starts_with("import (") {
231 in_import_block = true;
232 continue;
233 }
234 if in_import_block {
235 if trimmed == ")" {
236 in_import_block = false;
237 continue;
238 }
239 if let Some(caps) = go_import_re().captures(trimmed) {
240 imports.insert(caps[1].to_string());
241 }
242 }
243
244 if trimmed.starts_with("func ") {
245 let name_part = trimmed.strip_prefix("func ").unwrap_or("");
246 if let Some(name) = name_part.split('(').next() {
247 let name = name.trim();
248 if !name.is_empty() && name.starts_with(char::is_uppercase) {
249 exports.push(name.to_string());
250 }
251 }
252 }
253 }
254
255 DepInfo {
256 imports: imports.into_iter().collect(),
257 exports,
258 }
259}
260
261#[cfg(feature = "tree-sitter")]
262fn extract_kotlin_deps(content: &str) -> DepInfo {
263 let analysis = deep_queries::analyze(content, "kt");
264 let imports = analysis
265 .imports
266 .into_iter()
267 .map(|import| match import.kind {
268 ImportKind::Star => format!("{}.*", import.source),
269 _ => import.source,
270 })
271 .collect();
272
273 DepInfo {
274 imports,
275 exports: analysis.exports,
276 }
277}
278
279#[cfg(not(feature = "tree-sitter"))]
280fn extract_kotlin_deps(_content: &str) -> DepInfo {
281 DepInfo {
282 imports: Vec::new(),
283 exports: Vec::new(),
284 }
285}
286
287fn clean_import_path(path: &str) -> String {
288 path.trim_start_matches("./")
289 .trim_end_matches(".js")
290 .trim_end_matches(".ts")
291 .trim_end_matches(".tsx")
292 .trim_end_matches(".jsx")
293 .to_string()
294}
295
296fn clean_path_like(path: &str) -> String {
297 path.trim()
298 .trim_start_matches("./")
299 .trim_end_matches(".js")
300 .trim_end_matches(".ts")
301 .trim_end_matches(".tsx")
302 .trim_end_matches(".jsx")
303 .trim_end_matches(".py")
304 .trim_end_matches(".go")
305 .trim_end_matches(".rs")
306 .trim_end_matches(".c")
307 .trim_end_matches(".cpp")
308 .trim_end_matches(".h")
309 .trim_end_matches(".hpp")
310 .trim_end_matches(".php")
311 .trim_end_matches(".dart")
312 .trim_end_matches(".zig")
313 .trim_end_matches(".sh")
314 .trim_end_matches(".bash")
315 .to_string()
316}
317
318fn extract_c_like_deps(content: &str) -> DepInfo {
319 let mut imports = HashSet::new();
320 let re = static_regex!(r#"^\s*#\s*include\s*[<"]([^">]+)[">]"#);
321 for line in content.lines() {
322 let trimmed = line.trim();
323 if let Some(caps) = re.captures(trimmed) {
324 let inc = caps[1].trim();
325 if inc.starts_with('.') || inc.contains('/') {
326 imports.insert(clean_path_like(inc));
327 }
328 }
329 }
330 DepInfo {
331 imports: imports.into_iter().collect(),
332 exports: Vec::new(),
333 }
334}
335
336fn extract_ruby_deps(content: &str) -> DepInfo {
337 let mut imports = HashSet::new();
338 let re = static_regex!(r#"^\s*require(?:_relative)?\s+['"]([^'"]+)['"]"#);
339 for line in content.lines() {
340 let trimmed = line.trim();
341 if let Some(caps) = re.captures(trimmed) {
342 let req = caps[1].trim();
343 if req.starts_with('.') || req.contains('/') {
344 imports.insert(clean_path_like(req));
345 }
346 }
347 }
348 DepInfo {
349 imports: imports.into_iter().collect(),
350 exports: Vec::new(),
351 }
352}
353
354fn extract_php_deps(content: &str) -> DepInfo {
355 let mut imports = HashSet::new();
356 let re = static_regex!(
357 r#"\b(?:require|require_once|include|include_once)\s*\(?\s*['"]([^'"]+)['"]"#
358 );
359 for line in content.lines() {
360 let trimmed = line.trim();
361 if let Some(caps) = re.captures(trimmed) {
362 let p = caps[1].trim();
363 if p.starts_with('.') || p.starts_with('/') {
364 imports.insert(clean_path_like(p));
365 }
366 }
367 }
368 DepInfo {
369 imports: imports.into_iter().collect(),
370 exports: Vec::new(),
371 }
372}
373
374fn extract_bash_deps(content: &str) -> DepInfo {
375 let mut imports = HashSet::new();
376 let re = static_regex!(r#"^\s*(?:source|\.)\s+['"]?([^'"\s;]+)['"]?"#);
377 for line in content.lines() {
378 let trimmed = line.trim();
379 if let Some(caps) = re.captures(trimmed) {
380 let p = caps[1].trim();
381 if p.starts_with('.') || p.starts_with('/') {
382 imports.insert(clean_path_like(p));
383 }
384 }
385 }
386 DepInfo {
387 imports: imports.into_iter().collect(),
388 exports: Vec::new(),
389 }
390}
391
392fn extract_export_name(line: &str) -> Option<String> {
393 let without_export = line.strip_prefix("export ")?;
394 let without_default = without_export
395 .strip_prefix("default ")
396 .unwrap_or(without_export);
397
398 for keyword in &[
399 "function ",
400 "async function ",
401 "class ",
402 "const ",
403 "let ",
404 "type ",
405 "interface ",
406 "enum ",
407 ] {
408 if let Some(rest) = without_default.strip_prefix(keyword) {
409 let name = rest
410 .split(|c: char| !c.is_alphanumeric() && c != '_')
411 .next()?;
412 if !name.is_empty() {
413 return Some(name.to_string());
414 }
415 }
416 }
417
418 None
419}
420
421#[cfg(test)]
422mod tests {
423 use super::*;
424
425 #[test]
426 fn c_include_relative_is_extracted() {
427 let src = r#"#include "foo/bar.h"
428#include <stdio.h>
429"#;
430 let deps = extract_deps(src, "c");
431 assert!(deps.imports.contains(&"foo/bar".to_string()));
432 assert!(
433 !deps.imports.iter().any(|i| i.contains("stdio")),
434 "system includes should not be treated as internal deps"
435 );
436 }
437
438 #[test]
439 fn ruby_require_relative_is_extracted() {
440 let src = r#"require_relative "./lib/utils"
441require "json"
442"#;
443 let deps = extract_deps(src, "rb");
444 assert!(deps.imports.contains(&"lib/utils".to_string()));
445 assert!(
446 !deps.imports.iter().any(|i| i == "json"),
447 "external requires should not be treated as internal deps"
448 );
449 }
450
451 #[test]
452 fn php_require_is_extracted() {
453 let src = r#"<?php
454require_once "./vendor/autoload.php";
455include "http://example.com/a.php";
456"#;
457 let deps = extract_deps(src, "php");
458 assert!(deps.imports.contains(&"vendor/autoload".to_string()));
459 assert!(
460 deps.imports.iter().all(|i| !i.starts_with("http")),
461 "remote includes should not be treated as internal deps"
462 );
463 }
464
465 #[test]
466 fn bash_source_is_extracted() {
467 let src = r#"#!/usr/bin/env bash
468source "./scripts/env.sh"
469. ../common.sh
470"#;
471 let deps = extract_deps(src, "sh");
472 assert!(deps.imports.contains(&"scripts/env".to_string()));
473 assert!(deps.imports.contains(&"../common".to_string()));
474 }
475
476 #[test]
477 fn dart_import_relative_is_extracted() {
478 let src = r#"import "./src/util.dart";
479import "package:foo/bar.dart";
480"#;
481 let deps = extract_deps(src, "dart");
482 assert!(deps.imports.contains(&"src/util".to_string()));
483 assert!(
484 deps.imports.iter().all(|i| !i.starts_with("package:")),
485 "package imports should not be treated as internal deps"
486 );
487 }
488
489 #[test]
490 fn zig_import_is_extracted() {
491 let src = r#"const m = @import("lib/math.zig");
492const std = @import("std");
493"#;
494 let deps = extract_deps(src, "zig");
495 assert!(deps.imports.contains(&"lib/math".to_string()));
496 assert!(!deps.imports.iter().any(|i| i == "std"), "std is external");
497 }
498
499 #[test]
500 fn kotlin_deps_are_extracted_from_ast() {
501 let content = r"
502package com.example.app
503
504import com.example.services.UserService
505import com.example.shared.*
506
507class Feature
508fun build(): Feature = Feature()
509";
510 let deps = extract_deps(content, "kt");
511 assert!(deps
512 .imports
513 .contains(&"com.example.services.UserService".to_string()));
514 assert!(deps.imports.contains(&"com.example.shared.*".to_string()));
515 assert!(deps.exports.contains(&"Feature".to_string()));
516 assert!(deps.exports.contains(&"build".to_string()));
517 }
518}