1use once_cell::sync::Lazy;
8use rayon::prelude::*;
9use scribe_core::Result;
10use std::collections::HashMap;
11use std::sync::{Arc, Mutex};
12use tree_sitter::{Language, Node, Parser, Tree, TreeCursor};
13
14#[derive(Debug, Clone)]
16pub struct SimpleImport {
17 pub module: String,
19 pub line_number: usize,
21}
22
23#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
25pub enum ImportLanguage {
26 Python,
27 JavaScript,
28 TypeScript,
29 Go,
30 Rust,
31}
32
33impl ImportLanguage {
34 pub fn tree_sitter_language(&self) -> Language {
36 match self {
37 ImportLanguage::Python => tree_sitter_python::language(),
38 ImportLanguage::JavaScript => tree_sitter_javascript::language(),
39 ImportLanguage::TypeScript => tree_sitter_typescript::language_typescript(),
40 ImportLanguage::Go => tree_sitter_go::language(),
41 ImportLanguage::Rust => tree_sitter_rust::language(),
42 }
43 }
44
45 pub fn from_extension(ext: &str) -> Option<Self> {
47 match ext.to_lowercase().as_str() {
48 "py" | "pyi" | "pyw" => Some(ImportLanguage::Python),
49 "js" | "mjs" | "cjs" => Some(ImportLanguage::JavaScript),
50 "ts" | "mts" | "cts" => Some(ImportLanguage::TypeScript),
51 "go" => Some(ImportLanguage::Go),
52 "rs" => Some(ImportLanguage::Rust),
53 _ => None,
54 }
55 }
56}
57
58static PARSER_POOL: Lazy<Arc<Mutex<HashMap<ImportLanguage, Vec<Parser>>>>> =
60 Lazy::new(|| Arc::new(Mutex::new(HashMap::new())));
61
62const IMPORT_NODE_TYPES: &[&str] = &[
64 "import_statement",
65 "import_from_statement",
66 "use_declaration",
67 "import_declaration",
68 "import_spec",
69 "source_file",
70 "module",
71];
72
73pub struct SimpleAstParser {
75 }
77
78impl std::fmt::Debug for SimpleAstParser {
79 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80 f.debug_struct("SimpleAstParser")
81 .field("parsers", &"[reusable pool]")
82 .finish()
83 }
84}
85
86impl SimpleAstParser {
87 pub fn new() -> Result<Self> {
89 Self::ensure_parser_pool_initialized()?;
91 Ok(Self {})
92 }
93
94 fn ensure_parser_pool_initialized() -> Result<()> {
96 let mut pool = PARSER_POOL.lock().unwrap();
97
98 for language in [
99 ImportLanguage::Python,
100 ImportLanguage::JavaScript,
101 ImportLanguage::TypeScript,
102 ImportLanguage::Go,
103 ImportLanguage::Rust,
104 ] {
105 if !pool.contains_key(&language) {
106 let mut parser = Parser::new();
107 parser
108 .set_language(language.tree_sitter_language())
109 .map_err(|e| {
110 scribe_core::ScribeError::parse(format!(
111 "Failed to set tree-sitter language: {}",
112 e
113 ))
114 })?;
115 pool.insert(language, vec![parser]);
116 }
117 }
118
119 Ok(())
120 }
121
122 fn get_parser(&self, language: ImportLanguage) -> Result<Parser> {
124 let mut pool = PARSER_POOL.lock().unwrap();
125
126 if let Some(parsers) = pool.get_mut(&language) {
127 if let Some(parser) = parsers.pop() {
128 return Ok(parser);
129 }
130 }
131
132 let mut parser = Parser::new();
134 parser
135 .set_language(language.tree_sitter_language())
136 .map_err(|e| {
137 scribe_core::ScribeError::parse(format!(
138 "Failed to set tree-sitter language: {}",
139 e
140 ))
141 })?;
142 Ok(parser)
143 }
144
145 fn return_parser(&self, language: ImportLanguage, parser: Parser) {
147 let mut pool = PARSER_POOL.lock().unwrap();
148 pool.entry(language).or_insert_with(Vec::new).push(parser);
149 }
150
151 pub fn extract_imports(
153 &self,
154 content: &str,
155 language: ImportLanguage,
156 ) -> Result<Vec<SimpleImport>> {
157 let mut parser = self.get_parser(language)?;
159
160 let tree = parser
161 .parse(content, None)
162 .ok_or_else(|| scribe_core::ScribeError::parse("Failed to parse content"))?;
163
164 let mut imports = Vec::new();
165
166 let mut cursor = tree.walk();
168 self.extract_imports_with_cursor(&mut cursor, content, language, &mut imports)?;
169
170 self.return_parser(language, parser);
172
173 Ok(imports)
174 }
175
176 fn extract_imports_with_cursor(
178 &self,
179 cursor: &mut TreeCursor,
180 content: &str,
181 language: ImportLanguage,
182 imports: &mut Vec<SimpleImport>,
183 ) -> Result<()> {
184 let node = cursor.node();
185
186 if !self.node_can_contain_imports(node.kind()) {
188 return Ok(());
189 }
190
191 if self.is_import_node(node.kind()) {
193 self.extract_import_from_node(node, content, language, imports)?;
194 }
195
196 if cursor.goto_first_child() {
198 loop {
199 self.extract_imports_with_cursor(cursor, content, language, imports)?;
200 if !cursor.goto_next_sibling() {
201 break;
202 }
203 }
204 cursor.goto_parent();
205 }
206
207 Ok(())
208 }
209
210 fn node_can_contain_imports(&self, kind: &str) -> bool {
212 IMPORT_NODE_TYPES.contains(&kind)
213 || kind.contains("import")
214 || kind.contains("use")
215 || kind == "program"
216 || kind == "translation_unit"
217 || kind == "block"
218 || kind == "statement_block"
219 }
220
221 fn is_import_node(&self, kind: &str) -> bool {
223 matches!(
224 kind,
225 "import_statement"
226 | "import_from_statement"
227 | "use_declaration"
228 | "import_declaration"
229 | "import_spec"
230 )
231 }
232
233 fn extract_import_from_node(
235 &self,
236 node: Node,
237 content: &str,
238 language: ImportLanguage,
239 imports: &mut Vec<SimpleImport>,
240 ) -> Result<()> {
241 match language {
242 ImportLanguage::Python => {
243 self.extract_python_import_node(node, content, imports)?;
244 }
245 ImportLanguage::JavaScript | ImportLanguage::TypeScript => {
246 self.extract_js_ts_import_node(node, content, imports)?;
247 }
248 ImportLanguage::Go => {
249 self.extract_go_import_node(node, content, imports)?;
250 }
251 ImportLanguage::Rust => {
252 self.extract_rust_import_node(node, content, imports)?;
253 }
254 }
255 Ok(())
256 }
257
258 fn extract_python_import_node(
260 &self,
261 node: Node,
262 content: &str,
263 imports: &mut Vec<SimpleImport>,
264 ) -> Result<()> {
265 if node.kind() == "import_statement" {
266 let mut cursor = node.walk();
268 if cursor.goto_first_child() {
269 loop {
270 let child = cursor.node();
271 if child.kind() == "dotted_name" || child.kind() == "identifier" {
272 let module = self.node_text(child, content);
273 let line_number = child.start_position().row + 1;
274
275 imports.push(SimpleImport {
276 module,
277 line_number,
278 });
279 }
280 if !cursor.goto_next_sibling() {
281 break;
282 }
283 }
284 }
285 } else if node.kind() == "import_from_statement" {
286 if let Some(module_node) = node.child_by_field_name("module_name") {
287 let module = self.node_text(module_node, content);
288 let line_number = node.start_position().row + 1;
289 imports.push(SimpleImport {
290 module,
291 line_number,
292 });
293 }
294 }
295 Ok(())
296 }
297
298 fn extract_js_ts_import_node(
300 &self,
301 node: Node,
302 content: &str,
303 imports: &mut Vec<SimpleImport>,
304 ) -> Result<()> {
305 if node.kind() == "import_statement" {
306 let mut cursor = node.walk();
308 if cursor.goto_first_child() {
309 loop {
310 let child = cursor.node();
311 if child.kind() == "string" {
312 let mut module = self.node_text(child, content);
313 module = module.trim_matches('"').trim_matches('\'').to_string();
315 let line_number = node.start_position().row + 1;
316 imports.push(SimpleImport {
317 module,
318 line_number,
319 });
320 break;
321 }
322 if !cursor.goto_next_sibling() {
323 break;
324 }
325 }
326 }
327 }
328 Ok(())
329 }
330
331 fn extract_go_import_node(
333 &self,
334 node: Node,
335 content: &str,
336 imports: &mut Vec<SimpleImport>,
337 ) -> Result<()> {
338 if node.kind() == "import_spec" {
339 let mut cursor = node.walk();
340 if cursor.goto_first_child() {
341 loop {
342 let child = cursor.node();
343 if child.kind() == "interpreted_string_literal" {
344 let module = self.node_text(child, content);
345 let module = module.trim_matches('"').to_string();
346 let line_number = child.start_position().row + 1;
347
348 imports.push(SimpleImport {
349 module,
350 line_number,
351 });
352 }
353 if !cursor.goto_next_sibling() {
354 break;
355 }
356 }
357 }
358 }
359 Ok(())
360 }
361
362 fn extract_rust_import_node(
364 &self,
365 node: Node,
366 content: &str,
367 imports: &mut Vec<SimpleImport>,
368 ) -> Result<()> {
369 if node.kind() == "use_declaration" {
370 if let Some(use_tree) = node.child_by_field_name("argument") {
371 let module = self.node_text(use_tree, content);
372 let line_number = node.start_position().row + 1;
373
374 imports.push(SimpleImport {
375 module,
376 line_number,
377 });
378 }
379 }
380 Ok(())
381 }
382
383 fn node_text(&self, node: Node, content: &str) -> String {
385 content[node.start_byte()..node.end_byte()].to_string()
386 }
387
388 pub fn extract_imports_parallel(
390 &self,
391 files: &[(String, String, ImportLanguage)], ) -> Result<Vec<(String, Vec<SimpleImport>)>> {
393 files
395 .par_iter()
396 .map(|(path, content, language)| {
397 let imports = self.extract_imports(content, *language)?;
398 Ok((path.clone(), imports))
399 })
400 .collect()
401 }
402
403 pub fn extract_imports_batch(
405 &self,
406 contents: &[&str],
407 language: ImportLanguage,
408 ) -> Result<Vec<Vec<SimpleImport>>> {
409 contents
410 .par_iter()
411 .map(|content| self.extract_imports(content, language))
412 .collect()
413 }
414}
415
416impl Default for SimpleAstParser {
417 fn default() -> Self {
418 Self::new().expect("Failed to create SimpleAstParser")
419 }
420}