codeprism_lang_js/
parser.rs

1//! JavaScript/TypeScript parser implementation
2
3use crate::ast_mapper::AstMapper;
4use crate::error::{Error, Result};
5use crate::types::{Edge, Language, Node};
6use std::path::{Path, PathBuf};
7use tree_sitter::{Parser, Tree};
8
9/// Parse context for JavaScript/TypeScript files
10#[derive(Debug, Clone)]
11pub struct ParseContext {
12    /// Repository ID
13    pub repo_id: String,
14    /// File path being parsed
15    pub file_path: PathBuf,
16    /// Previous tree for incremental parsing
17    pub old_tree: Option<Tree>,
18    /// File content
19    pub content: String,
20}
21
22/// Parse result containing nodes and edges
23#[derive(Debug)]
24pub struct ParseResult {
25    /// The parsed tree
26    pub tree: Tree,
27    /// Extracted nodes
28    pub nodes: Vec<Node>,
29    /// Extracted edges
30    pub edges: Vec<Edge>,
31}
32
33/// JavaScript/TypeScript parser
34pub struct JavaScriptParser {
35    /// Tree-sitter parser for JavaScript
36    js_parser: Parser,
37    /// Tree-sitter parser for TypeScript
38    ts_parser: Parser,
39}
40
41impl JavaScriptParser {
42    /// Create a new JavaScript/TypeScript parser
43    pub fn new() -> Self {
44        let mut js_parser = Parser::new();
45        js_parser
46            .set_language(&tree_sitter_javascript::LANGUAGE.into())
47            .expect("Failed to load JavaScript grammar");
48
49        let mut ts_parser = Parser::new();
50        ts_parser
51            .set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())
52            .expect("Failed to load TypeScript grammar");
53
54        Self {
55            js_parser,
56            ts_parser,
57        }
58    }
59
60    /// Get the language for a file based on its extension
61    pub fn detect_language(path: &Path) -> Language {
62        match path.extension().and_then(|s| s.to_str()) {
63            Some("ts") | Some("tsx") => Language::TypeScript,
64            _ => Language::JavaScript,
65        }
66    }
67
68    /// Parse a JavaScript or TypeScript file
69    pub fn parse(&mut self, context: &ParseContext) -> Result<ParseResult> {
70        let language = Self::detect_language(&context.file_path);
71
72        // Select the appropriate parser
73        let parser = match language {
74            Language::JavaScript => &mut self.js_parser,
75            Language::TypeScript => &mut self.ts_parser,
76        };
77
78        // Parse the file
79        let tree = parser
80            .parse(&context.content, context.old_tree.as_ref())
81            .ok_or_else(|| Error::parse(&context.file_path, "Failed to parse file"))?;
82
83        // Extract nodes and edges
84        let mapper = AstMapper::new(
85            &context.repo_id,
86            context.file_path.clone(),
87            language,
88            &context.content,
89        );
90
91        let (nodes, edges) = mapper.extract(&tree)?;
92
93        Ok(ParseResult { tree, nodes, edges })
94    }
95}
96
97impl Default for JavaScriptParser {
98    fn default() -> Self {
99        Self::new()
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    use super::*;
106
107    #[test]
108    fn test_detect_language() {
109        assert_eq!(
110            JavaScriptParser::detect_language(Path::new("test.js")),
111            Language::JavaScript
112        );
113        assert_eq!(
114            JavaScriptParser::detect_language(Path::new("test.ts")),
115            Language::TypeScript
116        );
117        assert_eq!(
118            JavaScriptParser::detect_language(Path::new("test.tsx")),
119            Language::TypeScript
120        );
121        assert_eq!(
122            JavaScriptParser::detect_language(Path::new("test.mjs")),
123            Language::JavaScript
124        );
125    }
126
127    #[test]
128    fn test_parse_simple_javascript() {
129        let mut parser = JavaScriptParser::new();
130        let context = ParseContext {
131            repo_id: "test_repo".to_string(),
132            file_path: PathBuf::from("test.js"),
133            old_tree: None,
134            content: "function hello() { return 'world'; }".to_string(),
135        };
136
137        let result = parser.parse(&context).unwrap();
138        assert!(!result.nodes.is_empty(), "Should not be empty");
139
140        // Should have at least a module node and a function node
141        assert!(result
142            .nodes
143            .iter()
144            .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
145        assert!(result
146            .nodes
147            .iter()
148            .any(|n| matches!(n.kind, crate::types::NodeKind::Function)));
149    }
150
151    #[test]
152    fn test_parse_typescript() {
153        let mut parser = JavaScriptParser::new();
154        let context = ParseContext {
155            repo_id: "test_repo".to_string(),
156            file_path: PathBuf::from("test.ts"),
157            old_tree: None,
158            content: "function hello(name: string): string { return `Hello, ${name}!`; }"
159                .to_string(),
160        };
161
162        let result = parser.parse(&context).unwrap();
163        assert!(!result.nodes.is_empty(), "Should not be empty");
164
165        // Should detect TypeScript
166        let func_node = result
167            .nodes
168            .iter()
169            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
170            .expect("Should have a function node");
171
172        assert_eq!(func_node.lang, Language::TypeScript);
173    }
174
175    #[test]
176    fn test_incremental_parsing() {
177        let mut parser = JavaScriptParser::new();
178
179        // First parse
180        let context1 = ParseContext {
181            repo_id: "test_repo".to_string(),
182            file_path: PathBuf::from("test.js"),
183            old_tree: None,
184            content: "function foo() { return 1; }".to_string(),
185        };
186        let result1 = parser.parse(&context1).unwrap();
187
188        // Second parse with small edit - change return value
189        // This is what incremental parsing is designed for
190        let context2 = ParseContext {
191            repo_id: "test_repo".to_string(),
192            file_path: PathBuf::from("test.js"),
193            old_tree: Some(result1.tree),
194            content: "function foo() { return 2; }".to_string(),
195        };
196        let result2 = parser.parse(&context2).unwrap();
197
198        // Both should have the same structure
199        assert_eq!(result1.nodes.len(), result2.nodes.len());
200
201        // Function should still be found
202        let func1 = result1
203            .nodes
204            .iter()
205            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
206            .unwrap();
207        let func2 = result2
208            .nodes
209            .iter()
210            .find(|n| matches!(n.kind, crate::types::NodeKind::Function))
211            .unwrap();
212
213        assert_eq!(func1.name, "foo");
214        assert_eq!(func2.name, "foo");
215
216        // For larger changes, don't use incremental parsing
217        let context3 = ParseContext {
218            repo_id: "test_repo".to_string(),
219            file_path: PathBuf::from("test.js"),
220            old_tree: None, // Don't use old tree for major changes
221            content: "function foo() { return 2; }\nfunction bar() { return 3; }".to_string(),
222        };
223        let result3 = parser.parse(&context3).unwrap();
224
225        // Should find both functions
226        let func_count = result3
227            .nodes
228            .iter()
229            .filter(|n| matches!(n.kind, crate::types::NodeKind::Function))
230            .count();
231        assert_eq!(func_count, 2);
232    }
233
234    #[test]
235    fn test_debug_tree_sitter_nodes() {
236        let mut parser = JavaScriptParser::new();
237        let context = ParseContext {
238            repo_id: "test_repo".to_string(),
239            file_path: PathBuf::from("test.js"),
240            old_tree: None,
241            content: "function foo() {}\nfunction bar() {}".to_string(),
242        };
243
244        let tree = parser.js_parser.parse(&context.content, None).unwrap();
245        let mut cursor = tree.walk();
246
247        fn print_tree(cursor: &mut tree_sitter::TreeCursor, depth: usize) {
248            let node = cursor.node();
249            println!(
250                "{}{} [{:?}]",
251                "  ".repeat(depth),
252                node.kind(),
253                node.start_byte()..node.end_byte()
254            );
255
256            if cursor.goto_first_child() {
257                loop {
258                    print_tree(cursor, depth + 1);
259                    if !cursor.goto_next_sibling() {
260                        break;
261                    }
262                }
263                cursor.goto_parent();
264            }
265        }
266
267        print_tree(&mut cursor, 0);
268    }
269
270    #[test]
271    fn test_parse_multiple_functions() {
272        let mut parser = JavaScriptParser::new();
273        let context = ParseContext {
274            repo_id: "test_repo".to_string(),
275            file_path: PathBuf::from("test.js"),
276            old_tree: None,
277            content: "function foo() {}\nfunction bar() {}".to_string(),
278        };
279
280        let result = parser.parse(&context).unwrap();
281
282        println!("Parsed nodes:");
283        for node in &result.nodes {
284            println!("  {:?} - {}", node.kind, node.name);
285        }
286
287        // Should have a module and two functions
288        assert_eq!(result.nodes.len(), 3, "Should have 3 items");
289        assert!(result
290            .nodes
291            .iter()
292            .any(|n| matches!(n.kind, crate::types::NodeKind::Module)));
293
294        let func_nodes: Vec<_> = result
295            .nodes
296            .iter()
297            .filter(|n| matches!(n.kind, crate::types::NodeKind::Function))
298            .collect();
299
300        assert_eq!(func_nodes.len(), 2, "Should have 2 items");
301        assert!(func_nodes.iter().any(|n| n.name == "foo"));
302        assert!(func_nodes.iter().any(|n| n.name == "bar"));
303    }
304
305    #[test]
306    fn test_debug_ast_mapper() {
307        let mut parser = JavaScriptParser::new();
308        let context = ParseContext {
309            repo_id: "test_repo".to_string(),
310            file_path: PathBuf::from("test.js"),
311            old_tree: None,
312            content: "function foo() {}\nfunction bar() {}".to_string(),
313        };
314
315        let tree = parser.js_parser.parse(&context.content, None).unwrap();
316
317        // Create mapper and extract
318        let mapper = crate::ast_mapper::AstMapper::new(
319            &context.repo_id,
320            context.file_path.clone(),
321            Language::JavaScript,
322            &context.content,
323        );
324
325        let (nodes, edges) = mapper.extract(&tree).unwrap();
326
327        println!("Extracted nodes:");
328        for node in &nodes {
329            println!("  {:?} - {} at {:?}", node.kind, node.name, node.span);
330        }
331
332        println!("\nExtracted edges:");
333        for edge in &edges {
334            println!("  {:?}", edge.kind);
335        }
336    }
337}