arbor_core/
parser.rs

1//! Parser module - the heart of code analysis.
2//!
3//! This module wraps Tree-sitter and provides a clean API for parsing
4//! source files into CodeNodes. Language detection is automatic based
5//! on file extension.
6
7use crate::error::{ParseError, Result};
8use crate::languages::{get_parser, LanguageParser};
9use crate::node::CodeNode;
10use std::fs;
11use std::path::Path;
12
13/// Parses a source file and extracts all code nodes.
14///
15/// This is the main entry point for parsing. It handles:
16/// - Reading the file from disk
17/// - Detecting the language from the extension
18/// - Parsing with Tree-sitter
19/// - Extracting meaningful code entities
20///
21/// # Example
22///
23/// ```no_run
24/// use arbor_core::parse_file;
25/// use std::path::Path;
26///
27/// let nodes = parse_file(Path::new("src/lib.rs")).unwrap();
28/// println!("Found {} nodes", nodes.len());
29/// ```
30pub fn parse_file(path: &Path) -> Result<Vec<CodeNode>> {
31    // Read the source file
32    let source = fs::read_to_string(path).map_err(|e| ParseError::io(path, e))?;
33
34    if source.is_empty() {
35        return Err(ParseError::EmptyFile(path.to_path_buf()));
36    }
37
38    // Get the appropriate parser for this file type
39    let parser =
40        detect_language(path).ok_or_else(|| ParseError::UnsupportedLanguage(path.to_path_buf()))?;
41
42    // Use the file path as a string for node IDs
43    let file_path = path.to_string_lossy().to_string();
44
45    parse_source(&source, &file_path, parser.as_ref())
46}
47
48/// Parses source code directly (useful for testing or in-memory content).
49///
50/// You need to provide a language parser explicitly since there's no
51/// file extension to detect from.
52pub fn parse_source(
53    source: &str,
54    file_path: &str,
55    lang_parser: &dyn LanguageParser,
56) -> Result<Vec<CodeNode>> {
57    // Create and configure Tree-sitter parser
58    let mut parser = tree_sitter::Parser::new();
59    parser
60        .set_language(&lang_parser.language())
61        .map_err(|e| ParseError::ParserError(format!("Failed to set language: {}", e)))?;
62
63    // Parse the source
64    let tree = parser
65        .parse(source, None)
66        .ok_or_else(|| ParseError::ParserError("Tree-sitter returned no tree".into()))?;
67
68    // Extract nodes using the language-specific extractor
69    let nodes = lang_parser.extract_nodes(&tree, source, file_path);
70
71    Ok(nodes)
72}
73
74/// Detects the programming language from a file path.
75///
76/// Returns None if we don't support the file's extension.
77pub fn detect_language(path: &Path) -> Option<Box<dyn LanguageParser>> {
78    let extension = path.extension()?.to_str()?;
79    get_parser(extension)
80}
81
82#[cfg(test)]
83mod tests {
84    use super::*;
85    use crate::node::NodeKind;
86
87    #[test]
88    fn test_detect_language() {
89        assert!(detect_language(Path::new("foo.rs")).is_some());
90        assert!(detect_language(Path::new("bar.ts")).is_some());
91        assert!(detect_language(Path::new("baz.py")).is_some());
92        assert!(detect_language(Path::new("unknown.xyz")).is_none());
93    }
94
95    #[test]
96    fn test_parse_rust_source() {
97        let source = r#"
98            fn hello_world() {
99                println!("Hello!");
100            }
101
102            pub struct User {
103                name: String,
104            }
105        "#;
106
107        let parser = get_parser("rs").unwrap();
108        let nodes = parse_source(source, "test.rs", parser.as_ref()).unwrap();
109
110        // Should find at least the function and struct
111        assert!(nodes
112            .iter()
113            .any(|n| n.name == "hello_world" && n.kind == NodeKind::Function));
114        assert!(nodes
115            .iter()
116            .any(|n| n.name == "User" && n.kind == NodeKind::Struct));
117    }
118
119    #[test]
120    fn test_parse_typescript_source() {
121        let source = r#"
122            export function greet(name: string): string {
123                return `Hello, ${name}!`;
124            }
125
126            export class UserService {
127                validate() {}
128            }
129        "#;
130
131        let parser = get_parser("ts").unwrap();
132        let nodes = parse_source(source, "test.ts", parser.as_ref()).unwrap();
133
134        assert!(nodes
135            .iter()
136            .any(|n| n.name == "greet" && n.kind == NodeKind::Function));
137        assert!(nodes
138            .iter()
139            .any(|n| n.name == "UserService" && n.kind == NodeKind::Class));
140    }
141}