Skip to main content

impactsense_parser/
lib.rs

1//! Multi-language static analysis: parse codebases into a dependency graph for impact analysis.
2//!
3//! # Quick start
4//!
5//! ```no_run
6//! use std::path::Path;
7//! use impactsense_parser::pipeline::ScanOptions;
8//! use impactsense_parser::parse_project;
9//! use impactsense_parser::store::GraphStore;
10//!
11//! let graph = parse_project(Path::new("/path/to/repo"), &ScanOptions::default()).unwrap();
12//! let _callers = graph.callers("com.example.Service.method");
13//! ```
14
15use std::fmt;
16
17use tree_sitter::{Language, Parser, Tree};
18use tree_sitter_c_sharp::LANGUAGE as C_SHARP_LANGUAGE;
19use tree_sitter_erlang::LANGUAGE as ERLANG_LANGUAGE;
20use tree_sitter_go::LANGUAGE as GO_LANGUAGE;
21use tree_sitter_java::LANGUAGE as JAVA_LANGUAGE;
22use tree_sitter_javascript::LANGUAGE as JAVASCRIPT_LANGUAGE;
23use tree_sitter_python::LANGUAGE as PYTHON_LANGUAGE;
24use tree_sitter_rust::LANGUAGE as RUST_LANGUAGE;
25use tree_sitter_typescript::{LANGUAGE_TSX, LANGUAGE_TYPESCRIPT};
26
27pub mod go_resolve;
28pub mod go_stdlib;
29pub mod python_stdlib;
30pub mod python_common_external;
31pub mod scanner;
32pub mod scanner_incremental;
33pub mod compress;
34pub mod graph;
35pub mod pipeline;
36pub mod schema;
37pub mod edge;
38pub mod ir;
39pub mod extract;
40pub mod store;
41pub mod project;
42
43pub use graph::ExtractOptions;
44pub use graph::build_project_ir;
45pub use project::{parse_project, refresh_files, ProjectError};
46pub use store::{GraphStore, InMemoryGraph, ImpactReport, QueryLimits, SymbolRef};
47
48/// Identifier for all languages this parser supports.
49///
50/// This enum is intentionally generic so the `parser` crate can be reused
51/// across multiple codebases.
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
53pub enum LanguageId {
54    Java,         // constant representing the Java language
55    JavaScript,   // constant representing the JavaScript language
56    TypeScript,   // constant representing the TypeScript language
57    Tsx,          // constant representing the TSX (TypeScript + JSX) language
58    Python,       // constant representing the Python language
59    Rust,         // constant representing the Rust language
60    Go,           // constant representing the Go language
61    Erlang,       // constant representing the Erlang language
62    CSharp,       // constant representing the C# language
63}
64//so impl is implementation block add methods to a type -structure or enum
65//Display is standard trait in library ,it controls formatting of println!
66//so below is formatting implementation for enum
67
68
69impl fmt::Display for LanguageId {
70    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
71        // No, `write_str(s)` doesn't mean "return s"—it writes the string `s` to the formatter `f`.
72        // This is not printing to the console; it's formatting for output, like with `println!` or string formatting.
73        // The result of `write_str(s)` (a fmt::Result) is what's "returned" from the function.
74        let s = match self {
75            LanguageId::Java => "java",
76            LanguageId::JavaScript => "javascript",
77            LanguageId::TypeScript => "typescript",
78            LanguageId::Tsx => "tsx",
79            LanguageId::Python => "python",
80            LanguageId::Rust => "rust",
81            LanguageId::Go => "go",
82            LanguageId::Erlang => "erlang",
83            LanguageId::CSharp => "c_sharp",
84        };
85        f.write_str(s)
86    }
87}
88
89/// Errors that can be returned by the parsing layer. 
90//Error is crate thisError is external Rust lib
91// #[derive] will tell go to generate code for this enum so it can be printed as a nice error message.
92// #[derive(Debug)] will tell go to generate code so it can be printed with {:?} for debugging.
93//error constants are defined here in enum
94#[derive(Debug, thiserror::Error)]
95pub enum ParserError {
96    #[error("unsupported language: {0}")]
97    UnsupportedLanguage(String),
98    #[error("failed to set Tree-Sitter language: {0}")]
99    SetLanguage(String),
100    #[error("failed to parse source for language {language}")]
101    ParseFailed { language: LanguageId },
102}
103
104/// Get the underlying Tree-Sitter `Language` for a given `LanguageId`.
105pub fn language_for(id: LanguageId) -> Result<Language, ParserError> {
106    let lang: Language = match id {
107        LanguageId::Java => JAVA_LANGUAGE.into(),
108        LanguageId::JavaScript => JAVASCRIPT_LANGUAGE.into(),
109        LanguageId::TypeScript => LANGUAGE_TYPESCRIPT.into(),
110        LanguageId::Tsx => LANGUAGE_TSX.into(),
111        LanguageId::Python => PYTHON_LANGUAGE.into(),
112        LanguageId::Rust => RUST_LANGUAGE.into(),
113        LanguageId::Go => GO_LANGUAGE.into(),
114        LanguageId::Erlang => ERLANG_LANGUAGE.into(),
115        LanguageId::CSharp => C_SHARP_LANGUAGE.into(),
116    };
117
118    Ok(lang)
119}
120
121/// A reusable multi-language Tree-Sitter parser.
122///
123/// This wrapper owns a single `tree_sitter::Parser` instance that can be
124/// reused across parse calls and languages.
125pub struct MultiLangParser {
126    parser: Parser,
127    current_language: Option<LanguageId>,
128}
129
130impl MultiLangParser {
131    /// Create a new parser without an initial language.
132    pub fn new() -> Result<Self, ParserError> {
133        Ok(Self {
134            parser: Parser::new(),
135            current_language: None,
136        })
137    }
138
139    /// Ensure the underlying parser is configured for the given language.
140    ///
141    /// This is cheap if the language is already set.
142    fn ensure_language(&mut self, lang_id: LanguageId) -> Result<(), ParserError> {
143        if self.current_language == Some(lang_id) {
144            return Ok(());
145        }
146
147        let lang = language_for(lang_id)?;
148        self.parser
149            .set_language(&lang)
150            .map_err(|e| ParserError::SetLanguage(e.to_string()))?;
151        self.current_language = Some(lang_id);
152        Ok(())
153    }
154
155    /// Parse a source string for the given language and return the syntax tree.
156    ///
157    /// This is the main entry point other codebases will call.
158    pub fn parse_source(
159        &mut self,
160        lang_id: LanguageId,
161        source: &str,
162    ) -> Result<Tree, ParserError> {
163        self.ensure_language(lang_id)?;
164        self.parser
165            .parse(source, None)
166            .ok_or(ParserError::ParseFailed { language: lang_id })
167    }
168}
169
170/// Convenience function for one-off parses.
171///
172/// For high-throughput use-cases, prefer reusing a `MultiLangParser`.
173pub fn parse_once(lang_id: LanguageId, source: &str) -> Result<Tree, ParserError> {
174    let mut parser = MultiLangParser::new()?;
175    parser.parse_source(lang_id, source)
176}
177// `cfg(test)` is a Rust attribute that conditionally compiles the annotated code only when running tests.
178// It is commonly used to define test modules or helper functions that should only exist in test builds,
179// so they do not end up in the final binary used for production.
180 
181// For example:
182// #[cfg(test)]
183// mod tests {
184//     // test code here
185// }
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn parses_simple_java() {
193        let src = r#"class A { void m() {} }"#;
194        let tree = parse_once(LanguageId::Java, src).expect("java parse failed");
195        let root = tree.root_node();
196        assert!(root.child_count() > 0);
197    }
198}