Skip to main content

impactsense_parser/
lib.rs

1//! Multi-language static analysis: parse codebases into a dependency graph for impact analysis.
2//!
3//! # Quick start
4//!
5//! ```no_run
6//! use std::path::Path;
7//! use impactsense_parser::pipeline::ScanOptions;
8//! use impactsense_parser::parse_project;
9//! use impactsense_parser::store::GraphStore;
10//!
11//! let graph = parse_project(Path::new("/path/to/repo"), &ScanOptions::default()).unwrap();
12//! let _callers = graph.callers("com.example.Service.method");
13//! ```
14
15use std::fmt;
16
17use tree_sitter::{Language, Parser, Tree};
18use tree_sitter_c_sharp::LANGUAGE as C_SHARP_LANGUAGE;
19use tree_sitter_erlang::LANGUAGE as ERLANG_LANGUAGE;
20use tree_sitter_go::LANGUAGE as GO_LANGUAGE;
21use tree_sitter_java::LANGUAGE as JAVA_LANGUAGE;
22use tree_sitter_javascript::LANGUAGE as JAVASCRIPT_LANGUAGE;
23use tree_sitter_python::LANGUAGE as PYTHON_LANGUAGE;
24use tree_sitter_rust::LANGUAGE as RUST_LANGUAGE;
25use tree_sitter_typescript::{LANGUAGE_TSX, LANGUAGE_TYPESCRIPT};
26
27pub mod go_resolve;
28pub mod go_stdlib;
29pub mod python_stdlib;
30pub mod python_common_external;
31pub mod scanner;
32pub mod scanner_incremental;
33pub mod compress;
34pub mod graph;
35pub mod pipeline;
36pub mod schema;
37pub mod edge;
38pub mod ir;
39pub mod extract;
40pub mod store;
41pub mod project;
42
43pub use graph::ExtractOptions;
44pub use graph::{build_project_ir, enrich_project_ir_code_bytes};
45pub use extract::scan_and_build_ir_async;
46pub use project::{parse_project, parse_project_async, refresh_files, ProjectError};
47pub use compress::{
48    compressor_language_from_ir_string, decompress_code_bytes, language_id_from_ir_string,
49    CompressorClient, CompressorConfig, CompressError,
50};
51pub use store::{
52    ExplainOptions, ExplainSourceOrigin, ExplainSymbolResult, GraphStore, InMemoryGraph,
53    ImpactReport, QueryLimits, SymbolRef,
54};
55
56/// Identifier for all languages this parser supports.
57///
58/// This enum is intentionally generic so the `parser` crate can be reused
59/// across multiple codebases.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
61pub enum LanguageId {
62    Java,         // constant representing the Java language
63    JavaScript,   // constant representing the JavaScript language
64    TypeScript,   // constant representing the TypeScript language
65    Tsx,          // constant representing the TSX (TypeScript + JSX) language
66    Python,       // constant representing the Python language
67    Rust,         // constant representing the Rust language
68    Go,           // constant representing the Go language
69    Erlang,       // constant representing the Erlang language
70    CSharp,       // constant representing the C# language
71}
72//so impl is implementation block add methods to a type -structure or enum
73//Display is standard trait in library ,it controls formatting of println!
74//so below is formatting implementation for enum
75
76
77impl fmt::Display for LanguageId {
78    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79        // No, `write_str(s)` doesn't mean "return s"—it writes the string `s` to the formatter `f`.
80        // This is not printing to the console; it's formatting for output, like with `println!` or string formatting.
81        // The result of `write_str(s)` (a fmt::Result) is what's "returned" from the function.
82        let s = match self {
83            LanguageId::Java => "java",
84            LanguageId::JavaScript => "javascript",
85            LanguageId::TypeScript => "typescript",
86            LanguageId::Tsx => "tsx",
87            LanguageId::Python => "python",
88            LanguageId::Rust => "rust",
89            LanguageId::Go => "go",
90            LanguageId::Erlang => "erlang",
91            LanguageId::CSharp => "c_sharp",
92        };
93        f.write_str(s)
94    }
95}
96
97/// Errors that can be returned by the parsing layer. 
98//Error is crate thisError is external Rust lib
99// #[derive] will tell go to generate code for this enum so it can be printed as a nice error message.
100// #[derive(Debug)] will tell go to generate code so it can be printed with {:?} for debugging.
101//error constants are defined here in enum
102#[derive(Debug, thiserror::Error)]
103pub enum ParserError {
104    #[error("unsupported language: {0}")]
105    UnsupportedLanguage(String),
106    #[error("failed to set Tree-Sitter language: {0}")]
107    SetLanguage(String),
108    #[error("failed to parse source for language {language}")]
109    ParseFailed { language: LanguageId },
110}
111
112/// Get the underlying Tree-Sitter `Language` for a given `LanguageId`.
113pub fn language_for(id: LanguageId) -> Result<Language, ParserError> {
114    let lang: Language = match id {
115        LanguageId::Java => JAVA_LANGUAGE.into(),
116        LanguageId::JavaScript => JAVASCRIPT_LANGUAGE.into(),
117        LanguageId::TypeScript => LANGUAGE_TYPESCRIPT.into(),
118        LanguageId::Tsx => LANGUAGE_TSX.into(),
119        LanguageId::Python => PYTHON_LANGUAGE.into(),
120        LanguageId::Rust => RUST_LANGUAGE.into(),
121        LanguageId::Go => GO_LANGUAGE.into(),
122        LanguageId::Erlang => ERLANG_LANGUAGE.into(),
123        LanguageId::CSharp => C_SHARP_LANGUAGE.into(),
124    };
125
126    Ok(lang)
127}
128
129/// A reusable multi-language Tree-Sitter parser.
130///
131/// This wrapper owns a single `tree_sitter::Parser` instance that can be
132/// reused across parse calls and languages.
133pub struct MultiLangParser {
134    parser: Parser,
135    current_language: Option<LanguageId>,
136}
137
138impl MultiLangParser {
139    /// Create a new parser without an initial language.
140    pub fn new() -> Result<Self, ParserError> {
141        Ok(Self {
142            parser: Parser::new(),
143            current_language: None,
144        })
145    }
146
147    /// Ensure the underlying parser is configured for the given language.
148    ///
149    /// This is cheap if the language is already set.
150    fn ensure_language(&mut self, lang_id: LanguageId) -> Result<(), ParserError> {
151        if self.current_language == Some(lang_id) {
152            return Ok(());
153        }
154
155        let lang = language_for(lang_id)?;
156        self.parser
157            .set_language(&lang)
158            .map_err(|e| ParserError::SetLanguage(e.to_string()))?;
159        self.current_language = Some(lang_id);
160        Ok(())
161    }
162
163    /// Parse a source string for the given language and return the syntax tree.
164    ///
165    /// This is the main entry point other codebases will call.
166    pub fn parse_source(
167        &mut self,
168        lang_id: LanguageId,
169        source: &str,
170    ) -> Result<Tree, ParserError> {
171        self.ensure_language(lang_id)?;
172        self.parser
173            .parse(source, None)
174            .ok_or(ParserError::ParseFailed { language: lang_id })
175    }
176}
177
178/// Convenience function for one-off parses.
179///
180/// For high-throughput use-cases, prefer reusing a `MultiLangParser`.
181pub fn parse_once(lang_id: LanguageId, source: &str) -> Result<Tree, ParserError> {
182    let mut parser = MultiLangParser::new()?;
183    parser.parse_source(lang_id, source)
184}
185// `cfg(test)` is a Rust attribute that conditionally compiles the annotated code only when running tests.
186// It is commonly used to define test modules or helper functions that should only exist in test builds,
187// so they do not end up in the final binary used for production.
188 
189// For example:
190// #[cfg(test)]
191// mod tests {
192//     // test code here
193// }
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198
199    #[test]
200    fn parses_simple_java() {
201        let src = r#"class A { void m() {} }"#;
202        let tree = parse_once(LanguageId::Java, src).expect("java parse failed");
203        let root = tree.root_node();
204        assert!(root.child_count() > 0);
205    }
206}