impactsense-parser 0.1.0

Multi-language static analysis: parse codebases into an in-memory dependency graph for impact analysis
Documentation
//! Multi-language static analysis: parse codebases into a dependency graph for impact analysis.
//!
//! # Quick start
//!
//! ```no_run
//! use std::path::Path;
//! use impactsense_parser::pipeline::ScanOptions;
//! use impactsense_parser::parse_project;
//! use impactsense_parser::store::GraphStore;
//!
//! let graph = parse_project(Path::new("/path/to/repo"), &ScanOptions::default()).unwrap();
//! let _callers = graph.callers("com.example.Service.method");
//! ```

use std::fmt;

use tree_sitter::{Language, Parser, Tree};
use tree_sitter_c_sharp::LANGUAGE as C_SHARP_LANGUAGE;
use tree_sitter_erlang::LANGUAGE as ERLANG_LANGUAGE;
use tree_sitter_go::LANGUAGE as GO_LANGUAGE;
use tree_sitter_java::LANGUAGE as JAVA_LANGUAGE;
use tree_sitter_javascript::LANGUAGE as JAVASCRIPT_LANGUAGE;
use tree_sitter_python::LANGUAGE as PYTHON_LANGUAGE;
use tree_sitter_rust::LANGUAGE as RUST_LANGUAGE;
use tree_sitter_typescript::{LANGUAGE_TSX, LANGUAGE_TYPESCRIPT};

pub mod go_resolve;
pub mod go_stdlib;
pub mod python_stdlib;
pub mod python_common_external;
pub mod scanner;
pub mod scanner_incremental;
pub mod compress;
pub mod graph;
pub mod pipeline;
pub mod schema;
pub mod edge;
pub mod ir;
pub mod extract;
pub mod store;
pub mod project;

pub use graph::ExtractOptions;
pub use graph::build_project_ir;
pub use project::{parse_project, refresh_files, ProjectError};
pub use store::{GraphStore, InMemoryGraph, ImpactReport, QueryLimits, SymbolRef};

/// Identifier for all languages this parser supports.
///
/// This enum is intentionally generic so the `parser` crate can be reused
/// across multiple codebases.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum LanguageId {
    Java,         // constant representing the Java language
    JavaScript,   // constant representing the JavaScript language
    TypeScript,   // constant representing the TypeScript language
    Tsx,          // constant representing the TSX (TypeScript + JSX) language
    Python,       // constant representing the Python language
    Rust,         // constant representing the Rust language
    Go,           // constant representing the Go language
    Erlang,       // constant representing the Erlang language
    CSharp,       // constant representing the C# language
}
//so impl is implementation block add methods to a type -structure or enum
//Display is standard trait in library ,it controls formatting of println!
//so below is formatting implementation for enum


impl fmt::Display for LanguageId {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // No, `write_str(s)` doesn't mean "return s"—it writes the string `s` to the formatter `f`.
        // This is not printing to the console; it's formatting for output, like with `println!` or string formatting.
        // The result of `write_str(s)` (a fmt::Result) is what's "returned" from the function.
        let s = match self {
            LanguageId::Java => "java",
            LanguageId::JavaScript => "javascript",
            LanguageId::TypeScript => "typescript",
            LanguageId::Tsx => "tsx",
            LanguageId::Python => "python",
            LanguageId::Rust => "rust",
            LanguageId::Go => "go",
            LanguageId::Erlang => "erlang",
            LanguageId::CSharp => "c_sharp",
        };
        f.write_str(s)
    }
}

/// Errors that can be returned by the parsing layer. 
//Error is crate thisError is external Rust lib
// #[derive] will tell go to generate code for this enum so it can be printed as a nice error message.
// #[derive(Debug)] will tell go to generate code so it can be printed with {:?} for debugging.
//error constants are defined here in enum
#[derive(Debug, thiserror::Error)]
pub enum ParserError {
    #[error("unsupported language: {0}")]
    UnsupportedLanguage(String),
    #[error("failed to set Tree-Sitter language: {0}")]
    SetLanguage(String),
    #[error("failed to parse source for language {language}")]
    ParseFailed { language: LanguageId },
}

/// Get the underlying Tree-Sitter `Language` for a given `LanguageId`.
pub fn language_for(id: LanguageId) -> Result<Language, ParserError> {
    let lang: Language = match id {
        LanguageId::Java => JAVA_LANGUAGE.into(),
        LanguageId::JavaScript => JAVASCRIPT_LANGUAGE.into(),
        LanguageId::TypeScript => LANGUAGE_TYPESCRIPT.into(),
        LanguageId::Tsx => LANGUAGE_TSX.into(),
        LanguageId::Python => PYTHON_LANGUAGE.into(),
        LanguageId::Rust => RUST_LANGUAGE.into(),
        LanguageId::Go => GO_LANGUAGE.into(),
        LanguageId::Erlang => ERLANG_LANGUAGE.into(),
        LanguageId::CSharp => C_SHARP_LANGUAGE.into(),
    };

    Ok(lang)
}

/// A reusable multi-language Tree-Sitter parser.
///
/// This wrapper owns a single `tree_sitter::Parser` instance that can be
/// reused across parse calls and languages.
pub struct MultiLangParser {
    parser: Parser,
    current_language: Option<LanguageId>,
}

impl MultiLangParser {
    /// Create a new parser without an initial language.
    pub fn new() -> Result<Self, ParserError> {
        Ok(Self {
            parser: Parser::new(),
            current_language: None,
        })
    }

    /// Ensure the underlying parser is configured for the given language.
    ///
    /// This is cheap if the language is already set.
    fn ensure_language(&mut self, lang_id: LanguageId) -> Result<(), ParserError> {
        if self.current_language == Some(lang_id) {
            return Ok(());
        }

        let lang = language_for(lang_id)?;
        self.parser
            .set_language(&lang)
            .map_err(|e| ParserError::SetLanguage(e.to_string()))?;
        self.current_language = Some(lang_id);
        Ok(())
    }

    /// Parse a source string for the given language and return the syntax tree.
    ///
    /// This is the main entry point other codebases will call.
    pub fn parse_source(
        &mut self,
        lang_id: LanguageId,
        source: &str,
    ) -> Result<Tree, ParserError> {
        self.ensure_language(lang_id)?;
        self.parser
            .parse(source, None)
            .ok_or(ParserError::ParseFailed { language: lang_id })
    }
}

/// Convenience function for one-off parses.
///
/// For high-throughput use-cases, prefer reusing a `MultiLangParser`.
pub fn parse_once(lang_id: LanguageId, source: &str) -> Result<Tree, ParserError> {
    let mut parser = MultiLangParser::new()?;
    parser.parse_source(lang_id, source)
}
// `cfg(test)` is a Rust attribute that conditionally compiles the annotated code only when running tests.
// It is commonly used to define test modules or helper functions that should only exist in test builds,
// so they do not end up in the final binary used for production.
 
// For example:
// #[cfg(test)]
// mod tests {
//     // test code here
// }

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn parses_simple_java() {
        let src = r#"class A { void m() {} }"#;
        let tree = parse_once(LanguageId::Java, src).expect("java parse failed");
        let root = tree.root_node();
        assert!(root.child_count() > 0);
    }
}