Skip to main content

CodeParser

Trait CodeParser 

Source
pub trait CodeParser<D: Doc + Send + Sync>: Send + Sync {
    // Required methods
    fn parse_content<'life0, 'life1, 'life2, 'async_trait>(
        &'life0 self,
        content: &'life1 str,
        language: SupportLang,
        context: &'life2 AnalysisContext,
    ) -> Pin<Box<dyn Future<Output = ServiceResult<ParsedDocument<D>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait;
    fn parse_file<'life0, 'life1, 'life2, 'async_trait>(
        &'life0 self,
        file_path: &'life1 Path,
        context: &'life2 AnalysisContext,
    ) -> Pin<Box<dyn Future<Output = ServiceResult<ParsedDocument<D>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait;
    fn parse_multiple_files<'life0, 'life1, 'life2, 'life3, 'async_trait>(
        &'life0 self,
        file_paths: &'life1 [&'life2 Path],
        context: &'life3 AnalysisContext,
    ) -> Pin<Box<dyn Future<Output = ServiceResult<Vec<ParsedDocument<D>>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait,
             'life3: 'async_trait;
    fn capabilities(&self) -> ParserCapabilities;
    fn supported_languages(&self) -> &[SupportLang];

    // Provided methods
    fn detect_language(&self, file_path: &Path) -> ServiceResult<SupportLang> { ... }
    fn validate_content(
        &self,
        content: &str,
        _language: SupportLang,
    ) -> ServiceResult<()> { ... }
    fn preprocess_content(
        &self,
        content: &str,
        _language: SupportLang,
    ) -> String { ... }
    fn postprocess_document<'life0, 'life1, 'async_trait>(
        &'life0 self,
        document: ParsedDocument<D>,
        context: &'life1 AnalysisContext,
    ) -> Pin<Box<dyn Future<Output = ServiceResult<ParsedDocument<D>>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait { ... }
    fn collect_basic_metadata<'life0, 'life1, 'life2, 'async_trait>(
        &'life0 self,
        _document: &'life1 mut ParsedDocument<D>,
        _context: &'life2 AnalysisContext,
    ) -> Pin<Box<dyn Future<Output = ServiceResult<()>> + Send + 'async_trait>>
       where Self: 'async_trait,
             'life0: 'async_trait,
             'life1: 'async_trait,
             'life2: 'async_trait { ... }
}
Expand description

Core parser service trait that abstracts ast-grep parsing functionality.

This trait provides async interfaces for parsing source code into ParsedDocument instances that preserve all ast-grep capabilities while enabling codebase-level analysis. The trait supports both single-file and multi-file parsing operations.

§Design Philosophy

  • Preserve Power: All ast-grep functionality remains accessible through ParsedDocument
  • Enable Intelligence: Add metadata needed for codebase-level graph analysis
  • Abstract Execution: Support different execution environments
  • Commercial Ready: Clear extension points for commercial parsing features

§Examples

§Single File Parsing

let parser = MyParser;
let context = AnalysisContext::default();

// Parse a Rust file
let document = parser.parse_file(
    std::path::Path::new("src/main.rs"),
    &context
).await?;

// Access underlying ast-grep functionality
let root = document.ast_grep_root();
let matches = root.root().find_all("fn $NAME($$$PARAMS) { $$$BODY }");

§Multi-File Codebase Parsing

let parser = MyParser;
let mut context = AnalysisContext::default();
context.scope = ExecutionScope::Codebase;

// Parse entire codebase
let files: Vec<&std::path::Path> = vec![
    std::path::Path::new("src/main.rs"),
    std::path::Path::new("src/lib.rs"),
    std::path::Path::new("src/parser.rs"),
];

let documents = parser.parse_multiple_files(&files, &context).await?;

// Each document preserves ast-grep capabilities + adds codebase metadata
for doc in &documents {
    println!("File: {:?}", doc.file_path);
    println!("Symbols: {:?}", doc.metadata().defined_symbols.keys().collect::<Vec<_>>());
}

Required Methods§

Source

fn parse_content<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, content: &'life1 str, language: SupportLang, context: &'life2 AnalysisContext, ) -> Pin<Box<dyn Future<Output = ServiceResult<ParsedDocument<D>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Parse source content into a ParsedDocument.

This method wraps ast-grep parsing with additional metadata collection for codebase-level analysis while preserving all ast-grep functionality.

§Arguments
  • content - Source code to parse
  • language - Programming language of the content
  • context - Analysis context containing execution configuration
§Returns

ParsedDocument that wraps ast-grep Root with additional metadata

Source

fn parse_file<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, file_path: &'life1 Path, context: &'life2 AnalysisContext, ) -> Pin<Box<dyn Future<Output = ServiceResult<ParsedDocument<D>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Parse a single file into a ParsedDocument.

Automatically detects language from file extension and reads file content. Collects symbols, imports, and other metadata for codebase-level analysis.

§Arguments
  • file_path - Path to source file to parse
  • context - Analysis context containing execution configuration
§Returns

ParsedDocument with both ast-grep functionality and codebase metadata

Source

fn parse_multiple_files<'life0, 'life1, 'life2, 'life3, 'async_trait>( &'life0 self, file_paths: &'life1 [&'life2 Path], context: &'life3 AnalysisContext, ) -> Pin<Box<dyn Future<Output = ServiceResult<Vec<ParsedDocument<D>>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait, 'life3: 'async_trait,

Parse multiple files with efficient parallel execution.

Uses execution strategy from context to optimize for different environments:

  • Rayon for CLI parallel processing
  • Chunked execution for cloud workers
  • Sequential for single-threaded environments
§Arguments
  • file_paths - Slice of file paths to parse
  • context - Analysis context with execution configuration
§Returns

Vector of ParsedDocuments in same order as input paths

Source

fn capabilities(&self) -> ParserCapabilities

Get parser capabilities and configuration.

Describes what features this parser implementation supports, including performance characteristics and execution strategies.

Source

fn supported_languages(&self) -> &[SupportLang]

Get list of supported programming languages.

Returns slice of SupportLang values that this parser can handle. Used for language detection and validation.

Provided Methods§

Source

fn detect_language(&self, file_path: &Path) -> ServiceResult<SupportLang>

Detect language from file path.

Default implementation uses file extension matching. Implementations can override for more sophisticated detection.

Source

fn validate_content( &self, content: &str, _language: SupportLang, ) -> ServiceResult<()>

Validate content before parsing.

Default implementation checks for basic validity. Implementations can override for language-specific validation.

Source

fn preprocess_content(&self, content: &str, _language: SupportLang) -> String

Pre-process content before parsing.

Default implementation returns content unchanged. Implementations can override for content normalization.

Source

fn postprocess_document<'life0, 'life1, 'async_trait>( &'life0 self, document: ParsedDocument<D>, context: &'life1 AnalysisContext, ) -> Pin<Box<dyn Future<Output = ServiceResult<ParsedDocument<D>>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait,

Post-process parsed document.

Default implementation returns document unchanged. Implementations can override to add custom metadata collection.

Source

fn collect_basic_metadata<'life0, 'life1, 'life2, 'async_trait>( &'life0 self, _document: &'life1 mut ParsedDocument<D>, _context: &'life2 AnalysisContext, ) -> Pin<Box<dyn Future<Output = ServiceResult<()>> + Send + 'async_trait>>
where Self: 'async_trait, 'life0: 'async_trait, 'life1: 'async_trait, 'life2: 'async_trait,

Collect basic metadata for codebase-level analysis.

Default implementation extracts symbols, imports, exports, and function calls. This bridges ast-grep file-level analysis to codebase-level intelligence.

Implementors§