impactsense-parser 0.1.0

Multi-language static analysis: parse codebases into an in-memory dependency graph for impact analysis
Documentation
use std::fs;
use std::path::PathBuf;

use clap::{Parser, ValueEnum};
use impactsense_parser::extract::scan_and_build_ir;
use impactsense_parser::graph::{GraphPersistenceOptions, Neo4jConfig};
use impactsense_parser::pipeline::{
    run_bootstrap_pipeline, run_incremental_vector_pipeline, ParsingMode, ScanOptions,
};
use impactsense_parser::ExtractOptions;

/// Simple CLI wrapper around the multi-language parser + scanner.
#[derive(Parser, Debug)]
#[command(name = "impactsense-parser")]
#[command(about = "Multi-language Tree-Sitter parser and repository scanner", long_about = None)]
struct Cli {
    /// Root directory to scan (codebase root).
    #[arg(value_name = "ROOT")]
    root: PathBuf,

    /// Follow symbolic links while traversing the directory tree.
    #[arg(long)]
    follow_symlinks: bool,

    /// Maximum file size to parse, in bytes. Omit to have no size limit.
    #[arg(long)]
    max_file_size: Option<u64>,

    /// If set, push parsed File nodes into Neo4j after scanning.
    #[arg(long)]
    push_to_neo4j: bool,

    /// If set, delete all existing nodes and relationships before pushing to Neo4j.
    #[arg(long)]
    clean: bool,

    #[arg(long, default_value = "bolt://10.166.1.220:7687")]
    neo4j_uri: String,

    #[arg(long, default_value = "neo4j")]
    neo4j_user: String,

    #[arg(long, default_value = "test1234")]
    neo4j_password: String,

    /// Write ProjectIr JSON (in-memory graph export) to PATH.
    #[arg(long, value_name = "PATH")]
    output_json: Option<PathBuf>,

    #[arg(long, value_enum, default_value_t = CliParsingMode::Bootstrap)]
    mode: CliParsingMode,

    #[arg(long, value_delimiter = ',', value_name = "PATHS")]
    parse_targets: Vec<String>,

    #[arg(long, value_delimiter = ',', value_name = "PATHS")]
    cleanup_targets: Vec<String>,

    #[arg(long)]
    verbose_imports: bool,

    #[arg(long, default_value_t = 50)]
    max_parse_warnings_per_file: usize,

    #[arg(long, default_value = impactsense_parser::graph::DEFAULT_COMPRESSOR_URL)]
    compressor_url: String,

    #[arg(long)]
    compress_codeblocks: bool,
}

#[derive(Clone, Debug, ValueEnum)]
enum CliParsingMode {
    Bootstrap,
    IncrementalVector,
}

impl From<CliParsingMode> for ParsingMode {
    fn from(value: CliParsingMode) -> Self {
        match value {
            CliParsingMode::Bootstrap => ParsingMode::Bootstrap,
            CliParsingMode::IncrementalVector => ParsingMode::IncrementalVector,
        }
    }
}

#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
    let cli = Cli::parse();
    let mode: ParsingMode = cli.mode.clone().into();
    let scan_options = ScanOptions {
        follow_symlinks: cli.follow_symlinks,
        max_file_size: cli.max_file_size,
        graph: GraphPersistenceOptions {
            verbose_imports: cli.verbose_imports,
            max_parse_warnings_per_file: cli.max_parse_warnings_per_file,
            compressor: impactsense_parser::graph::CompressorConfig {
                base_url: cli.compressor_url.clone(),
                enabled: cli.compress_codeblocks || cli.push_to_neo4j,
            },
        },
    };

    if let Some(out_path) = &cli.output_json {
        let extract_opts = ExtractOptions::from(&scan_options.graph);
        let ir = scan_and_build_ir(&cli.root, &extract_opts, &scan_options)?;
        let json = serde_json::to_string_pretty(&ir)?;
        fs::write(out_path, json)?;
        println!(
            "Wrote ProjectIr to {} ({} files, {} functions, {} edges)",
            out_path.display(),
            ir.files.len(),
            ir.functions.len(),
            ir.edges.len()
        );
        if !cli.push_to_neo4j {
            return Ok(());
        }
    }

    let neo4j_cfg = if cli.push_to_neo4j {
        Some(Neo4jConfig {
            uri: cli.neo4j_uri.clone(),
            user: cli.neo4j_user.clone(),
            password: cli.neo4j_password.clone(),
        })
    } else {
        None
    };

    let report = match mode {
        ParsingMode::Bootstrap => {
            run_bootstrap_pipeline(&cli.root, &scan_options, neo4j_cfg.as_ref(), cli.clean).await?
        }
        ParsingMode::IncrementalVector => {
            if cli.parse_targets.is_empty() {
                return Err(
                    "--mode incremental-vector requires --parse-targets with at least one path"
                        .into(),
                );
            }
            run_incremental_vector_pipeline(
                &cli.root,
                &scan_options,
                neo4j_cfg.as_ref(),
                &cli.parse_targets,
                &cli.cleanup_targets,
            )
            .await?
        }
    };

    println!(
        "Pipeline finished: mode={:?}, parsed_files={}, cleanup_targets={}",
        report.mode, report.parsed_files, report.cleanup_targets
    );

    Ok(())
}