use std::fs;
use std::path::PathBuf;
use clap::{Parser, ValueEnum};
use impactsense_parser::extract::scan_and_build_ir;
use impactsense_parser::graph::{GraphPersistenceOptions, Neo4jConfig};
use impactsense_parser::pipeline::{
run_bootstrap_pipeline, run_incremental_vector_pipeline, ParsingMode, ScanOptions,
};
use impactsense_parser::ExtractOptions;
#[derive(Parser, Debug)]
#[command(name = "impactsense-parser")]
#[command(about = "Multi-language Tree-Sitter parser and repository scanner", long_about = None)]
struct Cli {
#[arg(value_name = "ROOT")]
root: PathBuf,
#[arg(long)]
follow_symlinks: bool,
#[arg(long)]
max_file_size: Option<u64>,
#[arg(long)]
push_to_neo4j: bool,
#[arg(long)]
clean: bool,
#[arg(long, default_value = "bolt://10.166.1.220:7687")]
neo4j_uri: String,
#[arg(long, default_value = "neo4j")]
neo4j_user: String,
#[arg(long, default_value = "test1234")]
neo4j_password: String,
#[arg(long, value_name = "PATH")]
output_json: Option<PathBuf>,
#[arg(long, value_enum, default_value_t = CliParsingMode::Bootstrap)]
mode: CliParsingMode,
#[arg(long, value_delimiter = ',', value_name = "PATHS")]
parse_targets: Vec<String>,
#[arg(long, value_delimiter = ',', value_name = "PATHS")]
cleanup_targets: Vec<String>,
#[arg(long)]
verbose_imports: bool,
#[arg(long, default_value_t = 50)]
max_parse_warnings_per_file: usize,
#[arg(long, default_value = impactsense_parser::graph::DEFAULT_COMPRESSOR_URL)]
compressor_url: String,
#[arg(long)]
compress_codeblocks: bool,
#[arg(long)]
no_compress_codeblocks: bool,
}
#[derive(Clone, Debug, ValueEnum)]
enum CliParsingMode {
Bootstrap,
IncrementalVector,
}
impl From<CliParsingMode> for ParsingMode {
fn from(value: CliParsingMode) -> Self {
match value {
CliParsingMode::Bootstrap => ParsingMode::Bootstrap,
CliParsingMode::IncrementalVector => ParsingMode::IncrementalVector,
}
}
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let cli = Cli::parse();
let mode: ParsingMode = cli.mode.clone().into();
let scan_options = ScanOptions {
follow_symlinks: cli.follow_symlinks,
max_file_size: cli.max_file_size,
graph: GraphPersistenceOptions {
verbose_imports: cli.verbose_imports,
max_parse_warnings_per_file: cli.max_parse_warnings_per_file,
compressor: impactsense_parser::graph::CompressorConfig {
base_url: cli.compressor_url.clone(),
enabled: !cli.no_compress_codeblocks,
},
},
};
if let Some(out_path) = &cli.output_json {
let extract_opts = ExtractOptions::from(&scan_options.graph);
let ir = scan_and_build_ir(&cli.root, &extract_opts, &scan_options)?;
let json = serde_json::to_string_pretty(&ir)?;
fs::write(out_path, json)?;
println!(
"Wrote ProjectIr to {} ({} files, {} functions, {} edges)",
out_path.display(),
ir.files.len(),
ir.functions.len(),
ir.edges.len()
);
if !cli.push_to_neo4j {
return Ok(());
}
}
let neo4j_cfg = if cli.push_to_neo4j {
Some(Neo4jConfig {
uri: cli.neo4j_uri.clone(),
user: cli.neo4j_user.clone(),
password: cli.neo4j_password.clone(),
})
} else {
None
};
let report = match mode {
ParsingMode::Bootstrap => {
run_bootstrap_pipeline(&cli.root, &scan_options, neo4j_cfg.as_ref(), cli.clean).await?
}
ParsingMode::IncrementalVector => {
if cli.parse_targets.is_empty() {
return Err(
"--mode incremental-vector requires --parse-targets with at least one path"
.into(),
);
}
run_incremental_vector_pipeline(
&cli.root,
&scan_options,
neo4j_cfg.as_ref(),
&cli.parse_targets,
&cli.cleanup_targets,
)
.await?
}
};
println!(
"Pipeline finished: mode={:?}, parsed_files={}, cleanup_targets={}",
report.mode, report.parsed_files, report.cleanup_targets
);
Ok(())
}