Skip to main content

impactsense_parser/
pipeline.rs

1use std::path::{Path, PathBuf};
2
3use thiserror::Error;
4
5use crate::graph::{
6    cleanup_incremental_targets_in_neo4j, persist_files_to_neo4j, GraphError, GraphPersistenceOptions,
7    Neo4jConfig,
8};
9use crate::scanner::{scan_and_parse, FileScanConfig, ScannerError};
10use crate::scanner_incremental::scan_and_parse_incremental_vector;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum ParsingMode {
14    Bootstrap,
15    IncrementalVector,
16}
17
18#[derive(Debug, Clone)]
19pub struct ScanOptions {
20    pub follow_symlinks: bool,
21    pub max_file_size: Option<u64>,
22    pub graph: GraphPersistenceOptions,
23}
24
25impl Default for ScanOptions {
26    fn default() -> Self {
27        Self {
28            follow_symlinks: false,
29            max_file_size: Some(2 * 1024 * 1024),
30            graph: GraphPersistenceOptions::default(),
31        }
32    }
33}
34
35#[derive(Debug, Clone)]
36pub struct PipelineReport {
37    pub mode: ParsingMode,
38    pub parsed_files: usize,
39    pub cleanup_targets: usize,
40}
41
42#[derive(Debug, Error)]
43pub enum PipelineError {
44    #[error("scan/parse failed: {0}")]
45    Scanner(#[from] ScannerError),
46    #[error("neo4j persistence failed: {0}")]
47    Graph(#[from] GraphError),
48}
49
50fn build_scan_config(root: &Path, options: &ScanOptions) -> FileScanConfig {
51    let mut config = FileScanConfig::new(root);
52    config.follow_symlinks = options.follow_symlinks;
53    config.max_file_size = options.max_file_size;
54    config
55}
56
57pub async fn run_bootstrap_pipeline(
58    root: &Path,
59    options: &ScanOptions,
60    neo4j_cfg: Option<&Neo4jConfig>,
61    clean: bool,
62) -> Result<PipelineReport, PipelineError> {
63    let config = build_scan_config(root, options);
64    let parsed_files = scan_and_parse(&config)?;
65
66    if let Some(cfg) = neo4j_cfg {
67        persist_files_to_neo4j(
68            cfg,
69            root,
70            &parsed_files,
71            clean,
72            options.follow_symlinks,
73            &options.graph,
74        )
75        .await?;
76    }
77
78    Ok(PipelineReport {
79        mode: ParsingMode::Bootstrap,
80        parsed_files: parsed_files.len(),
81        cleanup_targets: 0,
82    })
83}
84
85pub async fn run_incremental_vector_pipeline(
86    root: &Path,
87    options: &ScanOptions,
88    neo4j_cfg: Option<&Neo4jConfig>,
89    parse_targets: &[String],
90    cleanup_targets: &[String],
91) -> Result<PipelineReport, PipelineError> {
92    let config = build_scan_config(root, options);
93    let parse_target_paths: Vec<PathBuf> = parse_targets.iter().map(PathBuf::from).collect();
94    let parsed_files = scan_and_parse_incremental_vector(&config, &parse_target_paths)?;
95
96    if let Some(cfg) = neo4j_cfg {
97        cleanup_incremental_targets_in_neo4j(cfg, root, cleanup_targets).await?;
98        persist_files_to_neo4j(
99            cfg,
100            root,
101            &parsed_files,
102            false,
103            options.follow_symlinks,
104            &options.graph,
105        )
106        .await?;
107    }
108
109    Ok(PipelineReport {
110        mode: ParsingMode::IncrementalVector,
111        parsed_files: parsed_files.len(),
112        cleanup_targets: cleanup_targets.len(),
113    })
114}