impactsense-parser 0.1.0

Multi-language static analysis: parse codebases into an in-memory dependency graph for impact analysis
Documentation
use std::collections::{BTreeSet, HashSet};
use std::path::PathBuf;
use std::sync::Arc;
use std::time::Duration;

use anyhow::{Context, Result};
use notify::{EventKind, RecursiveMode};
use notify_debouncer_full::{new_debouncer, DebounceEventResult, Debouncer, FileIdMap};
use tokio::runtime::Handle;
use tokio::sync::RwLock;
use tracing::{error, info, warn};

use impactsense_parser::pipeline::ScanOptions;
use impactsense_parser::project::refresh_files;
use impactsense_parser::store::InMemoryGraph;

const IGNORED_DIRS: &[&str] = &[
    ".git",
    "target",
    "node_modules",
    "dist",
    "build",
    ".impactsense",
];

const SOURCE_EXTENSIONS: &[&str] = &[
    "java", "js", "ts", "tsx", "py", "rs", "go", "erl", "hrl", "cs",
];

pub fn spawn_file_watcher(
    root: PathBuf,
    graph: Arc<RwLock<InMemoryGraph>>,
    scan: ScanOptions,
    debounce: Duration,
) -> Result<()> {
    let rt = Handle::current();
    let watch_root = root.clone();

    let mut debouncer: Debouncer<notify::RecommendedWatcher, FileIdMap> =
        new_debouncer(debounce, None, move |res: DebounceEventResult| {
            let Ok(events) = res else {
                if let Err(errors) = res {
                    error!("file watcher error: {errors:?}");
                }
                return;
            };

            let (cleanup, parse) = resolve_targets(&watch_root, &events);
            if cleanup.is_empty() && parse.is_empty() {
                return;
            }

            let graph = graph.clone();
            let scan = scan.clone();
            let root = watch_root.clone();
            rt.spawn(async move {
                let mut g = graph.write().await;
                match refresh_files(&mut g, &root, &cleanup, &parse, &scan) {
                    Ok(report) => info!(
                        cleanup = report.cleanup_targets,
                        parsed = report.parse_targets,
                        "incremental graph refresh"
                    ),
                    Err(e) => warn!("incremental refresh failed: {e}"),
                }
            });
        })?;

    debouncer
        .watch(&root, RecursiveMode::Recursive)
        .context("failed to watch workspace root")?;

    std::mem::forget(debouncer);
    Ok(())
}

fn resolve_targets(
    root: &std::path::Path,
    events: &[notify_debouncer_full::DebouncedEvent],
) -> (Vec<String>, Vec<String>) {
    let mut cleanup: BTreeSet<String> = BTreeSet::new();
    let mut parse: BTreeSet<String> = BTreeSet::new();

    for event in events {
        match &event.kind {
            EventKind::Create(_) => {
                for path in &event.paths {
                    if let Some(rel) = rel_supported_path(root, path) {
                        parse.insert(rel);
                    }
                }
            }
            EventKind::Modify(_) => {
                for path in &event.paths {
                    if let Some(rel) = rel_supported_path(root, path) {
                        cleanup.insert(rel.clone());
                        parse.insert(rel);
                    }
                }
            }
            EventKind::Remove(_) => {
                for path in &event.paths {
                    if let Some(rel) = rel_supported_path(root, path) {
                        cleanup.insert(rel);
                    }
                }
            }
            EventKind::Any => {}
            _ => {
                for path in &event.paths {
                    if let Some(rel) = rel_supported_path(root, path) {
                        cleanup.insert(rel.clone());
                        parse.insert(rel);
                    }
                }
            }
        }
    }

    (cleanup.into_iter().collect(), parse.into_iter().collect())
}

fn rel_supported_path(root: &std::path::Path, path: &std::path::Path) -> Option<String> {
    let rel = path_to_rel(root, path)?;
    if !is_supported_source(&rel) || is_ignored_path(&rel) {
        return None;
    }
    Some(rel)
}

fn path_to_rel(root: &std::path::Path, path: &std::path::Path) -> Option<String> {
    let rel = path
        .strip_prefix(root)
        .ok()?
        .to_string_lossy()
        .replace('\\', "/");
    if rel.is_empty() {
        return None;
    }
    Some(rel)
}

fn is_supported_source(rel: &str) -> bool {
    let lower = rel.to_lowercase();
    SOURCE_EXTENSIONS
        .iter()
        .any(|ext| lower.ends_with(&format!(".{ext}")))
}

fn is_ignored_path(rel: &str) -> bool {
    let parts: HashSet<&str> = rel.split('/').collect();
    IGNORED_DIRS.iter().any(|d| parts.contains(d))
}