impactsense-parser 0.1.1

Multi-language static analysis: parse codebases into an in-memory dependency graph for impact analysis
Documentation
//! Discover `go.mod` modules and resolve import paths to files on disk.
use std::collections::HashSet;
use std::fs;
use std::path::{Path, PathBuf};

use walkdir::WalkDir;

/// One Go module root: `module` path from go.mod and directory containing go.mod.
#[derive(Debug, Clone)]
pub struct GoModule {
    pub module_path: String,
    pub root_dir: PathBuf,
}

/// `replace old => ../local` from go.mod: imports under `old/...` resolve under `local_root`.
#[derive(Debug, Clone)]
pub struct GoReplace {
    pub from: String,
    pub local_root: PathBuf,
}

/// Walk `root` for `go.mod` files and parse `module` lines.
pub fn discover_go_modules(root: &Path, follow_symlinks: bool) -> std::io::Result<Vec<GoModule>> {
    let mut out = Vec::new();
    let walker = WalkDir::new(root).follow_links(follow_symlinks);
    for entry in walker {
        let entry = entry.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
        if !entry.file_type().is_file() {
            continue;
        }
        if entry.file_name() != "go.mod" {
            continue;
        }
        let path = entry.path();
        let src = fs::read_to_string(path)?;
        if let Some(mp) = parse_go_mod_module(&src) {
            out.push(GoModule {
                module_path: mp,
                root_dir: path.parent().unwrap_or(path).to_path_buf(),
            });
        }
    }
    out.sort_by(|a, b| b.module_path.len().cmp(&a.module_path.len()));
    Ok(out)
}

/// Collect `replace` directives from every `go.mod` under `root`.
pub fn discover_go_replaces(root: &Path, follow_symlinks: bool) -> std::io::Result<Vec<GoReplace>> {
    let mut out = Vec::new();
    let walker = WalkDir::new(root).follow_links(follow_symlinks);
    for entry in walker {
        let entry = entry.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
        if !entry.file_type().is_file() || entry.file_name() != "go.mod" {
            continue;
        }
        let path = entry.path();
        let parent = path.parent().unwrap_or(path);
        let src = fs::read_to_string(path)?;
        out.extend(parse_go_mod_replaces(&src, parent));
    }
    out.sort_by(|a, b| b.from.len().cmp(&a.from.len()));
    Ok(out)
}

fn parse_go_mod_replaces(src: &str, go_mod_parent: &Path) -> Vec<GoReplace> {
    let mut out = Vec::new();
    let mut in_replace_block = false;
    for raw in src.lines() {
        let line = raw.split("//").next().unwrap_or("").trim();
        if line.is_empty() {
            continue;
        }
        if line.starts_with("replace (") || line == "replace (" {
            in_replace_block = true;
            continue;
        }
        if in_replace_block {
            if line == ")" {
                in_replace_block = false;
                continue;
            }
            if let Some(rep) = parse_one_replace_line(line, go_mod_parent, false) {
                out.push(rep);
            }
            continue;
        }
        if let Some(rest) = line.strip_prefix("replace") {
            let rest = rest.trim();
            if rest == "(" {
                in_replace_block = true;
                continue;
            }
            if let Some(rep) = parse_one_replace_line(rest, go_mod_parent, true) {
                out.push(rep);
            }
        }
    }
    out
}

fn parse_one_replace_line(line: &str, go_mod_parent: &Path, had_replace_keyword: bool) -> Option<GoReplace> {
    let line = line.trim().trim_end_matches(',');
    if !line.contains("=>") {
        return None;
    }
    let (lhs, rhs) = line.split_once("=>")?;
    let mut lhs = lhs.trim();
    if had_replace_keyword && lhs.starts_with('(') {
        lhs = lhs.trim_start_matches('(').trim();
    }
    let from = strip_optional_module_version(lhs);
    let rhs = rhs.trim().trim_end_matches(')');
    let local_root = local_root_from_replace_rhs(go_mod_parent, rhs)?;
    if from.is_empty() {
        return None;
    }
    Some(GoReplace { from, local_root })
}

fn strip_optional_module_version(lhs: &str) -> String {
    let parts: Vec<&str> = lhs.split_whitespace().collect();
    if parts.len() >= 2 {
        let v = parts[1];
        if v.starts_with('v') && v.chars().nth(1).map(|c| c.is_ascii_digit()).unwrap_or(false) {
            return parts[0].to_string();
        }
    }
    lhs.split_whitespace().next().unwrap_or(lhs).to_string()
}

fn looks_like_local_replace_path(token: &str) -> bool {
    let t = token.trim();
    if t.is_empty() {
        return false;
    }
    if t.contains('/') || t.contains('\\') {
        return true;
    }
    if t.starts_with('.') {
        return true;
    }
    if t.len() >= 3 && t.as_bytes().get(1) == Some(&b':') {
        return true;
    }
    if !t.contains('.') {
        return true;
    }
    false
}

fn local_root_from_replace_rhs(go_mod_parent: &Path, rhs: &str) -> Option<PathBuf> {
    let token = rhs.split_whitespace().next()?;
    if !looks_like_local_replace_path(token) {
        return None;
    }
    let rel = token.trim().trim_start_matches("./");
    Some(go_mod_parent.join(rel))
}

fn parse_go_mod_module(src: &str) -> Option<String> {
    for raw in src.lines() {
        let line = raw.split("//").next().unwrap_or("").trim();
        if let Some(rest) = line.strip_prefix("module") {
            let m = rest.trim().trim_matches('"').trim();
            if !m.is_empty() {
                return Some(m.to_string());
            }
        }
    }
    None
}

/// First path segment looks like a domain (third-party / vanity), not a local module alias.
pub fn is_likely_third_party_go_import(import_path: &str) -> bool {
    let first = import_path.trim().split('/').next().unwrap_or("");
    first.contains('.')
}

fn norm_path_slash(p: &str) -> String {
    p.replace('\\', "/")
}

fn resolved_path_slash(path: &Path) -> String {
    let p = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
    norm_path_slash(&p.display().to_string())
}

/// Physical absolute path (slash-normalized) for a known file string, which may be repo-relative.
fn resolved_known_file_path_slash(known_path: &str, repo_root: Option<&Path>) -> String {
    let p = Path::new(known_path);
    let joined = if p.is_absolute() {
        p.to_path_buf()
    } else if let Some(r) = repo_root {
        r.join(known_path)
    } else {
        p.to_path_buf()
    };
    resolved_path_slash(&joined)
}

fn dir_path_slash(dir: &Path) -> String {
    resolved_path_slash(dir)
}

/// True if known path (possibly repo-relative) lies under `dir` on disk.
fn file_is_under_dir(file_path: &str, dir: &Path, repo_root: Option<&Path>) -> bool {
    let f = resolved_known_file_path_slash(file_path, repo_root);
    let d = dir_path_slash(dir);
    f == d || f.starts_with(&(d.clone() + "/"))
}

/// Resolve import to a scanned `.go` file path using `replace`, go.mod roots, then substring fallback.
pub fn resolve_go_import_to_known_go_file(
    import_path: &str,
    known_paths: &HashSet<String>,
    modules: &[GoModule],
    replaces: &[GoReplace],
    repo_root: Option<&Path>,
) -> Option<String> {
    let norm = import_path.trim().replace('\\', "/");

    for r in replaces {
        if norm == r.from {
            if let Some(p) = pick_shortest_go_in_dir(known_paths, &r.local_root, repo_root) {
                return Some(p);
            }
        }
        let prefix = format!("{}/", r.from);
        if norm.starts_with(&prefix) {
            let suffix = &norm[prefix.len()..];
            let pkg_dir = r
                .local_root
                .join(suffix.replace('/', std::path::MAIN_SEPARATOR_STR));
            if let Some(p) = pick_shortest_go_in_dir(known_paths, &pkg_dir, repo_root) {
                return Some(p);
            }
        }
    }

    for m in modules {
        if norm == m.module_path {
            return pick_shortest_go_in_dir(known_paths, &m.root_dir, repo_root);
        }
        let prefix = format!("{}/", m.module_path);
        if norm.starts_with(&prefix) {
            let suffix = &norm[prefix.len()..];
            let pkg_dir = m.root_dir.join(suffix.replace('/', std::path::MAIN_SEPARATOR_STR));
            if let Some(p) = pick_shortest_go_in_dir(known_paths, &pkg_dir, repo_root) {
                return Some(p);
            }
        }
    }

    known_paths
        .iter()
        .filter(|p| {
            let pn = norm_path_slash(p);
            pn.ends_with(".go") && pn.contains(&norm)
        })
        .min_by_key(|p| p.len())
        .cloned()
}

fn pick_shortest_go_in_dir(
    known_paths: &HashSet<String>,
    dir: &Path,
    repo_root: Option<&Path>,
) -> Option<String> {
    known_paths
        .iter()
        .filter(|p| p.ends_with(".go") && file_is_under_dir(p, dir, repo_root))
        .min_by_key(|p| p.len())
        .cloned()
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashSet;

    #[test]
    fn parses_module_line() {
        let g = r#"
// comment
module kronos

go 1.21
"#;
        assert_eq!(parse_go_mod_module(g).as_deref(), Some("kronos"));
    }

    #[test]
    fn resolves_import_via_module_root() {
        let tmp = tempfile::tempdir().unwrap();
        let root = tmp.path().join("kronos-preprod");
        fs::create_dir_all(root.join("connectors/mongoConnector")).unwrap();
        let go_file = root.join("connectors/mongoConnector/mongoConnector.go");
        fs::write(tmp.path().join("kronos-preprod/go.mod"), "module kronos\n").unwrap();
        fs::write(&go_file, "package mongoConnector\n").unwrap();

        let modules = discover_go_modules(tmp.path(), false).unwrap();
        assert_eq!(modules.len(), 1);
        assert_eq!(modules[0].module_path, "kronos");

        let mut known = HashSet::new();
        let rel = go_file.strip_prefix(tmp.path()).unwrap_or(go_file.as_path());
        known.insert(rel.to_string_lossy().replace('\\', "/"));

        let resolved = resolve_go_import_to_known_go_file(
            "kronos/connectors/mongoConnector",
            &known,
            &modules,
            &[],
            Some(tmp.path()),
        )
        .expect("expected go.mod-aware resolution");
        assert!(resolved.ends_with("mongoConnector.go"));
        assert!(resolved.contains("connectors"));
    }

    #[test]
    fn resolves_import_via_go_mod_replace() {
        let tmp = tempfile::tempdir().unwrap();
        let svc = tmp.path().join("kronos-preprod");
        fs::create_dir_all(svc.join("handlers")).unwrap();
        let proto_pkg = svc.join("gen/kronos/proto");
        fs::create_dir_all(&proto_pkg).unwrap();
        let go_mod = r#"
module github.com/example/kronos-preprod

go 1.21

replace kronos => ./gen/kronos
"#;
        fs::write(svc.join("go.mod"), go_mod).unwrap();
        let stub = proto_pkg.join("models.pb.go");
        fs::write(&stub, "package proto\n").unwrap();
        fs::write(svc.join("handlers/h.go"), "package handlers\n").unwrap();

        let modules = discover_go_modules(tmp.path(), false).unwrap();
        let replaces = discover_go_replaces(tmp.path(), false).unwrap();
        assert_eq!(replaces.len(), 1);
        assert_eq!(replaces[0].from, "kronos");

        let mut known = HashSet::new();
        let rel = stub.strip_prefix(tmp.path()).unwrap_or(stub.as_path());
        known.insert(rel.to_string_lossy().replace('\\', "/"));

        let resolved = resolve_go_import_to_known_go_file(
            "kronos/proto",
            &known,
            &modules,
            &replaces,
            Some(tmp.path()),
        )
            .expect("replace should map kronos/proto to gen/kronos/proto");
        assert!(resolved.ends_with("models.pb.go"));
    }
}