codebase-graph 1.1.5

Native codebaseGraph CLI and MCP server for local code knowledge graphs.
use crate::error::NativeError;
use crate::hash;
use crate::profiles::ProfileSet;
use crate::protocol::{ManifestDiff, NativeSyntaxMaterializationRequest, SourceSnapshot};
use std::collections::{BTreeMap, BTreeSet};
use std::path::{Path, PathBuf};
use walkdir::WalkDir;

pub(crate) struct SourceScan {
    pub(crate) snapshots: BTreeMap<String, SourceSnapshot>,
    pub(crate) supported: BTreeMap<String, SourceSnapshot>,
    pub(crate) diagnostics: Vec<String>,
    pub(crate) diff: ManifestDiff,
}

pub(crate) fn scan_source_state(
    request: &NativeSyntaxMaterializationRequest,
) -> Result<SourceScan, NativeError> {
    let source_root = PathBuf::from(&request.source_root);
    let profiles = ProfileSet::new(&request.profiles);
    let excluded_parts = request
        .excluded_parts
        .iter()
        .map(|part| part.as_str())
        .collect::<BTreeSet<_>>();
    let candidate_paths = request
        .candidate_paths
        .iter()
        .map(|path| normalize_relative_pattern(path))
        .collect::<BTreeSet<_>>();
    let mut snapshots = BTreeMap::new();
    let mut supported = BTreeMap::new();
    let mut diagnostics = Vec::new();

    for entry in WalkDir::new(&source_root).sort_by_file_name() {
        let entry = entry.map_err(|error| NativeError::InvalidInput(error.to_string()))?;
        let path = entry.path();
        if path == source_root {
            continue;
        }
        if !entry.file_type().is_file() {
            continue;
        }
        let relative_path = relative_path(path, &source_root)?;
        if !candidate_paths.is_empty() && !candidate_paths.contains(&relative_path) {
            continue;
        }
        if is_excluded(path, &source_root, &excluded_parts)
            || ignored_by_patterns(&relative_path, request)
        {
            diagnostics.push(format!("Ignored file: {relative_path}"));
            continue;
        }
        let language = profiles.language_for_path(path);
        let content_hash = if language.is_some() {
            hash::sha256_file(path)?
        } else {
            String::new()
        };
        if language.is_none() {
            diagnostics.push(format!("Skipped unsupported file: {relative_path}"));
        }
        let snapshot = SourceSnapshot {
            path: relative_path.clone(),
            absolute_path: path.to_string_lossy().to_string(),
            content_hash,
            language,
        };
        if snapshot.language.is_some() {
            supported.insert(relative_path.clone(), snapshot.clone());
        }
        snapshots.insert(relative_path, snapshot);
    }

    let diff = compute_diff(request, &supported, &candidate_paths);
    Ok(SourceScan {
        snapshots,
        supported,
        diagnostics,
        diff,
    })
}

fn ignored_by_patterns(relative_path: &str, request: &NativeSyntaxMaterializationRequest) -> bool {
    if !request.include_patterns.is_empty()
        && !matches_any_pattern(relative_path, &request.include_patterns)
    {
        return true;
    }
    matches_any_pattern(relative_path, &request.ignore_patterns)
        || matches_any_pattern(relative_path, &request.exclude_patterns)
}

fn matches_any_pattern(path: &str, patterns: &[String]) -> bool {
    patterns
        .iter()
        .map(|pattern| pattern.trim())
        .filter(|pattern| !pattern.is_empty() && !pattern.starts_with('#'))
        .any(|pattern| glob_matches(path, pattern))
}

fn glob_matches(path: &str, pattern: &str) -> bool {
    let pattern = normalize_relative_pattern(pattern);
    if pattern.ends_with('/') {
        return path.starts_with(pattern.trim_end_matches('/'));
    }
    if !pattern.contains('/') && wildcard_match(path.rsplit('/').next().unwrap_or(path), &pattern) {
        return true;
    }
    wildcard_match(path, &pattern)
}

fn normalize_relative_pattern(pattern: &str) -> String {
    pattern
        .trim()
        .trim_start_matches("./")
        .replace('\\', "/")
        .to_string()
}

fn wildcard_match(text: &str, pattern: &str) -> bool {
    wildcard_match_bytes(text.as_bytes(), pattern.as_bytes())
}

fn wildcard_match_bytes(text: &[u8], pattern: &[u8]) -> bool {
    let (mut text_index, mut pattern_index) = (0_usize, 0_usize);
    let mut star_index = None;
    let mut match_index = 0_usize;
    while text_index < text.len() {
        if pattern_index < pattern.len()
            && (pattern[pattern_index] == b'?' || pattern[pattern_index] == text[text_index])
        {
            text_index += 1;
            pattern_index += 1;
        } else if pattern_index < pattern.len() && pattern[pattern_index] == b'*' {
            star_index = Some(pattern_index);
            match_index = text_index;
            pattern_index += 1;
        } else if let Some(star) = star_index {
            pattern_index = star + 1;
            match_index += 1;
            text_index = match_index;
        } else {
            return false;
        }
    }
    while pattern_index < pattern.len() && pattern[pattern_index] == b'*' {
        pattern_index += 1;
    }
    pattern_index == pattern.len()
}

fn is_excluded(path: &Path, source_root: &Path, excluded_parts: &BTreeSet<&str>) -> bool {
    path.strip_prefix(source_root)
        .ok()
        .map(|relative| {
            relative.components().any(|component| {
                excluded_parts.contains(component.as_os_str().to_string_lossy().as_ref())
            })
        })
        .unwrap_or(false)
}

fn relative_path(path: &Path, source_root: &Path) -> Result<String, NativeError> {
    Ok(path
        .strip_prefix(source_root)
        .map_err(|error| NativeError::InvalidInput(error.to_string()))?
        .to_string_lossy()
        .replace('\\', "/"))
}

fn compute_diff(
    request: &NativeSyntaxMaterializationRequest,
    supported: &BTreeMap<String, SourceSnapshot>,
    candidate_paths: &BTreeSet<String>,
) -> ManifestDiff {
    let Some(previous) = &request.previous_manifest else {
        return ManifestDiff {
            added: supported.keys().cloned().collect(),
            modified: Vec::new(),
            unchanged: Vec::new(),
            deleted: Vec::new(),
            force_rebuild: true,
        };
    };
    let incompatible = previous.schema_version != request.manifest_schema_version
        || previous.ontology != request.ontology
        || previous.parser_version != request.parser_version;
    if request.mode == "full" || incompatible {
        return ManifestDiff {
            added: supported.keys().cloned().collect(),
            modified: Vec::new(),
            unchanged: Vec::new(),
            deleted: previous
                .files
                .keys()
                .filter(|path| candidate_paths.is_empty() || candidate_paths.contains(*path))
                .cloned()
                .collect(),
            force_rebuild: request.mode == "full" || candidate_paths.is_empty(),
        };
    }

    let mut added = Vec::new();
    let mut modified = Vec::new();
    let mut unchanged = Vec::new();
    for (path, snapshot) in supported {
        match previous.files.get(path) {
            None => added.push(path.clone()),
            Some(entry)
                if entry.content_hash != snapshot.content_hash
                    || entry.language != snapshot.language.clone().unwrap_or_default() =>
            {
                modified.push(path.clone())
            }
            Some(_) => unchanged.push(path.clone()),
        }
    }
    let deleted: Vec<String> = previous
        .files
        .keys()
        .filter(|path| {
            !supported.contains_key(*path)
                && (candidate_paths.is_empty() || candidate_paths.contains(*path))
        })
        .cloned()
        .collect();
    if request.atomic_rebuild && (!added.is_empty() || !modified.is_empty() || !deleted.is_empty())
    {
        return ManifestDiff {
            added: supported.keys().cloned().collect(),
            modified: Vec::new(),
            unchanged: Vec::new(),
            deleted: previous.files.keys().cloned().collect(),
            force_rebuild: true,
        };
    }

    ManifestDiff {
        added,
        modified,
        unchanged,
        deleted,
        force_rebuild: false,
    }
}