nebu-ctx 0.6.0

Lean-ctx runtime adapted for the NebuCtx Cloud-backed product.
Documentation
use crate::models::{ProjectLanguageStat, ProjectMetadataEnvelope, ProjectMetadataSummary};
use anyhow::{Context, Result};
use ignore::WalkBuilder;
use std::collections::BTreeMap;
use std::path::Path;

const MARKER_FILES: &[&str] = &[
    ".git",
    "Cargo.toml",
    "package.json",
    "tsconfig.json",
    "pyproject.toml",
    "requirements.txt",
    "README.md",
    "compose.yaml",
    "docker-compose.yml",
    "NebulaRAG.slnx",
];

pub fn build_project_metadata(project_root: &Path) -> Result<ProjectMetadataEnvelope> {
    let project_root = project_root
        .canonicalize()
        .with_context(|| format!("failed to canonicalize project root {}", project_root.display()))?;

    let mut total_file_count = 0u64;
    let mut source_file_count = 0u64;
    let mut language_counts = BTreeMap::<String, u64>::new();

    let walker = WalkBuilder::new(&project_root)
        .hidden(true)
        .git_ignore(true)
        .git_global(true)
        .git_exclude(true)
        .build();

    for entry in walker.filter_map(|entry| entry.ok()) {
        if entry.file_type().is_none_or(|file_type| file_type.is_dir()) {
            continue;
        }

        total_file_count += 1;
        let path = entry.into_path();
        if let Some(language) = infer_language(&path) {
            source_file_count += 1;
            *language_counts.entry(language.to_string()).or_default() += 1;
        }
    }

    let markers = MARKER_FILES
        .iter()
        .filter(|marker| project_root.join(marker).exists())
        .map(|marker| marker.to_string())
        .collect::<Vec<_>>();

    let mut languages = language_counts
        .into_iter()
        .map(|(language, file_count)| ProjectLanguageStat { language, file_count })
        .collect::<Vec<_>>();
    languages.sort_by(|left, right| right.file_count.cmp(&left.file_count).then_with(|| left.language.cmp(&right.language)));
    languages.truncate(8);

    Ok(ProjectMetadataEnvelope {
        schema_version: 1,
        summary: ProjectMetadataSummary {
            total_file_count,
            source_file_count,
            markers,
            languages,
        },
    })
}

fn infer_language(path: &Path) -> Option<&'static str> {
    match path.extension().and_then(|value| value.to_str()).unwrap_or_default() {
        "rs" => Some("rust"),
        "cs" => Some("csharp"),
        "py" => Some("python"),
        "js" | "jsx" => Some("javascript"),
        "ts" | "tsx" => Some("typescript"),
        "go" => Some("go"),
        "java" => Some("java"),
        "kt" | "kts" => Some("kotlin"),
        "swift" => Some("swift"),
        "php" => Some("php"),
        "rb" => Some("ruby"),
        "cpp" | "cc" | "cxx" | "hpp" | "h" | "c" => Some("cpp"),
        "html" | "css" | "scss" => Some("web"),
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use super::build_project_metadata;
    use std::fs;
    use tempfile::tempdir;

    #[test]
    fn build_project_metadata_collects_markers_and_languages() {
        let temp_dir = tempdir().unwrap();
        fs::write(temp_dir.path().join("Cargo.toml"), "[package]\nname='demo'\n").unwrap();
        fs::create_dir_all(temp_dir.path().join("src")).unwrap();
        fs::write(temp_dir.path().join("src").join("main.rs"), "fn main() {}\n").unwrap();
        fs::write(temp_dir.path().join("src").join("lib.rs"), "pub fn demo() {}\n").unwrap();
        fs::write(temp_dir.path().join("notes.txt"), "ignore me\n").unwrap();

        let metadata = build_project_metadata(temp_dir.path()).unwrap();
        assert_eq!(metadata.schema_version, 1);
        assert_eq!(metadata.summary.total_file_count, 4);
        assert_eq!(metadata.summary.source_file_count, 2);
        assert!(metadata.summary.markers.contains(&"Cargo.toml".to_string()));
        assert_eq!(metadata.summary.languages[0].language, "rust");
        assert_eq!(metadata.summary.languages[0].file_count, 2);
    }
}