nornir 0.1.0

Companion to cargo: dependency tracking, release gating, deploy, benchmarks, and documentation assembly. Project-agnostic.
Documentation
//! Workspace walker that classifies files into [`super::Corpus`] buckets.
//!
//! Excludes hard-coded build/VCS dirs (`target/`, `.git/`, `.nornir/`,
//! `node_modules/`) plus any path in `forbidden` (relative to workspace
//! root). Classification is by filename + extension.

use std::path::{Path, PathBuf};

use walkdir::{DirEntry, WalkDir};
use super::Corpus;

pub struct Candidate {
    pub path: PathBuf,
    pub corpus: Corpus,
    /// Top-level workspace dir the file belongs to (e.g. `holger`, `znippy`).
    /// Empty when the file sits at the workspace root.
    pub repo: String,
}

const HARD_EXCLUDES: &[&str] = &["target", ".git", ".nornir", "node_modules"];

pub fn walk_workspace(root: &Path) -> Vec<Candidate> {
    let mut out = Vec::new();
    let walker = WalkDir::new(root)
        .follow_links(false)
        .into_iter()
        .filter_entry(|e| !is_excluded(e));

    for entry in walker.flatten() {
        if !entry.file_type().is_file() {
            continue;
        }
        let p = entry.path();
        let Some(corpus) = classify(p) else { continue };
        let repo = first_segment_under(root, p).unwrap_or_default();
        out.push(Candidate {
            path: p.to_path_buf(),
            corpus,
            repo,
        });
    }
    out
}

fn is_excluded(entry: &DirEntry) -> bool {
    if let Some(name) = entry.file_name().to_str() {
        if HARD_EXCLUDES.iter().any(|h| *h == name) {
            return true;
        }
    }
    false
}

fn first_segment_under(root: &Path, p: &Path) -> Option<String> {
    let rel = p.strip_prefix(root).ok()?;
    rel.components()
        .next()
        .map(|c| c.as_os_str().to_string_lossy().into_owned())
}

fn classify(p: &Path) -> Option<Corpus> {
    let name = p.file_name()?.to_string_lossy();
    let name_lower = name.to_lowercase();
    let ext = p
        .extension()
        .map(|s| s.to_string_lossy().to_lowercase())
        .unwrap_or_default();

    if name_lower == "bench_history.jsonl" {
        return Some(Corpus::BenchHistory);
    }
    if name_lower.starts_with("changelog") || name_lower == "history.md" {
        return Some(Corpus::Changelog);
    }
    if name_lower == "cargo.toml" || name_lower == "nornir.toml" || name_lower == "workspace.md" {
        return Some(Corpus::Config);
    }
    match ext.as_str() {
        "md" | "markdown" | "txt" | "rst" | "adoc" => Some(Corpus::Docs),
        "rs" | "py" | "java" | "kt" | "scala" | "go" | "ts" | "tsx" | "js" | "sh" | "rb"
        | "c" | "cc" | "cpp" | "h" | "hpp" => Some(Corpus::Code),
        "toml" | "yaml" | "yml" | "json" => Some(Corpus::Config),
        _ => None,
    }
}