koala-drift 1.0.4

Wiki ↔ code drift detector.
Documentation
//! `arch.textual-claim` — `wiki/architecture.md` makes textual claims
//! like `core 不依赖 domain` or `crates/koala-core does not depend on
//! crates/koala-cli`. Parse those, route to the dep-direction rule.
//! If the claim contradicts the actual `Cargo.toml` graph, fail.

use crate::check::{Check, Finding, FindingKind, Severity};
use crate::scan::tagged_lines;
use koala_core::invariant::Context;
use std::collections::HashMap;
use std::fs;
use std::path::PathBuf;

const ARCH_FILE: &str = "wiki/architecture.md";

pub struct ArchClaims;

impl Check for ArchClaims {
    fn id(&self) -> &'static str {
        "arch.textual-claim"
    }

    fn intent(&self) -> &'static str {
        "Sentences like `A 不依赖 B` / `A does not depend on B` in \
         architecture.md must agree with the workspace's Cargo \
         dependency graph."
    }

    fn run(&self, ctx: &Context) -> Vec<Finding> {
        let arch_path = ctx.root().join(ARCH_FILE);
        let Ok(text) = fs::read_to_string(&arch_path) else {
            return Vec::new();
        };
        let edges = collect_dep_edges(ctx.root());
        let mut out = Vec::new();
        for line in tagged_lines(&text) {
            if line.in_fence {
                continue;
            }
            for claim in parse_claims(line.text) {
                if let Some(violation) = check_claim(&claim, &edges) {
                    out.push(Finding {
                        check_id: self.id(),
                        file: PathBuf::from(ARCH_FILE),
                        line: line.line_no,
                        claim: format!(
                            "claim: `{a}` does not depend on `{b}`",
                            a = claim.from,
                            b = claim.to
                        ),
                        kind: FindingKind::AcceptanceTestRefMissing,
                        severity: Severity::Hard,
                        fix_hint: Some(violation),
                    });
                }
            }
        }
        out
    }
}

#[derive(Debug, Clone, PartialEq, Eq)]
struct Claim {
    from: String,
    to: String,
}

const NEGATION_MARKERS: &[&str] = &[
    "不依赖",
    "禁止依赖",
    "不可依赖",
    "does not depend on",
    "must not depend on",
    "should not depend on",
    "must never depend on",
    "is forbidden to depend on",
];

fn parse_claims(line: &str) -> Vec<Claim> {
    let mut out = Vec::new();
    for marker in NEGATION_MARKERS {
        let mut rest = line;
        while let Some(idx) = rest.find(marker) {
            let before = &rest[..idx];
            let after = &rest[idx + marker.len()..];
            if let (Some(from), Some(to)) =
                (extract_token_before(before), extract_token_after(after))
            {
                out.push(Claim { from, to });
            }
            rest = &rest[idx + marker.len()..];
        }
    }
    out
}

/// Pull the last identifier-shaped token from a string.
fn extract_token_before(s: &str) -> Option<String> {
    let trimmed = s.trim_end_matches([' ', '\u{3000}', '`', '*', '_', '"']);
    let mut start = trimmed.len();
    for (i, ch) in trimmed.char_indices().rev() {
        if is_identifier_char(ch) {
            start = i;
        } else {
            break;
        }
    }
    let tok = &trimmed[start..];
    if tok.is_empty() {
        None
    } else {
        Some(tok.to_string())
    }
}

/// Pull the first identifier-shaped token from a string.
fn extract_token_after(s: &str) -> Option<String> {
    let trimmed = s.trim_start_matches([' ', '\u{3000}', '`', '*', '_', '"']);
    let mut end = 0;
    for (i, ch) in trimmed.char_indices() {
        if is_identifier_char(ch) {
            end = i + ch.len_utf8();
        } else {
            break;
        }
    }
    let tok = &trimmed[..end];
    if tok.is_empty() {
        None
    } else {
        Some(tok.to_string())
    }
}

fn is_identifier_char(ch: char) -> bool {
    ch.is_ascii_alphanumeric() || ch == '_' || ch == '-' || ch == '/'
}

/// Returns Some(violation_message) if the claim is contradicted by
/// the dep graph, None if the claim holds.
fn check_claim(claim: &Claim, edges: &HashMap<String, Vec<String>>) -> Option<String> {
    let from_key = normalize_crate_ref(&claim.from);
    let to_key = normalize_crate_ref(&claim.to);
    let neighbours = edges.get(&from_key)?;
    if neighbours.iter().any(|n| n == &to_key) {
        return Some(format!(
            "Cargo.toml says `{from_key}` depends on `{to_key}` — claim contradicts the graph"
        ));
    }
    None
}

fn normalize_crate_ref(s: &str) -> String {
    let last = s
        .rsplit(['/', ' '])
        .next()
        .unwrap_or(s)
        .trim_matches(|c: char| !c.is_ascii_alphanumeric() && c != '-' && c != '_');
    last.to_string()
}

fn collect_dep_edges(root: &std::path::Path) -> HashMap<String, Vec<String>> {
    let mut out: HashMap<String, Vec<String>> = HashMap::new();
    let crates_dir = root.join("crates");
    let Ok(read) = fs::read_dir(&crates_dir) else {
        return out;
    };
    for e in read.flatten() {
        let p = e.path();
        let cargo = p.join("Cargo.toml");
        let Ok(text) = fs::read_to_string(&cargo) else {
            continue;
        };
        let Some(name) = p.file_name().and_then(|s| s.to_str()) else {
            continue;
        };
        let mut deps = Vec::new();
        let mut in_deps = false;
        for line in text.lines() {
            let trimmed = line.trim();
            if trimmed.starts_with('[') {
                in_deps = trimmed == "[dependencies]" || trimmed == "[dev-dependencies]";
                continue;
            }
            if !in_deps {
                continue;
            }
            if let Some((dep_name, _)) = trimmed.split_once('=') {
                let dep = dep_name.trim().to_string();
                if !dep.is_empty() {
                    deps.push(dep);
                }
            }
        }
        out.insert(name.to_string(), deps);
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use tempfile::TempDir;

    fn write_arch(root: &std::path::Path, body: &str) {
        let dir = root.join("wiki");
        fs::create_dir_all(&dir).unwrap();
        fs::write(dir.join("architecture.md"), body).unwrap();
    }

    fn write_crate(root: &std::path::Path, name: &str, deps: &[&str]) {
        let dir = root.join("crates").join(name);
        fs::create_dir_all(&dir).unwrap();
        let mut body = format!("[package]\nname = \"{name}\"\n\n[dependencies]\n");
        for d in deps {
            body.push_str(&format!("{d} = {{ path = \"../{d}\" }}\n"));
        }
        fs::write(dir.join("Cargo.toml"), body).unwrap();
    }

    #[test]
    fn parse_zh_negation() {
        let claims = parse_claims("注意:`koala-core` 不依赖 `koala-cli`。");
        assert_eq!(claims.len(), 1);
        assert_eq!(claims[0].from, "koala-core");
        assert_eq!(claims[0].to, "koala-cli");
    }

    #[test]
    fn parse_en_negation() {
        let claims = parse_claims("Note: `koala-core` does not depend on `koala-cli`.");
        assert_eq!(claims.len(), 1);
        assert_eq!(claims[0].from, "koala-core");
        assert_eq!(claims[0].to, "koala-cli");
    }

    #[test]
    fn parse_extended_negation_markers() {
        for (text, _from, _to) in &[
            ("`a` 禁止依赖 `b`", "a", "b"),
            ("`a` 不可依赖 `b`", "a", "b"),
            ("`a` should not depend on `b`", "a", "b"),
            ("`a` must never depend on `b`", "a", "b"),
            ("`a` is forbidden to depend on `b`", "a", "b"),
        ] {
            let claims = parse_claims(text);
            assert_eq!(claims.len(), 1, "marker missed: {text}");
            assert_eq!(claims[0].from, "a", "from extraction failed: {text}");
            assert_eq!(claims[0].to, "b", "to extraction failed: {text}");
        }
    }

    #[test]
    fn textual_claim_routed_to_rule_passes_when_graph_agrees() {
        let tmp = TempDir::new().unwrap();
        write_crate(tmp.path(), "koala-core", &[]);
        write_crate(tmp.path(), "koala-cli", &["koala-core"]);
        write_arch(tmp.path(), "# arch\n\n`koala-core` 不依赖 `koala-cli`。\n");
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = ArchClaims.run(&ctx);
        assert!(findings.is_empty(), "{findings:?}");
    }

    #[test]
    fn textual_claim_routed_to_rule_fails_when_graph_contradicts() {
        let tmp = TempDir::new().unwrap();
        // koala-core actually depends on koala-cli — but architecture.md
        // claims it doesn't. Drift.
        write_crate(tmp.path(), "koala-core", &["koala-cli"]);
        write_crate(tmp.path(), "koala-cli", &[]);
        write_arch(tmp.path(), "# arch\n\n`koala-core` 不依赖 `koala-cli`。\n");
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = ArchClaims.run(&ctx);
        assert_eq!(findings.len(), 1, "{findings:?}");
        assert_eq!(findings[0].severity, Severity::Hard);
    }

    #[test]
    fn fenced_block_skipped() {
        let tmp = TempDir::new().unwrap();
        write_crate(tmp.path(), "koala-core", &["koala-cli"]);
        write_crate(tmp.path(), "koala-cli", &[]);
        write_arch(
            tmp.path(),
            "# arch\n\n```\n`koala-core` 不依赖 `koala-cli`\n```\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = ArchClaims.run(&ctx);
        assert!(findings.is_empty());
    }
}