koala-drift 1.0.4

Wiki ↔ code drift detector.
Documentation
//! `wiki.template-placeholder-unfilled` — detect `<...>` placeholders
//! that `koala-core init` left in scaffolded Tier 2/3 wiki files. If
//! they survive into a PR, the file was never filled in.
//!
//! Scope:
//!   * a curated list of files that `koala-core init` writes for the
//!     user to fill (`CLAUDE.md`, `wiki/architecture.md`, `wiki/vision.md`,
//!     `wiki/roadmap.md`, `wiki/runbook.md`, `wiki/testing.md`,
//!     `wiki/tech-debt.md`, `README.md`);
//!   * every `wiki/features/*.md` except `_template.md` / `_index.md`
//!     (catches "copied template, forgot to fill it in").
//!
//! Skipped:
//!   * Tier 1 auto-generated files (`_index.md`, `_tags/*`, `health.md`)
//!     — `tier1.no-hand-edit` already guards them;
//!   * lines inside fenced code blocks (`` ``` ``);
//!   * spans inside inline code (`` `...` ``) — `<id>` in
//!     `` `path/<id>/file.json` `` is a path parameter, not a template
//!     placeholder;
//!   * HTML comments (`<!-- ... -->`).

use crate::check::{Check, Finding, FindingKind, Severity};
use crate::scan::{list_feature_files, rel, tagged_lines};
use koala_core::invariant::Context;
use regex::Regex;
use std::fs;
use std::path::PathBuf;
use std::sync::OnceLock;

/// Hand-curated list of Tier 2/3 files that `koala-core init` writes
/// containing `<...>` placeholders the user is expected to substitute.
/// Keep in sync with `templates/`.
const SCAFFOLDED_FILES: &[&str] = &[
    "CLAUDE.md",
    "README.md",
    "wiki/architecture.md",
    "wiki/vision.md",
    "wiki/roadmap.md",
    "wiki/runbook.md",
    "wiki/testing.md",
    "wiki/tech-debt.md",
];

/// `<...>` where the inner span is 1–120 chars and contains no `<`,
/// `>`, newline, or leading `!`. The `!` exclusion drops `<!-- ... -->`
/// HTML comments at the regex level (those start with `<!`).
fn placeholder_re() -> &'static Regex {
    static RE: OnceLock<Regex> = OnceLock::new();
    RE.get_or_init(|| Regex::new(r"<([^<>!\n][^<>\n]{0,119})>").unwrap())
}

pub struct TemplatePlaceholder;

impl Check for TemplatePlaceholder {
    fn id(&self) -> &'static str {
        "wiki.template-placeholder-unfilled"
    }

    fn intent(&self) -> &'static str {
        "Tier 2/3 wiki files scaffolded by `koala-core init` must have \
         their `<...>` placeholders replaced with real content — leaving \
         them in means the file was never filled."
    }

    fn run(&self, ctx: &Context) -> Vec<Finding> {
        let mut out = Vec::new();
        let mut targets: Vec<PathBuf> = SCAFFOLDED_FILES
            .iter()
            .map(|p| ctx.root().join(p))
            .collect();
        targets.extend(list_feature_files(ctx.root()));

        for path in &targets {
            let Ok(content) = fs::read_to_string(path) else {
                continue;
            };
            let display = rel(path, ctx.root());
            for line in tagged_lines(&content) {
                if line.in_fence {
                    continue;
                }
                let inline_code = inline_code_spans(line.text);
                for m in placeholder_re().find_iter(line.text) {
                    let token = m.as_str();
                    if looks_like_html_tag(token) {
                        continue;
                    }
                    if in_any_span(m.start(), &inline_code) {
                        continue;
                    }
                    out.push(Finding {
                        check_id: self.id(),
                        file: display.clone(),
                        line: line.line_no,
                        claim: token.to_string(),
                        kind: FindingKind::TemplatePlaceholderUnfilled,
                        severity: Severity::Hard,
                        fix_hint: Some(format!(
                            "replace `{token}` with real project content; \
                             `koala-core init` left this placeholder for you to fill"
                        )),
                    });
                }
            }
        }
        out
    }
}

/// Byte ranges (start..end, exclusive) covering inline-code spans
/// `` `...` `` on a single line. Skips fenced delimiters (lines starting
/// with ` ``` ` are handled at a higher level by `tagged_lines`). Treats
/// every backtick run as an opener-or-closer pair; unmatched trailing
/// backticks are ignored.
fn inline_code_spans(line: &str) -> Vec<(usize, usize)> {
    let bytes = line.as_bytes();
    let mut spans = Vec::new();
    let mut i = 0;
    while i < bytes.len() {
        if bytes[i] == b'`' {
            let open = i;
            i += 1;
            while i < bytes.len() && bytes[i] != b'`' {
                i += 1;
            }
            if i < bytes.len() {
                // include closing backtick in the span
                spans.push((open, i + 1));
                i += 1;
            } else {
                // unmatched — drop
                break;
            }
        } else {
            i += 1;
        }
    }
    spans
}

fn in_any_span(pos: usize, spans: &[(usize, usize)]) -> bool {
    spans.iter().any(|&(s, e)| pos >= s && pos < e)
}

/// Filter the few legitimate inline-HTML tags we don't want to flag.
/// Markdown templates rarely contain raw HTML, but this keeps the
/// check honest if `<br>` etc. ever sneak in.
fn looks_like_html_tag(token: &str) -> bool {
    let inner = token
        .strip_prefix('<')
        .and_then(|s| s.strip_suffix('>'))
        .unwrap_or(token);
    matches!(
        inner.trim().to_ascii_lowercase().as_str(),
        "br" | "hr" | "p" | "li" | "ul" | "ol" | "td" | "tr" | "th" | "table" | "sub" | "sup"
    )
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;
    use tempfile::TempDir;

    fn write(dir: &std::path::Path, rel: &str, body: &str) {
        let p = dir.join(rel);
        fs::create_dir_all(p.parent().unwrap()).unwrap();
        fs::write(p, body).unwrap();
    }

    #[test]
    fn flags_unfilled_architecture_md() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\n## 模块清单\n\n| <module-1> | <职责> | <依赖> |\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = TemplatePlaceholder.run(&ctx);
        assert_eq!(findings.len(), 3, "{findings:#?}");
        assert!(findings
            .iter()
            .all(|f| matches!(f.kind, FindingKind::TemplatePlaceholderUnfilled)));
        assert!(findings.iter().any(|f| f.claim == "<module-1>"));
        assert!(findings.iter().any(|f| f.claim == "<职责>"));
    }

    #[test]
    fn ignores_filled_in_file() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\n## 模块清单\n\n| world | 状态容器 | core |\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(TemplatePlaceholder.run(&ctx).is_empty());
    }

    #[test]
    fn ignores_html_comments() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\n<!-- AUTO-GENERATED -->\n\nfilled.\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(TemplatePlaceholder.run(&ctx).is_empty());
    }

    #[test]
    fn ignores_fenced_code_block() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\n```\nfn foo<T>(x: T) {}\n```\n\nfilled.\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(
            TemplatePlaceholder.run(&ctx).is_empty(),
            "generic <T> inside code fence should not flag"
        );
    }

    #[test]
    fn ignores_html_inline_tag_allowlist() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\nLine one.<br>\nLine two.\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(TemplatePlaceholder.run(&ctx).is_empty());
    }

    #[test]
    fn scans_user_feature_files_not_template() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/features/_template.md",
            "# Feature\n\n<elevator pitch>\n",
        );
        write(
            tmp.path(),
            "wiki/features/my-feature.md",
            "# Feature\n\n<elevator pitch>\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = TemplatePlaceholder.run(&ctx);
        // _template.md is excluded; my-feature.md should flag.
        assert_eq!(findings.len(), 1, "{findings:#?}");
        assert!(findings[0]
            .file
            .to_string_lossy()
            .ends_with("my-feature.md"));
    }

    #[test]
    fn nested_placeholder_still_flags_inner_token() {
        // Real templates contain things like `<elevator pitch,<= 30 字>`.
        // The outer `<...>` doesn't match (nested `<`), but the inner
        // `<= 30 字>` does — that's enough to flag the line.
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\n<elevator pitch,<= 30 字>\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = TemplatePlaceholder.run(&ctx);
        assert_eq!(findings.len(), 1, "{findings:#?}");
        assert_eq!(findings[0].claim, "<= 30 字>");
        assert_eq!(findings[0].line, 3);
    }

    #[test]
    fn missing_target_file_is_silent() {
        // No wiki/ dir at all — check should be a no-op, not panic.
        let tmp = TempDir::new().unwrap();
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(TemplatePlaceholder.run(&ctx).is_empty());
    }

    #[test]
    fn skips_non_scaffolded_files() {
        // A random markdown file outside the scaffolded list should
        // never be scanned, even if it has angle brackets.
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/other.md",
            "# Other\n\n<would-flag-if-scanned>\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(TemplatePlaceholder.run(&ctx).is_empty());
    }

    #[test]
    fn ignores_path_parameters_inside_inline_code() {
        // Real-world: `wiki/architecture.md` table row
        //   | `storage/agents/<id>/config.json` | per-agent config |
        // `<id>` is path notation, not a template placeholder.
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\n| `storage/<id>/config.json` | filled in |\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        assert!(
            TemplatePlaceholder.run(&ctx).is_empty(),
            "<id> inside backticks must not flag"
        );
    }

    #[test]
    fn flags_token_outside_backticks_on_same_line() {
        // Make sure the inline-code skip doesn't over-skip: a token
        // OUTSIDE backticks on the same line should still flag.
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/architecture.md",
            "# Architecture\n\nReal `<safe>` content but <unfilled> here.\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = TemplatePlaceholder.run(&ctx);
        assert_eq!(findings.len(), 1, "{findings:#?}");
        assert_eq!(findings[0].claim, "<unfilled>");
    }

    #[test]
    fn fix_hint_quotes_the_token() {
        let tmp = TempDir::new().unwrap();
        write(
            tmp.path(),
            "wiki/vision.md",
            "# Vision\n\n<one-liner project pitch>\n",
        );
        let ctx = Context::new(tmp.path().to_path_buf());
        let findings = TemplatePlaceholder.run(&ctx);
        assert_eq!(findings.len(), 1);
        let hint = findings[0].fix_hint.as_deref().unwrap_or("");
        assert!(hint.contains("<one-liner project pitch>"), "{hint}");
        assert!(hint.contains("koala-core init"), "{hint}");
    }
}