koala-drift 1.0.4

Wiki ↔ code drift detector.
Documentation
//! Wiki traversal helpers shared by the built-in checks.

use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;

/// One line of a markdown document, tagged with its enclosing `## ` section
/// (if any) and whether it sits inside a fenced code block. Line numbers are
/// 1-indexed and match what an editor would show.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TaggedLine<'a> {
    pub line_no: usize,
    pub text: &'a str,
    pub section: Option<&'a str>,
    pub in_fence: bool,
}

/// Walk `content` once, yielding `TaggedLine`s. Fenced blocks are detected
/// by lines whose first non-whitespace characters are `` ``` ``. The toggle
/// flips on the fence line itself; fence delimiter lines are reported with
/// `in_fence = true`.
pub fn tagged_lines(content: &str) -> Vec<TaggedLine<'_>> {
    let mut out = Vec::new();
    let mut in_fence = false;
    let mut section: Option<&str> = None;
    for (i, line) in content.lines().enumerate() {
        let trimmed = line.trim_start();
        let mut fence_line = false;
        if trimmed.starts_with("```") {
            fence_line = true;
            in_fence = !in_fence;
        } else if !in_fence {
            if let Some(rest) = line.strip_prefix("## ") {
                // Only level-2 headings demarcate sections — `### sub` stays
                // inside the parent.
                section = Some(rest.trim());
            } else if line.starts_with("# ") {
                // Doc title resets to "no section".
                section = None;
            }
        }
        out.push(TaggedLine {
            line_no: i + 1,
            text: line,
            section,
            in_fence: in_fence || fence_line,
        });
    }
    out
}

/// Feature docs to consider: `wiki/features/*.md` excluding underscore-prefixed
/// templates / index files.
pub fn list_feature_files(root: &Path) -> Vec<PathBuf> {
    list_md_in(&root.join("wiki/features"), |name| !name.starts_with('_'))
}

/// ADR files: `wiki/decisions/NNNN-*.md` excluding underscore-prefixed.
pub fn list_adr_files(root: &Path) -> Vec<PathBuf> {
    list_md_in(&root.join("wiki/decisions"), |name| {
        !name.starts_with('_') && starts_with_four_digits(name)
    })
}

/// Every `*.md` under `wiki/`, used for inbound-ref counting.
pub fn list_all_wiki_md(root: &Path) -> Vec<PathBuf> {
    let dir = root.join("wiki");
    if !dir.is_dir() {
        return Vec::new();
    }
    WalkDir::new(&dir)
        .into_iter()
        .filter_map(Result::ok)
        .filter(|e| e.file_type().is_file())
        .map(|e| e.into_path())
        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("md"))
        .collect()
}

/// Best-effort relative path for display; falls back to absolute.
pub fn rel(path: &Path, root: &Path) -> PathBuf {
    path.strip_prefix(root).unwrap_or(path).to_path_buf()
}

/// Suggest the closest existing file for a missing path. Looks at
/// siblings of the parent dir (if it exists) and the parent dir itself
/// when a leaf is misspelled. Returns the closest candidate when its
/// edit distance is ≤ `max_distance`.
pub fn fuzzy_suggest(missing: &Path, repo_root: &Path, max_distance: usize) -> Option<PathBuf> {
    let abs = repo_root.join(missing);
    let parent = abs.parent()?;
    let needle = abs.file_name().and_then(|s| s.to_str())?;
    let read = fs::read_dir(parent).ok()?;
    let mut best: Option<(usize, PathBuf)> = None;
    for e in read.flatten() {
        let Some(name) = e
            .path()
            .file_name()
            .and_then(|s| s.to_str())
            .map(str::to_string)
        else {
            continue;
        };
        let d = edit_distance(needle, &name);
        if d <= max_distance {
            match &best {
                Some((bd, _)) if *bd <= d => {}
                _ => {
                    best = Some((d, e.path()));
                }
            }
        }
    }
    let (_, p) = best?;
    Some(rel(&p, repo_root))
}

fn edit_distance(a: &str, b: &str) -> usize {
    let av: Vec<char> = a.chars().collect();
    let bv: Vec<char> = b.chars().collect();
    let m = av.len();
    let n = bv.len();
    if m == 0 {
        return n;
    }
    if n == 0 {
        return m;
    }
    let mut prev: Vec<usize> = (0..=n).collect();
    let mut curr = vec![0usize; n + 1];
    for i in 1..=m {
        curr[0] = i;
        for j in 1..=n {
            let cost = if av[i - 1] == bv[j - 1] { 0 } else { 1 };
            curr[j] = (prev[j] + 1).min(curr[j - 1] + 1).min(prev[j - 1] + cost);
        }
        std::mem::swap(&mut prev, &mut curr);
    }
    prev[n]
}

fn list_md_in(dir: &Path, accept: impl Fn(&str) -> bool) -> Vec<PathBuf> {
    if !dir.is_dir() {
        return Vec::new();
    }
    let mut out: Vec<PathBuf> = fs::read_dir(dir)
        .into_iter()
        .flatten()
        .flatten()
        .map(|e| e.path())
        .filter(|p| p.extension().and_then(|s| s.to_str()) == Some("md"))
        .filter(|p| {
            p.file_name()
                .and_then(|n| n.to_str())
                .map(&accept)
                .unwrap_or(false)
        })
        .collect();
    out.sort();
    out
}

fn starts_with_four_digits(name: &str) -> bool {
    let bytes = name.as_bytes();
    bytes.len() >= 4 && bytes[..4].iter().all(u8::is_ascii_digit)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn tags_section_and_fence() {
        let doc = "# Title\n\n## A\nalpha\n```\nfenced\n```\n## B\nbeta\n";
        let lines = tagged_lines(doc);
        // line 4 "alpha" is in section A, not in fence
        let l = &lines[3];
        assert_eq!(l.text, "alpha");
        assert_eq!(l.section, Some("A"));
        assert!(!l.in_fence);
        // line 6 "fenced" is in fence, still in section A
        let l = &lines[5];
        assert_eq!(l.text, "fenced");
        assert_eq!(l.section, Some("A"));
        assert!(l.in_fence);
        // line 9 "beta" is in section B
        let l = &lines[8];
        assert_eq!(l.text, "beta");
        assert_eq!(l.section, Some("B"));
    }

    #[test]
    fn fence_lines_themselves_marked_in_fence() {
        let doc = "## A\n```\nx\n```\n";
        let lines = tagged_lines(doc);
        // ``` (line 2): in_fence after toggle is true; we report fence line as in_fence
        assert!(lines[1].in_fence);
        // x (line 3): in_fence
        assert!(lines[2].in_fence);
        // closing ``` (line 4): reported as in_fence
        assert!(lines[3].in_fence);
    }
}