tokmd 1.12.0

Tokei-backed repo inventory receipts (Markdown/TSV/JSONL/CSV) for PRs, CI, and LLM workflows.
Documentation
//! Inclusion-policy preparation for context file selection.

use std::collections::{BTreeMap, BTreeSet};

use tokmd_scan::normalize_slashes as normalize_path;
use tokmd_types::{
    ContextFileRow, FileClassification, FileKind, FileRow, InclusionPolicy, PolicyExcludedFile,
};

use super::{SelectOptions, assign_policy, classify_file, compute_file_cap};

struct FileContextMeta {
    classifications: Vec<FileClassification>,
    policy: InclusionPolicy,
    policy_reason: Option<String>,
    original_tokens: usize,
}

pub(super) struct PolicySelection {
    pub(super) pack_rows: Vec<FileRow>,
    pub(super) excluded_by_policy: Vec<PolicyExcludedFile>,
    file_meta_map: BTreeMap<String, FileContextMeta>,
}

pub(super) fn prepare_policy_selection(
    candidate_rows: &[FileRow],
    budget: usize,
    options: &SelectOptions,
) -> PolicySelection {
    let file_cap = compute_file_cap(budget, options);
    let mut file_meta_map: BTreeMap<String, FileContextMeta> = BTreeMap::new();
    let mut excluded_by_policy: Vec<PolicyExcludedFile> = Vec::new();

    for row in candidate_rows
        .iter()
        .filter(|row| row.kind == FileKind::Parent)
    {
        let path = normalize_path(&row.path);
        let classifications = classify_file(&path, row.tokens, row.lines, options.dense_threshold);
        let (policy, reason) = assign_policy(row.tokens, file_cap, &classifications);

        file_meta_map.insert(
            path.clone(),
            FileContextMeta {
                classifications: classifications.clone(),
                policy,
                policy_reason: reason.clone(),
                original_tokens: row.tokens,
            },
        );

        if matches!(policy, InclusionPolicy::Skip | InclusionPolicy::Summary) {
            excluded_by_policy.push(PolicyExcludedFile {
                path,
                original_tokens: row.tokens,
                policy,
                reason: reason.unwrap_or_default(),
                classifications,
            });
        }
    }

    let excluded_paths: BTreeSet<&str> = excluded_by_policy
        .iter()
        .map(|file| file.path.as_str())
        .collect();

    let pack_rows = candidate_rows
        .iter()
        .filter(|row| {
            if row.kind != FileKind::Parent {
                return true;
            }
            let path = normalize_path(&row.path);
            !excluded_paths.contains(path.as_str())
        })
        .map(|row| {
            let path = normalize_path(&row.path);
            if let Some(meta) = file_meta_map.get(&path)
                && meta.policy == InclusionPolicy::HeadTail
            {
                return FileRow {
                    tokens: row.tokens.min(file_cap),
                    ..row.clone()
                };
            }
            row.clone()
        })
        .collect();

    PolicySelection {
        pack_rows,
        excluded_by_policy,
        file_meta_map,
    }
}

impl PolicySelection {
    pub(super) fn annotate_selected(&self, selected: &mut [ContextFileRow]) {
        for file in selected {
            let path = normalize_path(&file.path);
            if let Some(meta) = self.file_meta_map.get(&path) {
                file.classifications = meta.classifications.clone();
                file.policy = meta.policy;
                file.policy_reason = meta.policy_reason.clone();
                if meta.policy == InclusionPolicy::HeadTail {
                    file.effective_tokens = Some(file.tokens);
                    file.tokens = meta.original_tokens;
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn file_row(path: &str, tokens: usize, lines: usize) -> FileRow {
        FileRow {
            path: path.to_string(),
            module: "src".to_string(),
            lang: "Rust".to_string(),
            kind: FileKind::Parent,
            code: lines,
            comments: 0,
            blanks: 0,
            lines,
            bytes: tokens,
            tokens,
        }
    }

    fn selected_row(path: &str, tokens: usize) -> ContextFileRow {
        ContextFileRow {
            path: path.to_string(),
            module: "src".to_string(),
            lang: "Rust".to_string(),
            tokens,
            code: 1,
            lines: 1,
            bytes: tokens,
            value: tokens,
            rank_reason: "test".to_string(),
            policy: InclusionPolicy::Full,
            effective_tokens: None,
            policy_reason: None,
            classifications: Vec::new(),
        }
    }

    #[test]
    fn head_tail_rows_are_capped_for_packing_and_restored_on_annotation() {
        let options = SelectOptions {
            max_file_pct: 0.10,
            max_file_tokens: Some(100),
            ..Default::default()
        };
        let selection =
            prepare_policy_selection(&[file_row("src/big.rs", 150, 150)], 1_000, &options);

        assert_eq!(selection.excluded_by_policy.len(), 0);
        assert_eq!(selection.pack_rows[0].tokens, 100);

        let mut selected = vec![selected_row("src/big.rs", 100)];
        selection.annotate_selected(&mut selected);
        assert_eq!(selected[0].policy, InclusionPolicy::HeadTail);
        assert_eq!(selected[0].effective_tokens, Some(100));
        assert_eq!(selected[0].tokens, 150);
    }

    #[test]
    fn oversized_generated_rows_are_excluded_from_packing() {
        let options = SelectOptions {
            max_file_pct: 0.10,
            max_file_tokens: Some(100),
            ..Default::default()
        };
        let selection = prepare_policy_selection(
            &[file_row("src/generated.pb.rs", 150, 150)],
            1_000,
            &options,
        );

        assert!(selection.pack_rows.is_empty());
        assert_eq!(selection.excluded_by_policy.len(), 1);
        assert_eq!(
            selection.excluded_by_policy[0].policy,
            InclusionPolicy::Skip
        );
    }
}