Skip to main content

tokmd_analysis_explain/
lib.rs

1use std::collections::BTreeSet;
2
3struct Entry {
4    canonical: &'static str,
5    aliases: &'static [&'static str],
6    summary: &'static str,
7}
8
9const ENTRIES: &[Entry] = &[
10    Entry {
11        canonical: "doc_density",
12        aliases: &["documentation_density", "docs"],
13        summary: "Ratio of comment lines to total code+comment lines.",
14    },
15    Entry {
16        canonical: "whitespace_ratio",
17        aliases: &["whitespace"],
18        summary: "Ratio of blank lines to code+comment lines.",
19    },
20    Entry {
21        canonical: "verbosity",
22        aliases: &["bytes_per_line"],
23        summary: "Average bytes per line; higher values often indicate denser lines.",
24    },
25    Entry {
26        canonical: "test_density",
27        aliases: &["tests"],
28        summary: "Share of code lines in test files vs production files.",
29    },
30    Entry {
31        canonical: "todo_density",
32        aliases: &["todo", "fixme"],
33        summary: "TODO/FIXME/HACK/XXX markers per KLOC.",
34    },
35    Entry {
36        canonical: "polyglot_entropy",
37        aliases: &["language_entropy", "polyglot"],
38        summary: "Language distribution entropy; higher means code spread across more languages.",
39    },
40    Entry {
41        canonical: "gini",
42        aliases: &["distribution_gini"],
43        summary: "Inequality of file sizes; higher means concentration in fewer files.",
44    },
45    Entry {
46        canonical: "avg_cyclomatic",
47        aliases: &["cyclomatic"],
48        summary: "Average branching complexity across analyzed files.",
49    },
50    Entry {
51        canonical: "max_cyclomatic",
52        aliases: &[],
53        summary: "Highest cyclomatic complexity found in a single file.",
54    },
55    Entry {
56        canonical: "avg_cognitive",
57        aliases: &["cognitive"],
58        summary: "Average cognitive complexity (human understandability cost).",
59    },
60    Entry {
61        canonical: "max_nesting_depth",
62        aliases: &["nesting_depth"],
63        summary: "Deepest observed nesting level in analyzed code.",
64    },
65    Entry {
66        canonical: "maintainability_index",
67        aliases: &["mi"],
68        summary: "SEI-style maintainability score from complexity and size inputs.",
69    },
70    Entry {
71        canonical: "technical_debt_ratio",
72        aliases: &["debt_ratio", "technical_debt"],
73        summary: "Complexity points per KLOC as a heuristic debt signal.",
74    },
75    Entry {
76        canonical: "halstead",
77        aliases: &["halstead_volume", "halstead_effort"],
78        summary: "Halstead software-science metrics derived from operators/operands.",
79    },
80    Entry {
81        canonical: "complexity_histogram",
82        aliases: &["histogram"],
83        summary: "Bucketed distribution of cyclomatic complexity values.",
84    },
85    Entry {
86        canonical: "hotspots",
87        aliases: &["git_hotspots"],
88        summary: "Files with high change frequency and high size-based impact.",
89    },
90    Entry {
91        canonical: "bus_factor",
92        aliases: &["ownership"],
93        summary: "Approximate author concentration by module from git history.",
94    },
95    Entry {
96        canonical: "freshness",
97        aliases: &["staleness"],
98        summary: "Recency of file changes; stale files exceed threshold days.",
99    },
100    Entry {
101        canonical: "code_age_distribution",
102        aliases: &["code_age", "age_buckets"],
103        summary: "Bucketed file age distribution plus recent-vs-prior refresh trend.",
104    },
105    Entry {
106        canonical: "coupling",
107        aliases: &["module_coupling"],
108        summary: "Modules frequently changed together in commits.",
109    },
110    Entry {
111        canonical: "predictive_churn",
112        aliases: &["churn"],
113        summary: "Trend model of module change velocity over recent commits.",
114    },
115    Entry {
116        canonical: "duplicate_waste",
117        aliases: &["dup", "duplication"],
118        summary: "Redundant bytes from exact duplicate files.",
119    },
120    Entry {
121        canonical: "duplication_density",
122        aliases: &["dup_density"],
123        summary: "Duplicate waste density overall and by module.",
124    },
125    Entry {
126        canonical: "imports",
127        aliases: &["import_graph"],
128        summary: "Observed dependency edges across files/modules from import statements.",
129    },
130    Entry {
131        canonical: "entropy_suspects",
132        aliases: &["entropy"],
133        summary: "Files with suspiciously high entropy indicating packed/binary-like content.",
134    },
135    Entry {
136        canonical: "license_radar",
137        aliases: &["license"],
138        summary: "Heuristic SPDX/license detection from metadata and text.",
139    },
140    Entry {
141        canonical: "archetype",
142        aliases: &["project_archetype"],
143        summary: "Repository type inference from structural signals (workspace, web app, etc.).",
144    },
145    Entry {
146        canonical: "context_window_fit",
147        aliases: &["window_fit", "context_fit"],
148        summary: "Estimated token fit against a target model context window.",
149    },
150];
151
152fn normalize(key: &str) -> String {
153    key.trim()
154        .to_ascii_lowercase()
155        .replace([' ', '-', '.'], "_")
156}
157
158pub fn lookup(key: &str) -> Option<String> {
159    let wanted = normalize(key);
160    for entry in ENTRIES {
161        if normalize(entry.canonical) == wanted {
162            return Some(format!("{}: {}", entry.canonical, entry.summary));
163        }
164        if entry.aliases.iter().any(|a| normalize(a) == wanted) {
165            return Some(format!("{}: {}", entry.canonical, entry.summary));
166        }
167    }
168    None
169}
170
171pub fn catalog() -> String {
172    let mut keys: BTreeSet<&'static str> = BTreeSet::new();
173    for entry in ENTRIES {
174        keys.insert(entry.canonical);
175    }
176    let mut out = String::from("Available metric/finding keys:\n");
177    for key in keys {
178        out.push_str("- ");
179        out.push_str(key);
180        out.push('\n');
181    }
182    out
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188
189    #[test]
190    fn lookup_finds_canonical_key() {
191        let value = lookup("avg_cyclomatic").expect("canonical key should resolve");
192        assert!(value.starts_with("avg_cyclomatic:"));
193        assert!(value.contains("complexity"));
194    }
195
196    #[test]
197    fn lookup_finds_alias_with_normalization() {
198        let value = lookup("Distribution-Gini").expect("alias should resolve");
199        assert!(value.starts_with("gini:"));
200    }
201
202    #[test]
203    fn catalog_is_sorted_and_unique() {
204        let catalog = catalog();
205        let keys: Vec<&str> = catalog
206            .lines()
207            .skip(1)
208            .filter_map(|line| line.strip_prefix("- "))
209            .collect();
210
211        assert!(
212            !keys.is_empty(),
213            "catalog should include at least one key line"
214        );
215
216        let mut sorted = keys.clone();
217        sorted.sort_unstable();
218        sorted.dedup();
219        assert_eq!(keys, sorted, "catalog keys should be sorted and unique");
220    }
221}