1use std::path::{Path, PathBuf};
2use std::time::{SystemTime, UNIX_EPOCH};
3
4use tokmd_analysis_types::FileStatRow;
5
6pub(crate) fn now_ms() -> u128 {
7 SystemTime::now()
8 .duration_since(UNIX_EPOCH)
9 .unwrap_or_default()
10 .as_millis()
11}
12
13#[cfg(any(feature = "git", feature = "content"))]
14pub(crate) fn normalize_path(path: &str, root: &Path) -> String {
15 let mut out = path.replace('\\', "/");
16 if let Ok(stripped) = Path::new(&out).strip_prefix(root) {
17 out = stripped.to_string_lossy().replace('\\', "/");
18 }
19 if let Some(stripped) = out.strip_prefix("./") {
20 out = stripped.to_string();
21 }
22 out
23}
24
25pub(crate) fn path_depth(path: &str) -> usize {
26 path.split('/').filter(|seg| !seg.is_empty()).count().max(1)
27}
28
29pub(crate) fn is_test_path(path: &str) -> bool {
30 let lower = path.to_lowercase();
31 if lower.contains("/test/") || lower.contains("/tests/") || lower.contains("__tests__") {
32 return true;
33 }
34 if lower.contains("/spec/") || lower.contains("/specs/") {
35 return true;
36 }
37 let name = lower.rsplit('/').next().unwrap_or(&lower);
38 name.contains("_test")
39 || name.contains(".test.")
40 || name.contains(".spec.")
41 || name.starts_with("test_")
42 || name.ends_with("_test.rs")
43}
44
45pub(crate) fn is_infra_lang(lang: &str) -> bool {
46 let l = lang.to_lowercase();
47 matches!(
48 l.as_str(),
49 "json"
50 | "yaml"
51 | "toml"
52 | "markdown"
53 | "xml"
54 | "html"
55 | "css"
56 | "scss"
57 | "less"
58 | "makefile"
59 | "dockerfile"
60 | "hcl"
61 | "terraform"
62 | "nix"
63 | "cmake"
64 | "ini"
65 | "properties"
66 | "gitignore"
67 | "gitconfig"
68 | "editorconfig"
69 | "csv"
70 | "tsv"
71 | "svg"
72 )
73}
74
75pub(crate) fn percentile(sorted: &[usize], pct: f64) -> f64 {
76 if sorted.is_empty() {
77 return 0.0;
78 }
79 let idx = (pct * (sorted.len() as f64 - 1.0)).ceil() as usize;
80 sorted[idx.min(sorted.len() - 1)] as f64
81}
82
83pub(crate) fn gini_coefficient(sorted: &[usize]) -> f64 {
84 if sorted.is_empty() {
85 return 0.0;
86 }
87 let n = sorted.len() as f64;
88 let sum: f64 = sorted.iter().map(|v| *v as f64).sum();
89 if sum == 0.0 {
90 return 0.0;
91 }
92 let mut accum = 0.0;
93 for (i, value) in sorted.iter().enumerate() {
94 let i = i as f64 + 1.0;
95 accum += (2.0 * i - n - 1.0) * (*value as f64);
96 }
97 accum / (n * sum)
98}
99
100pub(crate) fn safe_ratio(numer: usize, denom: usize) -> f64 {
101 if denom == 0 {
102 0.0
103 } else {
104 round_f64(numer as f64 / denom as f64, 4)
105 }
106}
107
108pub(crate) fn round_f64(value: f64, decimals: u32) -> f64 {
109 let factor = 10f64.powi(decimals as i32);
110 (value * factor).round() / factor
111}
112
113pub(crate) fn empty_file_row() -> FileStatRow {
114 FileStatRow {
115 path: String::new(),
116 module: String::new(),
117 lang: String::new(),
118 code: 0,
119 comments: 0,
120 blanks: 0,
121 lines: 0,
122 bytes: 0,
123 tokens: 0,
124 doc_pct: None,
125 bytes_per_line: None,
126 depth: 0,
127 }
128}
129
130pub fn normalize_root(root: &Path) -> PathBuf {
131 root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137 use proptest::prelude::*;
138
139 proptest! {
140 #[test]
145 fn percentile_empty_is_zero(pct in 0.0f64..=1.0) {
146 prop_assert_eq!(percentile(&[], pct), 0.0);
147 }
148
149 #[test]
150 fn percentile_in_bounds(mut values in prop::collection::vec(0usize..10000, 1..100),
151 pct in 0.0f64..=1.0) {
152 values.sort();
153 let result = percentile(&values, pct);
154 let min = *values.first().unwrap() as f64;
155 let max = *values.last().unwrap() as f64;
156 prop_assert!(result >= min, "Percentile {} below min {}", result, min);
157 prop_assert!(result <= max, "Percentile {} above max {}", result, max);
158 }
159
160 #[test]
161 fn percentile_zero_is_min(mut values in prop::collection::vec(0usize..10000, 1..100)) {
162 values.sort();
163 let p0 = percentile(&values, 0.0);
164 prop_assert_eq!(p0, *values.first().unwrap() as f64);
166 }
167
168 #[test]
169 fn percentile_one_is_max(mut values in prop::collection::vec(0usize..10000, 1..100)) {
170 values.sort();
171 let p100 = percentile(&values, 1.0);
172 prop_assert_eq!(p100, *values.last().unwrap() as f64);
173 }
174
175 #[test]
176 fn percentile_monotonic(mut values in prop::collection::vec(0usize..10000, 2..100),
177 pct1 in 0.0f64..=1.0,
178 pct2 in 0.0f64..=1.0) {
179 values.sort();
180 let p1 = percentile(&values, pct1);
181 let p2 = percentile(&values, pct2);
182 if pct1 <= pct2 {
183 prop_assert!(p1 <= p2, "Percentile should be monotonic: p({})={} > p({})={}", pct1, p1, pct2, p2);
184 } else {
185 prop_assert!(p1 >= p2, "Percentile should be monotonic: p({})={} < p({})={}", pct1, p1, pct2, p2);
186 }
187 }
188
189 #[test]
194 fn gini_empty_is_zero(_dummy in 0..1u8) {
195 prop_assert_eq!(gini_coefficient(&[]), 0.0);
196 }
197
198 #[test]
199 fn gini_all_zeros_is_zero(len in 1usize..100) {
200 let values = vec![0usize; len];
201 prop_assert_eq!(gini_coefficient(&values), 0.0);
202 }
203
204 #[test]
205 fn gini_in_bounds(values in prop::collection::vec(0usize..1000, 1..100)) {
206 let mut sorted = values;
207 sorted.sort();
208 let gini = gini_coefficient(&sorted);
209 prop_assert!(gini >= 0.0, "Gini must be non-negative: got {}", gini);
210 prop_assert!(gini <= 1.0, "Gini must be at most 1: got {}", gini);
211 }
212
213 #[test]
214 fn gini_uniform_is_zero(value in 1usize..1000, len in 2usize..100) {
215 let values = vec![value; len];
217 let gini = gini_coefficient(&values);
218 prop_assert!(gini.abs() < 0.0001, "Uniform distribution should have Gini ~0: got {}", gini);
219 }
220
221 #[test]
222 fn gini_one_nonzero_high(len in 2usize..100) {
223 let mut values = vec![0usize; len - 1];
225 values.push(1000);
226 values.sort();
227 let gini = gini_coefficient(&values);
228 let expected_max = (len - 1) as f64 / len as f64;
230 prop_assert!(gini >= expected_max - 0.01, "Extreme inequality should have high Gini: got {}, expected ~{}", gini, expected_max);
231 }
232
233 #[test]
238 fn safe_ratio_zero_denom_is_zero(numer in 0usize..10000) {
239 prop_assert_eq!(safe_ratio(numer, 0), 0.0);
240 }
241
242 #[test]
243 fn safe_ratio_zero_numer_is_zero(denom in 1usize..10000) {
244 prop_assert_eq!(safe_ratio(0, denom), 0.0);
245 }
246
247 #[test]
248 fn safe_ratio_same_is_one(value in 1usize..10000) {
249 prop_assert_eq!(safe_ratio(value, value), 1.0);
250 }
251
252 #[test]
253 fn safe_ratio_has_limited_decimals(numer in 0usize..10000, denom in 1usize..10000) {
254 let ratio = safe_ratio(numer, denom);
255 let s = format!("{}", ratio);
256 if let Some(dot_pos) = s.find('.') {
258 let decimals = s.len() - dot_pos - 1;
259 prop_assert!(decimals <= 4, "Should have at most 4 decimals: {} has {}", s, decimals);
260 }
261 }
262
263 #[test]
268 fn round_idempotent(value in -1000.0f64..1000.0, decimals in 0u32..6) {
269 let once = round_f64(value, decimals);
270 let twice = round_f64(once, decimals);
271 prop_assert!((once - twice).abs() < 1e-10, "Rounding should be idempotent");
272 }
273
274 #[test]
275 fn round_preserves_integer(value in -1000i64..1000) {
276 let f = value as f64;
277 for decimals in 0..6 {
278 let rounded = round_f64(f, decimals);
279 prop_assert_eq!(rounded, f, "Rounding integer should preserve it");
280 }
281 }
282
283 #[test]
288 fn path_depth_always_at_least_one(path in "\\PC*") {
289 let depth = path_depth(&path);
290 prop_assert!(depth >= 1, "Path depth should always be at least 1");
291 }
292
293 #[test]
294 fn path_depth_counts_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..10)) {
295 let path = segments.join("/");
296 let depth = path_depth(&path);
297 prop_assert_eq!(depth, segments.len(), "Depth should equal segment count for {}", path);
298 }
299
300 #[test]
301 fn path_depth_ignores_empty_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..5)) {
302 let path_normal = segments.join("/");
303 let path_with_double = segments.join("//");
304 let path_with_trailing = format!("{}/", path_normal);
305 let path_with_leading = format!("/{}", path_normal);
306
307 let d_normal = path_depth(&path_normal);
308 let d_double = path_depth(&path_with_double);
309 let d_trailing = path_depth(&path_with_trailing);
310 let d_leading = path_depth(&path_with_leading);
311
312 prop_assert_eq!(d_normal, d_double, "Double slashes should not add depth");
313 prop_assert_eq!(d_normal, d_trailing, "Trailing slash should not add depth");
314 prop_assert_eq!(d_normal, d_leading, "Leading slash should not add depth");
315 }
316
317 #[test]
322 fn is_test_path_case_insensitive_for_dirs(prefix in "[a-zA-Z0-9_/]+", suffix in "[a-zA-Z0-9_/]+\\.rs") {
323 let lower = format!("{}/test/{}", prefix, suffix);
325 let upper = format!("{}/TEST/{}", prefix, suffix);
326 let mixed = format!("{}/TeSt/{}", prefix, suffix);
327
328 prop_assert_eq!(is_test_path(&lower), is_test_path(&upper), "Case sensitivity issue with TEST dir");
329 prop_assert_eq!(is_test_path(&lower), is_test_path(&mixed), "Case sensitivity issue with TeSt dir");
330 }
331
332 #[test]
333 fn is_test_path_known_test_dirs_detected(dir in prop::sample::select(vec!["test", "tests", "__tests__", "spec", "specs"])) {
334 let path = format!("src/{}/foo.rs", dir);
335 prop_assert!(is_test_path(&path), "Should detect test dir: {}", dir);
336 }
337
338 #[test]
339 fn is_test_path_file_patterns_detected(pattern in prop::sample::select(vec!["foo_test.rs", "test_foo.rs", "foo.test.js", "foo.spec.ts"])) {
340 let path = format!("src/{}", pattern);
341 prop_assert!(is_test_path(&path), "Should detect test file pattern: {}", pattern);
342 }
343
344 #[test]
349 fn is_infra_lang_case_insensitive(lang in prop::sample::select(vec!["json", "yaml", "toml", "markdown", "xml", "html", "css"])) {
350 prop_assert!(is_infra_lang(lang), "Should detect infra lang: {}", lang);
351 prop_assert!(is_infra_lang(&lang.to_uppercase()), "Should detect infra lang (upper): {}", lang.to_uppercase());
352 }
353
354 #[test]
355 fn is_infra_lang_known_infra_detected(lang in prop::sample::select(vec![
356 "json", "yaml", "toml", "markdown", "xml", "html", "css", "scss", "less",
357 "makefile", "dockerfile", "hcl", "terraform", "nix", "cmake", "ini",
358 "properties", "gitignore", "gitconfig", "editorconfig", "csv", "tsv", "svg"
359 ])) {
360 prop_assert!(is_infra_lang(lang), "Should detect known infra lang: {}", lang);
361 }
362
363 #[test]
364 fn is_infra_lang_code_langs_not_infra(lang in prop::sample::select(vec![
365 "rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"
366 ])) {
367 prop_assert!(!is_infra_lang(lang), "Code lang should not be infra: {}", lang);
368 }
369 }
370}