1use std::path::{Path, PathBuf};
2use std::time::{SystemTime, UNIX_EPOCH};
3
4use tokmd_analysis_types::FileStatRow;
5
6pub use tokmd_math::{gini_coefficient, percentile, round_f64, safe_ratio};
7
8#[derive(Debug, Clone, Default)]
9pub struct AnalysisLimits {
10 pub max_files: Option<usize>,
11 pub max_bytes: Option<u64>,
12 pub max_file_bytes: Option<u64>,
13 pub max_commits: Option<usize>,
14 pub max_commit_files: Option<usize>,
15}
16
17pub fn now_ms() -> u128 {
18 SystemTime::now()
19 .duration_since(UNIX_EPOCH)
20 .unwrap_or_default()
21 .as_millis()
22}
23
24pub fn normalize_path(path: &str, root: &Path) -> String {
25 let mut out = path.replace('\\', "/");
26 if let Ok(stripped) = Path::new(&out).strip_prefix(root) {
27 out = stripped.to_string_lossy().replace('\\', "/");
28 }
29 while let Some(stripped) = out.strip_prefix("./") {
30 out = stripped.to_string();
31 }
32 out
33}
34
35pub fn path_depth(path: &str) -> usize {
36 path.split('/').filter(|seg| !seg.is_empty()).count().max(1)
37}
38
39pub fn is_test_path(path: &str) -> bool {
40 let lower = path.to_lowercase();
41 if lower.contains("/test/") || lower.contains("/tests/") || lower.contains("__tests__") {
42 return true;
43 }
44 if lower.contains("/spec/") || lower.contains("/specs/") {
45 return true;
46 }
47 let name = lower.rsplit('/').next().unwrap_or(&lower);
48 name.contains("_test")
49 || name.contains(".test.")
50 || name.contains(".spec.")
51 || name.starts_with("test_")
52 || name.ends_with("_test.rs")
53}
54
55pub fn is_infra_lang(lang: &str) -> bool {
56 let l = lang.to_lowercase();
57 matches!(
58 l.as_str(),
59 "json"
60 | "yaml"
61 | "toml"
62 | "markdown"
63 | "xml"
64 | "html"
65 | "css"
66 | "scss"
67 | "less"
68 | "makefile"
69 | "dockerfile"
70 | "hcl"
71 | "terraform"
72 | "nix"
73 | "cmake"
74 | "ini"
75 | "properties"
76 | "gitignore"
77 | "gitconfig"
78 | "editorconfig"
79 | "csv"
80 | "tsv"
81 | "svg"
82 )
83}
84
85pub fn empty_file_row() -> FileStatRow {
86 FileStatRow {
87 path: String::new(),
88 module: String::new(),
89 lang: String::new(),
90 code: 0,
91 comments: 0,
92 blanks: 0,
93 lines: 0,
94 bytes: 0,
95 tokens: 0,
96 doc_pct: None,
97 bytes_per_line: None,
98 depth: 0,
99 }
100}
101
102pub fn normalize_root(root: &Path) -> PathBuf {
103 root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
104}
105
106#[cfg(test)]
107mod tests {
108 use super::*;
109 use proptest::prelude::*;
110
111 #[test]
112 fn normalize_path_replaces_backslashes_and_leading_dot_slash() {
113 let root = PathBuf::from("repo");
114 assert_eq!(normalize_path(r".\src\lib.rs", &root), "src/lib.rs");
115 }
116
117 #[test]
118 fn normalize_path_is_deterministic() {
119 let root = PathBuf::from("repo");
120 let input = r".\src\main.rs";
121 assert_eq!(normalize_path(input, &root), normalize_path(input, &root));
122 }
123
124 proptest! {
125 #[test]
126 fn path_depth_always_at_least_one(path in "\\PC*") {
127 let depth = path_depth(&path);
128 prop_assert!(depth >= 1, "Path depth should always be at least 1");
129 }
130
131 #[test]
132 fn path_depth_counts_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..10)) {
133 let path = segments.join("/");
134 let depth = path_depth(&path);
135 prop_assert_eq!(depth, segments.len(), "Depth should equal segment count for {}", path);
136 }
137
138 #[test]
139 fn path_depth_ignores_empty_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..5)) {
140 let path_normal = segments.join("/");
141 let path_with_double = segments.join("//");
142 let path_with_trailing = format!("{}/", path_normal);
143 let path_with_leading = format!("/{}", path_normal);
144
145 let d_normal = path_depth(&path_normal);
146 let d_double = path_depth(&path_with_double);
147 let d_trailing = path_depth(&path_with_trailing);
148 let d_leading = path_depth(&path_with_leading);
149
150 prop_assert_eq!(d_normal, d_double, "Double slashes should not add depth");
151 prop_assert_eq!(d_normal, d_trailing, "Trailing slash should not add depth");
152 prop_assert_eq!(d_normal, d_leading, "Leading slash should not add depth");
153 }
154
155 #[test]
156 fn is_test_path_case_insensitive_for_dirs(prefix in "[a-zA-Z0-9_/]+", suffix in "[a-zA-Z0-9_/]+\\.rs") {
157 let lower = format!("{}/test/{}", prefix, suffix);
158 let upper = format!("{}/TEST/{}", prefix, suffix);
159 let mixed = format!("{}/TeSt/{}", prefix, suffix);
160
161 prop_assert_eq!(is_test_path(&lower), is_test_path(&upper), "Case sensitivity issue with TEST dir");
162 prop_assert_eq!(is_test_path(&lower), is_test_path(&mixed), "Case sensitivity issue with TeSt dir");
163 }
164
165 #[test]
166 fn is_test_path_known_test_dirs_detected(dir in prop::sample::select(vec!["test", "tests", "__tests__", "spec", "specs"])) {
167 let path = format!("src/{}/foo.rs", dir);
168 prop_assert!(is_test_path(&path), "Should detect test dir: {}", dir);
169 }
170
171 #[test]
172 fn is_test_path_file_patterns_detected(pattern in prop::sample::select(vec!["foo_test.rs", "test_foo.rs", "foo.test.js", "foo.spec.ts"])) {
173 let path = format!("src/{}", pattern);
174 prop_assert!(is_test_path(&path), "Should detect test file pattern: {}", pattern);
175 }
176
177 #[test]
178 fn is_infra_lang_case_insensitive(lang in prop::sample::select(vec!["json", "yaml", "toml", "markdown", "xml", "html", "css"])) {
179 prop_assert!(is_infra_lang(lang), "Should detect infra lang: {}", lang);
180 prop_assert!(is_infra_lang(&lang.to_uppercase()), "Should detect infra lang (upper): {}", lang.to_uppercase());
181 }
182
183 #[test]
184 fn is_infra_lang_known_infra_detected(lang in prop::sample::select(vec![
185 "json", "yaml", "toml", "markdown", "xml", "html", "css", "scss", "less",
186 "makefile", "dockerfile", "hcl", "terraform", "nix", "cmake", "ini",
187 "properties", "gitignore", "gitconfig", "editorconfig", "csv", "tsv", "svg"
188 ])) {
189 prop_assert!(is_infra_lang(lang), "Should detect known infra lang: {}", lang);
190 }
191
192 #[test]
193 fn is_infra_lang_code_langs_not_infra(lang in prop::sample::select(vec![
194 "rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"
195 ])) {
196 prop_assert!(!is_infra_lang(lang), "Code lang should not be infra: {}", lang);
197 }
198 }
199}