1use std::path::{Path, PathBuf};
2#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use tokmd_analysis_types::FileStatRow;
6
7pub use tokmd_math::{gini_coefficient, percentile, round_f64, safe_ratio};
8
9#[derive(Debug, Clone, Default)]
10pub struct AnalysisLimits {
11 pub max_files: Option<usize>,
12 pub max_bytes: Option<u64>,
13 pub max_file_bytes: Option<u64>,
14 pub max_commits: Option<usize>,
15 pub max_commit_files: Option<usize>,
16}
17
18#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
19pub fn now_ms() -> u128 {
20 0
23}
24
25#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
26pub fn now_ms() -> u128 {
27 SystemTime::now()
28 .duration_since(UNIX_EPOCH)
29 .unwrap_or_default()
30 .as_millis()
31}
32
33pub fn normalize_path(path: &str, root: &Path) -> String {
34 let mut out = path.replace('\\', "/");
35 if let Ok(stripped) = Path::new(&out).strip_prefix(root) {
36 out = stripped.to_string_lossy().replace('\\', "/");
37 }
38 while let Some(stripped) = out.strip_prefix("./") {
39 out = stripped.to_string();
40 }
41 out
42}
43
44pub fn path_depth(path: &str) -> usize {
45 path.split('/').filter(|seg| !seg.is_empty()).count().max(1)
46}
47
48pub fn is_test_path(path: &str) -> bool {
49 let lower = path.to_lowercase();
50 if lower.contains("/test/") || lower.contains("/tests/") || lower.contains("__tests__") {
51 return true;
52 }
53 if lower.contains("/spec/") || lower.contains("/specs/") {
54 return true;
55 }
56 let name = lower.rsplit('/').next().unwrap_or(&lower);
57 name.contains("_test")
58 || name.contains(".test.")
59 || name.contains(".spec.")
60 || name.starts_with("test_")
61 || name.ends_with("_test.rs")
62}
63
64pub fn is_infra_lang(lang: &str) -> bool {
65 let l = lang.to_lowercase();
66 matches!(
67 l.as_str(),
68 "json"
69 | "yaml"
70 | "toml"
71 | "markdown"
72 | "xml"
73 | "html"
74 | "css"
75 | "scss"
76 | "less"
77 | "makefile"
78 | "dockerfile"
79 | "hcl"
80 | "terraform"
81 | "nix"
82 | "cmake"
83 | "ini"
84 | "properties"
85 | "gitignore"
86 | "gitconfig"
87 | "editorconfig"
88 | "csv"
89 | "tsv"
90 | "svg"
91 )
92}
93
94pub fn empty_file_row() -> FileStatRow {
95 FileStatRow {
96 path: String::new(),
97 module: String::new(),
98 lang: String::new(),
99 code: 0,
100 comments: 0,
101 blanks: 0,
102 lines: 0,
103 bytes: 0,
104 tokens: 0,
105 doc_pct: None,
106 bytes_per_line: None,
107 depth: 0,
108 }
109}
110
111pub fn normalize_root(root: &Path) -> PathBuf {
112 root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
113}
114
115#[cfg(test)]
116mod tests {
117 use super::*;
118 use proptest::prelude::*;
119
120 #[test]
121 fn normalize_path_replaces_backslashes_and_leading_dot_slash() {
122 let root = PathBuf::from("repo");
123 assert_eq!(normalize_path(r".\src\lib.rs", &root), "src/lib.rs");
124 }
125
126 #[test]
127 fn normalize_path_is_deterministic() {
128 let root = PathBuf::from("repo");
129 let input = r".\src\main.rs";
130 assert_eq!(normalize_path(input, &root), normalize_path(input, &root));
131 }
132
133 proptest! {
134 #[test]
135 fn path_depth_always_at_least_one(path in "\\PC*") {
136 let depth = path_depth(&path);
137 prop_assert!(depth >= 1, "Path depth should always be at least 1");
138 }
139
140 #[test]
141 fn path_depth_counts_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..10)) {
142 let path = segments.join("/");
143 let depth = path_depth(&path);
144 prop_assert_eq!(depth, segments.len(), "Depth should equal segment count for {}", path);
145 }
146
147 #[test]
148 fn path_depth_ignores_empty_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..5)) {
149 let path_normal = segments.join("/");
150 let path_with_double = segments.join("//");
151 let path_with_trailing = format!("{}/", path_normal);
152 let path_with_leading = format!("/{}", path_normal);
153
154 let d_normal = path_depth(&path_normal);
155 let d_double = path_depth(&path_with_double);
156 let d_trailing = path_depth(&path_with_trailing);
157 let d_leading = path_depth(&path_with_leading);
158
159 prop_assert_eq!(d_normal, d_double, "Double slashes should not add depth");
160 prop_assert_eq!(d_normal, d_trailing, "Trailing slash should not add depth");
161 prop_assert_eq!(d_normal, d_leading, "Leading slash should not add depth");
162 }
163
164 #[test]
165 fn is_test_path_case_insensitive_for_dirs(prefix in "[a-zA-Z0-9_/]+", suffix in "[a-zA-Z0-9_/]+\\.rs") {
166 let lower = format!("{}/test/{}", prefix, suffix);
167 let upper = format!("{}/TEST/{}", prefix, suffix);
168 let mixed = format!("{}/TeSt/{}", prefix, suffix);
169
170 prop_assert_eq!(is_test_path(&lower), is_test_path(&upper), "Case sensitivity issue with TEST dir");
171 prop_assert_eq!(is_test_path(&lower), is_test_path(&mixed), "Case sensitivity issue with TeSt dir");
172 }
173
174 #[test]
175 fn is_test_path_known_test_dirs_detected(dir in prop::sample::select(vec!["test", "tests", "__tests__", "spec", "specs"])) {
176 let path = format!("src/{}/foo.rs", dir);
177 prop_assert!(is_test_path(&path), "Should detect test dir: {}", dir);
178 }
179
180 #[test]
181 fn is_test_path_file_patterns_detected(pattern in prop::sample::select(vec!["foo_test.rs", "test_foo.rs", "foo.test.js", "foo.spec.ts"])) {
182 let path = format!("src/{}", pattern);
183 prop_assert!(is_test_path(&path), "Should detect test file pattern: {}", pattern);
184 }
185
186 #[test]
187 fn is_infra_lang_case_insensitive(lang in prop::sample::select(vec!["json", "yaml", "toml", "markdown", "xml", "html", "css"])) {
188 prop_assert!(is_infra_lang(lang), "Should detect infra lang: {}", lang);
189 prop_assert!(is_infra_lang(&lang.to_uppercase()), "Should detect infra lang (upper): {}", lang.to_uppercase());
190 }
191
192 #[test]
193 fn is_infra_lang_known_infra_detected(lang in prop::sample::select(vec![
194 "json", "yaml", "toml", "markdown", "xml", "html", "css", "scss", "less",
195 "makefile", "dockerfile", "hcl", "terraform", "nix", "cmake", "ini",
196 "properties", "gitignore", "gitconfig", "editorconfig", "csv", "tsv", "svg"
197 ])) {
198 prop_assert!(is_infra_lang(lang), "Should detect known infra lang: {}", lang);
199 }
200
201 #[test]
202 fn is_infra_lang_code_langs_not_infra(lang in prop::sample::select(vec![
203 "rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"
204 ])) {
205 prop_assert!(!is_infra_lang(lang), "Code lang should not be infra: {}", lang);
206 }
207 }
208}