Skip to main content

tokmd_analysis_util/
lib.rs

1use std::path::{Path, PathBuf};
2#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
3use std::time::{SystemTime, UNIX_EPOCH};
4
5use tokmd_analysis_types::FileStatRow;
6
7pub use tokmd_math::{gini_coefficient, percentile, round_f64, safe_ratio};
8
9#[derive(Debug, Clone, Default)]
10pub struct AnalysisLimits {
11    pub max_files: Option<usize>,
12    pub max_bytes: Option<u64>,
13    pub max_file_bytes: Option<u64>,
14    pub max_commits: Option<usize>,
15    pub max_commit_files: Option<usize>,
16}
17
18#[cfg(all(target_arch = "wasm32", target_os = "unknown"))]
19pub fn now_ms() -> u128 {
20    // `std::time` is not implemented on bare wasm. Keep analysis receipts
21    // deterministic until the browser path provides a host clock explicitly.
22    0
23}
24
25#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))]
26pub fn now_ms() -> u128 {
27    SystemTime::now()
28        .duration_since(UNIX_EPOCH)
29        .unwrap_or_default()
30        .as_millis()
31}
32
33pub fn normalize_path(path: &str, root: &Path) -> String {
34    let mut out = path.replace('\\', "/");
35    if let Ok(stripped) = Path::new(&out).strip_prefix(root) {
36        out = stripped.to_string_lossy().replace('\\', "/");
37    }
38    while let Some(stripped) = out.strip_prefix("./") {
39        out = stripped.to_string();
40    }
41    out
42}
43
44pub fn path_depth(path: &str) -> usize {
45    path.split('/').filter(|seg| !seg.is_empty()).count().max(1)
46}
47
48pub fn is_test_path(path: &str) -> bool {
49    let lower = path.to_lowercase();
50    if lower.contains("/test/") || lower.contains("/tests/") || lower.contains("__tests__") {
51        return true;
52    }
53    if lower.contains("/spec/") || lower.contains("/specs/") {
54        return true;
55    }
56    let name = lower.rsplit('/').next().unwrap_or(&lower);
57    name.contains("_test")
58        || name.contains(".test.")
59        || name.contains(".spec.")
60        || name.starts_with("test_")
61        || name.ends_with("_test.rs")
62}
63
64pub fn is_infra_lang(lang: &str) -> bool {
65    let l = lang.to_lowercase();
66    matches!(
67        l.as_str(),
68        "json"
69            | "yaml"
70            | "toml"
71            | "markdown"
72            | "xml"
73            | "html"
74            | "css"
75            | "scss"
76            | "less"
77            | "makefile"
78            | "dockerfile"
79            | "hcl"
80            | "terraform"
81            | "nix"
82            | "cmake"
83            | "ini"
84            | "properties"
85            | "gitignore"
86            | "gitconfig"
87            | "editorconfig"
88            | "csv"
89            | "tsv"
90            | "svg"
91    )
92}
93
94pub fn empty_file_row() -> FileStatRow {
95    FileStatRow {
96        path: String::new(),
97        module: String::new(),
98        lang: String::new(),
99        code: 0,
100        comments: 0,
101        blanks: 0,
102        lines: 0,
103        bytes: 0,
104        tokens: 0,
105        doc_pct: None,
106        bytes_per_line: None,
107        depth: 0,
108    }
109}
110
111pub fn normalize_root(root: &Path) -> PathBuf {
112    root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
113}
114
115#[cfg(test)]
116mod tests {
117    use super::*;
118    use proptest::prelude::*;
119
120    #[test]
121    fn normalize_path_replaces_backslashes_and_leading_dot_slash() {
122        let root = PathBuf::from("repo");
123        assert_eq!(normalize_path(r".\src\lib.rs", &root), "src/lib.rs");
124    }
125
126    #[test]
127    fn normalize_path_is_deterministic() {
128        let root = PathBuf::from("repo");
129        let input = r".\src\main.rs";
130        assert_eq!(normalize_path(input, &root), normalize_path(input, &root));
131    }
132
133    proptest! {
134        #[test]
135        fn path_depth_always_at_least_one(path in "\\PC*") {
136            let depth = path_depth(&path);
137            prop_assert!(depth >= 1, "Path depth should always be at least 1");
138        }
139
140        #[test]
141        fn path_depth_counts_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..10)) {
142            let path = segments.join("/");
143            let depth = path_depth(&path);
144            prop_assert_eq!(depth, segments.len(), "Depth should equal segment count for {}", path);
145        }
146
147        #[test]
148        fn path_depth_ignores_empty_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..5)) {
149            let path_normal = segments.join("/");
150            let path_with_double = segments.join("//");
151            let path_with_trailing = format!("{}/", path_normal);
152            let path_with_leading = format!("/{}", path_normal);
153
154            let d_normal = path_depth(&path_normal);
155            let d_double = path_depth(&path_with_double);
156            let d_trailing = path_depth(&path_with_trailing);
157            let d_leading = path_depth(&path_with_leading);
158
159            prop_assert_eq!(d_normal, d_double, "Double slashes should not add depth");
160            prop_assert_eq!(d_normal, d_trailing, "Trailing slash should not add depth");
161            prop_assert_eq!(d_normal, d_leading, "Leading slash should not add depth");
162        }
163
164        #[test]
165        fn is_test_path_case_insensitive_for_dirs(prefix in "[a-zA-Z0-9_/]+", suffix in "[a-zA-Z0-9_/]+\\.rs") {
166            let lower = format!("{}/test/{}", prefix, suffix);
167            let upper = format!("{}/TEST/{}", prefix, suffix);
168            let mixed = format!("{}/TeSt/{}", prefix, suffix);
169
170            prop_assert_eq!(is_test_path(&lower), is_test_path(&upper), "Case sensitivity issue with TEST dir");
171            prop_assert_eq!(is_test_path(&lower), is_test_path(&mixed), "Case sensitivity issue with TeSt dir");
172        }
173
174        #[test]
175        fn is_test_path_known_test_dirs_detected(dir in prop::sample::select(vec!["test", "tests", "__tests__", "spec", "specs"])) {
176            let path = format!("src/{}/foo.rs", dir);
177            prop_assert!(is_test_path(&path), "Should detect test dir: {}", dir);
178        }
179
180        #[test]
181        fn is_test_path_file_patterns_detected(pattern in prop::sample::select(vec!["foo_test.rs", "test_foo.rs", "foo.test.js", "foo.spec.ts"])) {
182            let path = format!("src/{}", pattern);
183            prop_assert!(is_test_path(&path), "Should detect test file pattern: {}", pattern);
184        }
185
186        #[test]
187        fn is_infra_lang_case_insensitive(lang in prop::sample::select(vec!["json", "yaml", "toml", "markdown", "xml", "html", "css"])) {
188            prop_assert!(is_infra_lang(lang), "Should detect infra lang: {}", lang);
189            prop_assert!(is_infra_lang(&lang.to_uppercase()), "Should detect infra lang (upper): {}", lang.to_uppercase());
190        }
191
192        #[test]
193        fn is_infra_lang_known_infra_detected(lang in prop::sample::select(vec![
194            "json", "yaml", "toml", "markdown", "xml", "html", "css", "scss", "less",
195            "makefile", "dockerfile", "hcl", "terraform", "nix", "cmake", "ini",
196            "properties", "gitignore", "gitconfig", "editorconfig", "csv", "tsv", "svg"
197        ])) {
198            prop_assert!(is_infra_lang(lang), "Should detect known infra lang: {}", lang);
199        }
200
201        #[test]
202        fn is_infra_lang_code_langs_not_infra(lang in prop::sample::select(vec![
203            "rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"
204        ])) {
205            prop_assert!(!is_infra_lang(lang), "Code lang should not be infra: {}", lang);
206        }
207    }
208}