Skip to main content

tokmd_analysis_util/
lib.rs

1use std::path::{Path, PathBuf};
2use std::time::{SystemTime, UNIX_EPOCH};
3
4use tokmd_analysis_types::FileStatRow;
5
6pub use tokmd_math::{gini_coefficient, percentile, round_f64, safe_ratio};
7
8#[derive(Debug, Clone, Default)]
9pub struct AnalysisLimits {
10    pub max_files: Option<usize>,
11    pub max_bytes: Option<u64>,
12    pub max_file_bytes: Option<u64>,
13    pub max_commits: Option<usize>,
14    pub max_commit_files: Option<usize>,
15}
16
17pub fn now_ms() -> u128 {
18    SystemTime::now()
19        .duration_since(UNIX_EPOCH)
20        .unwrap_or_default()
21        .as_millis()
22}
23
24pub fn normalize_path(path: &str, root: &Path) -> String {
25    let mut out = path.replace('\\', "/");
26    if let Ok(stripped) = Path::new(&out).strip_prefix(root) {
27        out = stripped.to_string_lossy().replace('\\', "/");
28    }
29    if let Some(stripped) = out.strip_prefix("./") {
30        out = stripped.to_string();
31    }
32    out
33}
34
35pub fn path_depth(path: &str) -> usize {
36    path.split('/').filter(|seg| !seg.is_empty()).count().max(1)
37}
38
39pub fn is_test_path(path: &str) -> bool {
40    let lower = path.to_lowercase();
41    if lower.contains("/test/") || lower.contains("/tests/") || lower.contains("__tests__") {
42        return true;
43    }
44    if lower.contains("/spec/") || lower.contains("/specs/") {
45        return true;
46    }
47    let name = lower.rsplit('/').next().unwrap_or(&lower);
48    name.contains("_test")
49        || name.contains(".test.")
50        || name.contains(".spec.")
51        || name.starts_with("test_")
52        || name.ends_with("_test.rs")
53}
54
55pub fn is_infra_lang(lang: &str) -> bool {
56    let l = lang.to_lowercase();
57    matches!(
58        l.as_str(),
59        "json"
60            | "yaml"
61            | "toml"
62            | "markdown"
63            | "xml"
64            | "html"
65            | "css"
66            | "scss"
67            | "less"
68            | "makefile"
69            | "dockerfile"
70            | "hcl"
71            | "terraform"
72            | "nix"
73            | "cmake"
74            | "ini"
75            | "properties"
76            | "gitignore"
77            | "gitconfig"
78            | "editorconfig"
79            | "csv"
80            | "tsv"
81            | "svg"
82    )
83}
84
85pub fn empty_file_row() -> FileStatRow {
86    FileStatRow {
87        path: String::new(),
88        module: String::new(),
89        lang: String::new(),
90        code: 0,
91        comments: 0,
92        blanks: 0,
93        lines: 0,
94        bytes: 0,
95        tokens: 0,
96        doc_pct: None,
97        bytes_per_line: None,
98        depth: 0,
99    }
100}
101
102pub fn normalize_root(root: &Path) -> PathBuf {
103    root.canonicalize().unwrap_or_else(|_| root.to_path_buf())
104}
105
106#[cfg(test)]
107mod tests {
108    use super::*;
109    use proptest::prelude::*;
110
111    #[test]
112    fn normalize_path_replaces_backslashes_and_leading_dot_slash() {
113        let root = PathBuf::from("repo");
114        assert_eq!(normalize_path(r".\src\lib.rs", &root), "src/lib.rs");
115    }
116
117    #[test]
118    fn normalize_path_is_deterministic() {
119        let root = PathBuf::from("repo");
120        let input = r".\src\main.rs";
121        assert_eq!(normalize_path(input, &root), normalize_path(input, &root));
122    }
123
124    proptest! {
125        #[test]
126        fn path_depth_always_at_least_one(path in "\\PC*") {
127            let depth = path_depth(&path);
128            prop_assert!(depth >= 1, "Path depth should always be at least 1");
129        }
130
131        #[test]
132        fn path_depth_counts_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..10)) {
133            let path = segments.join("/");
134            let depth = path_depth(&path);
135            prop_assert_eq!(depth, segments.len(), "Depth should equal segment count for {}", path);
136        }
137
138        #[test]
139        fn path_depth_ignores_empty_segments(segments in prop::collection::vec("[a-zA-Z0-9_]+", 1..5)) {
140            let path_normal = segments.join("/");
141            let path_with_double = segments.join("//");
142            let path_with_trailing = format!("{}/", path_normal);
143            let path_with_leading = format!("/{}", path_normal);
144
145            let d_normal = path_depth(&path_normal);
146            let d_double = path_depth(&path_with_double);
147            let d_trailing = path_depth(&path_with_trailing);
148            let d_leading = path_depth(&path_with_leading);
149
150            prop_assert_eq!(d_normal, d_double, "Double slashes should not add depth");
151            prop_assert_eq!(d_normal, d_trailing, "Trailing slash should not add depth");
152            prop_assert_eq!(d_normal, d_leading, "Leading slash should not add depth");
153        }
154
155        #[test]
156        fn is_test_path_case_insensitive_for_dirs(prefix in "[a-zA-Z0-9_/]+", suffix in "[a-zA-Z0-9_/]+\\.rs") {
157            let lower = format!("{}/test/{}", prefix, suffix);
158            let upper = format!("{}/TEST/{}", prefix, suffix);
159            let mixed = format!("{}/TeSt/{}", prefix, suffix);
160
161            prop_assert_eq!(is_test_path(&lower), is_test_path(&upper), "Case sensitivity issue with TEST dir");
162            prop_assert_eq!(is_test_path(&lower), is_test_path(&mixed), "Case sensitivity issue with TeSt dir");
163        }
164
165        #[test]
166        fn is_test_path_known_test_dirs_detected(dir in prop::sample::select(vec!["test", "tests", "__tests__", "spec", "specs"])) {
167            let path = format!("src/{}/foo.rs", dir);
168            prop_assert!(is_test_path(&path), "Should detect test dir: {}", dir);
169        }
170
171        #[test]
172        fn is_test_path_file_patterns_detected(pattern in prop::sample::select(vec!["foo_test.rs", "test_foo.rs", "foo.test.js", "foo.spec.ts"])) {
173            let path = format!("src/{}", pattern);
174            prop_assert!(is_test_path(&path), "Should detect test file pattern: {}", pattern);
175        }
176
177        #[test]
178        fn is_infra_lang_case_insensitive(lang in prop::sample::select(vec!["json", "yaml", "toml", "markdown", "xml", "html", "css"])) {
179            prop_assert!(is_infra_lang(lang), "Should detect infra lang: {}", lang);
180            prop_assert!(is_infra_lang(&lang.to_uppercase()), "Should detect infra lang (upper): {}", lang.to_uppercase());
181        }
182
183        #[test]
184        fn is_infra_lang_known_infra_detected(lang in prop::sample::select(vec![
185            "json", "yaml", "toml", "markdown", "xml", "html", "css", "scss", "less",
186            "makefile", "dockerfile", "hcl", "terraform", "nix", "cmake", "ini",
187            "properties", "gitignore", "gitconfig", "editorconfig", "csv", "tsv", "svg"
188        ])) {
189            prop_assert!(is_infra_lang(lang), "Should detect known infra lang: {}", lang);
190        }
191
192        #[test]
193        fn is_infra_lang_code_langs_not_infra(lang in prop::sample::select(vec![
194            "rust", "python", "javascript", "typescript", "go", "java", "c", "cpp"
195        ])) {
196            prop_assert!(!is_infra_lang(lang), "Code lang should not be infra: {}", lang);
197        }
198    }
199}