Skip to main content

covy_core/
pathmap.rs

1use std::collections::{BTreeMap, HashMap};
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4use std::sync::Arc;
5
6use crate::diagnostics::DiagnosticsData;
7use crate::model::{CoverageData, RepoSnapshot};
8
9/// Strategies for mapping coverage file paths to repository file paths.
10pub struct PathMapper {
11    strip_prefixes: Vec<String>,
12    rules: Vec<(String, String)>,
13    ignore_globs: Vec<String>,
14    ignore_globs_lower: Vec<String>,
15    case_sensitive: bool,
16    /// Reverse index: filename → list of full paths in the repo.
17    suffix_index: HashMap<String, Vec<String>>,
18    /// Content hash index for fallback matching.
19    hash_index: HashMap<String, Vec<String>>,
20    /// LRU cache of resolved mappings.
21    cache: HashMap<String, Option<String>>,
22    /// Cached known-path index keyed by hash of known_paths.
23    cached_known_index: Option<(u64, Arc<HashMap<String, String>>)>,
24}
25
26impl PathMapper {
27    pub fn new(
28        strip_prefixes: Vec<String>,
29        rules: BTreeMap<String, String>,
30        snapshot: Option<&RepoSnapshot>,
31    ) -> Self {
32        Self::with_options(strip_prefixes, rules, Vec::new(), !cfg!(windows), snapshot)
33    }
34
35    pub fn with_options(
36        strip_prefixes: Vec<String>,
37        rules: BTreeMap<String, String>,
38        ignore_globs: Vec<String>,
39        case_sensitive: bool,
40        snapshot: Option<&RepoSnapshot>,
41    ) -> Self {
42        let mut suffix_index = HashMap::new();
43        let mut hash_index = HashMap::new();
44        let normalized_strip_prefixes = normalize_prefixes(strip_prefixes);
45        let normalized_rules = normalize_rules(rules);
46        let normalized_ignore_globs = ignore_globs
47            .into_iter()
48            .map(|g| normalize_path(g.trim()))
49            .filter(|g| !g.is_empty())
50            .collect::<Vec<_>>();
51        let normalized_ignore_globs_lower = if case_sensitive {
52            Vec::new()
53        } else {
54            normalized_ignore_globs
55                .iter()
56                .map(|g| g.to_ascii_lowercase())
57                .collect::<Vec<_>>()
58        };
59
60        if let Some(snap) = snapshot {
61            for (path, hash) in &snap.file_hashes {
62                let normalized_path = normalize_path(path);
63                // Build suffix index by filename
64                if let Some(filename) = normalized_path.rsplit('/').next() {
65                    suffix_index
66                        .entry(normalize_case(filename, case_sensitive))
67                        .or_insert_with(Vec::new)
68                        .push(normalized_path.clone());
69                }
70                // Build hash index
71                hash_index
72                    .entry(hash.clone())
73                    .or_insert_with(Vec::new)
74                    .push(normalized_path.clone());
75            }
76        }
77
78        Self {
79            strip_prefixes: normalized_strip_prefixes,
80            rules: normalized_rules,
81            ignore_globs: normalized_ignore_globs,
82            ignore_globs_lower: normalized_ignore_globs_lower,
83            case_sensitive,
84            suffix_index,
85            hash_index,
86            cache: HashMap::new(),
87            cached_known_index: None,
88        }
89    }
90
91    /// Resolve a coverage file path to a repository file path.
92    /// Strategy chain: exact match → rule substitution → strip prefix → suffix match.
93    pub fn resolve(&mut self, coverage_path: &str, known_paths: &[&str]) -> Option<String> {
94        let cache_key = normalize_case(&normalize_path(coverage_path), self.case_sensitive);
95        if let Some(cached) = self.cache.get(&cache_key) {
96            return cached.clone();
97        }
98
99        let result = self.resolve_inner(coverage_path, known_paths);
100        self.cache.insert(cache_key, result.clone());
101        result
102    }
103
104    fn resolve_inner(&mut self, coverage_path: &str, known_paths: &[&str]) -> Option<String> {
105        let normalized = normalize_path(coverage_path);
106        if self.is_ignored(&normalized) {
107            return None;
108        }
109
110        let known_index = self.get_known_index(known_paths);
111
112        // 1. Exact match
113        if let Some(exact) = self.find_known(&normalized, known_index.as_ref()) {
114            return Some(exact.to_string());
115        }
116
117        // 2. Rule substitution
118        for (from, to) in &self.rules {
119            if let Some(rest) = strip_path_prefix_with_case(&normalized, from, self.case_sensitive)
120            {
121                let candidate = normalize_path(&format!("{to}{rest}"));
122                if let Some(found) = self.find_known(&candidate, known_index.as_ref()) {
123                    return Some(found.to_string());
124                }
125            }
126        }
127
128        // 3. Strip prefix
129        for prefix in &self.strip_prefixes {
130            if let Some(stripped) =
131                strip_path_prefix_with_case(&normalized, prefix, self.case_sensitive)
132            {
133                let candidate = stripped.trim_start_matches('/');
134                if let Some(found) = self.find_known(candidate, known_index.as_ref()) {
135                    return Some(found.to_string());
136                }
137            }
138        }
139
140        // 4. Suffix match (by filename)
141        let filename = normalized.rsplit('/').next().unwrap_or(normalized.as_str());
142        let filename_key = normalize_case(filename, self.case_sensitive);
143        let mut best: Option<(&str, usize)> = None;
144
145        if let Some(snapshot_candidates) = self.suffix_index.get(&filename_key) {
146            for candidate in snapshot_candidates {
147                if let Some(found) = self.find_known(candidate, known_index.as_ref()) {
148                    let score = common_suffix_len(
149                        &normalize_case(found, self.case_sensitive),
150                        &normalize_case(&normalized, self.case_sensitive),
151                    );
152                    best = pick_better_match(best, (found, score), self.case_sensitive);
153                }
154            }
155        }
156
157        if best.is_none() {
158            for known in known_paths {
159                let known_normalized = normalize_path(known);
160                let known_filename = known_normalized
161                    .rsplit('/')
162                    .next()
163                    .unwrap_or(known_normalized.as_str());
164                if normalize_case(known_filename, self.case_sensitive) != filename_key {
165                    continue;
166                }
167                let score = common_suffix_len(
168                    &normalize_case(&known_normalized, self.case_sensitive),
169                    &normalize_case(&normalized, self.case_sensitive),
170                );
171                best = pick_better_match(best, (known, score), self.case_sensitive);
172            }
173        }
174
175        best.map(|(path, _)| path.to_string())
176    }
177
178    /// Resolve using content hash as fallback.
179    pub fn resolve_by_hash(&self, content_hash: &str) -> Option<String> {
180        self.hash_index.get(content_hash).and_then(|paths| {
181            if paths.len() == 1 {
182                Some(paths[0].clone())
183            } else {
184                None
185            }
186        })
187    }
188
189    fn is_ignored(&self, path: &str) -> bool {
190        if self.case_sensitive {
191            for pattern in &self.ignore_globs {
192                if glob_matches(pattern, path) {
193                    return true;
194                }
195            }
196            return false;
197        }
198
199        for pattern in &self.ignore_globs {
200            if glob_matches(pattern, path) {
201                return true;
202            }
203        }
204
205        let lower_path = path.to_ascii_lowercase();
206        for pattern in &self.ignore_globs_lower {
207            if glob_matches(pattern, &lower_path) {
208                return true;
209            }
210        }
211        false
212    }
213
214    fn build_known_index(&self, known_paths: &[&str]) -> HashMap<String, String> {
215        let mut index = HashMap::with_capacity(known_paths.len());
216        for &path in known_paths {
217            let normalized = normalize_path(path);
218            let key = normalize_case(&normalized, self.case_sensitive);
219            index.entry(key).or_insert_with(|| path.to_string());
220        }
221        index
222    }
223
224    fn get_known_index(&mut self, known_paths: &[&str]) -> Arc<HashMap<String, String>> {
225        let mut hasher = std::collections::hash_map::DefaultHasher::new();
226        known_paths.len().hash(&mut hasher);
227        for path in known_paths {
228            path.hash(&mut hasher);
229        }
230        let known_paths_key = hasher.finish();
231
232        let needs_rebuild = self
233            .cached_known_index
234            .as_ref()
235            .map(|(cached_key, _)| *cached_key != known_paths_key)
236            .unwrap_or(true);
237        if needs_rebuild {
238            self.cached_known_index = Some((
239                known_paths_key,
240                Arc::new(self.build_known_index(known_paths)),
241            ));
242        }
243
244        Arc::clone(
245            &self
246            .cached_known_index
247            .as_ref()
248            .expect("known index cache must be initialized")
249            .1,
250        )
251    }
252
253    fn find_known<'a>(
254        &self,
255        candidate: &str,
256        known_index: &'a HashMap<String, String>,
257    ) -> Option<&'a str> {
258        let key = normalize_case(&normalize_path(candidate), self.case_sensitive);
259        known_index.get(&key).map(|s| s.as_str())
260    }
261}
262
263/// Automatically normalize paths in coverage data to be relative.
264///
265/// Strategy:
266/// 1. If `source_root` is provided, strip it from all paths.
267/// 2. Otherwise, detect common absolute prefix and strip it.
268/// 3. As fallback, try `git rev-parse --show-toplevel`.
269/// 4. Normalize backslashes and strip leading `./`.
270pub fn auto_normalize_paths(data: &mut CoverageData, source_root: Option<&Path>) {
271    let root = source_root
272        .map(|p| p.to_string_lossy().to_string())
273        .or_else(|| detect_common_prefix(data.files.keys().map(|k| k.as_str())))
274        .or_else(git_toplevel);
275
276    let old_files = std::mem::take(&mut data.files);
277    let mut new_files = BTreeMap::new();
278    for (path, fc) in old_files {
279        let mut p: String = path.replace('\\', "/");
280
281        if let Some(ref root) = root {
282            let root_normalized = root.replace('\\', "/");
283            let root_with_slash = if root_normalized.ends_with('/') {
284                root_normalized.clone()
285            } else {
286                format!("{root_normalized}/")
287            };
288            if p.starts_with(&root_with_slash) {
289                p = p[root_with_slash.len()..].to_string();
290            } else if p == root_normalized {
291                // Edge case: path equals root exactly
292                p = String::new();
293            }
294        }
295
296        // Strip leading ./
297        if let Some(stripped) = p.strip_prefix("./") {
298            p = stripped.to_string();
299        }
300
301        if !p.is_empty() {
302            new_files.insert(p, fc);
303        }
304    }
305    data.files = new_files;
306}
307
308/// Automatically normalize paths in diagnostics data to be relative.
309pub fn auto_normalize_issue_paths(data: &mut DiagnosticsData, source_root: Option<&Path>) {
310    let root = source_root
311        .map(|p| p.to_string_lossy().to_string())
312        .or_else(|| detect_common_prefix(data.issues_by_file.keys().map(|k| k.as_str())))
313        .or_else(git_toplevel);
314
315    let old_issues = std::mem::take(&mut data.issues_by_file);
316    let mut new_issues = BTreeMap::new();
317
318    for (path, mut issues) in old_issues {
319        let mut p: String = path.replace('\\', "/");
320
321        if let Some(ref root) = root {
322            let root_normalized = root.replace('\\', "/");
323            let root_with_slash = if root_normalized.ends_with('/') {
324                root_normalized.clone()
325            } else {
326                format!("{root_normalized}/")
327            };
328            if p.starts_with(&root_with_slash) {
329                p = p[root_with_slash.len()..].to_string();
330            } else if p == root_normalized {
331                p = String::new();
332            }
333        }
334
335        if let Some(stripped) = p.strip_prefix("./") {
336            p = stripped.to_string();
337        }
338
339        if !p.is_empty() {
340            for issue in &mut issues {
341                issue.path = p.clone();
342            }
343            new_issues.insert(p, issues);
344        }
345    }
346
347    data.issues_by_file = new_issues;
348}
349
350/// Detect common absolute prefix across all file paths.
351fn detect_common_prefix<'a, I>(paths: I) -> Option<String>
352where
353    I: Iterator<Item = &'a str>,
354{
355    let paths: Vec<&str> = paths.collect();
356    if paths.is_empty() {
357        return None;
358    }
359
360    // Only detect prefix if paths are absolute
361    if !paths
362        .iter()
363        .all(|p| p.starts_with('/') || (p.len() >= 2 && p.as_bytes()[1] == b':'))
364    {
365        return None;
366    }
367
368    let first = paths[0].replace('\\', "/");
369    let mut prefix_end = 0;
370
371    // Find the longest common directory prefix
372    for (i, ch) in first.char_indices() {
373        if ch == '/' {
374            let candidate = &first[..=i];
375            if paths
376                .iter()
377                .all(|p| p.replace('\\', "/").starts_with(candidate))
378            {
379                prefix_end = i + 1;
380            } else {
381                break;
382            }
383        }
384    }
385
386    if prefix_end > 1 {
387        Some(first[..prefix_end].to_string())
388    } else {
389        None
390    }
391}
392
393/// Try to get git repo root via `git rev-parse --show-toplevel`.
394fn git_toplevel() -> Option<String> {
395    std::process::Command::new("git")
396        .args(["rev-parse", "--show-toplevel"])
397        .output()
398        .ok()
399        .filter(|o| o.status.success())
400        .and_then(|o| {
401            String::from_utf8(o.stdout)
402                .ok()
403                .map(|s| s.trim().to_string())
404        })
405        .filter(|s| !s.is_empty())
406}
407
408fn normalize_path(path: &str) -> String {
409    let normalized = path.replace('\\', "/");
410    if let Some(stripped) = normalized.strip_prefix("./") {
411        stripped.to_string()
412    } else {
413        normalized
414    }
415}
416
417fn normalize_case(path: &str, case_sensitive: bool) -> String {
418    if case_sensitive {
419        path.to_string()
420    } else {
421        path.to_ascii_lowercase()
422    }
423}
424
425fn normalize_prefixes(prefixes: Vec<String>) -> Vec<String> {
426    let mut out = prefixes
427        .into_iter()
428        .map(|p| normalize_path(p.trim()))
429        .filter(|p| !p.is_empty())
430        .collect::<Vec<_>>();
431    out.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b)));
432    out.dedup();
433    out
434}
435
436fn normalize_rules(rules: BTreeMap<String, String>) -> Vec<(String, String)> {
437    let mut out = rules
438        .into_iter()
439        .map(|(from, to)| (normalize_path(from.trim()), normalize_path(to.trim())))
440        .filter(|(from, _)| !from.is_empty())
441        .collect::<Vec<_>>();
442    out.sort_by(|(a_from, _), (b_from, _)| {
443        b_from
444            .len()
445            .cmp(&a_from.len())
446            .then_with(|| a_from.cmp(b_from))
447    });
448    out
449}
450
451fn strip_path_prefix_with_case<'a>(
452    path: &'a str,
453    prefix: &str,
454    case_sensitive: bool,
455) -> Option<&'a str> {
456    if case_sensitive {
457        return path.strip_prefix(prefix);
458    }
459
460    let lower_path = path.to_ascii_lowercase();
461    let lower_prefix = prefix.to_ascii_lowercase();
462    if !lower_path.starts_with(&lower_prefix) {
463        return None;
464    }
465    Some(&path[prefix.len()..])
466}
467
468fn glob_matches(pattern: &str, path: &str) -> bool {
469    glob::Pattern::new(pattern)
470        .map(|p| p.matches(path))
471        .unwrap_or(false)
472}
473
474fn pick_better_match<'a>(
475    current: Option<(&'a str, usize)>,
476    candidate: (&'a str, usize),
477    case_sensitive: bool,
478) -> Option<(&'a str, usize)> {
479    match current {
480        None => Some(candidate),
481        Some((best_path, best_score)) => {
482            if candidate.1 > best_score {
483                return Some(candidate);
484            }
485            if candidate.1 < best_score {
486                return Some((best_path, best_score));
487            }
488
489            let candidate_key = normalize_case(candidate.0, case_sensitive);
490            let best_key = normalize_case(best_path, case_sensitive);
491            if candidate_key < best_key {
492                Some(candidate)
493            } else {
494                Some((best_path, best_score))
495            }
496        }
497    }
498}
499
500fn common_suffix_len(a: &str, b: &str) -> usize {
501    a.bytes()
502        .rev()
503        .zip(b.bytes().rev())
504        .take_while(|(x, y)| x == y)
505        .count()
506}
507
508#[cfg(test)]
509mod tests {
510    use super::*;
511    use crate::diagnostics::{Issue, Severity};
512
513    #[test]
514    fn test_exact_match() {
515        let mut mapper = PathMapper::new(vec![], BTreeMap::new(), None);
516        let known = vec!["src/main.rs", "src/lib.rs"];
517        assert_eq!(
518            mapper.resolve("src/main.rs", &known),
519            Some("src/main.rs".to_string())
520        );
521    }
522
523    #[test]
524    fn test_strip_prefix() {
525        let mut mapper = PathMapper::new(vec!["/app/".to_string()], BTreeMap::new(), None);
526        let known = vec!["src/main.rs"];
527        assert_eq!(
528            mapper.resolve("/app/src/main.rs", &known),
529            Some("src/main.rs".to_string())
530        );
531    }
532
533    #[test]
534    fn test_rule_substitution() {
535        let mut rules = BTreeMap::new();
536        rules.insert("/build/classes/".to_string(), "src/main/java/".to_string());
537        let mut mapper = PathMapper::new(vec![], rules, None);
538        let known = vec!["src/main/java/com/App.java"];
539        assert_eq!(
540            mapper.resolve("/build/classes/com/App.java", &known),
541            Some("src/main/java/com/App.java".to_string())
542        );
543    }
544
545    #[test]
546    fn test_ignore_glob_never_resolves() {
547        let mut mapper = PathMapper::with_options(
548            vec![],
549            BTreeMap::new(),
550            vec!["**/bazel-out/**".to_string()],
551            true,
552            None,
553        );
554        let known = vec!["src/main.rs"];
555        assert_eq!(
556            mapper.resolve("bazel-out/k8-fastbuild/bin/main.rs", &known),
557            None
558        );
559    }
560
561    #[test]
562    fn test_case_insensitive_exact_match() {
563        let mut mapper = PathMapper::with_options(vec![], BTreeMap::new(), vec![], false, None);
564        let known = vec!["Src/Main.rs"];
565        assert_eq!(
566            mapper.resolve("src/main.rs", &known),
567            Some("Src/Main.rs".to_string())
568        );
569    }
570
571    #[test]
572    fn test_strip_prefix_removes_leading_separator() {
573        let mut mapper = PathMapper::new(vec!["/workspace".to_string()], BTreeMap::new(), None);
574        let known = vec!["src/main.rs"];
575        assert_eq!(
576            mapper.resolve("/workspace/src/main.rs", &known),
577            Some("src/main.rs".to_string())
578        );
579    }
580
581    #[test]
582    fn test_suffix_match_is_deterministic_on_ties() {
583        let mut mapper = PathMapper::new(vec![], BTreeMap::new(), None);
584        let known = vec!["a/foo/main.rs", "b/foo/main.rs"];
585        assert_eq!(
586            mapper.resolve("/tmp/work/foo/main.rs", &known),
587            Some("a/foo/main.rs".to_string())
588        );
589    }
590
591    #[test]
592    fn test_auto_normalize_absolute_paths() {
593        let mut data = CoverageData::new();
594        data.files.insert(
595            "/home/user/project/src/main.rs".to_string(),
596            crate::model::FileCoverage::new(),
597        );
598        data.files.insert(
599            "/home/user/project/tests/test.rs".to_string(),
600            crate::model::FileCoverage::new(),
601        );
602
603        auto_normalize_paths(&mut data, None);
604        assert!(data.files.contains_key("src/main.rs"));
605        assert!(data.files.contains_key("tests/test.rs"));
606    }
607
608    #[test]
609    fn test_auto_normalize_with_source_root() {
610        let mut data = CoverageData::new();
611        data.files.insert(
612            "/app/src/main.rs".to_string(),
613            crate::model::FileCoverage::new(),
614        );
615
616        auto_normalize_paths(&mut data, Some(Path::new("/app")));
617        assert!(data.files.contains_key("src/main.rs"));
618    }
619
620    #[test]
621    fn test_auto_normalize_strips_dot_slash() {
622        let mut data = CoverageData::new();
623        data.files.insert(
624            "./src/main.rs".to_string(),
625            crate::model::FileCoverage::new(),
626        );
627
628        auto_normalize_paths(&mut data, None);
629        assert!(data.files.contains_key("src/main.rs"));
630    }
631
632    #[test]
633    fn test_auto_normalize_backslashes() {
634        let mut data = CoverageData::new();
635        data.files.insert(
636            "C:\\Users\\dev\\project\\src\\main.rs".to_string(),
637            crate::model::FileCoverage::new(),
638        );
639
640        auto_normalize_paths(&mut data, Some(Path::new("C:\\Users\\dev\\project")));
641        assert!(data.files.contains_key("src/main.rs"));
642    }
643
644    #[test]
645    fn test_auto_normalize_issue_paths() {
646        let mut data = DiagnosticsData::new();
647        data.issues_by_file.insert(
648            "/repo/src/main.rs".to_string(),
649            vec![Issue {
650                path: "/repo/src/main.rs".to_string(),
651                line: 10,
652                column: None,
653                end_line: None,
654                severity: Severity::Warning,
655                rule_id: "x".to_string(),
656                message: "m".to_string(),
657                source: "tool".to_string(),
658                fingerprint: "fp".to_string(),
659            }],
660        );
661
662        auto_normalize_issue_paths(&mut data, Some(Path::new("/repo")));
663        assert!(data.issues_by_file.contains_key("src/main.rs"));
664        assert_eq!(data.issues_by_file["src/main.rs"][0].path, "src/main.rs");
665    }
666
667    #[test]
668    fn test_caching() {
669        let mut mapper = PathMapper::new(vec![], BTreeMap::new(), None);
670        let known = vec!["src/main.rs"];
671        mapper.resolve("src/main.rs", &known);
672        // Second call uses cache
673        assert_eq!(
674            mapper.resolve("src/main.rs", &known),
675            Some("src/main.rs".to_string())
676        );
677    }
678}