1use std::collections::{BTreeMap, HashMap};
2use std::hash::{Hash, Hasher};
3use std::path::Path;
4use std::sync::Arc;
5
6use crate::diagnostics::DiagnosticsData;
7use crate::model::{CoverageData, RepoSnapshot};
8
9pub struct PathMapper {
11 strip_prefixes: Vec<String>,
12 rules: Vec<(String, String)>,
13 ignore_globs: Vec<String>,
14 ignore_globs_lower: Vec<String>,
15 case_sensitive: bool,
16 suffix_index: HashMap<String, Vec<String>>,
18 hash_index: HashMap<String, Vec<String>>,
20 cache: HashMap<String, Option<String>>,
22 cached_known_index: Option<(u64, Arc<HashMap<String, String>>)>,
24}
25
26impl PathMapper {
27 pub fn new(
28 strip_prefixes: Vec<String>,
29 rules: BTreeMap<String, String>,
30 snapshot: Option<&RepoSnapshot>,
31 ) -> Self {
32 Self::with_options(strip_prefixes, rules, Vec::new(), !cfg!(windows), snapshot)
33 }
34
35 pub fn with_options(
36 strip_prefixes: Vec<String>,
37 rules: BTreeMap<String, String>,
38 ignore_globs: Vec<String>,
39 case_sensitive: bool,
40 snapshot: Option<&RepoSnapshot>,
41 ) -> Self {
42 let mut suffix_index = HashMap::new();
43 let mut hash_index = HashMap::new();
44 let normalized_strip_prefixes = normalize_prefixes(strip_prefixes);
45 let normalized_rules = normalize_rules(rules);
46 let normalized_ignore_globs = ignore_globs
47 .into_iter()
48 .map(|g| normalize_path(g.trim()))
49 .filter(|g| !g.is_empty())
50 .collect::<Vec<_>>();
51 let normalized_ignore_globs_lower = if case_sensitive {
52 Vec::new()
53 } else {
54 normalized_ignore_globs
55 .iter()
56 .map(|g| g.to_ascii_lowercase())
57 .collect::<Vec<_>>()
58 };
59
60 if let Some(snap) = snapshot {
61 for (path, hash) in &snap.file_hashes {
62 let normalized_path = normalize_path(path);
63 if let Some(filename) = normalized_path.rsplit('/').next() {
65 suffix_index
66 .entry(normalize_case(filename, case_sensitive))
67 .or_insert_with(Vec::new)
68 .push(normalized_path.clone());
69 }
70 hash_index
72 .entry(hash.clone())
73 .or_insert_with(Vec::new)
74 .push(normalized_path.clone());
75 }
76 }
77
78 Self {
79 strip_prefixes: normalized_strip_prefixes,
80 rules: normalized_rules,
81 ignore_globs: normalized_ignore_globs,
82 ignore_globs_lower: normalized_ignore_globs_lower,
83 case_sensitive,
84 suffix_index,
85 hash_index,
86 cache: HashMap::new(),
87 cached_known_index: None,
88 }
89 }
90
91 pub fn resolve(&mut self, coverage_path: &str, known_paths: &[&str]) -> Option<String> {
94 let cache_key = normalize_case(&normalize_path(coverage_path), self.case_sensitive);
95 if let Some(cached) = self.cache.get(&cache_key) {
96 return cached.clone();
97 }
98
99 let result = self.resolve_inner(coverage_path, known_paths);
100 self.cache.insert(cache_key, result.clone());
101 result
102 }
103
104 fn resolve_inner(&mut self, coverage_path: &str, known_paths: &[&str]) -> Option<String> {
105 let normalized = normalize_path(coverage_path);
106 if self.is_ignored(&normalized) {
107 return None;
108 }
109
110 let known_index = self.get_known_index(known_paths);
111
112 if let Some(exact) = self.find_known(&normalized, known_index.as_ref()) {
114 return Some(exact.to_string());
115 }
116
117 for (from, to) in &self.rules {
119 if let Some(rest) = strip_path_prefix_with_case(&normalized, from, self.case_sensitive)
120 {
121 let candidate = normalize_path(&format!("{to}{rest}"));
122 if let Some(found) = self.find_known(&candidate, known_index.as_ref()) {
123 return Some(found.to_string());
124 }
125 }
126 }
127
128 for prefix in &self.strip_prefixes {
130 if let Some(stripped) =
131 strip_path_prefix_with_case(&normalized, prefix, self.case_sensitive)
132 {
133 let candidate = stripped.trim_start_matches('/');
134 if let Some(found) = self.find_known(candidate, known_index.as_ref()) {
135 return Some(found.to_string());
136 }
137 }
138 }
139
140 let filename = normalized.rsplit('/').next().unwrap_or(normalized.as_str());
142 let filename_key = normalize_case(filename, self.case_sensitive);
143 let mut best: Option<(&str, usize)> = None;
144
145 if let Some(snapshot_candidates) = self.suffix_index.get(&filename_key) {
146 for candidate in snapshot_candidates {
147 if let Some(found) = self.find_known(candidate, known_index.as_ref()) {
148 let score = common_suffix_len(
149 &normalize_case(found, self.case_sensitive),
150 &normalize_case(&normalized, self.case_sensitive),
151 );
152 best = pick_better_match(best, (found, score), self.case_sensitive);
153 }
154 }
155 }
156
157 if best.is_none() {
158 for known in known_paths {
159 let known_normalized = normalize_path(known);
160 let known_filename = known_normalized
161 .rsplit('/')
162 .next()
163 .unwrap_or(known_normalized.as_str());
164 if normalize_case(known_filename, self.case_sensitive) != filename_key {
165 continue;
166 }
167 let score = common_suffix_len(
168 &normalize_case(&known_normalized, self.case_sensitive),
169 &normalize_case(&normalized, self.case_sensitive),
170 );
171 best = pick_better_match(best, (known, score), self.case_sensitive);
172 }
173 }
174
175 best.map(|(path, _)| path.to_string())
176 }
177
178 pub fn resolve_by_hash(&self, content_hash: &str) -> Option<String> {
180 self.hash_index.get(content_hash).and_then(|paths| {
181 if paths.len() == 1 {
182 Some(paths[0].clone())
183 } else {
184 None
185 }
186 })
187 }
188
189 fn is_ignored(&self, path: &str) -> bool {
190 if self.case_sensitive {
191 for pattern in &self.ignore_globs {
192 if glob_matches(pattern, path) {
193 return true;
194 }
195 }
196 return false;
197 }
198
199 for pattern in &self.ignore_globs {
200 if glob_matches(pattern, path) {
201 return true;
202 }
203 }
204
205 let lower_path = path.to_ascii_lowercase();
206 for pattern in &self.ignore_globs_lower {
207 if glob_matches(pattern, &lower_path) {
208 return true;
209 }
210 }
211 false
212 }
213
214 fn build_known_index(&self, known_paths: &[&str]) -> HashMap<String, String> {
215 let mut index = HashMap::with_capacity(known_paths.len());
216 for &path in known_paths {
217 let normalized = normalize_path(path);
218 let key = normalize_case(&normalized, self.case_sensitive);
219 index.entry(key).or_insert_with(|| path.to_string());
220 }
221 index
222 }
223
224 fn get_known_index(&mut self, known_paths: &[&str]) -> Arc<HashMap<String, String>> {
225 let mut hasher = std::collections::hash_map::DefaultHasher::new();
226 known_paths.len().hash(&mut hasher);
227 for path in known_paths {
228 path.hash(&mut hasher);
229 }
230 let known_paths_key = hasher.finish();
231
232 let needs_rebuild = self
233 .cached_known_index
234 .as_ref()
235 .map(|(cached_key, _)| *cached_key != known_paths_key)
236 .unwrap_or(true);
237 if needs_rebuild {
238 self.cached_known_index = Some((
239 known_paths_key,
240 Arc::new(self.build_known_index(known_paths)),
241 ));
242 }
243
244 Arc::clone(
245 &self
246 .cached_known_index
247 .as_ref()
248 .expect("known index cache must be initialized")
249 .1,
250 )
251 }
252
253 fn find_known<'a>(
254 &self,
255 candidate: &str,
256 known_index: &'a HashMap<String, String>,
257 ) -> Option<&'a str> {
258 let key = normalize_case(&normalize_path(candidate), self.case_sensitive);
259 known_index.get(&key).map(|s| s.as_str())
260 }
261}
262
263pub fn auto_normalize_paths(data: &mut CoverageData, source_root: Option<&Path>) {
271 let root = source_root
272 .map(|p| p.to_string_lossy().to_string())
273 .or_else(|| detect_common_prefix(data.files.keys().map(|k| k.as_str())))
274 .or_else(git_toplevel);
275
276 let old_files = std::mem::take(&mut data.files);
277 let mut new_files = BTreeMap::new();
278 for (path, fc) in old_files {
279 let mut p: String = path.replace('\\', "/");
280
281 if let Some(ref root) = root {
282 let root_normalized = root.replace('\\', "/");
283 let root_with_slash = if root_normalized.ends_with('/') {
284 root_normalized.clone()
285 } else {
286 format!("{root_normalized}/")
287 };
288 if p.starts_with(&root_with_slash) {
289 p = p[root_with_slash.len()..].to_string();
290 } else if p == root_normalized {
291 p = String::new();
293 }
294 }
295
296 if let Some(stripped) = p.strip_prefix("./") {
298 p = stripped.to_string();
299 }
300
301 if !p.is_empty() {
302 new_files.insert(p, fc);
303 }
304 }
305 data.files = new_files;
306}
307
308pub fn auto_normalize_issue_paths(data: &mut DiagnosticsData, source_root: Option<&Path>) {
310 let root = source_root
311 .map(|p| p.to_string_lossy().to_string())
312 .or_else(|| detect_common_prefix(data.issues_by_file.keys().map(|k| k.as_str())))
313 .or_else(git_toplevel);
314
315 let old_issues = std::mem::take(&mut data.issues_by_file);
316 let mut new_issues = BTreeMap::new();
317
318 for (path, mut issues) in old_issues {
319 let mut p: String = path.replace('\\', "/");
320
321 if let Some(ref root) = root {
322 let root_normalized = root.replace('\\', "/");
323 let root_with_slash = if root_normalized.ends_with('/') {
324 root_normalized.clone()
325 } else {
326 format!("{root_normalized}/")
327 };
328 if p.starts_with(&root_with_slash) {
329 p = p[root_with_slash.len()..].to_string();
330 } else if p == root_normalized {
331 p = String::new();
332 }
333 }
334
335 if let Some(stripped) = p.strip_prefix("./") {
336 p = stripped.to_string();
337 }
338
339 if !p.is_empty() {
340 for issue in &mut issues {
341 issue.path = p.clone();
342 }
343 new_issues.insert(p, issues);
344 }
345 }
346
347 data.issues_by_file = new_issues;
348}
349
350fn detect_common_prefix<'a, I>(paths: I) -> Option<String>
352where
353 I: Iterator<Item = &'a str>,
354{
355 let paths: Vec<&str> = paths.collect();
356 if paths.is_empty() {
357 return None;
358 }
359
360 if !paths
362 .iter()
363 .all(|p| p.starts_with('/') || (p.len() >= 2 && p.as_bytes()[1] == b':'))
364 {
365 return None;
366 }
367
368 let first = paths[0].replace('\\', "/");
369 let mut prefix_end = 0;
370
371 for (i, ch) in first.char_indices() {
373 if ch == '/' {
374 let candidate = &first[..=i];
375 if paths
376 .iter()
377 .all(|p| p.replace('\\', "/").starts_with(candidate))
378 {
379 prefix_end = i + 1;
380 } else {
381 break;
382 }
383 }
384 }
385
386 if prefix_end > 1 {
387 Some(first[..prefix_end].to_string())
388 } else {
389 None
390 }
391}
392
393fn git_toplevel() -> Option<String> {
395 std::process::Command::new("git")
396 .args(["rev-parse", "--show-toplevel"])
397 .output()
398 .ok()
399 .filter(|o| o.status.success())
400 .and_then(|o| {
401 String::from_utf8(o.stdout)
402 .ok()
403 .map(|s| s.trim().to_string())
404 })
405 .filter(|s| !s.is_empty())
406}
407
408fn normalize_path(path: &str) -> String {
409 let normalized = path.replace('\\', "/");
410 if let Some(stripped) = normalized.strip_prefix("./") {
411 stripped.to_string()
412 } else {
413 normalized
414 }
415}
416
417fn normalize_case(path: &str, case_sensitive: bool) -> String {
418 if case_sensitive {
419 path.to_string()
420 } else {
421 path.to_ascii_lowercase()
422 }
423}
424
425fn normalize_prefixes(prefixes: Vec<String>) -> Vec<String> {
426 let mut out = prefixes
427 .into_iter()
428 .map(|p| normalize_path(p.trim()))
429 .filter(|p| !p.is_empty())
430 .collect::<Vec<_>>();
431 out.sort_by(|a, b| b.len().cmp(&a.len()).then_with(|| a.cmp(b)));
432 out.dedup();
433 out
434}
435
436fn normalize_rules(rules: BTreeMap<String, String>) -> Vec<(String, String)> {
437 let mut out = rules
438 .into_iter()
439 .map(|(from, to)| (normalize_path(from.trim()), normalize_path(to.trim())))
440 .filter(|(from, _)| !from.is_empty())
441 .collect::<Vec<_>>();
442 out.sort_by(|(a_from, _), (b_from, _)| {
443 b_from
444 .len()
445 .cmp(&a_from.len())
446 .then_with(|| a_from.cmp(b_from))
447 });
448 out
449}
450
451fn strip_path_prefix_with_case<'a>(
452 path: &'a str,
453 prefix: &str,
454 case_sensitive: bool,
455) -> Option<&'a str> {
456 if case_sensitive {
457 return path.strip_prefix(prefix);
458 }
459
460 let lower_path = path.to_ascii_lowercase();
461 let lower_prefix = prefix.to_ascii_lowercase();
462 if !lower_path.starts_with(&lower_prefix) {
463 return None;
464 }
465 Some(&path[prefix.len()..])
466}
467
468fn glob_matches(pattern: &str, path: &str) -> bool {
469 glob::Pattern::new(pattern)
470 .map(|p| p.matches(path))
471 .unwrap_or(false)
472}
473
474fn pick_better_match<'a>(
475 current: Option<(&'a str, usize)>,
476 candidate: (&'a str, usize),
477 case_sensitive: bool,
478) -> Option<(&'a str, usize)> {
479 match current {
480 None => Some(candidate),
481 Some((best_path, best_score)) => {
482 if candidate.1 > best_score {
483 return Some(candidate);
484 }
485 if candidate.1 < best_score {
486 return Some((best_path, best_score));
487 }
488
489 let candidate_key = normalize_case(candidate.0, case_sensitive);
490 let best_key = normalize_case(best_path, case_sensitive);
491 if candidate_key < best_key {
492 Some(candidate)
493 } else {
494 Some((best_path, best_score))
495 }
496 }
497 }
498}
499
500fn common_suffix_len(a: &str, b: &str) -> usize {
501 a.bytes()
502 .rev()
503 .zip(b.bytes().rev())
504 .take_while(|(x, y)| x == y)
505 .count()
506}
507
508#[cfg(test)]
509mod tests {
510 use super::*;
511 use crate::diagnostics::{Issue, Severity};
512
513 #[test]
514 fn test_exact_match() {
515 let mut mapper = PathMapper::new(vec![], BTreeMap::new(), None);
516 let known = vec!["src/main.rs", "src/lib.rs"];
517 assert_eq!(
518 mapper.resolve("src/main.rs", &known),
519 Some("src/main.rs".to_string())
520 );
521 }
522
523 #[test]
524 fn test_strip_prefix() {
525 let mut mapper = PathMapper::new(vec!["/app/".to_string()], BTreeMap::new(), None);
526 let known = vec!["src/main.rs"];
527 assert_eq!(
528 mapper.resolve("/app/src/main.rs", &known),
529 Some("src/main.rs".to_string())
530 );
531 }
532
533 #[test]
534 fn test_rule_substitution() {
535 let mut rules = BTreeMap::new();
536 rules.insert("/build/classes/".to_string(), "src/main/java/".to_string());
537 let mut mapper = PathMapper::new(vec![], rules, None);
538 let known = vec!["src/main/java/com/App.java"];
539 assert_eq!(
540 mapper.resolve("/build/classes/com/App.java", &known),
541 Some("src/main/java/com/App.java".to_string())
542 );
543 }
544
545 #[test]
546 fn test_ignore_glob_never_resolves() {
547 let mut mapper = PathMapper::with_options(
548 vec![],
549 BTreeMap::new(),
550 vec!["**/bazel-out/**".to_string()],
551 true,
552 None,
553 );
554 let known = vec!["src/main.rs"];
555 assert_eq!(
556 mapper.resolve("bazel-out/k8-fastbuild/bin/main.rs", &known),
557 None
558 );
559 }
560
561 #[test]
562 fn test_case_insensitive_exact_match() {
563 let mut mapper = PathMapper::with_options(vec![], BTreeMap::new(), vec![], false, None);
564 let known = vec!["Src/Main.rs"];
565 assert_eq!(
566 mapper.resolve("src/main.rs", &known),
567 Some("Src/Main.rs".to_string())
568 );
569 }
570
571 #[test]
572 fn test_strip_prefix_removes_leading_separator() {
573 let mut mapper = PathMapper::new(vec!["/workspace".to_string()], BTreeMap::new(), None);
574 let known = vec!["src/main.rs"];
575 assert_eq!(
576 mapper.resolve("/workspace/src/main.rs", &known),
577 Some("src/main.rs".to_string())
578 );
579 }
580
581 #[test]
582 fn test_suffix_match_is_deterministic_on_ties() {
583 let mut mapper = PathMapper::new(vec![], BTreeMap::new(), None);
584 let known = vec!["a/foo/main.rs", "b/foo/main.rs"];
585 assert_eq!(
586 mapper.resolve("/tmp/work/foo/main.rs", &known),
587 Some("a/foo/main.rs".to_string())
588 );
589 }
590
591 #[test]
592 fn test_auto_normalize_absolute_paths() {
593 let mut data = CoverageData::new();
594 data.files.insert(
595 "/home/user/project/src/main.rs".to_string(),
596 crate::model::FileCoverage::new(),
597 );
598 data.files.insert(
599 "/home/user/project/tests/test.rs".to_string(),
600 crate::model::FileCoverage::new(),
601 );
602
603 auto_normalize_paths(&mut data, None);
604 assert!(data.files.contains_key("src/main.rs"));
605 assert!(data.files.contains_key("tests/test.rs"));
606 }
607
608 #[test]
609 fn test_auto_normalize_with_source_root() {
610 let mut data = CoverageData::new();
611 data.files.insert(
612 "/app/src/main.rs".to_string(),
613 crate::model::FileCoverage::new(),
614 );
615
616 auto_normalize_paths(&mut data, Some(Path::new("/app")));
617 assert!(data.files.contains_key("src/main.rs"));
618 }
619
620 #[test]
621 fn test_auto_normalize_strips_dot_slash() {
622 let mut data = CoverageData::new();
623 data.files.insert(
624 "./src/main.rs".to_string(),
625 crate::model::FileCoverage::new(),
626 );
627
628 auto_normalize_paths(&mut data, None);
629 assert!(data.files.contains_key("src/main.rs"));
630 }
631
632 #[test]
633 fn test_auto_normalize_backslashes() {
634 let mut data = CoverageData::new();
635 data.files.insert(
636 "C:\\Users\\dev\\project\\src\\main.rs".to_string(),
637 crate::model::FileCoverage::new(),
638 );
639
640 auto_normalize_paths(&mut data, Some(Path::new("C:\\Users\\dev\\project")));
641 assert!(data.files.contains_key("src/main.rs"));
642 }
643
644 #[test]
645 fn test_auto_normalize_issue_paths() {
646 let mut data = DiagnosticsData::new();
647 data.issues_by_file.insert(
648 "/repo/src/main.rs".to_string(),
649 vec![Issue {
650 path: "/repo/src/main.rs".to_string(),
651 line: 10,
652 column: None,
653 end_line: None,
654 severity: Severity::Warning,
655 rule_id: "x".to_string(),
656 message: "m".to_string(),
657 source: "tool".to_string(),
658 fingerprint: "fp".to_string(),
659 }],
660 );
661
662 auto_normalize_issue_paths(&mut data, Some(Path::new("/repo")));
663 assert!(data.issues_by_file.contains_key("src/main.rs"));
664 assert_eq!(data.issues_by_file["src/main.rs"][0].path, "src/main.rs");
665 }
666
667 #[test]
668 fn test_caching() {
669 let mut mapper = PathMapper::new(vec![], BTreeMap::new(), None);
670 let known = vec!["src/main.rs"];
671 mapper.resolve("src/main.rs", &known);
672 assert_eq!(
674 mapper.resolve("src/main.rs", &known),
675 Some("src/main.rs".to_string())
676 );
677 }
678}