use std::path::{Path, PathBuf};
use crate::util;
#[derive(Debug, Clone)]
pub struct NormalizationContext {
pub workspace_root: PathBuf,
pub sysroot: PathBuf,
pub cargo_registry: Option<PathBuf>,
}
impl NormalizationContext {
pub fn new(workspace_root: PathBuf, sysroot: PathBuf) -> Self {
let cargo_registry = std::env::var_os("CARGO_HOME")
.map(PathBuf::from)
.or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cargo")))
.map(|p| p.join("registry"));
Self {
workspace_root,
sysroot,
cargo_registry,
}
}
}
pub fn normalize(input: &str, ctx: &NormalizationContext, fixture_dir: &Path) -> String {
let mut substitutions: Vec<(String, &'static str)> = Vec::new();
push_path(&mut substitutions, fixture_dir, "$DIR");
push_path(&mut substitutions, &ctx.workspace_root, "$WORKSPACE");
push_path(&mut substitutions, &ctx.sysroot, "$RUST");
if let Some(reg) = &ctx.cargo_registry {
push_path(&mut substitutions, reg, "$CARGO/registry");
}
substitutions.sort_by_key(|(needle, _)| std::cmp::Reverse(needle.len()));
let unified_le = unify_line_endings(input);
let mut intermediate: Vec<String> = Vec::with_capacity(unified_le.lines().count() + 1);
for line in unified_le.lines() {
let mut s = line.to_string();
if has_path_marker(&s) {
s = rewrite_path_separators_in_path_lines(&s);
}
s = rewrite_long_type_note_path(&s);
for (needle, repl) in &substitutions {
s = replace_advancing(&s, needle, repl);
}
s = rewrite_type_ids(&s);
let trimmed = s.trim_end_matches([' ', '\t']);
intermediate.push(trimmed.to_string());
}
let mut out = String::with_capacity(input.len());
let mut prev_blank = false;
for line in intermediate {
let is_blank = line.is_empty();
if is_blank && prev_blank {
continue;
}
out.push_str(&line);
out.push('\n');
prev_blank = is_blank;
}
while out.ends_with('\n') {
out.pop();
}
out
}
fn push_path(out: &mut Vec<(String, &'static str)>, p: &Path, placeholder: &'static str) {
let s = util::to_forward_slash(&p.to_string_lossy());
if s.is_empty() {
return;
}
out.push((s, placeholder));
}
fn replace_advancing(s: &str, needle: &str, repl: &str) -> String {
if needle.is_empty() {
return s.to_string();
}
if !s.contains(needle) {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let mut rest = s;
while let Some(idx) = rest.find(needle) {
out.push_str(&rest[..idx]);
out.push_str(repl);
rest = &rest[idx + needle.len()..];
}
out.push_str(rest);
out
}
fn rewrite_type_ids(s: &str) -> String {
if !s.contains('#') {
return s.to_string();
}
let bytes = s.as_bytes();
let mut out = String::with_capacity(s.len());
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'#' && i + 1 < bytes.len() && bytes[i + 1].is_ascii_digit() {
let mut j = i + 1;
while j < bytes.len() && bytes[j].is_ascii_digit() {
j += 1;
}
out.push_str("$TYPEID");
i = j;
} else {
let mut j = i + 1;
while j < bytes.len() && (bytes[j] & 0xC0) == 0x80 {
j += 1;
}
out.push_str(&s[i..j]);
i = j;
}
}
out
}
fn unify_line_endings(s: &str) -> String {
if !s.contains('\r') {
return s.to_string();
}
let mut out = String::with_capacity(s.len());
let bytes = s.as_bytes();
let mut i = 0;
while i < bytes.len() {
let b = bytes[i];
if b == b'\r' {
out.push('\n');
if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
i += 2;
} else {
i += 1;
}
} else {
let mut j = i + 1;
while j < bytes.len() && (bytes[j] & 0xC0) == 0x80 {
j += 1;
}
out.push_str(&s[i..j]);
i = j;
}
}
out
}
fn has_path_marker(line: &str) -> bool {
line.contains("--> ") || line.contains("::: ")
}
fn rewrite_long_type_note_path(line: &str) -> String {
const MARKERS: &[&str] = &[
"the full name for the type has been written to '",
"the full type name has been written to '",
];
for marker in MARKERS {
let Some(prefix_idx) = line.find(marker) else {
continue;
};
let after_quote = prefix_idx + marker.len();
let Some(close_rel) = line[after_quote..].find('\'') else {
return line.to_string();
};
let close_abs = after_quote + close_rel;
let mut out = String::with_capacity(line.len());
out.push_str(&line[..after_quote]);
out.push_str("$LONGTYPE_FILE");
out.push_str(&line[close_abs..]);
return out;
}
line.to_string()
}
fn rewrite_path_separators_in_path_lines(line: &str) -> String {
for marker in ["--> ", "::: "] {
if let Some(idx) = line.find(marker) {
let head_end = idx + marker.len();
let head = &line[..head_end];
let tail = &line[head_end..];
return format!("{head}{}", util::to_forward_slash(tail));
}
}
line.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
fn ctx(workspace: &str, sysroot: &str) -> NormalizationContext {
NormalizationContext {
workspace_root: PathBuf::from(workspace),
sysroot: PathBuf::from(sysroot),
cargo_registry: Some(PathBuf::from("/home/u/.cargo/registry")),
}
}
#[test]
fn rewrites_dir_prefix_then_workspace_prefix() {
let input = " --> /p/tests/lihaaf/compile_fail/foo.rs:3:1\n";
let c = ctx("/p", "/home/u/.rustup/x");
let dir = PathBuf::from("/p/tests/lihaaf/compile_fail");
let out = normalize(input, &c, &dir);
assert_eq!(out, " --> $DIR/foo.rs:3:1");
}
#[test]
fn longest_prefix_wins() {
let input = " --> /p/tests/lihaaf/compile_fail/foo.rs:3:1\n ::: /p/src/lib.rs:1:1\n";
let c = ctx("/p", "/home/u/.rustup/x");
let dir = PathBuf::from("/p/tests/lihaaf/compile_fail");
let out = normalize(input, &c, &dir);
let expected = " --> $DIR/foo.rs:3:1\n ::: $WORKSPACE/src/lib.rs:1:1";
assert_eq!(out, expected);
}
#[test]
fn rewrites_sysroot_prefix() {
let input = " ::: /home/u/.rustup/x/lib/core/src/option.rs:1:1\n";
let c = ctx("/p", "/home/u/.rustup/x");
let dir = PathBuf::from("/p/tests/lihaaf/compile_fail");
let out = normalize(input, &c, &dir);
assert_eq!(out, " ::: $RUST/lib/core/src/option.rs:1:1");
}
#[test]
fn type_id_rewrite_replaces_hash_digits() {
let input = "expected `Foo#0`, found `Bar#42`\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "expected `Foo$TYPEID`, found `Bar$TYPEID`");
}
#[test]
fn type_id_does_not_touch_hash_without_digits() {
let input = "see issue #[123] (a TODO comment)\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "see issue #[123] (a TODO comment)");
}
#[test]
fn collapses_blank_line_runs() {
let input = "alpha\n\n\n\nomega\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "alpha\n\nomega");
}
#[test]
fn strips_trailing_whitespace() {
let input = "alpha \nbeta\t\t\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "alpha\nbeta");
}
#[test]
fn unifies_crlf_and_lone_cr_to_lf() {
let input = "a\r\nb\rc\nd\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "a\nb\nc\nd");
}
#[test]
fn does_not_touch_diagnostic_text() {
let input = "error: unknown on_delete value `bogus`; expected one of: cascade\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(
out,
"error: unknown on_delete value `bogus`; expected one of: cascade"
);
}
#[test]
fn preserves_rustc_aborting_summary() {
let input = "error: bad\nerror: aborting due to 1 previous error\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "error: bad\nerror: aborting due to 1 previous error");
}
#[test]
fn preserves_rustc_aborting_plural() {
let input = "error: a\nerror: b\nerror: aborting due to 42 previous errors\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(
out,
"error: a\nerror: b\nerror: aborting due to 42 previous errors"
);
}
#[test]
fn preserves_unrelated_aborting_text() {
let input = "error: aborting due to user request\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(out, "error: aborting due to user request");
}
#[test]
fn preserves_rustc_explain_pointer() {
let input =
"error: bad\n\nFor more information about this error, try `rustc --explain E0463`.\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(
out,
"error: bad\n\nFor more information about this error, try `rustc --explain E0463`."
);
}
#[test]
fn determinism_same_inputs_produce_same_bytes() {
let input = " --> /p/tests/lihaaf/compile_fail/foo.rs:3:1\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/tests/lihaaf/compile_fail");
let a = normalize(input, &c, &dir);
let b = normalize(input, &c, &dir);
assert_eq!(a, b);
}
#[test]
fn long_type_note_two_sessions_normalize_to_same_bytes() {
let session_a = " = note: the full name for the type has been written to '/tmp/phase85-orchestration/lihaaf-djogi-validation/target/lihaaf-session-NqO1Du/tests_lihaaf_compile_fail_sealed_into_distinct_columns.rs/sealed_into_distinct_columns.long-type-13784649802967031202.txt'\n";
let session_b = " = note: the full name for the type has been written to '/tmp/phase85-targets/djogi-lihaaf/lihaaf-session-b8ldWS/tests_lihaaf_compile_fail_sealed_into_distinct_columns.rs/sealed_into_distinct_columns.long-type-3815226114102655174.txt'\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/tests/lihaaf/compile_fail");
let out_a = normalize(session_a, &c, &dir);
let out_b = normalize(session_b, &c, &dir);
assert_eq!(
out_a, out_b,
"two sessions' long-type notes must normalize identically:\n a = {out_a:?}\n b = {out_b:?}",
);
assert!(
out_a.contains("$LONGTYPE_FILE"),
"expected $LONGTYPE_FILE placeholder, got: {out_a:?}",
);
assert!(
!out_a.contains("lihaaf-session-"),
"session-dir suffix must be normalized away, got: {out_a:?}",
);
assert!(
!out_a.contains("13784649802967031202"),
"type-hash digits from session a must be normalized away: {out_a:?}",
);
assert!(
!out_b.contains("3815226114102655174"),
"type-hash digits from session b must be normalized away: {out_b:?}",
);
assert!(
out_a.contains("the full name for the type has been written to"),
"primary note text must be preserved, got: {out_a:?}",
);
}
#[test]
fn long_type_note_normalizes_alternative_phrasing() {
let line = " = note: the full type name has been written to '/var/folders/abc/T/lihaaf-session-xyz/foo.long-type-9999.txt'\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(line, &c, &dir);
assert!(
out.contains("$LONGTYPE_FILE"),
"expected $LONGTYPE_FILE placeholder, got: {out:?}",
);
assert!(
!out.contains("lihaaf-session-xyz"),
"session-dir suffix must be normalized away: {out:?}",
);
assert!(
!out.contains("9999"),
"type-hash digits must be normalized away: {out:?}",
);
assert!(
out.contains("the full type name has been written to"),
"alt-phrasing note text must be preserved: {out:?}",
);
}
#[test]
fn long_type_note_preserves_surrounding_diagnostic() {
let input = "\
error[E0277]: the trait bound is not satisfied
--> /p/tests/foo.rs:1:1
|
1 | bad code here
| ^^^
= note: the full name for the type has been written to '/tmp/x/lihaaf-session-AbCdEf/foo.long-type-12345.txt'
= note: consider using `--verbose` to print the full type name to the console
error: aborting due to 1 previous error
";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/tests");
let out = normalize(input, &c, &dir);
assert!(
out.contains("error[E0277]: the trait bound is not satisfied"),
"primary error code+message must be preserved: {out:?}",
);
assert!(
out.contains("consider using `--verbose`"),
"secondary `--verbose` hint must be preserved: {out:?}",
);
assert!(
out.contains("error: aborting due to 1 previous error"),
"rustc summary line must be preserved: {out:?}",
);
assert!(
out.contains("$LONGTYPE_FILE"),
"long-type path must be normalized to placeholder: {out:?}",
);
assert!(
!out.contains("lihaaf-session-AbCdEf"),
"volatile session dir must be normalized away: {out:?}",
);
assert!(
!out.contains("long-type-12345"),
"type-hash digits must be normalized away: {out:?}",
);
}
#[test]
fn long_type_note_left_intact_when_no_match() {
let input =
" = note: consider using `--verbose` to print the full type name to the console\n";
let c = ctx("/p", "/r");
let dir = PathBuf::from("/p/x");
let out = normalize(input, &c, &dir);
assert_eq!(
out,
" = note: consider using `--verbose` to print the full type name to the console",
);
}
}