pub const PATH_SENTINEL: &str = "<path>";
pub fn extract_cc1_line(stderr: &str) -> Option<&str> {
stderr
.lines()
.map(str::trim)
.find(|line| line.contains("\"-cc1\""))
}
fn tokenize(line: &str) -> Vec<String> {
let mut out = Vec::new();
let mut chars = line.chars().peekable();
while let Some(c) = chars.next() {
if c != '"' {
continue;
}
let mut tok = String::new();
while let Some(c) = chars.next() {
match c {
'"' => break,
'\\' => match chars.peek() {
Some('"') | Some('\\') => tok.push(chars.next().unwrap()),
_ => tok.push('\\'),
},
_ => tok.push(c),
}
}
out.push(tok);
}
out
}
fn is_abs_path_start(s: &str, windows_aware: bool) -> bool {
if s.starts_with('/') {
return true;
}
if !windows_aware {
return false;
}
let b = s.as_bytes();
if s.starts_with("\\\\") {
return true;
}
b.len() >= 3 && b[0].is_ascii_alphabetic() && b[1] == b':' && (b[2] == b'\\' || b[2] == b'/')
}
fn sentinel_token(tok: &str, windows_aware: bool) -> String {
if is_abs_path_start(tok, windows_aware) {
return PATH_SENTINEL.to_string();
}
if let Some(eq) = tok.find('=') {
let (head, val) = tok.split_at(eq + 1);
if is_abs_path_start(val, windows_aware) {
return format!("{head}{PATH_SENTINEL}");
}
}
if let Some(rest) = tok.strip_prefix("-I")
&& is_abs_path_start(rest, windows_aware)
{
return format!("-I{PATH_SENTINEL}");
}
tok.to_string()
}
pub fn semantic_tokens(cc1_line: &str, windows_aware: bool) -> Vec<String> {
let toks = tokenize(cc1_line);
let last = toks.len().saturating_sub(1);
toks.iter()
.enumerate()
.map(|(i, tok)| {
if i == last && !toks.is_empty() {
PATH_SENTINEL.to_string()
} else {
sentinel_token(tok, windows_aware)
}
})
.collect()
}
pub fn resolved_semantic_tokens(stderr: &str, windows_aware: bool) -> Option<Vec<String>> {
extract_cc1_line(stderr).map(|line| semantic_tokens(line, windows_aware))
}
#[cfg(test)]
mod tests {
use super::*;
const O2: &str = include_str!("testdata/clang_o2.txt");
const O2_NATIVE: &str = include_str!("testdata/clang_o2_march_native.txt");
const CL_WIN: &str = include_str!("testdata/clang_cl_windows.txt");
#[test]
fn extract_cc1_line_finds_the_compile_line_past_the_banner() {
let line = extract_cc1_line(O2).expect("fixture has a -cc1 line");
assert!(line.contains("\"-cc1\""));
assert!(line.contains("\"-O2\""));
}
#[test]
fn extract_cc1_line_is_none_without_a_compile_line() {
assert!(extract_cc1_line("clang version 1\nThread model: posix").is_none());
assert!(extract_cc1_line("").is_none());
}
#[test]
fn tokenize_unquotes_every_argument() {
let toks = tokenize(r#" "/usr/bin/clang" "-cc1" "-O2" "#);
assert_eq!(toks, vec!["/usr/bin/clang", "-cc1", "-O2"]);
}
#[test]
fn tokenize_handles_escaped_quote_and_backslash() {
let toks = tokenize(r#""a\"b" "c\\d""#);
assert_eq!(toks, vec!["a\"b", "c\\d"]);
}
#[test]
fn sentinel_token_blanks_posix_paths_where_one_can_begin() {
for wa in [false, true] {
assert_eq!(
sentinel_token("/Applications/Xcode/clang", wa),
PATH_SENTINEL
);
assert_eq!(
sentinel_token("-fdebug-compilation-dir=/Users/x/proj", wa),
format!("-fdebug-compilation-dir={PATH_SENTINEL}")
);
assert_eq!(
sentinel_token("-I/usr/local/include", wa),
format!("-I{PATH_SENTINEL}")
);
}
}
#[test]
fn sentinel_token_blanks_windows_paths_only_when_windows_aware() {
let cases = [
(
"C:\\Program Files\\LLVM\\bin\\clang-cl.exe",
PATH_SENTINEL.to_string(),
),
("c:/users/x/proj/a.c", PATH_SENTINEL.to_string()),
("\\\\server\\share\\inc", PATH_SENTINEL.to_string()),
("\\\\?\\C:\\Windows Kits\\10", PATH_SENTINEL.to_string()),
(
"-fdebug-compilation-dir=C:\\t\\proj",
format!("-fdebug-compilation-dir={PATH_SENTINEL}"),
),
("-IC:\\sdk\\include", format!("-I{PATH_SENTINEL}")),
];
for (tok, blanked) in cases {
assert_eq!(
sentinel_token(tok, true),
blanked,
"windows_aware must blank {tok}"
);
assert_eq!(
sentinel_token(tok, false),
tok,
"clang-cl (not windows_aware) must keep {tok} raw"
);
}
}
#[test]
fn sentinel_token_leaves_codegen_tokens_untouched() {
for wa in [false, true] {
for tok in [
"-cc1",
"-O2",
"-target-cpu",
"apple-m1",
"x86-64",
"+neon",
"-ffp-contract=on",
"-fms-compatibility-version=19.44.35221",
"-mrelocation-model",
"pic",
"C:", "C:rel", "-DNAME=1",
] {
assert_eq!(
sentinel_token(tok, wa),
tok,
"must not alter {tok} (wa={wa})"
);
}
}
}
#[test]
fn semantic_tokens_strips_every_host_local_path() {
let toks = resolved_semantic_tokens(O2, true).expect("fixture resolves");
for tok in &toks {
assert!(
!tok.starts_with('/'),
"absolute path leaked into the key: {tok}"
);
}
assert_eq!(toks.last().map(String::as_str), Some(PATH_SENTINEL));
assert!(toks.iter().any(|t| t == "-O2"));
assert!(toks.iter().any(|t| t == "-target-cpu"));
assert!(toks.iter().any(|t| t == "apple-m1"));
}
fn has_windows_path(tok: &str) -> bool {
let drive = |s: &str| {
let b = s.as_bytes();
b.len() >= 3
&& b[0].is_ascii_alphabetic()
&& b[1] == b':'
&& (b[2] == b'\\' || b[2] == b'/')
};
tok.contains("\\\\")
|| tok.split('=').any(drive)
|| drive(tok)
|| drive(tok.trim_start_matches("-I"))
}
#[test]
fn semantic_tokens_strips_windows_paths_for_gnu_clang() {
let toks = resolved_semantic_tokens(CL_WIN, true).expect("cl fixture resolves");
for tok in &toks {
assert!(
!has_windows_path(tok),
"Windows path leaked into the key: {tok}"
);
}
assert!(toks.iter().any(|t| t == "-gcodeview"));
assert!(toks.iter().any(|t| t == "-debug-info-kind=constructor"));
assert!(toks.iter().any(|t| t == "-fdebug-compilation-dir=<path>"));
}
#[test]
fn semantic_tokens_keeps_windows_paths_for_clang_cl() {
let toks = resolved_semantic_tokens(CL_WIN, false).expect("cl fixture resolves");
assert!(
toks.iter().any(|t| has_windows_path(t)),
"clang-cl tokens must keep raw Windows paths, none found"
);
let gnu = resolved_semantic_tokens(CL_WIN, true).unwrap();
assert_ne!(toks, gnu, "windows_aware must change the cl token list");
}
#[test]
fn semantic_tokens_is_deterministic() {
assert_eq!(semantic_tokens(O2, true), semantic_tokens(O2, true));
}
#[test]
fn march_native_resolves_to_a_different_key_than_plain() {
let plain = resolved_semantic_tokens(O2, true).unwrap();
let native = resolved_semantic_tokens(O2_NATIVE, true).unwrap();
assert_ne!(
plain, native,
"-march=native must change the resolved invocation"
);
}
}