pub const PATH_SENTINEL: &str = "<path>";
pub fn extract_cc1_line(stderr: &str) -> Option<&str> {
stderr
.lines()
.map(str::trim)
.find(|line| line.contains("\"-cc1\""))
}
fn tokenize(line: &str) -> Vec<String> {
let mut out = Vec::new();
let mut chars = line.chars().peekable();
while let Some(c) = chars.next() {
if c != '"' {
continue;
}
let mut tok = String::new();
while let Some(c) = chars.next() {
match c {
'"' => break,
'\\' => match chars.peek() {
Some('"') | Some('\\') => tok.push(chars.next().unwrap()),
_ => tok.push('\\'),
},
_ => tok.push(c),
}
}
out.push(tok);
}
out
}
fn sentinel_token(tok: &str) -> String {
if tok.starts_with('/') {
return PATH_SENTINEL.to_string();
}
if let Some(eq) = tok.find('=') {
let (head, val) = tok.split_at(eq + 1);
if val.starts_with('/') {
return format!("{head}{PATH_SENTINEL}");
}
}
if let Some(rest) = tok.strip_prefix("-I")
&& rest.starts_with('/')
{
return format!("-I{PATH_SENTINEL}");
}
tok.to_string()
}
pub fn semantic_tokens(cc1_line: &str) -> Vec<String> {
let toks = tokenize(cc1_line);
let last = toks.len().saturating_sub(1);
toks.iter()
.enumerate()
.map(|(i, tok)| {
if i == last && !toks.is_empty() {
PATH_SENTINEL.to_string()
} else {
sentinel_token(tok)
}
})
.collect()
}
pub fn resolved_semantic_tokens(stderr: &str) -> Option<Vec<String>> {
extract_cc1_line(stderr).map(semantic_tokens)
}
#[cfg(test)]
mod tests {
use super::*;
const O2: &str = include_str!("testdata/clang_o2.txt");
const O2_NATIVE: &str = include_str!("testdata/clang_o2_march_native.txt");
#[test]
fn extract_cc1_line_finds_the_compile_line_past_the_banner() {
let line = extract_cc1_line(O2).expect("fixture has a -cc1 line");
assert!(line.contains("\"-cc1\""));
assert!(line.contains("\"-O2\""));
}
#[test]
fn extract_cc1_line_is_none_without_a_compile_line() {
assert!(extract_cc1_line("clang version 1\nThread model: posix").is_none());
assert!(extract_cc1_line("").is_none());
}
#[test]
fn tokenize_unquotes_every_argument() {
let toks = tokenize(r#" "/usr/bin/clang" "-cc1" "-O2" "#);
assert_eq!(toks, vec!["/usr/bin/clang", "-cc1", "-O2"]);
}
#[test]
fn tokenize_handles_escaped_quote_and_backslash() {
let toks = tokenize(r#""a\"b" "c\\d""#);
assert_eq!(toks, vec!["a\"b", "c\\d"]);
}
#[test]
fn sentinel_token_blanks_paths_only_where_one_can_begin() {
assert_eq!(sentinel_token("/Applications/Xcode/clang"), PATH_SENTINEL);
assert_eq!(
sentinel_token("-fdebug-compilation-dir=/Users/x/proj"),
format!("-fdebug-compilation-dir={PATH_SENTINEL}")
);
assert_eq!(
sentinel_token("-I/usr/local/include"),
format!("-I{PATH_SENTINEL}")
);
}
#[test]
fn sentinel_token_leaves_codegen_tokens_untouched() {
for tok in [
"-cc1",
"-O2",
"-target-cpu",
"apple-m1",
"+neon",
"-ffp-contract=on",
"-mrelocation-model",
"pic",
] {
assert_eq!(sentinel_token(tok), tok, "must not alter {tok}");
}
}
#[test]
fn semantic_tokens_strips_every_host_local_path() {
let toks = resolved_semantic_tokens(O2).expect("fixture resolves");
for tok in &toks {
assert!(
!tok.starts_with('/'),
"absolute path leaked into the key: {tok}"
);
}
assert_eq!(toks.last().map(String::as_str), Some(PATH_SENTINEL));
assert!(toks.iter().any(|t| t == "-O2"));
assert!(toks.iter().any(|t| t == "-target-cpu"));
assert!(toks.iter().any(|t| t == "apple-m1"));
}
#[test]
fn semantic_tokens_is_deterministic() {
assert_eq!(semantic_tokens(O2), semantic_tokens(O2));
}
#[test]
fn march_native_resolves_to_a_different_key_than_plain() {
let plain = resolved_semantic_tokens(O2).unwrap();
let native = resolved_semantic_tokens(O2_NATIVE).unwrap();
assert_ne!(
plain, native,
"-march=native must change the resolved invocation"
);
}
}