use std::borrow::Cow;
const SUBSTITUTIONS: &[(&str, char)] = &[
("backslash", '\\'),
("percent", '%'),
("dollar", '$'),
("tilde", '~'),
("colon", ':'),
("qmark", '?'),
("emark", '!'),
("slash", '/'),
("minus", '-'),
("plus", '+'),
("star", '*'),
("roof", '^'),
("amp", '&'),
("bar", '|'),
("dot", '.'),
("lt", '<'),
("gt", '>'),
("eq", '='),
("at", '@'),
];
pub fn demangle(mangled: &str) -> Cow<'_, str> {
let (body, had_substitutions) = if let Some(stripped) = mangled.strip_suffix('_') {
(stripped, true)
} else {
(mangled, false)
};
if !had_substitutions {
return Cow::Borrowed(mangled);
}
let mut out = String::with_capacity(body.len());
let bytes = body.as_bytes();
let mut i = 0;
while i < bytes.len() {
if bytes.get(i).copied() == Some(b'X') {
let hi_idx = i.saturating_add(1);
let lo_idx = i.saturating_add(2);
if let (Some(&hi), Some(&lo)) = (bytes.get(hi_idx), bytes.get(lo_idx))
&& is_upper_hex(hi)
&& is_upper_hex(lo)
&& let Some(val) = decode_hex2(hi, lo)
&& !is_mangle_passthrough_or_substitution(val)
{
out.push(val as char);
i = i.saturating_add(3);
continue;
}
if i == 0
&& let Some(&next) = bytes.get(hi_idx)
&& next.is_ascii_digit()
{
out.push(next as char);
i = i.saturating_add(2);
continue;
}
}
if let Some(tail) = bytes.get(i..)
&& let Some((word, ch)) = try_substitution(tail)
{
out.push(ch);
i = i.saturating_add(word.len());
continue;
}
if let Some(&b) = bytes.get(i) {
out.push(b as char);
}
i = i.saturating_add(1);
}
Cow::Owned(out)
}
pub fn mangle(name: &str) -> String {
let bytes = name.as_bytes();
let mut result = String::with_capacity(name.len());
let mut start = 0;
let mut requires_underscore = false;
if let Some(&first) = bytes.first()
&& first.is_ascii_digit()
{
result.push('X');
result.push(first as char);
start = 1;
}
for i in start..bytes.len() {
let Some(&c) = bytes.get(i) else {
break;
};
match c {
b'a'..=b'z' | b'0'..=b'9' | b'A'..=b'Z' => {
result.push(c as char);
}
b'_' => {
let next = bytes.get(i.saturating_add(1)).copied();
if i > 0 && next.is_some_and(|b| b.is_ascii_digit()) {
} else {
result.push('_');
}
}
_ => {
let word = match c {
b'$' => "dollar",
b'%' => "percent",
b'&' => "amp",
b'^' => "roof",
b'!' => "emark",
b'?' => "qmark",
b'*' => "star",
b'+' => "plus",
b'-' => "minus",
b'/' => "slash",
b'\\' => "backslash",
b'=' => "eq",
b'<' => "lt",
b'>' => "gt",
b'~' => "tilde",
b':' => "colon",
b'.' => "dot",
b'@' => "at",
b'|' => "bar",
_ => {
result.push('X');
result.push(to_hex_upper((c >> 4) & 0xF));
result.push(to_hex_upper(c & 0xF));
requires_underscore = true;
continue;
}
};
result.push_str(word);
requires_underscore = true;
}
}
}
if requires_underscore {
result.push('_');
}
result
}
fn to_hex_upper(nibble: u8) -> char {
match nibble {
0..=9 => b'0'.wrapping_add(nibble) as char,
10..=15 => b'A'.wrapping_add(nibble).wrapping_sub(10) as char,
_ => '0',
}
}
fn try_substitution(bytes: &[u8]) -> Option<(&'static str, char)> {
for &(word, ch) in SUBSTITUTIONS {
if bytes.get(..word.len()) == Some(word.as_bytes()) {
return Some((word, ch));
}
}
None
}
fn is_upper_hex(b: u8) -> bool {
b.is_ascii_digit() || matches!(b, b'A'..=b'F')
}
fn is_mangle_passthrough_or_substitution(b: u8) -> bool {
matches!(b,
b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'_'
| b'$' | b'%' | b'&' | b'^' | b'!' | b'?'
| b'*' | b'+' | b'-' | b'/' | b'\\' | b'='
| b'<' | b'>' | b'~' | b':' | b'.' | b'@' | b'|'
)
}
fn decode_hex2(hi: u8, lo: u8) -> Option<u8> {
let h = hex_val(hi)?;
let l = hex_val(lo)?;
Some((h << 4) | l)
}
fn hex_val(b: u8) -> Option<u8> {
match b {
b'0'..=b'9' => Some(b.wrapping_sub(b'0')),
b'a'..=b'f' => Some(b.wrapping_sub(b'a').wrapping_add(10)),
b'A'..=b'F' => Some(b.wrapping_sub(b'A').wrapping_add(10)),
_ => None,
}
}
#[cfg(test)]
mod tests {
use super::*;
use proptest::prelude::*;
#[test]
fn plain_identifier_passthrough() {
assert_eq!(demangle("fooBar"), "fooBar");
assert_eq!(demangle("x"), "x");
}
#[test]
fn no_trailing_underscore_means_literal() {
assert_eq!(demangle("amp"), "amp");
assert_eq!(demangle("dot"), "dot");
}
#[test]
fn every_substitution() {
assert_eq!(demangle("dollar_"), "$");
assert_eq!(demangle("percent_"), "%");
assert_eq!(demangle("amp_"), "&");
assert_eq!(demangle("roof_"), "^");
assert_eq!(demangle("emark_"), "!");
assert_eq!(demangle("qmark_"), "?");
assert_eq!(demangle("star_"), "*");
assert_eq!(demangle("plus_"), "+");
assert_eq!(demangle("minus_"), "-");
assert_eq!(demangle("slash_"), "/");
assert_eq!(demangle("backslash_"), "\\");
assert_eq!(demangle("eq_"), "=");
assert_eq!(demangle("lt_"), "<");
assert_eq!(demangle("gt_"), ">");
assert_eq!(demangle("tilde_"), "~");
assert_eq!(demangle("colon_"), ":");
assert_eq!(demangle("dot_"), ".");
assert_eq!(demangle("at_"), "@");
assert_eq!(demangle("bar_"), "|");
}
#[test]
fn mixed_substitutions() {
assert_eq!(demangle("colonOrEquals_"), ":OrEquals");
assert_eq!(demangle("ampeq_"), "&=");
assert_eq!(demangle("ltlt_"), "<<");
}
#[test]
fn hex_escape() {
assert_eq!(demangle("X20_"), " ");
assert_eq!(demangle("X7B_"), "{");
}
#[test]
fn leading_digit_escape() {
assert_eq!(demangle("X1plus_"), "1+");
assert_eq!(demangle("X1foo"), "X1foo");
}
#[test]
fn real_world_identifiers() {
assert_eq!(demangle("genNimMainInner"), "genNimMainInner");
assert_eq!(demangle("GC_getStatistics"), "GC_getStatistics");
assert_eq!(demangle("amp_"), "&");
assert_eq!(demangle("ampeq_"), "&=");
assert_eq!(demangle("colonOrEquals_"), ":OrEquals");
assert_eq!(demangle("colonanonymous_"), ":anonymous");
}
#[test]
fn mangle_plain() {
assert_eq!(mangle("fooBar"), "fooBar");
}
#[test]
fn mangle_special_chars() {
assert_eq!(mangle("$"), "dollar_");
assert_eq!(mangle("&="), "ampeq_");
assert_eq!(mangle(":OrEquals"), "colonOrEquals_");
}
#[test]
fn mangle_leading_digit() {
assert_eq!(mangle("1foo"), "X1foo");
}
#[test]
fn mangle_hex_fallback() {
assert_eq!(mangle(" "), "X20_");
}
#[test]
fn round_trip_basic() {
for name in &["foo", "&", "&=", ":OrEquals", "a+b", "GC_ref"] {
let mangled = mangle(name);
let back = demangle(&mangled);
assert_eq!(
&*back, *name,
"round-trip failed for {name:?} (mangled: {mangled:?})"
);
}
}
fn alpha_ident_strategy() -> impl Strategy<Value = String> {
"[a-wyzA-WYZ][a-zA-Z0-9]{0,19}"
}
fn operator_strategy() -> impl Strategy<Value = String> {
prop::collection::vec(
prop::sample::select(vec![
'$', '%', '&', '^', '!', '?', '*', '+', '-', '/', '\\', '=', '<', '>', '~', ':',
'.', '@', '|',
]),
1..6,
)
.prop_map(|chars| chars.into_iter().collect::<String>())
}
proptest! {
#[test]
fn proptest_alpha_round_trip(name in alpha_ident_strategy()) {
let mangled = mangle(&name);
let back = demangle(&mangled);
prop_assert_eq!(&*back, &*name,
"round-trip failed: {:?} → {:?} → {:?}", name, mangled, back);
}
#[test]
fn proptest_operator_round_trip(name in operator_strategy()) {
let mangled = mangle(&name);
let back = demangle(&mangled);
prop_assert_eq!(&*back, &*name,
"round-trip failed: {:?} → {:?} → {:?}", name, mangled, back);
}
#[test]
fn proptest_mangle_produces_valid_c_identifier(name in alpha_ident_strategy()) {
let mangled = mangle(&name);
for (i, b) in mangled.bytes().enumerate() {
prop_assert!(
b.is_ascii_alphanumeric() || b == b'_',
"mangled form {:?} has non-C-ident byte {:#04x} at position {}",
mangled, b, i
);
}
}
}
}