binfarce/demangle/legacy.rs
1use std::char;
2use std::fmt;
3
4/// Representation of a demangled symbol name.
5pub struct Demangle<'a> {
6 inner: &'a str,
7 /// The number of ::-separated elements in the original name.
8 elements: usize,
9}
10
11/// De-mangles a Rust symbol into a more readable version
12///
13/// All Rust symbols by default are mangled as they contain characters that
14/// cannot be represented in all object files. The mangling mechanism is similar
15/// to C++'s, but Rust has a few specifics to handle items like lifetimes in
16/// symbols.
17///
18/// This function will take a **mangled** symbol and return a value. When printed,
19/// the de-mangled version will be written. If the symbol does not look like
20/// a mangled symbol, the original value will be written instead.
21///
22/// # Examples
23///
24/// ```ignore
25/// use rustc_demangle::demangle;
26///
27/// assert_eq!(demangle("_ZN4testE").to_string(), "test");
28/// assert_eq!(demangle("_ZN3foo3barE").to_string(), "foo::bar");
29/// assert_eq!(demangle("foo").to_string(), "foo");
30/// ```
31
32// All Rust symbols are in theory lists of "::"-separated identifiers. Some
33// assemblers, however, can't handle these characters in symbol names. To get
34// around this, we use C++-style mangling. The mangling method is:
35//
36// 1. Prefix the symbol with "_ZN"
37// 2. For each element of the path, emit the length plus the element
38// 3. End the path with "E"
39//
40// For example, "_ZN4testE" => "test" and "_ZN3foo3barE" => "foo::bar".
41//
42// We're the ones printing our backtraces, so we can't rely on anything else to
43// demangle our symbols. It's *much* nicer to look at demangled symbols, so
44// this function is implemented to give us nice pretty output.
45//
46// Note that this demangler isn't quite as fancy as it could be. We have lots
47// of other information in our symbols like hashes, version, type information,
48// etc. Additionally, this doesn't handle glue symbols at all.
49pub fn demangle(s: &str) -> Result<(Demangle, &str), ()> {
50 // First validate the symbol. If it doesn't look like anything we're
51 // expecting, we just print it literally. Note that we must handle non-Rust
52 // symbols because we could have any function in the backtrace.
53 let inner = if s.starts_with("_ZN") {
54 &s[3..]
55 } else if s.starts_with("ZN") {
56 // On Windows, dbghelp strips leading underscores, so we accept "ZN...E"
57 // form too.
58 &s[2..]
59 } else if s.starts_with("__ZN") {
60 // On OSX, symbols are prefixed with an extra _
61 &s[4..]
62 } else {
63 return Err(());
64 };
65
66 // only work with ascii text
67 if inner.bytes().any(|c| c & 0x80 != 0) {
68 return Err(());
69 }
70
71 let mut elements = 0;
72 let mut chars = inner.chars();
73 let mut c = chars.next().ok_or(())?;
74 while c != 'E' {
75 // Decode an identifier element's length.
76 if !c.is_digit(10) {
77 return Err(());
78 }
79 let mut len = 0usize;
80 while let Some(d) = c.to_digit(10) {
81 len = len.checked_mul(10)
82 .and_then(|len| len.checked_add(d as usize))
83 .ok_or(())?;
84 c = chars.next().ok_or(())?;
85 }
86
87 // `c` already contains the first character of this identifier, skip it and
88 // all the other characters of this identifier, to reach the next element.
89 for _ in 0..len {
90 c = chars.next().ok_or(())?;
91 }
92
93 elements += 1;
94 }
95
96 Ok((Demangle {
97 inner,
98 elements,
99 }, chars.as_str()))
100}
101
102// Rust hashes are hex digits with an `h` prepended.
103fn is_rust_hash(s: &str) -> bool {
104 s.starts_with('h') && s[1..].chars().all(|c| c.is_digit(16))
105}
106
107impl<'a> fmt::Display for Demangle<'a> {
108 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
109 // Alright, let's do this.
110 let mut inner = self.inner;
111 for element in 0..self.elements {
112 let mut rest = inner;
113 while rest.chars().next().unwrap().is_digit(10) {
114 rest = &rest[1..];
115 }
116 let i: usize = inner[..(inner.len() - rest.len())].parse().unwrap();
117 inner = &rest[i..];
118 rest = &rest[..i];
119 // Skip printing the hash if alternate formatting
120 // was requested.
121 if f.alternate() && element+1 == self.elements && is_rust_hash(&rest) {
122 break;
123 }
124 if element != 0 {
125 f.write_str("::")?;
126 }
127 if rest.starts_with("_$") {
128 rest = &rest[1..];
129 }
130 loop {
131 if rest.starts_with('.') {
132 if let Some('.') = rest[1..].chars().next() {
133 f.write_str("::")?;
134 rest = &rest[2..];
135 } else {
136 f.write_str(".")?;
137 rest = &rest[1..];
138 }
139 } else if rest.starts_with('$') {
140 let (escape, after_escape) = if let Some(end) = rest[1..].find('$') {
141 (&rest[1..end + 1], &rest[end + 2..])
142 } else {
143 break;
144 };
145
146 // see src/librustc_codegen_utils/symbol_names/legacy.rs for these mappings
147 let unescaped = match escape {
148 "SP" => "@",
149 "BP" => "*",
150 "RF" => "&",
151 "LT" => "<",
152 "GT" => ">",
153 "LP" => "(",
154 "RP" => ")",
155 "C" => ",",
156
157 _ => {
158 if escape.starts_with('u') {
159 let digits = &escape[1..];
160 let all_lower_hex = digits.chars().all(|c| match c {
161 '0'..='9' | 'a'..='f' => true,
162 _ => false,
163 });
164 let c = u32::from_str_radix(digits, 16).ok()
165 .and_then(char::from_u32);
166 if let (true, Some(c)) = (all_lower_hex, c) {
167 // FIXME(eddyb) do we need to filter out control codepoints?
168 if !c.is_control() {
169 c.fmt(f)?;
170 rest = after_escape;
171 continue;
172 }
173 }
174 }
175 break;
176 }
177 };
178 f.write_str(unescaped)?;
179 rest = after_escape;
180 } else if let Some(i) = rest.find(|c| c == '$' || c == '.') {
181 f.write_str(&rest[..i])?;
182 rest = &rest[i..];
183 } else {
184 break;
185 }
186 }
187 f.write_str(rest)?;
188 }
189
190 Ok(())
191 }
192}