cpp_demangle/lib.rs
1//! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2//! describing what the name refers to: a variable, a function, a virtual table,
3//! etc. The description type implements functions such as `demangle()`,
4//! producing human-readable text describing the mangled name. Debuggers and
5//! profilers can use this crate to provide more meaningful output.
6//!
7//! C++ requires the compiler to choose names for linker symbols consistently
8//! across compilation units, so that two compilation units that have seen the
9//! same declarations can pair up definitions in one unit with references in
10//! another. Almost all platforms other than Microsoft Windows follow the
11//! [Itanium C++ ABI][itanium]'s rules for this.
12//!
13//! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
14//!
15//! For example, suppose a C++ compilation unit has the definition:
16//!
17//! ```c++
18//! namespace space {
19//! int foo(int x, int y) { return x+y; }
20//! }
21//! ```
22//!
23//! The Itanium C++ ABI specifies that the linker symbol for that function must
24//! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25//! value representing its structure. That Rust value can be `demangle()`d to the
26//! string `space::foo(int, int)`, which is more meaningful to the C++
27//! developer.
28
29#![deny(missing_docs)]
30#![deny(missing_debug_implementations)]
31#![deny(unsafe_code)]
32// Clippy stuff.
33#![allow(unknown_lints)]
34#![allow(clippy::inline_always)]
35#![allow(clippy::redundant_field_names)]
36#![cfg_attr(not(feature = "std"), no_std)]
37
38#[cfg(feature = "alloc")]
39#[macro_use]
40extern crate alloc;
41
42#[cfg(not(feature = "alloc"))]
43compile_error!("`alloc` or `std` feature is required for this crate");
44
45#[macro_use]
46mod logging;
47
48pub mod ast;
49pub mod error;
50mod index_str;
51mod subs;
52
53use alloc::string::String;
54use alloc::vec::Vec;
55use ast::{Demangle, Parse, ParseContext};
56use core::fmt;
57use core::num::NonZeroU32;
58use error::{Error, Result};
59use index_str::IndexStr;
60
61/// Options to control the parsing process.
62#[derive(Clone, Copy, Debug, Default)]
63#[repr(C)]
64pub struct ParseOptions {
65 recursion_limit: Option<NonZeroU32>,
66}
67
68impl ParseOptions {
69 /// Set the limit on recursion depth during the parsing phase. A low
70 /// limit will cause valid symbols to be rejected, but a high limit may
71 /// allow pathological symbols to overflow the stack during parsing.
72 /// The default value is 96, which will not overflow the stack even in
73 /// a debug build.
74 pub fn recursion_limit(mut self, limit: u32) -> Self {
75 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
76 self
77 }
78}
79
80/// Options to control the demangling process.
81#[derive(Clone, Copy, Debug, Default)]
82#[repr(C)]
83pub struct DemangleOptions {
84 no_params: bool,
85 no_return_type: bool,
86 hide_expression_literal_types: bool,
87 recursion_limit: Option<NonZeroU32>,
88}
89
90impl DemangleOptions {
91 /// Construct a new `DemangleOptions` with the default values.
92 pub fn new() -> Self {
93 Default::default()
94 }
95
96 /// Do not display function arguments.
97 pub fn no_params(mut self) -> Self {
98 self.no_params = true;
99 self
100 }
101
102 /// Do not display the function return type.
103 pub fn no_return_type(mut self) -> Self {
104 self.no_return_type = true;
105 self
106 }
107
108 /// Hide type annotations in template value parameters.
109 /// These are not needed to distinguish template instances
110 /// so this can make it easier to match user-provided
111 /// template instance names.
112 pub fn hide_expression_literal_types(mut self) -> Self {
113 self.hide_expression_literal_types = true;
114 self
115 }
116
117 /// Set the limit on recursion depth during the demangling phase. A low
118 /// limit will cause valid symbols to be rejected, but a high limit may
119 /// allow pathological symbols to overflow the stack during demangling.
120 /// The default value is 128.
121 pub fn recursion_limit(mut self, limit: u32) -> Self {
122 self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
123 self
124 }
125}
126
127/// A `Symbol` which owns the underlying storage for the mangled name.
128pub type OwnedSymbol = Symbol<Vec<u8>>;
129
130/// A `Symbol` which borrows the underlying storage for the mangled name.
131pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
132
133/// A mangled symbol that has been parsed into an AST.
134///
135/// This is generic over some storage type `T` which can be either owned or
136/// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
137#[derive(Clone, Debug, PartialEq)]
138pub struct Symbol<T> {
139 raw: T,
140 substitutions: subs::SubstitutionTable,
141 parsed: ast::MangledName,
142}
143
144impl<T> Symbol<T>
145where
146 T: AsRef<[u8]>,
147{
148 /// Given some raw storage, parse the mangled symbol from it with the default
149 /// options.
150 ///
151 /// ```
152 /// use cpp_demangle::Symbol;
153 ///
154 /// // First, something easy :)
155 ///
156 /// let mangled = b"_ZN5space3fooEibc";
157 ///
158 /// let sym = Symbol::new(&mangled[..])
159 /// .expect("Could not parse mangled symbol!");
160 ///
161 /// let demangled = sym.demangle().unwrap();
162 /// assert_eq!(demangled, "space::foo(int, bool, char)");
163 ///
164 /// // Now let's try something a little more complicated!
165 ///
166 /// let mangled =
167 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
168 ///
169 /// let sym = Symbol::new(&mangled[..])
170 /// .expect("Could not parse mangled symbol!");
171 ///
172 /// let demangled = sym.demangle().unwrap();
173 /// assert_eq!(
174 /// demangled,
175 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
176 /// );
177 /// ```
178 #[inline]
179 pub fn new(raw: T) -> Result<Symbol<T>> {
180 Self::new_with_options(raw, &Default::default())
181 }
182
183 /// Given some raw storage, parse the mangled symbol from it.
184 ///
185 /// ```
186 /// use cpp_demangle::{ParseOptions, Symbol};
187 ///
188 /// // First, something easy :)
189 ///
190 /// let mangled = b"_ZN5space3fooEibc";
191 ///
192 /// let parse_options = ParseOptions::default()
193 /// .recursion_limit(1024);
194 ///
195 /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
196 /// .expect("Could not parse mangled symbol!");
197 ///
198 /// let demangled = sym.demangle().unwrap();
199 /// assert_eq!(demangled, "space::foo(int, bool, char)");
200 ///
201 /// // Now let's try something a little more complicated!
202 ///
203 /// let mangled =
204 /// b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
205 ///
206 /// let sym = Symbol::new(&mangled[..])
207 /// .expect("Could not parse mangled symbol!");
208 ///
209 /// let demangled = sym.demangle().unwrap();
210 /// assert_eq!(
211 /// demangled,
212 /// "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
213 /// );
214 /// ```
215 pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
216 let mut substitutions = subs::SubstitutionTable::new();
217
218 let parsed = {
219 let ctx = ParseContext::new(*options);
220 let input = IndexStr::new(raw.as_ref());
221
222 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
223 debug_assert!(ctx.recursion_level() == 0);
224
225 if tail.is_empty() {
226 parsed
227 } else {
228 return Err(Error::UnexpectedText);
229 }
230 };
231
232 let symbol = Symbol {
233 raw: raw,
234 substitutions: substitutions,
235 parsed: parsed,
236 };
237
238 log!(
239 "Successfully parsed '{}' as
240
241AST = {:#?}
242
243substitutions = {:#?}",
244 String::from_utf8_lossy(symbol.raw.as_ref()),
245 symbol.parsed,
246 symbol.substitutions
247 );
248
249 Ok(symbol)
250 }
251
252 /// Demangle the symbol and return it as a String, with the default options.
253 ///
254 /// ```
255 /// use cpp_demangle::{DemangleOptions, Symbol};
256 ///
257 /// let mangled = b"_ZN5space3fooEibc";
258 ///
259 /// let sym = Symbol::new(&mangled[..])
260 /// .expect("Could not parse mangled symbol!");
261 ///
262 /// let demangled = sym.demangle().unwrap();
263 /// assert_eq!(demangled, "space::foo(int, bool, char)");
264 /// ```
265 #[inline]
266 pub fn demangle(&self) -> ::core::result::Result<String, fmt::Error> {
267 self.demangle_with_options(&Default::default())
268 }
269
270 /// Demangle the symbol and return it as a String.
271 ///
272 /// ```
273 /// use cpp_demangle::{DemangleOptions, Symbol};
274 ///
275 /// let mangled = b"_ZN5space3fooEibc";
276 ///
277 /// let sym = Symbol::new(&mangled[..])
278 /// .expect("Could not parse mangled symbol!");
279 ///
280 /// let options = DemangleOptions::default();
281 /// let demangled = sym.demangle_with_options(&options).unwrap();
282 /// assert_eq!(demangled, "space::foo(int, bool, char)");
283 /// ```
284 #[allow(clippy::trivially_copy_pass_by_ref)]
285 pub fn demangle_with_options(
286 &self,
287 options: &DemangleOptions,
288 ) -> ::core::result::Result<String, fmt::Error> {
289 let mut out = String::new();
290 {
291 let mut ctx = ast::DemangleContext::new(
292 &self.substitutions,
293 self.raw.as_ref(),
294 *options,
295 &mut out,
296 );
297 self.parsed.demangle(&mut ctx, None)?;
298 }
299
300 Ok(out)
301 }
302
303 /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
304 /// syntactic structure.
305 #[allow(clippy::trivially_copy_pass_by_ref)]
306 pub fn structured_demangle<W: DemangleWrite>(
307 &self,
308 out: &mut W,
309 options: &DemangleOptions,
310 ) -> fmt::Result {
311 let mut ctx =
312 ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
313 self.parsed.demangle(&mut ctx, None)
314 }
315}
316
317/// The type of a demangled AST node.
318/// This is only partial, not all nodes are represented.
319#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
320pub enum DemangleNodeType {
321 /// Entering a <prefix> production
322 Prefix,
323 /// Entering a <template-prefix> production
324 TemplatePrefix,
325 /// Entering a <template-args> production
326 TemplateArgs,
327 /// Entering a <unqualified-name> production
328 UnqualifiedName,
329 /// Entering a <template-param> production
330 TemplateParam,
331 /// Entering a <decltype> production
332 Decltype,
333 /// Entering a <data-member-prefix> production
334 DataMemberPrefix,
335 /// Entering a <nested-name> production
336 NestedName,
337 /// Entering a <special-name> production that is a vtable.
338 VirtualTable,
339 /// Additional values may be added in the future. Use a
340 /// _ pattern for compatibility.
341 __NonExhaustive,
342}
343
344/// Sink for demangled text that reports syntactic structure.
345pub trait DemangleWrite {
346 /// Called when we are entering the scope of some AST node.
347 fn push_demangle_node(&mut self, _: DemangleNodeType) {}
348 /// Same as `fmt::Write::write_str`.
349 fn write_string(&mut self, s: &str) -> fmt::Result;
350 /// Called when we are exiting the scope of some AST node for
351 /// which `push_demangle_node` was called.
352 fn pop_demangle_node(&mut self) {}
353}
354
355impl<W: fmt::Write> DemangleWrite for W {
356 fn write_string(&mut self, s: &str) -> fmt::Result {
357 fmt::Write::write_str(self, s)
358 }
359}
360
361impl<'a, T> Symbol<&'a T>
362where
363 T: AsRef<[u8]> + ?Sized,
364{
365 /// Parse a mangled symbol from input and return it and the trailing tail of
366 /// bytes that come after the symbol, with the default options.
367 ///
368 /// While `Symbol::new` will return an error if there is unexpected trailing
369 /// bytes, `with_tail` simply returns the trailing bytes along with the
370 /// parsed symbol.
371 ///
372 /// ```
373 /// use cpp_demangle::BorrowedSymbol;
374 ///
375 /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
376 ///
377 /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
378 /// .expect("Could not parse mangled symbol!");
379 ///
380 /// assert_eq!(tail, b" and some trailing junk");
381 ///
382 /// let demangled = sym.demangle().unwrap();
383 /// assert_eq!(demangled, "space::foo(int, bool, char)");
384 /// ```
385 #[inline]
386 pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
387 Self::with_tail_and_options(input, &Default::default())
388 }
389
390 /// Parse a mangled symbol from input and return it and the trailing tail of
391 /// bytes that come after the symbol.
392 ///
393 /// While `Symbol::new_with_options` will return an error if there is
394 /// unexpected trailing bytes, `with_tail_and_options` simply returns the
395 /// trailing bytes along with the parsed symbol.
396 ///
397 /// ```
398 /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
399 ///
400 /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
401 ///
402 /// let parse_options = ParseOptions::default()
403 /// .recursion_limit(1024);
404 ///
405 /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
406 /// .expect("Could not parse mangled symbol!");
407 ///
408 /// assert_eq!(tail, b" and some trailing junk");
409 ///
410 /// let demangled = sym.demangle().unwrap();
411 /// assert_eq!(demangled, "space::foo(int, bool, char)");
412 /// ```
413 pub fn with_tail_and_options(
414 input: &'a T,
415 options: &ParseOptions,
416 ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
417 let mut substitutions = subs::SubstitutionTable::new();
418
419 let ctx = ParseContext::new(*options);
420 let idx_str = IndexStr::new(input.as_ref());
421 let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
422 debug_assert!(ctx.recursion_level() == 0);
423
424 let symbol = Symbol {
425 raw: input.as_ref(),
426 substitutions: substitutions,
427 parsed: parsed,
428 };
429
430 log!(
431 "Successfully parsed '{}' as
432
433AST = {:#?}
434
435substitutions = {:#?}",
436 String::from_utf8_lossy(symbol.raw),
437 symbol.parsed,
438 symbol.substitutions
439 );
440
441 Ok((symbol, tail.into()))
442 }
443}