cpp_demangle/
lib.rs

1//! This crate can parse a C++ “mangled” linker symbol name into a Rust value
2//! describing what the name refers to: a variable, a function, a virtual table,
3//! etc. The description type implements functions such as `demangle()`,
4//! producing human-readable text describing the mangled name. Debuggers and
5//! profilers can use this crate to provide more meaningful output.
6//!
7//! C++ requires the compiler to choose names for linker symbols consistently
8//! across compilation units, so that two compilation units that have seen the
9//! same declarations can pair up definitions in one unit with references in
10//! another.  Almost all platforms other than Microsoft Windows follow the
11//! [Itanium C++ ABI][itanium]'s rules for this.
12//!
13//! [itanium]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
14//!
15//! For example, suppose a C++ compilation unit has the definition:
16//!
17//! ```c++
18//! namespace space {
19//!   int foo(int x, int y) { return x+y; }
20//! }
21//! ```
22//!
23//! The Itanium C++ ABI specifies that the linker symbol for that function must
24//! be named `_ZN5space3fooEii`. This crate can parse that name into a Rust
25//! value representing its structure. That Rust value can be `demangle()`d to the
26//! string `space::foo(int, int)`, which is more meaningful to the C++
27//! developer.
28
29#![deny(missing_docs)]
30#![deny(missing_debug_implementations)]
31#![deny(unsafe_code)]
32// Clippy stuff.
33#![allow(unknown_lints)]
34#![allow(clippy::inline_always)]
35#![allow(clippy::redundant_field_names)]
36#![cfg_attr(not(feature = "std"), no_std)]
37
38#[cfg(feature = "alloc")]
39#[macro_use]
40extern crate alloc;
41
42#[cfg(not(feature = "alloc"))]
43compile_error!("`alloc` or `std` feature is required for this crate");
44
45#[macro_use]
46mod logging;
47
48pub mod ast;
49pub mod error;
50mod index_str;
51mod subs;
52
53use alloc::string::String;
54use alloc::vec::Vec;
55use ast::{Demangle, Parse, ParseContext};
56use core::fmt;
57use core::num::NonZeroU32;
58use error::{Error, Result};
59use index_str::IndexStr;
60
61/// Options to control the parsing process.
62#[derive(Clone, Copy, Debug, Default)]
63#[repr(C)]
64pub struct ParseOptions {
65    recursion_limit: Option<NonZeroU32>,
66}
67
68impl ParseOptions {
69    /// Set the limit on recursion depth during the parsing phase. A low
70    /// limit will cause valid symbols to be rejected, but a high limit may
71    /// allow pathological symbols to overflow the stack during parsing.
72    /// The default value is 96, which will not overflow the stack even in
73    /// a debug build.
74    pub fn recursion_limit(mut self, limit: u32) -> Self {
75        self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
76        self
77    }
78}
79
80/// Options to control the demangling process.
81#[derive(Clone, Copy, Debug, Default)]
82#[repr(C)]
83pub struct DemangleOptions {
84    no_params: bool,
85    no_return_type: bool,
86    hide_expression_literal_types: bool,
87    recursion_limit: Option<NonZeroU32>,
88}
89
90impl DemangleOptions {
91    /// Construct a new `DemangleOptions` with the default values.
92    pub fn new() -> Self {
93        Default::default()
94    }
95
96    /// Do not display function arguments.
97    pub fn no_params(mut self) -> Self {
98        self.no_params = true;
99        self
100    }
101
102    /// Do not display the function return type.
103    pub fn no_return_type(mut self) -> Self {
104        self.no_return_type = true;
105        self
106    }
107
108    /// Hide type annotations in template value parameters.
109    /// These are not needed to distinguish template instances
110    /// so this can make it easier to match user-provided
111    /// template instance names.
112    pub fn hide_expression_literal_types(mut self) -> Self {
113        self.hide_expression_literal_types = true;
114        self
115    }
116
117    /// Set the limit on recursion depth during the demangling phase. A low
118    /// limit will cause valid symbols to be rejected, but a high limit may
119    /// allow pathological symbols to overflow the stack during demangling.
120    /// The default value is 128.
121    pub fn recursion_limit(mut self, limit: u32) -> Self {
122        self.recursion_limit = Some(NonZeroU32::new(limit).expect("Recursion limit must be > 0"));
123        self
124    }
125}
126
127/// A `Symbol` which owns the underlying storage for the mangled name.
128pub type OwnedSymbol = Symbol<Vec<u8>>;
129
130/// A `Symbol` which borrows the underlying storage for the mangled name.
131pub type BorrowedSymbol<'a> = Symbol<&'a [u8]>;
132
133/// A mangled symbol that has been parsed into an AST.
134///
135/// This is generic over some storage type `T` which can be either owned or
136/// borrowed. See the `OwnedSymbol` and `BorrowedSymbol` type aliases.
137#[derive(Clone, Debug, PartialEq)]
138pub struct Symbol<T> {
139    raw: T,
140    substitutions: subs::SubstitutionTable,
141    parsed: ast::MangledName,
142}
143
144impl<T> Symbol<T>
145where
146    T: AsRef<[u8]>,
147{
148    /// Given some raw storage, parse the mangled symbol from it with the default
149    /// options.
150    ///
151    /// ```
152    /// use cpp_demangle::Symbol;
153    ///
154    /// // First, something easy :)
155    ///
156    /// let mangled = b"_ZN5space3fooEibc";
157    ///
158    /// let sym = Symbol::new(&mangled[..])
159    ///     .expect("Could not parse mangled symbol!");
160    ///
161    /// let demangled = sym.demangle().unwrap();
162    /// assert_eq!(demangled, "space::foo(int, bool, char)");
163    ///
164    /// // Now let's try something a little more complicated!
165    ///
166    /// let mangled =
167    ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
168    ///
169    /// let sym = Symbol::new(&mangled[..])
170    ///     .expect("Could not parse mangled symbol!");
171    ///
172    /// let demangled = sym.demangle().unwrap();
173    /// assert_eq!(
174    ///     demangled,
175    ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
176    /// );
177    /// ```
178    #[inline]
179    pub fn new(raw: T) -> Result<Symbol<T>> {
180        Self::new_with_options(raw, &Default::default())
181    }
182
183    /// Given some raw storage, parse the mangled symbol from it.
184    ///
185    /// ```
186    /// use cpp_demangle::{ParseOptions, Symbol};
187    ///
188    /// // First, something easy :)
189    ///
190    /// let mangled = b"_ZN5space3fooEibc";
191    ///
192    /// let parse_options = ParseOptions::default()
193    ///     .recursion_limit(1024);
194    ///
195    /// let sym = Symbol::new_with_options(&mangled[..], &parse_options)
196    ///     .expect("Could not parse mangled symbol!");
197    ///
198    /// let demangled = sym.demangle().unwrap();
199    /// assert_eq!(demangled, "space::foo(int, bool, char)");
200    ///
201    /// // Now let's try something a little more complicated!
202    ///
203    /// let mangled =
204    ///     b"__Z28JS_GetPropertyDescriptorByIdP9JSContextN2JS6HandleIP8JSObjectEENS2_I4jsidEENS1_13MutableHandleINS1_18PropertyDescriptorEEE";
205    ///
206    /// let sym = Symbol::new(&mangled[..])
207    ///     .expect("Could not parse mangled symbol!");
208    ///
209    /// let demangled = sym.demangle().unwrap();
210    /// assert_eq!(
211    ///     demangled,
212    ///     "JS_GetPropertyDescriptorById(JSContext*, JS::Handle<JSObject*>, JS::Handle<jsid>, JS::MutableHandle<JS::PropertyDescriptor>)"
213    /// );
214    /// ```
215    pub fn new_with_options(raw: T, options: &ParseOptions) -> Result<Symbol<T>> {
216        let mut substitutions = subs::SubstitutionTable::new();
217
218        let parsed = {
219            let ctx = ParseContext::new(*options);
220            let input = IndexStr::new(raw.as_ref());
221
222            let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, input)?;
223            debug_assert!(ctx.recursion_level() == 0);
224
225            if tail.is_empty() {
226                parsed
227            } else {
228                return Err(Error::UnexpectedText);
229            }
230        };
231
232        let symbol = Symbol {
233            raw: raw,
234            substitutions: substitutions,
235            parsed: parsed,
236        };
237
238        log!(
239            "Successfully parsed '{}' as
240
241AST = {:#?}
242
243substitutions = {:#?}",
244            String::from_utf8_lossy(symbol.raw.as_ref()),
245            symbol.parsed,
246            symbol.substitutions
247        );
248
249        Ok(symbol)
250    }
251
252    /// Demangle the symbol and return it as a String, with the default options.
253    ///
254    /// ```
255    /// use cpp_demangle::{DemangleOptions, Symbol};
256    ///
257    /// let mangled = b"_ZN5space3fooEibc";
258    ///
259    /// let sym = Symbol::new(&mangled[..])
260    ///     .expect("Could not parse mangled symbol!");
261    ///
262    /// let demangled = sym.demangle().unwrap();
263    /// assert_eq!(demangled, "space::foo(int, bool, char)");
264    /// ```
265    #[inline]
266    pub fn demangle(&self) -> ::core::result::Result<String, fmt::Error> {
267        self.demangle_with_options(&Default::default())
268    }
269
270    /// Demangle the symbol and return it as a String.
271    ///
272    /// ```
273    /// use cpp_demangle::{DemangleOptions, Symbol};
274    ///
275    /// let mangled = b"_ZN5space3fooEibc";
276    ///
277    /// let sym = Symbol::new(&mangled[..])
278    ///     .expect("Could not parse mangled symbol!");
279    ///
280    /// let options = DemangleOptions::default();
281    /// let demangled = sym.demangle_with_options(&options).unwrap();
282    /// assert_eq!(demangled, "space::foo(int, bool, char)");
283    /// ```
284    #[allow(clippy::trivially_copy_pass_by_ref)]
285    pub fn demangle_with_options(
286        &self,
287        options: &DemangleOptions,
288    ) -> ::core::result::Result<String, fmt::Error> {
289        let mut out = String::new();
290        {
291            let mut ctx = ast::DemangleContext::new(
292                &self.substitutions,
293                self.raw.as_ref(),
294                *options,
295                &mut out,
296            );
297            self.parsed.demangle(&mut ctx, None)?;
298        }
299
300        Ok(out)
301    }
302
303    /// Demangle the symbol to a DemangleWrite, which lets the consumer be informed about
304    /// syntactic structure.
305    #[allow(clippy::trivially_copy_pass_by_ref)]
306    pub fn structured_demangle<W: DemangleWrite>(
307        &self,
308        out: &mut W,
309        options: &DemangleOptions,
310    ) -> fmt::Result {
311        let mut ctx =
312            ast::DemangleContext::new(&self.substitutions, self.raw.as_ref(), *options, out);
313        self.parsed.demangle(&mut ctx, None)
314    }
315}
316
317/// The type of a demangled AST node.
318/// This is only partial, not all nodes are represented.
319#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
320pub enum DemangleNodeType {
321    /// Entering a <prefix> production
322    Prefix,
323    /// Entering a <template-prefix> production
324    TemplatePrefix,
325    /// Entering a <template-args> production
326    TemplateArgs,
327    /// Entering a <unqualified-name> production
328    UnqualifiedName,
329    /// Entering a <template-param> production
330    TemplateParam,
331    /// Entering a <decltype> production
332    Decltype,
333    /// Entering a <data-member-prefix> production
334    DataMemberPrefix,
335    /// Entering a <nested-name> production
336    NestedName,
337    /// Entering a <special-name> production that is a vtable.
338    VirtualTable,
339    /// Additional values may be added in the future. Use a
340    /// _ pattern for compatibility.
341    __NonExhaustive,
342}
343
344/// Sink for demangled text that reports syntactic structure.
345pub trait DemangleWrite {
346    /// Called when we are entering the scope of some AST node.
347    fn push_demangle_node(&mut self, _: DemangleNodeType) {}
348    /// Same as `fmt::Write::write_str`.
349    fn write_string(&mut self, s: &str) -> fmt::Result;
350    /// Called when we are exiting the scope of some AST node for
351    /// which `push_demangle_node` was called.
352    fn pop_demangle_node(&mut self) {}
353}
354
355impl<W: fmt::Write> DemangleWrite for W {
356    fn write_string(&mut self, s: &str) -> fmt::Result {
357        fmt::Write::write_str(self, s)
358    }
359}
360
361impl<'a, T> Symbol<&'a T>
362where
363    T: AsRef<[u8]> + ?Sized,
364{
365    /// Parse a mangled symbol from input and return it and the trailing tail of
366    /// bytes that come after the symbol, with the default options.
367    ///
368    /// While `Symbol::new` will return an error if there is unexpected trailing
369    /// bytes, `with_tail` simply returns the trailing bytes along with the
370    /// parsed symbol.
371    ///
372    /// ```
373    /// use cpp_demangle::BorrowedSymbol;
374    ///
375    /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
376    ///
377    /// let (sym, tail) = BorrowedSymbol::with_tail(&mangled[..])
378    ///     .expect("Could not parse mangled symbol!");
379    ///
380    /// assert_eq!(tail, b" and some trailing junk");
381    ///
382    /// let demangled = sym.demangle().unwrap();
383    /// assert_eq!(demangled, "space::foo(int, bool, char)");
384    /// ```
385    #[inline]
386    pub fn with_tail(input: &'a T) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
387        Self::with_tail_and_options(input, &Default::default())
388    }
389
390    /// Parse a mangled symbol from input and return it and the trailing tail of
391    /// bytes that come after the symbol.
392    ///
393    /// While `Symbol::new_with_options` will return an error if there is
394    /// unexpected trailing bytes, `with_tail_and_options` simply returns the
395    /// trailing bytes along with the parsed symbol.
396    ///
397    /// ```
398    /// use cpp_demangle::{BorrowedSymbol, ParseOptions};
399    ///
400    /// let mangled = b"_ZN5space3fooEibc and some trailing junk";
401    ///
402    /// let parse_options = ParseOptions::default()
403    ///     .recursion_limit(1024);
404    ///
405    /// let (sym, tail) = BorrowedSymbol::with_tail_and_options(&mangled[..], &parse_options)
406    ///     .expect("Could not parse mangled symbol!");
407    ///
408    /// assert_eq!(tail, b" and some trailing junk");
409    ///
410    /// let demangled = sym.demangle().unwrap();
411    /// assert_eq!(demangled, "space::foo(int, bool, char)");
412    /// ```
413    pub fn with_tail_and_options(
414        input: &'a T,
415        options: &ParseOptions,
416    ) -> Result<(BorrowedSymbol<'a>, &'a [u8])> {
417        let mut substitutions = subs::SubstitutionTable::new();
418
419        let ctx = ParseContext::new(*options);
420        let idx_str = IndexStr::new(input.as_ref());
421        let (parsed, tail) = ast::MangledName::parse(&ctx, &mut substitutions, idx_str)?;
422        debug_assert!(ctx.recursion_level() == 0);
423
424        let symbol = Symbol {
425            raw: input.as_ref(),
426            substitutions: substitutions,
427            parsed: parsed,
428        };
429
430        log!(
431            "Successfully parsed '{}' as
432
433AST = {:#?}
434
435substitutions = {:#?}",
436            String::from_utf8_lossy(symbol.raw),
437            symbol.parsed,
438            symbol.substitutions
439        );
440
441        Ok((symbol, tail.into()))
442    }
443}