scala_native_demangle/
lib.rs

1//! Demangle Scala Native identifiers
2//!
3//! Turn mangled Scala Native identifiers into a more readable form.
4//!
5//! 1. Name mangling rules: https://scala-native.org/en/latest/contrib/mangling.html
6//! 2. Scala implementation: https://github.com/indoorvivants/sn-demangler
7
8pub type DemangleError = String;
9pub type ParsingResult<T> = Result<T, DemangleError>;
10
11pub struct DemanglingConfig {
12    pub collapse_scala_names: bool,
13    pub debug: bool,
14}
15
16static DEFAULT_CONFIG: DemanglingConfig = DemanglingConfig {
17    collapse_scala_names: true,
18    debug: false,
19};
20
21impl Default for DemanglingConfig {
22    fn default() -> Self {
23        DemanglingConfig {
24            collapse_scala_names: true,
25            debug: false,
26        }
27    }
28}
29
30impl DemanglingConfig {
31    fn log(&self, str: &str) -> () {
32        if self.debug {
33            println!("{str}")
34        }
35    }
36    fn log_name(&self, name: &str, str: &str) -> () {
37        if self.debug {
38            println!("{name}: {str}")
39        }
40    }
41}
42
43pub fn demangle(input: &str, config: &DemanglingConfig) -> ParsingResult<String> {
44    if !input.starts_with("_S") {
45        return Err("identifier doesn't start with _S".to_string());
46    } else {
47        config.log_name("demangle", &input[2..]);
48        return defn_name(&input[2..], config);
49    }
50}
51
52pub fn demangle_with_defaults(input: &str) -> ParsingResult<String> {
53    return demangle(input, &DEFAULT_CONFIG);
54}
55
56// private sub parsers
57// <defn-name> ::=
58//     T <name>                       // top-level name
59//     M <name> <sig-name>            // member name
60fn defn_name(input: &str, config: &DemanglingConfig) -> ParsingResult<String> {
61    config.log_name("defn_name", input);
62    if input.starts_with("T") {
63        return toplevel_name(&input[1..], config);
64    } else if input.starts_with("M") {
65        return member_name(&input[1..], config);
66    } else {
67        if input.len() > 0 {
68            return Err(format!(
69                "defn_name: unknown name modifier '{}'",
70                input[0..0].to_string()
71            ));
72        } else {
73            return Err("defn_name: unexpectedly empty rest of identifier".to_string());
74        }
75    }
76}
77
78fn toplevel_name(input: &str, config: &DemanglingConfig) -> ParsingResult<String> {
79    config.log_name("toplevel_name", input);
80    return name(input, config).map(|t| t.1);
81}
82fn member_name(input: &str, config: &DemanglingConfig) -> ParsingResult<String> {
83    config.log_name("member_name", input);
84    let (consumed, owner) = name(input, config)?;
85    let signature = sig_name(&input[consumed..], config);
86
87    return signature.and_then(|s| return Ok(format!("{}.{}", owner, s)));
88}
89
90// <sig-name> ::=
91//     F <name> <scope>                    // field name
92//     R <type-name>+ E                    // constructor name
93//     D <name> <type-name>+ E <scope>     // method name
94//     P <name> <type-name>+ E             // proxy name
95//     C <name>                            // c extern name
96//     G <name>                            // generated name
97//     K <sig-name> <type-name>+ E         // duplicate name
98fn sig_name(input: &str, config: &DemanglingConfig) -> ParsingResult<String> {
99    config.log_name("sig_name", input);
100    if input.starts_with("C") || input.starts_with("G") {
101        return Ok(name(&input[1..], config)?.1);
102    } else if input.starts_with("I") {
103        return Ok("<clinit>".to_string());
104    } else if input.starts_with("F") {
105        let (consumed, field_name) = name(&input[1..], config)?;
106        // return field_name.and_then(|nm| {
107        let rest = &input[(1 + consumed)..];
108        return scope(rest, config).map(|sc| format!("{}{}", render_scope(sc), field_name));
109        // });
110    } else if input.starts_with("R") {
111        let type_names = read_type_names(&input[1..], config)?;
112        return Ok(type_names.1.join(", "));
113    } else if input.starts_with("K") {
114        // TODO: basically the same as D case below
115        let after_tag = &input[1..];
116        let (consumed, nm) = name(after_tag, config)?;
117
118        let after_name = &after_tag[consumed..];
119        let (_, type_names) = read_type_names(after_name, config)?;
120
121        let signature = match type_names.len() {
122            1 => format!("{}: {}", nm, type_names.join(",")),
123            n => format!(
124                "{}({}): {}",
125                nm,
126                type_names[0..n - 2].join(","),
127                type_names.get(n - 1).unwrap_or(&"???".to_string())
128            ),
129        };
130
131        return Ok(signature);
132    } else if input.starts_with("P") {
133        // TODO: basically the same as D case below
134        let after_tag = &input[1..];
135        let (consumed, nm) = name(after_tag, config)?;
136
137        let after_name = &after_tag[consumed..];
138        let (_, type_names) = read_type_names(after_name, config)?;
139
140        let signature = match type_names.len() {
141            1 => format!("{}: {}", nm, type_names.join(",")),
142            n => format!(
143                "{}({}): {}",
144                nm,
145                type_names[0..n - 2].join(","),
146                type_names.get(n - 1).unwrap_or(&"???".to_string())
147            ),
148        };
149
150        return Ok(signature);
151    } else if input.starts_with("D") {
152        let after_tag = &input[1..];
153        let (consumed, nm) = name(after_tag, config)?;
154
155        let after_name = &after_tag[consumed..];
156        let (consumed, type_names) = read_type_names(after_name, config)?;
157
158        let after_types = &after_name[consumed + 1..];
159        config.log_name(
160            "sig_name:D",
161            format!("type_names: {type_names:?}, after: {after_types}").as_str(),
162        );
163        let sc = scope(&after_types, config)?;
164
165        let signature = match type_names.len() {
166            1 => format!("{}{}: {}", render_scope(sc), nm, type_names.join(",")),
167            n => format!(
168                "{}{}({}): {}",
169                render_scope(sc),
170                nm,
171                type_names[0..n - 1].join(","),
172                type_names.get(n - 1).unwrap_or(&"???".to_string())
173            ),
174        };
175
176        return Ok(signature);
177    } else {
178        return Err(format!(
179            "sig_name: expected to start with F/R/D/P/C/G/K/I, {}",
180            &input
181        )
182        .to_string());
183    }
184}
185
186fn read_type_names(input: &str, config: &DemanglingConfig) -> ParsingResult<(usize, Vec<String>)> {
187    let mut pos = 0;
188    let mut result = Vec::new();
189    while !input[pos..].starts_with("E") {
190        let (consumed, nm) = type_name(&input[pos..], config)?;
191        result.push(nm);
192        pos += consumed;
193    }
194
195    return Ok((pos, result));
196}
197
198fn scala_root_name(name: &str, config: &DemanglingConfig) -> String {
199    if !config.collapse_scala_names {
200        return format!("scala.{name}");
201    } else {
202        return name.to_string();
203    };
204}
205
206fn common_type_name(name: String, config: &DemanglingConfig) -> String {
207    if !config.collapse_scala_names {
208        return name;
209    } else {
210        let immut = "scala.collection.immutable.";
211
212        if name == "java.lang.Object" {
213            return "Object".to_string();
214        } else if name == "java.lang.String" {
215            return "String".to_string();
216        } else if name == "java.lang.Throwable" {
217            return "Throwable".to_string();
218        } else if name.starts_with(immut) {
219            return name.strip_prefix(immut).unwrap_or(&name).to_string();
220            // return "Throwable".to_string();
221        } else {
222            return name;
223        }
224    }
225}
226
227// <type-name> ::=
228//     v                              // c vararg
229//     R _                            // c pointer type-name
230//     R <type-name>+ E               // c function type-name
231//     S <type-name>+ E               // c anonymous struct type-name
232//     A <type-name> <number> _       // c array type-name
233//     <integer-type-name>            // signed integer type-name
234//     <integer-type-name> ::=
235//         b                              // scala.Byte
236//         s                              // scala.Short
237//         i                              // scala.Int
238//         j                              // scala.Long
239//     z                              // scala.Boolean
240//     c                              // scala.Char
241//     f                              // scala.Float
242//     d                              // scala.Double
243//     u                              // scala.Unit
244//     l                              // scala.Null
245//     n                              // scala.Nothing
246//     L <nullable-type-name>         // nullable type-name
247//     A <type-name> _                // nonnull array type-name
248//     X <name>                       // nonnull exact class type-name
249//     <name>                         // nonnull class type-name
250fn type_name(input: &str, config: &DemanglingConfig) -> ParsingResult<(usize, String)> {
251    let mut chars = input.chars();
252    config.log(format!("type_name: {input}").as_str());
253
254    let scala_root_namer = |name: &str| scala_root_name(name, config);
255    let common_type_namer = |name: String| common_type_name(name, config);
256
257    let result = match chars.next() {
258        Some('v') => Ok((1, "<c vararg>".to_string())),
259        Some('z') => Ok((1, scala_root_namer("Boolean"))),
260        Some('c') => Ok((1, scala_root_namer("Char"))),
261        Some('f') => Ok((1, scala_root_namer("Float"))),
262        Some('d') => Ok((1, scala_root_namer("Double"))),
263        Some('u') => Ok((1, scala_root_namer("Unit"))),
264        Some('l') => Ok((1, scala_root_namer("Null"))),
265        Some('n') => Ok((1, scala_root_namer("Nothing"))),
266        Some('b') => Ok((1, scala_root_namer("Byte"))),
267        Some('s') => Ok((1, scala_root_namer("Short"))),
268        Some('i') => Ok((1, scala_root_namer("Int"))),
269        Some('j') => Ok((1, scala_root_namer("Long"))),
270
271        Some('R') => match chars.next() {
272            Some('_') => Ok((2, "<c pointer>".to_string())),
273            Some(c) => Err(format!("type_name: after R expected _, got `{c}` instead").to_string()),
274            None => Err("type_name: unexpected end of input".to_string()),
275        },
276        Some('L') => {
277            let (consumed, type_name) = nullable_type_name(&input[1..], config)?;
278            Ok((consumed + 1, common_type_namer(type_name)))
279        }
280        Some('A') => {
281            let (consumed, tn) = type_name(&input[1..], config)?;
282            let after_type_name = &input[1 + consumed..];
283            let num = number(after_type_name);
284            Ok((
285                consumed + num + 1, /* "_" at the end */
286                format!("CArray[{}]", tn),
287            ))
288        }
289        Some('X') => {
290            let (consumed, class_type_name) = name(&input[1..], config)?;
291            Ok((consumed + 1, class_type_name))
292        }
293        Some(other) => Err(format!("type_name: unexpected start character `{other}`").to_string()),
294        None => Err("type_name: unexpected end of input".to_string()),
295    };
296
297    return result;
298}
299
300fn number(input: &str) -> usize {
301    return input.chars().take_while(|c| c.is_digit(10)).count();
302}
303
304fn nullable_type_name(input: &str, config: &DemanglingConfig) -> ParsingResult<(usize, String)> {
305    let mut chars = input.chars();
306
307    match chars.next() {
308        Some('A') => {
309            let (consumed, ar) = type_name(&input[1..], config)?;
310            return Ok((consumed + 2, format!("Array[{}]", ar)));
311        }
312        Some('X') => {
313            let (consumed, n) = name(input, config)?;
314
315            return Ok((consumed + 1, n));
316        }
317        Some(d) if d.is_digit(10) => {
318            return name(input, config);
319        }
320        Some(a) => return Err(format!("nullable_type_name: unexpected start `{a}`")),
321        None => return Err("nullable_type_name: unexpected end of input".to_string()),
322    };
323}
324
325enum Scope {
326    Public,
327    PublicStatic,
328    Private(String),
329    PrivateStatic(String),
330}
331
332fn render_scope(scope: Scope) -> String {
333    return match scope {
334        Scope::Public => "".to_string(),
335        Scope::PublicStatic => "".to_string(),
336        Scope::Private(inn) => format!("<private[{}]>", inn),
337        Scope::PrivateStatic(inn) => format!("<private[{}]>", inn),
338    };
339}
340
341// <scope> ::=
342//     P <defn-name>                  // private to defn-name
343//     O                              // public
344fn scope(input: &str, config: &DemanglingConfig) -> Result<Scope, String> {
345    config.log_name("scope", input);
346    if input.starts_with("O") {
347        return Ok(Scope::Public);
348    } else if input.starts_with("o") {
349        return Ok(Scope::PublicStatic);
350    } else if input.starts_with("P") {
351        return defn_name(&input[1..], config).map(|i| return Scope::Private(i));
352    } else if input.starts_with("p") {
353        return defn_name(&input[1..], config).map(|i| return Scope::PrivateStatic(i));
354    } else {
355        return Err(format!("scope: cannot read `{}`", input).to_string());
356    }
357}
358
359fn name(input: &str, config: &DemanglingConfig) -> ParsingResult<(usize, String)> {
360    //println!("name: {}", input);
361    config.log_name("name", input);
362    let mut number_end: usize = 0;
363    for c in input.chars() {
364        if c.is_digit(10) {
365            number_end += 1
366        } else {
367            break;
368        }
369    }
370    if number_end >= 1 {
371        let (length, rest) = input.split_at(number_end);
372
373        match usize::from_str_radix(length, 10) {
374            Ok(res) => {
375                if rest.starts_with("-") {
376                    return Ok((length.len() + 1 + res, rest[1..(1 + res)].to_string()));
377                } else {
378                    return Ok((length.len() + res, rest[0..res].to_string()));
379                }
380            }
381            Err(_) => {
382                return Err("name: invalid length".to_string());
383            }
384        }
385    } else {
386        return Err(format!("name: invalid input `{}`", input.to_string()));
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use crate::demangle;
393
394    fn run(s: &str) -> String {
395        return demangle(s, &Default::default()).unwrap();
396    }
397
398    fn run_raw(s: &str) -> String {
399        return demangle(
400            s,
401            &crate::DemanglingConfig {
402                collapse_scala_names: false,
403                ..Default::default()
404            },
405        )
406        .unwrap();
407    }
408
409    #[test]
410    fn it_works() {
411        assert_eq!(run("_ST10__dispatch"), "__dispatch");
412        assert_eq!(
413            run_raw("_SM42sttp.model.headers.CacheDirective$MinFreshD12productArityiEO"),
414            "sttp.model.headers.CacheDirective$MinFresh.productArity: scala.Int"
415        );
416        assert_eq!(
417            run("_SM42sttp.model.headers.CacheDirective$MinFreshD12productArityiEO"),
418            "sttp.model.headers.CacheDirective$MinFresh.productArity: Int"
419        );
420
421        assert_eq!(run_raw("_SM42scala.scalanative.runtime.SymbolFormatter$D10inBounds$1L32scala.scalanative.unsigned.ULongizEPT42scala.scalanative.runtime.SymbolFormatter$"), 
422            "scala.scalanative.runtime.SymbolFormatter$.<private[scala.scalanative.runtime.SymbolFormatter$]>inBounds$1(scala.scalanative.unsigned.ULong,scala.Int): scala.Boolean");
423
424        assert_eq!(run("_SM42scala.scalanative.runtime.SymbolFormatter$D10inBounds$1L32scala.scalanative.unsigned.ULongizEPT42scala.scalanative.runtime.SymbolFormatter$"), 
425            "scala.scalanative.runtime.SymbolFormatter$.<private[scala.scalanative.runtime.SymbolFormatter$]>inBounds$1(scala.scalanative.unsigned.ULong,Int): Boolean");
426
427        assert_eq!(run_raw("_SM41scalaboot.template.scalatemplate$package$D10$anonfun$3L26scalaboot.template.ContextL15scala.Function1L23java.lang.StringBuilderL31scalaboot.template.UnsafeCursorL23scalaboot.template.MoveuEPT41scalaboot.template.scalatemplate$package$"), 
428            "scalaboot.template.scalatemplate$package$.<private[scalaboot.template.scalatemplate$package$]>$anonfun$3(scalaboot.template.Context,scala.Function1,java.lang.StringBuilder,scalaboot.template.UnsafeCursor,scalaboot.template.Move): scala.Unit");
429
430        assert_eq!(run("_SM41scalaboot.template.scalatemplate$package$D10$anonfun$3L26scalaboot.template.ContextL15scala.Function1L23java.lang.StringBuilderL31scalaboot.template.UnsafeCursorL23scalaboot.template.MoveuEPT41scalaboot.template.scalatemplate$package$"), 
431            "scalaboot.template.scalatemplate$package$.<private[scalaboot.template.scalatemplate$package$]>$anonfun$3(scalaboot.template.Context,scala.Function1,java.lang.StringBuilder,scalaboot.template.UnsafeCursor,scalaboot.template.Move): Unit");
432
433        assert_eq!(run("_SM33scala.scalanative.unsafe.package$D11fromCStringL28scala.scalanative.unsafe.PtrL24java.nio.charset.CharsetL16java.lang.StringEO"), "scala.scalanative.unsafe.package$.fromCString(scala.scalanative.unsafe.Ptr,java.nio.charset.Charset): String");
434
435        assert_eq!(
436            run("_SM17java.lang.IntegerD7compareiiiEo"),
437            "java.lang.Integer.compare(Int,Int): Int"
438        );
439
440        assert_eq!(
441            run("_SM38scala.scalanative.junit.JUnitFrameworkIE"),
442            "scala.scalanative.junit.JUnitFramework.<clinit>"
443        );
444
445        assert_eq!(run("_SM10fansi.TrieD17$init$$$anonfun$5cLAL10fansi.Trie_L12scala.Tuple2uEpT10fansi.Trie"), "fansi.Trie.<private[fansi.Trie]>$init$$$anonfun$5(Char,Array[fansi.Trie],scala.Tuple2): Unit")
446    }
447}