use std::borrow::Cow;
use crate::demangle::identifier;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Demangled<'a> {
pub identifier: Cow<'a, str>,
pub module: &'a str,
pub item_id: Option<u64>,
pub raw_identifier: &'a str,
}
pub fn parse(symbol: &str) -> Option<Demangled<'_>> {
let (raw_ident, module, item_id) = split_symbol(symbol)?;
let identifier = identifier::demangle(raw_ident);
Some(Demangled {
identifier,
module,
item_id,
raw_identifier: raw_ident,
})
}
fn split_symbol(symbol: &str) -> Option<(&str, &str, Option<u64>)> {
let bytes = symbol.as_bytes();
if bytes.len() < 4 {
return None;
}
let start = bytes.len().saturating_sub(2);
for i in (1..=start).rev() {
let b0 = bytes.get(i).copied();
let b1 = bytes.get(i.saturating_add(1)).copied();
if b0 == Some(b'_') && b1 == Some(b'_') {
let raw_ident = symbol.get(..i).unwrap_or("");
let suffix = symbol.get(i.saturating_add(2)..).unwrap_or("");
if !raw_ident.is_empty()
&& !suffix.is_empty()
&& let Some((module, item_id)) = parse_module_suffix(suffix)
{
return Some((raw_ident, module, item_id));
}
}
}
None
}
fn parse_module_suffix(suffix: &str) -> Option<(&str, Option<u64>)> {
if let Some(pos) = suffix.rfind("_u") {
let after_u = suffix.get(pos.saturating_add(2)..).unwrap_or("");
let digit_end = after_u
.bytes()
.position(|b| !b.is_ascii_digit())
.unwrap_or(after_u.len());
if digit_end > 0 {
let id: u64 = after_u.get(..digit_end)?.parse().ok()?;
let module = suffix.get(..pos)?;
if !module.is_empty() && is_valid_module_name(module) {
return Some((module, Some(id)));
}
}
}
if is_valid_module_name(suffix) {
return Some((suffix, None));
}
None
}
fn is_valid_module_name(name: &str) -> bool {
!name.is_empty() && name.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_')
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_symbol() {
let d = parse("genNimMainInner__cgen_u41496").unwrap();
assert_eq!(&*d.identifier, "genNimMainInner");
assert_eq!(d.module, "cgen");
assert_eq!(d.item_id, Some(41496));
}
#[test]
fn operator_symbol() {
let d = parse("amp___docgen_u11299").unwrap();
assert_eq!(&*d.identifier, "&");
assert_eq!(d.module, "docgen");
assert_eq!(d.item_id, Some(11299));
}
#[test]
fn compound_operator() {
let d = parse("ampeq___sighashes_u12").unwrap();
assert_eq!(&*d.identifier, "&=");
assert_eq!(d.module, "sighashes");
assert_eq!(d.item_id, Some(12));
}
#[test]
fn literal_camelcase_identifier() {
let d = parse("colonOrEquals__parser_u350").unwrap();
assert_eq!(&*d.identifier, "colonOrEquals");
assert_eq!(d.module, "parser");
assert_eq!(d.item_id, Some(350));
}
#[test]
fn z_encoded_module_path() {
let d = parse("FF__OOZdistZchecksumsZsrcZchecksumsZmd5_u42").unwrap();
assert_eq!(&*d.identifier, "FF");
assert_eq!(d.module, "OOZdistZchecksumsZsrcZchecksumsZmd5");
assert_eq!(d.item_id, Some(42));
}
#[test]
fn underscore_in_identifier() {
let d = parse("GC_getStatistics__system_u7819").unwrap();
assert_eq!(&*d.identifier, "GC_getStatistics");
assert_eq!(d.module, "system");
assert_eq!(d.item_id, Some(7819));
}
#[test]
fn z_module_with_z_prefix() {
let d = parse("WEXITSTATUS__posixZposix_u1063").unwrap();
assert_eq!(&*d.identifier, "WEXITSTATUS");
assert_eq!(d.module, "posixZposix");
assert_eq!(d.item_id, Some(1063));
}
#[test]
fn pure_z_module() {
let d = parse("DefaultLocale__pureZtimes_u2303").unwrap();
assert_eq!(&*d.identifier, "DefaultLocale");
assert_eq!(d.module, "pureZtimes");
assert_eq!(d.item_id, Some(2303));
}
#[test]
fn rejects_non_nim() {
assert!(parse("main").is_none());
assert!(parse("_start").is_none());
assert!(parse("printf").is_none());
}
#[test]
fn rejects_bare_separator() {
assert!(parse("__").is_none());
assert!(parse("foo__").is_none());
}
#[test]
fn triple_underscore_operator() {
let d = parse("colonanonymous___cgen_u4206").unwrap();
assert_eq!(&*d.identifier, ":anonymous");
assert_eq!(d.module, "cgen");
assert_eq!(d.item_id, Some(4206));
}
}