Skip to main content

nimrod/
metadata.rs

1//! Nim binary metadata detection.
2//!
3//! Infers high-level properties from the container's symbol table:
4//!
5//! - **GC mode** — `refc` (legacy) vs `arc`/`orc` (modern), based on
6//!   which RTTI global naming convention is present.
7//! - **`--nimMainPrefix`** — the user-configurable prefix on entry shims.
8//!
9//! See RESEARCH.md §3.5 (RTTI counts by mode) and §6 (entry shims).
10
11use crate::{container::Container, detect::DetectionMatches};
12use core::fmt;
13
14/// Nim garbage-collector / memory-management mode.
15///
16/// # Stability
17///
18/// The string returned by [`Display`](fmt::Display) is part of nimrod's
19/// stable API. Changes are SemVer-major.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum GcMode {
22    /// `--mm:refc` — traditional reference-counting GC (Nim 1.x default).
23    /// Indicated by the presence of legacy `NTI_` RTTI globals.
24    Refc,
25    /// `--mm:arc` or `--mm:orc` — deterministic ARC with optional cycle
26    /// collector (Nim 2.x default). Indicated by `NTIv2_` globals.
27    ArcOrc,
28    /// Could not determine the GC mode (no RTTI symbols found, e.g.
29    /// in a fully stripped binary).
30    Unknown,
31}
32
33impl GcMode {
34    /// Returns the stable string identifier for this mode.
35    pub fn as_str(&self) -> &'static str {
36        match self {
37            Self::Refc => "Refc",
38            Self::ArcOrc => "ArcOrc",
39            Self::Unknown => "Unknown",
40        }
41    }
42}
43
44impl fmt::Display for GcMode {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        f.write_str(self.as_str())
47    }
48}
49
50/// Best-effort Nim compiler-family hint.
51///
52/// Derived from RTTI generation plus the cycle-collector signal — there is no
53/// deterministic version stamp in a Nim binary (`RESEARCH.md` §12), so this is
54/// a heuristic, not a precise point-release.
55///
56/// # Stability
57///
58/// The string returned by [`Display`](fmt::Display) is part of nimrod's stable
59/// API. Changes are SemVer-major.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
61pub enum NimVersionHint {
62    /// Legacy `--mm:refc` build (Nim 1.x default, still available in 2.x).
63    /// Signalled by V1 `NTI_` RTTI globals with no V2 globals.
64    Nim1xRefc,
65    /// Modern `--mm:arc` build (Nim 2.x). V2 RTTI present, no cycle collector.
66    Nim2xArc,
67    /// Modern `--mm:orc` build (Nim 2.x default). V2 RTTI present *and*
68    /// cycle-collector symbols (`collectCycles`) present.
69    Nim2xOrc,
70    /// Could not determine (no RTTI globals — e.g. a fully stripped binary).
71    Unknown,
72}
73
74impl NimVersionHint {
75    /// Returns the stable string identifier for this hint.
76    pub fn as_str(&self) -> &'static str {
77        match self {
78            Self::Nim1xRefc => "Nim1xRefc",
79            Self::Nim2xArc => "Nim2xArc",
80            Self::Nim2xOrc => "Nim2xOrc",
81            Self::Unknown => "Unknown",
82        }
83    }
84}
85
86impl fmt::Display for NimVersionHint {
87    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88        f.write_str(self.as_str())
89    }
90}
91
92/// Infers a best-effort Nim compiler-family hint.
93///
94/// - V2 RTTI + a cycle-collector symbol (`collectCycles…`) → [`NimVersionHint::Nim2xOrc`]
95/// - V2 RTTI without one → [`NimVersionHint::Nim2xArc`]
96/// - V1 RTTI only → [`NimVersionHint::Nim1xRefc`]
97/// - neither → [`NimVersionHint::Unknown`]
98///
99/// The ARC vs ORC split keys off the ORC cycle collector
100/// (`collectCycles` / `collectCyclesBacon`, from `lib/system/orc.nim`). That
101/// symbol is stripped by `-d:danger` / `--passL:-s`, so a stripped ORC build
102/// may be reported as [`NimVersionHint::Nim2xArc`]. The refc-vs-modern split is
103/// robust regardless of stripping (it keys off the RTTI naming convention,
104/// `RESEARCH.md` §3.4).
105pub fn detect_compiler_version(
106    container: &Container<'_>,
107    matches: DetectionMatches,
108) -> NimVersionHint {
109    match gc_mode(matches) {
110        GcMode::ArcOrc => {
111            if has_cycle_collector(container) {
112                NimVersionHint::Nim2xOrc
113            } else {
114                NimVersionHint::Nim2xArc
115            }
116        }
117        GcMode::Refc => NimVersionHint::Nim1xRefc,
118        GcMode::Unknown => NimVersionHint::Unknown,
119    }
120}
121
122/// Returns `true` if the symbol table contains an ORC cycle-collector routine.
123fn has_cycle_collector(container: &Container<'_>) -> bool {
124    container.symbols().iter().any(|s| {
125        let name = s.name.as_ref();
126        name.contains("collectCycles") || name.contains("collectCyclesBacon")
127    })
128}
129
130/// Infers the GC mode from the detection report's RTTI flags.
131///
132/// - `NTIv2_` symbols → [`GcMode::ArcOrc`]
133/// - `NTI_` symbols (without `NTIv2_`) → [`GcMode::Refc`]
134/// - Neither → [`GcMode::Unknown`]
135pub fn gc_mode(matches: DetectionMatches) -> GcMode {
136    let has_v2 = matches.contains(DetectionMatches::NTIV2_SYMBOL);
137    let has_legacy = matches.contains(DetectionMatches::NTI_LEGACY_SYMBOL);
138
139    match (has_v2, has_legacy) {
140        (true, _) => GcMode::ArcOrc,
141        (false, true) => GcMode::Refc,
142        _ => GcMode::Unknown,
143    }
144}
145
146/// Attempts to detect the `--nimMainPrefix` from the symbol table.
147///
148/// Scans for a symbol matching `<prefix>NimMain` (exact name, no module
149/// suffix). Returns the prefix (empty string for the default, non-empty
150/// for custom prefixes), or `None` if `NimMain` was not found.
151pub fn nim_main_prefix<'a>(container: &'a Container<'a>) -> Option<&'a str> {
152    for sym in container.symbols() {
153        let name = sym.name.as_ref();
154        let stripped = name.strip_prefix('_').unwrap_or(name);
155        if stripped == "NimMain" {
156            return Some("");
157        }
158        // Custom prefix: `<prefix>NimMain` — must not contain `__`
159        // (which would indicate a normal mangled symbol).
160        if let Some(prefix) = stripped.strip_suffix("NimMain")
161            && !prefix.is_empty()
162            && !stripped.contains("__")
163        {
164            // The prefix should be a valid identifier fragment.
165            if prefix
166                .bytes()
167                .all(|b| b.is_ascii_alphanumeric() || b == b'_')
168            {
169                return Some(prefix);
170            }
171        }
172    }
173    None
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179
180    #[test]
181    fn compiler_version_splits_arc_orc_via_cycle_collector() {
182        use crate::container::{self, Arch, Format, Symbol, SymbolKind};
183        use std::borrow::Cow;
184
185        let bytes = [0u8; 4];
186        let orc_syms = vec![Symbol {
187            name: Cow::Borrowed("collectCyclesBacon__system_u3313"),
188            vm_addr: 0x1000,
189            size: 0,
190            kind: SymbolKind::Function,
191        }];
192        let orc = container::assemble(&bytes, Format::Elf, Arch::Amd64, 0, vec![], orc_syms);
193        assert_eq!(
194            detect_compiler_version(&orc, DetectionMatches::NTIV2_SYMBOL),
195            NimVersionHint::Nim2xOrc
196        );
197
198        // Same V2 flags but no cycle collector → ARC.
199        let arc = container::assemble(&bytes, Format::Elf, Arch::Amd64, 0, vec![], vec![]);
200        assert_eq!(
201            detect_compiler_version(&arc, DetectionMatches::NTIV2_SYMBOL),
202            NimVersionHint::Nim2xArc
203        );
204        // V1 only → refc; nothing → unknown.
205        assert_eq!(
206            detect_compiler_version(&arc, DetectionMatches::NTI_LEGACY_SYMBOL),
207            NimVersionHint::Nim1xRefc
208        );
209        assert_eq!(
210            detect_compiler_version(&arc, DetectionMatches::EMPTY),
211            NimVersionHint::Unknown
212        );
213    }
214
215    #[test]
216    fn gc_mode_from_flags() {
217        assert_eq!(gc_mode(DetectionMatches::NTIV2_SYMBOL), GcMode::ArcOrc);
218        assert_eq!(gc_mode(DetectionMatches::NTI_LEGACY_SYMBOL), GcMode::Refc);
219        assert_eq!(gc_mode(DetectionMatches::EMPTY), GcMode::Unknown);
220        // Both set — V2 wins (ARC/ORC is the authoritative modern mode).
221        assert_eq!(
222            gc_mode(DetectionMatches::NTIV2_SYMBOL | DetectionMatches::NTI_LEGACY_SYMBOL),
223            GcMode::ArcOrc
224        );
225    }
226}