nimrod/metadata.rs
1//! Nim binary metadata detection.
2//!
3//! Infers high-level properties from the container's symbol table:
4//!
5//! - **GC mode** — `refc` (legacy) vs `arc`/`orc` (modern), based on
6//! which RTTI global naming convention is present.
7//! - **`--nimMainPrefix`** — the user-configurable prefix on entry shims.
8//!
9//! See RESEARCH.md §3.5 (RTTI counts by mode) and §6 (entry shims).
10
11use crate::{container::Container, detect::DetectionMatches};
12use core::fmt;
13
14/// Nim garbage-collector / memory-management mode.
15///
16/// # Stability
17///
18/// The string returned by [`Display`](fmt::Display) is part of nimrod's
19/// stable API. Changes are SemVer-major.
20#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
21pub enum GcMode {
22 /// `--mm:refc` — traditional reference-counting GC (Nim 1.x default).
23 /// Indicated by the presence of legacy `NTI_` RTTI globals.
24 Refc,
25 /// `--mm:arc` or `--mm:orc` — deterministic ARC with optional cycle
26 /// collector (Nim 2.x default). Indicated by `NTIv2_` globals.
27 ArcOrc,
28 /// Could not determine the GC mode (no RTTI symbols found, e.g.
29 /// in a fully stripped binary).
30 Unknown,
31}
32
33impl GcMode {
34 /// Returns the stable string identifier for this mode.
35 pub fn as_str(&self) -> &'static str {
36 match self {
37 Self::Refc => "Refc",
38 Self::ArcOrc => "ArcOrc",
39 Self::Unknown => "Unknown",
40 }
41 }
42}
43
44impl fmt::Display for GcMode {
45 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46 f.write_str(self.as_str())
47 }
48}
49
50/// Best-effort Nim compiler-family hint.
51///
52/// Derived from RTTI generation plus the cycle-collector signal — there is no
53/// deterministic version stamp in a Nim binary (`RESEARCH.md` §12), so this is
54/// a heuristic, not a precise point-release.
55///
56/// # Stability
57///
58/// The string returned by [`Display`](fmt::Display) is part of nimrod's stable
59/// API. Changes are SemVer-major.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
61pub enum NimVersionHint {
62 /// Legacy `--mm:refc` build (Nim 1.x default, still available in 2.x).
63 /// Signalled by V1 `NTI_` RTTI globals with no V2 globals.
64 Nim1xRefc,
65 /// Modern `--mm:arc` build (Nim 2.x). V2 RTTI present, no cycle collector.
66 Nim2xArc,
67 /// Modern `--mm:orc` build (Nim 2.x default). V2 RTTI present *and*
68 /// cycle-collector symbols (`collectCycles`) present.
69 Nim2xOrc,
70 /// Could not determine (no RTTI globals — e.g. a fully stripped binary).
71 Unknown,
72}
73
74impl NimVersionHint {
75 /// Returns the stable string identifier for this hint.
76 pub fn as_str(&self) -> &'static str {
77 match self {
78 Self::Nim1xRefc => "Nim1xRefc",
79 Self::Nim2xArc => "Nim2xArc",
80 Self::Nim2xOrc => "Nim2xOrc",
81 Self::Unknown => "Unknown",
82 }
83 }
84}
85
86impl fmt::Display for NimVersionHint {
87 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
88 f.write_str(self.as_str())
89 }
90}
91
92/// Infers a best-effort Nim compiler-family hint.
93///
94/// - V2 RTTI + a cycle-collector symbol (`collectCycles…`) → [`NimVersionHint::Nim2xOrc`]
95/// - V2 RTTI without one → [`NimVersionHint::Nim2xArc`]
96/// - V1 RTTI only → [`NimVersionHint::Nim1xRefc`]
97/// - neither → [`NimVersionHint::Unknown`]
98///
99/// The ARC vs ORC split keys off the ORC cycle collector
100/// (`collectCycles` / `collectCyclesBacon`, from `lib/system/orc.nim`). That
101/// symbol is stripped by `-d:danger` / `--passL:-s`, so a stripped ORC build
102/// may be reported as [`NimVersionHint::Nim2xArc`]. The refc-vs-modern split is
103/// robust regardless of stripping (it keys off the RTTI naming convention,
104/// `RESEARCH.md` §3.4).
105pub fn detect_compiler_version(
106 container: &Container<'_>,
107 matches: DetectionMatches,
108) -> NimVersionHint {
109 match gc_mode(matches) {
110 GcMode::ArcOrc => {
111 if has_cycle_collector(container) {
112 NimVersionHint::Nim2xOrc
113 } else {
114 NimVersionHint::Nim2xArc
115 }
116 }
117 GcMode::Refc => NimVersionHint::Nim1xRefc,
118 GcMode::Unknown => NimVersionHint::Unknown,
119 }
120}
121
122/// Returns `true` if the symbol table contains an ORC cycle-collector routine.
123fn has_cycle_collector(container: &Container<'_>) -> bool {
124 container.symbols().iter().any(|s| {
125 let name = s.name.as_ref();
126 name.contains("collectCycles") || name.contains("collectCyclesBacon")
127 })
128}
129
130/// Infers the GC mode from the detection report's RTTI flags.
131///
132/// - `NTIv2_` symbols → [`GcMode::ArcOrc`]
133/// - `NTI_` symbols (without `NTIv2_`) → [`GcMode::Refc`]
134/// - Neither → [`GcMode::Unknown`]
135pub fn gc_mode(matches: DetectionMatches) -> GcMode {
136 let has_v2 = matches.contains(DetectionMatches::NTIV2_SYMBOL);
137 let has_legacy = matches.contains(DetectionMatches::NTI_LEGACY_SYMBOL);
138
139 match (has_v2, has_legacy) {
140 (true, _) => GcMode::ArcOrc,
141 (false, true) => GcMode::Refc,
142 _ => GcMode::Unknown,
143 }
144}
145
146/// Attempts to detect the `--nimMainPrefix` from the symbol table.
147///
148/// Scans for a symbol matching `<prefix>NimMain` (exact name, no module
149/// suffix). Returns the prefix (empty string for the default, non-empty
150/// for custom prefixes), or `None` if `NimMain` was not found.
151pub fn nim_main_prefix<'a>(container: &'a Container<'a>) -> Option<&'a str> {
152 for sym in container.symbols() {
153 let name = sym.name.as_ref();
154 let stripped = name.strip_prefix('_').unwrap_or(name);
155 if stripped == "NimMain" {
156 return Some("");
157 }
158 // Custom prefix: `<prefix>NimMain` — must not contain `__`
159 // (which would indicate a normal mangled symbol).
160 if let Some(prefix) = stripped.strip_suffix("NimMain")
161 && !prefix.is_empty()
162 && !stripped.contains("__")
163 {
164 // The prefix should be a valid identifier fragment.
165 if prefix
166 .bytes()
167 .all(|b| b.is_ascii_alphanumeric() || b == b'_')
168 {
169 return Some(prefix);
170 }
171 }
172 }
173 None
174}
175
176#[cfg(test)]
177mod tests {
178 use super::*;
179
180 #[test]
181 fn compiler_version_splits_arc_orc_via_cycle_collector() {
182 use crate::container::{self, Arch, Format, Symbol, SymbolKind};
183 use std::borrow::Cow;
184
185 let bytes = [0u8; 4];
186 let orc_syms = vec![Symbol {
187 name: Cow::Borrowed("collectCyclesBacon__system_u3313"),
188 vm_addr: 0x1000,
189 size: 0,
190 kind: SymbolKind::Function,
191 }];
192 let orc = container::assemble(&bytes, Format::Elf, Arch::Amd64, 0, vec![], orc_syms);
193 assert_eq!(
194 detect_compiler_version(&orc, DetectionMatches::NTIV2_SYMBOL),
195 NimVersionHint::Nim2xOrc
196 );
197
198 // Same V2 flags but no cycle collector → ARC.
199 let arc = container::assemble(&bytes, Format::Elf, Arch::Amd64, 0, vec![], vec![]);
200 assert_eq!(
201 detect_compiler_version(&arc, DetectionMatches::NTIV2_SYMBOL),
202 NimVersionHint::Nim2xArc
203 );
204 // V1 only → refc; nothing → unknown.
205 assert_eq!(
206 detect_compiler_version(&arc, DetectionMatches::NTI_LEGACY_SYMBOL),
207 NimVersionHint::Nim1xRefc
208 );
209 assert_eq!(
210 detect_compiler_version(&arc, DetectionMatches::EMPTY),
211 NimVersionHint::Unknown
212 );
213 }
214
215 #[test]
216 fn gc_mode_from_flags() {
217 assert_eq!(gc_mode(DetectionMatches::NTIV2_SYMBOL), GcMode::ArcOrc);
218 assert_eq!(gc_mode(DetectionMatches::NTI_LEGACY_SYMBOL), GcMode::Refc);
219 assert_eq!(gc_mode(DetectionMatches::EMPTY), GcMode::Unknown);
220 // Both set — V2 wins (ARC/ORC is the authoritative modern mode).
221 assert_eq!(
222 gc_mode(DetectionMatches::NTIV2_SYMBOL | DetectionMatches::NTI_LEGACY_SYMBOL),
223 GcMode::ArcOrc
224 );
225 }
226}