Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use std::cell::Cell;
10use std::collections::HashMap;
11use tree_sitter::{Node, Parser};
12
13use crate::CppStdlib;
14
15thread_local! {
16    static STDLIB: Cell<CppStdlib> = const { Cell::new(CppStdlib::LibStdCpp) };
17}
18
19/// Set the active C++ stdlib variant for this thread.  Called from `lib.rs::set_cpp_stdlib`.
20pub(crate) fn set_stdlib(s: CppStdlib) {
21    STDLIB.with(|c| c.set(s));
22}
23
24fn active_stdlib() -> CppStdlib {
25    STDLIB.with(|c| c.get())
26}
27
28// ── type resolution ───────────────────────────────────────────────────────────
29
30/// Map a C/C++ type name to (size, align) using the target arch.
31fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
32    let ty = ty.trim();
33    // Strip qualifiers
34    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
35        if let Some(rest) = ty.strip_prefix(qual) {
36            return c_type_size_align(rest, arch);
37        }
38    }
39    // x86 SSE / AVX / AVX-512 SIMD types
40    match ty {
41        "__m64" => return (8, 8),
42        "__m128" | "__m128d" | "__m128i" => return (16, 16),
43        "__m256" | "__m256d" | "__m256i" => return (32, 32),
44        "__m512" | "__m512d" | "__m512i" => return (64, 64),
45        // ARM NEON — 64-bit (double-word) vectors
46        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
47        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
48        // ARM NEON — 128-bit (quad-word) vectors
49        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
50        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
51        _ => {}
52    }
53    // C++ standard library types — sizes vary by stdlib variant.
54    let stdlib = active_stdlib();
55    match ty {
56        // ── Synchronisation ───────────────────────────────────────────────────
57        // pthread_mutex_t on Linux/glibc: 40 bytes.
58        // On macOS (libc++): opaque_pthread_mutex_t = 56 bytes.
59        // MSVC: CRITICAL_SECTION = 40 bytes on 64-bit.
60        "std::mutex"
61        | "std::recursive_mutex"
62        | "std::timed_mutex"
63        | "std::recursive_timed_mutex"
64        | "pthread_mutex_t" => {
65            return match stdlib {
66                CppStdlib::LibCpp => (56, 8), // macOS/Apple pthread mutex
67                _ => (40, 8),
68            };
69        }
70        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
71        "std::condition_variable" | "pthread_cond_t" => {
72            return match stdlib {
73                CppStdlib::LibCpp => (40, 8), // macOS pthread_cond_t
74                _ => (48, 8),
75            };
76        }
77
78        // ── String / view ─────────────────────────────────────────────────────
79        // libstdc++ std::string: 32B (ptr + length + SSO buffer (15 chars + NUL)).
80        // libc++ (Clang/macOS/Android): 24B (short-string optimisation is smaller).
81        // MSVC STL: 32B on 64-bit (16-byte SSO inline buffer).
82        "std::string" | "std::wstring" | "std::u8string" | "std::u16string" | "std::u32string"
83        | "std::pmr::string" => {
84            return match stdlib {
85                CppStdlib::LibCpp => (24, 8),
86                _ => (32, 8), // libstdc++ and MSVC both 32B
87            };
88        }
89        // std::string_view / std::span<T>: pointer + length (2 words).
90        "std::string_view"
91        | "std::wstring_view"
92        | "std::u8string_view"
93        | "std::u16string_view"
94        | "std::u32string_view" => return (arch.pointer_size * 2, arch.pointer_size),
95
96        // ── Sequence containers ───────────────────────────────────────────────
97        // std::vector<T>: pointer + size + capacity = 3 words (24B on 64-bit).
98        // Size is independent of T.
99        ty if ty.starts_with("std::vector<") || ty == "std::vector" => {
100            return (arch.pointer_size * 3, arch.pointer_size);
101        }
102        // std::deque<T>: 80B on both libstdc++ and libc++ (64-bit Linux).
103        ty if ty.starts_with("std::deque<") || ty == "std::deque" => return (80, 8),
104        // std::list<T>: sentinel node pointer + size = 2 words + node pointers.
105        // libstdc++: 24B (size_t + two pointers). libc++: 24B.
106        ty if ty.starts_with("std::list<") || ty == "std::list" => {
107            return (arch.pointer_size * 3, arch.pointer_size);
108        }
109        // std::forward_list<T>: single pointer (head node).
110        ty if ty.starts_with("std::forward_list<") || ty == "std::forward_list" => {
111            return (arch.pointer_size, arch.pointer_size);
112        }
113        // std::array<T, N>: inline storage; size = N * sizeof(T).
114        // We cannot compute this without resolving T and N, so fall through.
115
116        // ── Associative / unordered containers ────────────────────────────────
117        // All map/set types: header node + size = ~48B (libstdc++) / ~40B (libc++).
118        // Use 48B as conservative approximation.
119        ty if ty.starts_with("std::map<")
120            || ty.starts_with("std::multimap<")
121            || ty.starts_with("std::set<")
122            || ty.starts_with("std::multiset<") =>
123        {
124            return (48, 8);
125        }
126        // std::unordered_map / unordered_set: bucket array pointer + size + load factor + etc.
127        // libstdc++: ~56B. libc++: ~72B. Use 56B.
128        ty if ty.starts_with("std::unordered_map<")
129            || ty.starts_with("std::unordered_multimap<")
130            || ty.starts_with("std::unordered_set<")
131            || ty.starts_with("std::unordered_multiset<") =>
132        {
133            return (56, 8);
134        }
135
136        // ── Smart pointers ────────────────────────────────────────────────────
137        // std::unique_ptr<T>: single pointer (deleter may be zero-sized via EBO).
138        ty if ty.starts_with("std::unique_ptr<") || ty == "std::unique_ptr" => {
139            return (arch.pointer_size, arch.pointer_size);
140        }
141        // std::shared_ptr<T> / std::weak_ptr<T>: object pointer + control block pointer.
142        ty if ty.starts_with("std::shared_ptr<")
143            || ty == "std::shared_ptr"
144            || ty.starts_with("std::weak_ptr<")
145            || ty == "std::weak_ptr" =>
146        {
147            return (arch.pointer_size * 2, arch.pointer_size);
148        }
149
150        // ── Type-erasure / utilities ──────────────────────────────────────────
151        // std::function<Sig>: 32B on libstdc++ and libc++ (64-bit Linux).
152        // Holds a functor pointer, a vtable pointer, and a small-functor buffer.
153        ty if ty.starts_with("std::function<") || ty == "std::function" => return (32, 8),
154        // std::any: 32B on libstdc++ (small-object buffer + vtable pointer).
155        "std::any" => return (32, 8),
156        // std::error_code / std::error_condition: pointer + int = 16B.
157        "std::error_code" | "std::error_condition" => return (16, 8),
158        // std::exception_ptr: single pointer.
159        "std::exception_ptr" => return (arch.pointer_size, arch.pointer_size),
160        // std::type_index: single pointer (wraps std::type_info*).
161        "std::type_index" => return (arch.pointer_size, arch.pointer_size),
162        // std::span<T>: pointer + length (2 words). Template arg irrelevant.
163        ty if ty.starts_with("std::span<") || ty == "std::span" => {
164            return (arch.pointer_size * 2, arch.pointer_size);
165        }
166        // std::optional<T>: sizeof(T) + 1B bool, padded to align(T).
167        // Recurse to resolve T then apply the formula.
168        ty if ty.starts_with("std::optional<") && ty.ends_with('>') => {
169            let inner = &ty["std::optional<".len()..ty.len() - 1];
170            let (t_size, t_align) = c_type_size_align(inner.trim(), arch);
171            let total = (t_size + 1).next_multiple_of(t_align.max(1));
172            return (total, t_align.max(1));
173        }
174
175        // ── Atomic ────────────────────────────────────────────────────────────
176        // std::atomic<T>: same size and alignment as T.
177        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
178            let inner = &ty[12..ty.len() - 1];
179            return c_type_size_align(inner.trim(), arch);
180        }
181        // std::atomic_flag: guaranteed 1B minimum, but often 4B in practice.
182        "std::atomic_flag" => return (4, 4),
183
184        _ => {} // fall through to primitive types below
185    }
186    // Primitive / stdint / pointer types
187    match ty {
188        "char" | "_Bool" | "bool" => (1, 1),
189        "short" | "short int" => (2, 2),
190        "int" => (4, 4),
191        "long" | "long int" => (arch.pointer_size, arch.pointer_size),
192        "long long" | "long long int" => (8, 8),
193        "float" => (4, 4),
194        "double" => (8, 8),
195        "long double" => (16, 16),
196
197        // C99 stdint exact-width types
198        "int8_t" | "uint8_t" => (1, 1),
199        "int16_t" | "uint16_t" => (2, 2),
200        "int32_t" | "uint32_t" => (4, 4),
201        "int64_t" | "uint64_t" => (8, 8),
202        "intmax_t" | "uintmax_t" => (8, 8),
203        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
204            (arch.pointer_size, arch.pointer_size)
205        }
206
207        // C99 fast types — uint_fast{8,16}_t are always 1/2B;
208        // uint_fast{32,64}_t are pointer-sized on 64-bit (8B), 4B on 32-bit.
209        "int_fast8_t" | "uint_fast8_t" => (1, 1),
210        "int_fast16_t" | "uint_fast16_t" => (2, 2),
211        "int_fast32_t" | "uint_fast32_t" | "int_fast64_t" | "uint_fast64_t" => {
212            (arch.pointer_size, arch.pointer_size)
213        }
214
215        // C99 least types — minimum guaranteed widths
216        "int_least8_t" | "uint_least8_t" => (1, 1),
217        "int_least16_t" | "uint_least16_t" => (2, 2),
218        "int_least32_t" | "uint_least32_t" => (4, 4),
219        "int_least64_t" | "uint_least64_t" => (8, 8),
220
221        // GCC/Clang 128-bit integer extension
222        "__int128" | "__uint128" | "__int128_t" | "__uint128_t" => (16, 16),
223
224        // Linux kernel short-form integer types (linux/types.h)
225        "u8" | "s8" => (1, 1),
226        "u16" | "s16" => (2, 2),
227        "u32" | "s32" => (4, 4),
228        "u64" | "s64" => (8, 8),
229
230        // Linux kernel double-underscore types (__u8, __s8, __be16, __le32, …)
231        "__u8" | "__s8" | "__u8__" | "__s8__" => (1, 1),
232        "__u16" | "__s16" | "__be16" | "__le16" => (2, 2),
233        "__u32" | "__s32" | "__be32" | "__le32" => (4, 4),
234        "__u64" | "__s64" | "__be64" | "__le64" => (8, 8),
235
236        // MSVC fixed-width intrinsics
237        "__int8" => (1, 1),
238        "__int16" => (2, 2),
239        "__int32" => (4, 4),
240        "__int64" => (8, 8),
241
242        // Windows SDK / WinAPI types
243        "BYTE" | "BOOLEAN" | "CHAR" | "INT8" | "UINT8" => (1, 1),
244        "WORD" | "WCHAR" | "SHORT" | "USHORT" | "INT16" | "UINT16" => (2, 2),
245        "DWORD" | "LONG" | "ULONG" | "INT" | "UINT" | "BOOL" | "FLOAT" | "INT32" | "UINT32" => {
246            (4, 4)
247        }
248        "QWORD" | "LONGLONG" | "ULONGLONG" | "INT64" | "UINT64" | "LARGE_INTEGER" => (8, 8),
249        "DWORD64" | "ULONG64" | "LONG64" => (8, 8),
250        "HANDLE" | "LPVOID" | "PVOID" | "LPCVOID" | "LPSTR" | "LPCSTR" | "LPWSTR" | "LPCWSTR"
251        | "SIZE_T" | "SSIZE_T" | "ULONG_PTR" | "LONG_PTR" | "DWORD_PTR" | "INT_PTR"
252        | "UINT_PTR" => (arch.pointer_size, arch.pointer_size),
253
254        // C/C++ character types
255        // wchar_t: 4B on Linux/macOS (GCC/Clang POSIX), 2B on Windows/MSVC.
256        // All current padlock arch configs are POSIX, so 4B is correct here.
257        "wchar_t" => (4, 4),
258        "char8_t" => (1, 1),
259        "char16_t" => (2, 2),
260        "char32_t" => (4, 4),
261
262        // Half-precision and bfloat16 (ARM, GCC, Clang, ML workloads)
263        "_Float16" | "__fp16" | "__bf16" => (2, 2),
264        // 128-bit float (GCC/Clang extension)
265        "_Float128" | "__float128" => (16, 16),
266
267        // Pointer types
268        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
269        // Unknown — use pointer size as a reasonable default
270        _ => (arch.pointer_size, arch.pointer_size),
271    }
272}
273
274// ── struct / union simulation ─────────────────────────────────────────────────
275
276/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
277/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
278fn strip_bitfield_suffix(ty: &str) -> &str {
279    if let Some(pos) = ty.rfind(':') {
280        let suffix = ty[pos + 1..].trim();
281        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
282            return ty[..pos].trim_end();
283        }
284    }
285    ty
286}
287
288/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
289fn is_bitfield_type(ty: &str) -> bool {
290    strip_bitfield_suffix(ty) != ty
291}
292
293/// Collapse consecutive bit-field `RawField` entries into storage-unit-sized
294/// synthetic fields, following the standard GCC/Clang ABI rules:
295///
296/// - Consecutive bitfields that share the same storage-unit size are packed
297///   together until the unit fills up.
298/// - A change in storage-unit size (e.g. `uint8_t` followed by `uint32_t`)
299///   or a full unit forces a new storage unit.
300/// - A zero-width bitfield (`T : 0`) flushes the current group and forces
301///   alignment to the next storage-unit boundary (represented by dropping the
302///   zero-width field).
303/// - Non-bitfield fields are passed through unchanged.
304///
305/// Each resulting synthetic field has:
306/// - **name**: `[a:3|b:5]` — shows what is packed inside the storage unit.
307/// - **type**: the base type of the first field in the group (e.g. `"unsigned int"`).
308/// - **size / align**: determined by `c_type_size_align` on that base type.
309#[allow(unused_assignments)] // final flush!() resets state vars that aren't read again
310fn resolve_bitfield_groups(
311    raw_fields: Vec<RawField>,
312    aliases: &HashMap<String, String>,
313    arch: &'static ArchConfig,
314) -> Vec<RawField> {
315    let mut result: Vec<RawField> = Vec::new();
316
317    // Accumulator for the current storage unit.
318    let mut in_unit = false; // true while we have an open storage unit
319    let mut unit_base_ty = String::new(); // base type of the current unit (for size/align)
320    let mut unit_size: usize = 0; // byte size of the storage unit
321    let mut bits_used: u32 = 0; // bits consumed so far in this unit
322    let mut parts: Vec<String> = Vec::new(); // "name:N" pairs for named fields only
323    let mut first_line: u32 = 0;
324
325    // Flush the current unit. Named members produce a labelled synthetic field;
326    // anonymous-only units produce a `[__pad]` placeholder so that
327    // `simulate_layout` accounts for the storage-unit bytes in the total size.
328    macro_rules! flush {
329        () => {
330            if in_unit {
331                let name = if parts.is_empty() {
332                    "[__pad]".to_string()
333                } else {
334                    format!("[{}]", parts.join("|"))
335                };
336                result.push((name, unit_base_ty.clone(), None, None, first_line));
337                parts.clear();
338                unit_base_ty.clear();
339                unit_size = 0;
340                bits_used = 0;
341                in_unit = false;
342            }
343        };
344    }
345
346    for (fname, ty, guard, alignas, line) in raw_fields {
347        if !is_bitfield_type(&ty) {
348            flush!();
349            result.push((fname, ty, guard, alignas, line));
350            continue;
351        }
352
353        // Parse the bit-width from the "type:N" annotation.
354        let base_raw = strip_bitfield_suffix(&ty).to_string();
355        let bit_width: u32 = ty[ty.rfind(':').unwrap() + 1..].trim().parse().unwrap_or(0);
356
357        // Zero-width bitfield: flush and force alignment to next storage-unit boundary.
358        if bit_width == 0 {
359            flush!();
360            continue;
361        }
362
363        // Resolve the storage-unit size for this field.
364        let resolved = aliases
365            .get(&base_raw)
366            .map(String::as_str)
367            .unwrap_or(&base_raw);
368        let (new_unit_size, _) = c_type_size_align(resolved, arch);
369
370        // Start a new unit when: no current unit, unit-size changes, or current unit is full.
371        let needs_new_unit =
372            !in_unit || new_unit_size != unit_size || bits_used + bit_width > unit_size as u32 * 8;
373
374        if needs_new_unit {
375            flush!();
376            unit_base_ty = base_raw;
377            unit_size = new_unit_size;
378            bits_used = 0;
379            first_line = line;
380            in_unit = true;
381        }
382
383        bits_used += bit_width;
384        // Anonymous padding bitfields (`int : 3;` with no name) still consume
385        // bits in the current unit but are omitted from the display label.
386        if !fname.is_empty() {
387            parts.push(format!("{fname}:{bit_width}"));
388        }
389    }
390
391    // Flush any remaining group.
392    flush!();
393
394    result
395}
396
397/// Simulate C/C++ struct layout given ordered fields.
398///
399/// `pack_n` controls field alignment capping:
400/// - `0` — no packing (default C/C++ ABI alignment rules)
401/// - `1` — `__attribute__((packed))` / `#pragma pack(1)`: force alignment to 1
402/// - `N` — `#pragma pack(N)`: cap each field's alignment at N bytes
403///
404/// This unified model handles both GCC/Clang `__attribute__((packed))` and
405/// MSVC-style `#pragma pack(N)` directives.
406fn simulate_layout(
407    fields: &mut Vec<Field>,
408    struct_name: String,
409    arch: &'static ArchConfig,
410    source_line: Option<u32>,
411    pack_n: usize,
412) -> StructLayout {
413    let mut offset = 0usize;
414    let mut struct_align = 1usize;
415
416    for f in fields.iter_mut() {
417        let eff_align = if pack_n > 0 {
418            f.align.min(pack_n)
419        } else {
420            f.align
421        };
422        if eff_align > 0 {
423            offset = offset.next_multiple_of(eff_align);
424        }
425        f.offset = offset;
426        offset += f.size;
427        struct_align = struct_align.max(eff_align);
428    }
429    // Trailing padding (not present when fully packed)
430    if pack_n != 1 && struct_align > 0 {
431        offset = offset.next_multiple_of(struct_align);
432    }
433
434    StructLayout {
435        name: struct_name,
436        total_size: offset,
437        align: struct_align,
438        fields: std::mem::take(fields),
439        source_file: None,
440        source_line,
441        arch,
442        is_packed: pack_n == 1,
443        is_union: false,
444        is_repr_rust: false,
445        suppressed_findings: Vec::new(),
446        uncertain_fields: Vec::new(),
447    }
448}
449
450/// Simulate a C/C++ union layout: all fields start at offset 0;
451/// total size is the largest field, rounded to max alignment.
452fn simulate_union_layout(
453    fields: &mut Vec<Field>,
454    name: String,
455    arch: &'static ArchConfig,
456    source_line: Option<u32>,
457) -> StructLayout {
458    for f in fields.iter_mut() {
459        f.offset = 0;
460    }
461    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
462    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
463    let total_size = if max_align > 0 {
464        max_size.next_multiple_of(max_align)
465    } else {
466        max_size
467    };
468
469    StructLayout {
470        name,
471        total_size,
472        align: max_align,
473        fields: std::mem::take(fields),
474        source_file: None,
475        source_line,
476        arch,
477        is_packed: false,
478        is_union: true,
479        is_repr_rust: false,
480        suppressed_findings: Vec::new(),
481        uncertain_fields: Vec::new(),
482    }
483}
484
485// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
486
487/// Parse a `class_specifier` node, modelling:
488/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
489/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
490///   later by the nested-struct resolution pass in `lib.rs`).
491fn parse_class_specifier(
492    source: &str,
493    node: Node<'_>,
494    arch: &'static ArchConfig,
495    aliases: &HashMap<String, String>,
496    pragma_pack: usize,
497) -> Option<StructLayout> {
498    let mut class_name = "<anonymous>".to_string();
499    let mut base_names: Vec<String> = Vec::new();
500    let mut body_node: Option<Node> = None;
501    let mut is_packed = false;
502    let mut struct_alignas: Option<usize> = None;
503
504    for i in 0..node.child_count() {
505        let child = node.child(i)?;
506        match child.kind() {
507            "type_identifier" => class_name = source[child.byte_range()].to_string(),
508            "base_class_clause" => {
509                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
510                // type_identifier nodes are direct children of base_class_clause.
511                for j in 0..child.child_count() {
512                    if let Some(base) = child.child(j)
513                        && base.kind() == "type_identifier"
514                    {
515                        base_names.push(source[base.byte_range()].to_string());
516                    }
517                }
518            }
519            "field_declaration_list" => body_node = Some(child),
520            "attribute_specifier" if source[child.byte_range()].contains("packed") => {
521                is_packed = true;
522            }
523            // C++11 class-level alignas: `class alignas(64) Name { ... };`
524            "alignas_qualifier" | "alignas_specifier" if struct_alignas.is_none() => {
525                struct_alignas = parse_alignas_value(source, child);
526            }
527            _ => {}
528        }
529    }
530
531    let body = body_node?;
532
533    // Detect virtual methods: look for `virtual` keyword anywhere in body
534    let has_virtual = contains_virtual_keyword(source, body);
535
536    // Collect declared fields: (field_name, type_text, guard, alignas_override, source_line)
537    let mut raw_fields: Vec<RawField> = Vec::new();
538    for i in 0..body.child_count() {
539        let Some(child) = body.child(i) else {
540            continue;
541        };
542        if child.kind() == "field_declaration" {
543            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, false) {
544                raw_fields.extend(anon_fields);
545            } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
546            {
547                raw_fields.push((fname, ty, guard, al, ln));
548            }
549        }
550    }
551
552    // Build fields: vtable pointer, then base-class slots, then declared fields
553    let mut fields: Vec<Field> = Vec::new();
554
555    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
556    if has_virtual {
557        let ps = arch.pointer_size;
558        fields.push(Field {
559            name: "__vptr".to_string(),
560            ty: TypeInfo::Pointer {
561                size: ps,
562                align: ps,
563            },
564            offset: 0,
565            size: ps,
566            align: ps,
567            source_file: None,
568            source_line: None,
569            access: AccessPattern::Unknown,
570        });
571    }
572
573    // Base class storage (opaque until nested-struct resolver fills in sizes)
574    for base in &base_names {
575        let ps = arch.pointer_size;
576        fields.push(Field {
577            name: format!("__base_{base}"),
578            ty: TypeInfo::Opaque {
579                name: base.clone(),
580                size: ps,
581                align: ps,
582            },
583            offset: 0,
584            size: ps,
585            align: ps,
586            source_file: None,
587            source_line: None,
588            access: AccessPattern::Unknown,
589        });
590    }
591
592    // Pack consecutive bit-field members into their storage units.
593    let raw_fields = if raw_fields
594        .iter()
595        .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
596    {
597        resolve_bitfield_groups(raw_fields, aliases, arch)
598    } else {
599        raw_fields
600    };
601
602    // Declared member fields
603    for (fname, ty_name, guard, alignas, field_line) in raw_fields {
604        let resolved = aliases
605            .get(&ty_name)
606            .map(String::as_str)
607            .unwrap_or(&ty_name);
608        let (size, natural_align) = c_type_size_align(resolved, arch);
609        let align = alignas.unwrap_or(natural_align);
610        let access = if let Some(g) = guard {
611            AccessPattern::Concurrent {
612                guard: Some(g),
613                is_atomic: false,
614                is_annotated: true,
615            }
616        } else {
617            AccessPattern::Unknown
618        };
619        fields.push(Field {
620            name: fname,
621            ty: TypeInfo::Primitive {
622                name: ty_name,
623                size,
624                align,
625            },
626            offset: 0,
627            size,
628            align,
629            source_file: None,
630            source_line: Some(field_line),
631            access,
632        });
633    }
634
635    if fields.is_empty() {
636        return None;
637    }
638
639    let line = node.start_position().row as u32 + 1;
640    let pack_n = if is_packed {
641        1
642    } else if pragma_pack > 0 {
643        pragma_pack
644    } else {
645        0
646    };
647    let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), pack_n);
648
649    if let Some(al) = struct_alignas
650        && al > layout.align
651    {
652        layout.align = al;
653        if pack_n == 0 {
654            layout.total_size = layout.total_size.next_multiple_of(al);
655        }
656    }
657
658    layout.suppressed_findings =
659        super::suppress::suppressed_from_preceding_source(source, node.start_byte());
660
661    Some(layout)
662}
663
664/// Return true if a `field_declaration_list` node contains any `virtual` keyword
665/// (indicating that the class needs a vtable pointer).
666fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
667    let mut stack = vec![node];
668    while let Some(n) = stack.pop() {
669        if n.kind() == "virtual" {
670            return true;
671        }
672        // Also check raw text for cases where tree-sitter may not produce a
673        // dedicated `virtual` node (e.g. inside complex declarations).
674        if n.child_count() == 0 {
675            let text = &source[n.byte_range()];
676            if text == "virtual" {
677                return true;
678            }
679        }
680        for i in (0..n.child_count()).rev() {
681            if let Some(child) = n.child(i) {
682                stack.push(child);
683            }
684        }
685    }
686    false
687}
688
689// ── tree-sitter walker ────────────────────────────────────────────────────────
690
691/// Pre-scan a tree for plain scalar typedef declarations and return a map of
692/// `AliasName → BaseTypeName` for within-file alias resolution.
693///
694/// Only simple scalar aliases are collected, e.g.:
695///   `typedef uint32_t MyId;`   → `{"MyId": "uint32_t"}`
696///   `typedef unsigned int Idx;` → `{"Idx": "unsigned int"}`
697///
698/// Struct/union, function-pointer, and pointer typedefs are skipped — the
699/// alias name in those cases lives in a nested declarator node and will not
700/// appear as a direct `type_identifier` child, so they naturally produce fewer
701/// than two type parts and are filtered out.
702fn collect_typedef_aliases(source: &str, root: Node<'_>) -> HashMap<String, String> {
703    let mut aliases = HashMap::new();
704    let mut stack = vec![root];
705    while let Some(node) = stack.pop() {
706        for i in (0..node.child_count()).rev() {
707            if let Some(child) = node.child(i) {
708                stack.push(child);
709            }
710        }
711        if node.kind() != "type_definition" {
712            continue;
713        }
714        // Skip struct/union/class typedefs — those produce StructLayout entries.
715        let has_record = (0..node.child_count()).any(|i| {
716            node.child(i)
717                .map(|c| {
718                    matches!(
719                        c.kind(),
720                        "struct_specifier" | "union_specifier" | "class_specifier"
721                    )
722                })
723                .unwrap_or(false)
724        });
725        if has_record {
726            continue;
727        }
728        // Collect direct-child type parts in declaration order:
729        //   `typedef uint32_t MyId;`       → ["uint32_t", "MyId"]
730        //   `typedef unsigned int MyUInt;` → ["unsigned int", "MyUInt"]
731        // For pointer/function typedefs the alias name is nested inside a
732        // declarator node, so only one part is collected and we skip (len < 2).
733        let mut type_parts: Vec<String> = Vec::new();
734        for i in 0..node.child_count() {
735            let Some(child) = node.child(i) else {
736                continue;
737            };
738            match child.kind() {
739                "typedef" | ";" => {}
740                "type_identifier" | "primitive_type" | "sized_type_specifier" => {
741                    type_parts.push(source[child.byte_range()].trim().to_string());
742                }
743                _ => {}
744            }
745        }
746        if type_parts.len() < 2 {
747            continue;
748        }
749        // Last element is the alias name; everything before is the base type.
750        // Safety: len >= 2 guaranteed by the check above.
751        let alias_name = type_parts.pop().expect("len >= 2");
752        let base_type = type_parts.join(" ");
753        aliases.entry(alias_name).or_insert(base_type);
754    }
755    aliases
756}
757
758fn extract_structs_from_tree(
759    source: &str,
760    root: Node<'_>,
761    arch: &'static ArchConfig,
762    layouts: &mut Vec<StructLayout>,
763) {
764    // Phase 0: collect within-file typedef scalar aliases for field type resolution.
765    let aliases = collect_typedef_aliases(source, root);
766
767    // Phase 1: extract struct/union/class layouts.
768    // We do a single linear pass ordered by byte offset so that `#pragma pack`
769    // directives are processed in document order, keeping `current_pack` accurate
770    // at each struct declaration site.
771    //
772    // Pack state: `current_pack = 0` means no active pragma (use default ABI
773    // alignment); `current_pack = N > 0` means cap field alignment at N bytes.
774    let mut pack_stack: Vec<usize> = Vec::new();
775    let mut current_pack: usize = 0;
776
777    let cursor = root.walk();
778    let mut stack = vec![root];
779
780    while let Some(node) = stack.pop() {
781        // Push children in reverse so we process left-to-right in document order.
782        for i in (0..node.child_count()).rev() {
783            if let Some(child) = node.child(i) {
784                stack.push(child);
785            }
786        }
787
788        // Track `#pragma pack(...)` directives (tree-sitter: `preproc_call` nodes).
789        if node.kind() == "preproc_call" {
790            let text = &source[node.byte_range()];
791            if text.contains("#pragma") && text.contains("pack(") {
792                current_pack = parse_pragma_pack(text, &mut pack_stack, current_pack);
793            }
794        }
795
796        // Skip C++ template structs/classes/unions — without monomorphisation we
797        // cannot know T's size, so any sizing would be wrong.  tree-sitter-cpp
798        // wraps these as `template_declaration > struct_specifier` etc.
799        let in_template = node
800            .parent()
801            .map(|p| p.kind() == "template_declaration")
802            .unwrap_or(false);
803        if in_template {
804            let tpl_name = (0..node.child_count())
805                .filter_map(|i| node.child(i))
806                .find(|c| c.kind() == "type_identifier")
807                .map(|c| source[c.byte_range()].to_string())
808                .unwrap_or_else(|| "(unknown)".to_string());
809            eprintln!(
810                "padlock: note: skipping '{tpl_name}' — template \
811                 (layout depends on type arguments; use binary analysis for accurate results)"
812            );
813            crate::record_skipped(
814                &tpl_name,
815                "C++ template — layout depends on type arguments; \
816                 use binary analysis for accurate results",
817            );
818            continue;
819        }
820
821        match node.kind() {
822            "struct_specifier" => {
823                if let Some(layout) = parse_struct_or_union_specifier(
824                    source,
825                    node,
826                    arch,
827                    false,
828                    &aliases,
829                    current_pack,
830                ) {
831                    layouts.push(layout);
832                }
833            }
834            "union_specifier" => {
835                if let Some(layout) = parse_struct_or_union_specifier(
836                    source,
837                    node,
838                    arch,
839                    true,
840                    &aliases,
841                    current_pack,
842                ) {
843                    layouts.push(layout);
844                }
845            }
846            "class_specifier" => {
847                if let Some(layout) =
848                    parse_class_specifier(source, node, arch, &aliases, current_pack)
849                {
850                    layouts.push(layout);
851                }
852            }
853            _ => {}
854        }
855    }
856
857    // Also handle `typedef struct/union { ... } Name;`.
858    // Run a second pass; at this point we do not re-track pragma pack since
859    // typedef structs with non-default packing will have already been captured
860    // in the first pass (the struct specifier inside the typedef is the same
861    // node). The second pass only renames anonymous structs, so pack accuracy
862    // is inherited from the first-pass result.
863    let cursor2 = root.walk();
864    let mut stack2 = vec![root];
865    while let Some(node) = stack2.pop() {
866        for i in (0..node.child_count()).rev() {
867            if let Some(child) = node.child(i) {
868                stack2.push(child);
869            }
870        }
871        if node.kind() == "type_definition"
872            && let Some(layout) =
873                parse_typedef_struct_or_union(source, node, arch, &aliases, current_pack)
874        {
875            let existing = layouts
876                .iter()
877                .position(|l| l.name == layout.name || l.name == "<anonymous>");
878            match existing {
879                Some(i) if layouts[i].name == "<anonymous>" => {
880                    layouts[i] = layout;
881                }
882                None => layouts.push(layout),
883                _ => {}
884            }
885        }
886    }
887    let _ = cursor;
888    let _ = cursor2; // silence unused warnings
889}
890
891/// Parse a `#pragma pack(...)` directive and update the pack stack/current level.
892///
893/// Recognised forms:
894/// - `#pragma pack(N)`        — set pack level to N
895/// - `#pragma pack()`         — reset to default (0)
896/// - `#pragma pack(push, N)`  — push current level, set to N
897/// - `#pragma pack(push)`     — push current level (no change)
898/// - `#pragma pack(pop)`      — restore previous level
899///
900/// Returns the new `current_pack` value.
901fn parse_pragma_pack(text: &str, stack: &mut Vec<usize>, current: usize) -> usize {
902    // Extract the argument list between the outer parentheses of `pack(...)`.
903    let Some(start) = text.find("pack(") else {
904        return current;
905    };
906    let rest = &text[start + 5..]; // skip "pack("
907    let Some(end) = rest.find(')') else {
908        return current;
909    };
910    let args = rest[..end].trim();
911
912    if args.is_empty() {
913        // #pragma pack() — reset to default
914        return 0;
915    }
916
917    // Split on comma to distinguish `push`/`pop`/`N`/`push, N`.
918    let parts: Vec<&str> = args.splitn(2, ',').map(str::trim).collect();
919    match parts[0] {
920        "pop" => stack.pop().unwrap_or(0),
921        "push" => {
922            stack.push(current);
923            if let Some(n_str) = parts.get(1) {
924                n_str.parse::<usize>().unwrap_or(current)
925            } else {
926                current // push without N: keep current level
927            }
928        }
929        n_str => n_str.parse::<usize>().unwrap_or(current),
930    }
931}
932
933/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
934///
935/// `pragma_pack` is the active `#pragma pack(N)` level at the point of the
936/// declaration (`0` = no active pragma, equivalent to default ABI alignment).
937fn parse_struct_or_union_specifier(
938    source: &str,
939    node: Node<'_>,
940    arch: &'static ArchConfig,
941    is_union: bool,
942    aliases: &HashMap<String, String>,
943    pragma_pack: usize,
944) -> Option<StructLayout> {
945    let mut name = "<anonymous>".to_string();
946    let mut body_node: Option<Node> = None;
947    let mut is_packed = false;
948    // Struct-level alignas: `struct alignas(64) CacheAligned { ... };`
949    let mut struct_alignas: Option<usize> = None;
950
951    for i in 0..node.child_count() {
952        let child = node.child(i)?;
953        match child.kind() {
954            "type_identifier" => name = source[child.byte_range()].to_string(),
955            "field_declaration_list" => body_node = Some(child),
956            "attribute_specifier" => {
957                let text = &source[child.byte_range()];
958                if text.contains("packed") {
959                    is_packed = true;
960                }
961            }
962            // C++11 struct-level alignas: `struct alignas(64) Name { ... };`
963            // tree-sitter-cpp: `alignas_qualifier` as direct child of struct_specifier
964            "alignas_qualifier" | "alignas_specifier" if struct_alignas.is_none() => {
965                struct_alignas = parse_alignas_value(source, child);
966            }
967            _ => {}
968        }
969    }
970
971    let body = body_node?;
972    let mut raw_fields: Vec<RawField> = Vec::new();
973
974    for i in 0..body.child_count() {
975        let child = body.child(i)?;
976        if child.kind() == "field_declaration" {
977            // Check for anonymous nested struct/union: a field_declaration whose
978            // only non-field-identifier child is a struct_specifier/union_specifier
979            // with no type_identifier (i.e. `struct { int x; int y; };`).
980            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, is_union) {
981                raw_fields.extend(anon_fields);
982            } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
983            {
984                raw_fields.push((fname, ty, guard, al, ln));
985            }
986        }
987    }
988
989    if raw_fields.is_empty() {
990        return None;
991    }
992
993    // Pack consecutive bit-field members into their storage units.
994    let raw_fields = if raw_fields
995        .iter()
996        .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
997    {
998        resolve_bitfield_groups(raw_fields, aliases, arch)
999    } else {
1000        raw_fields
1001    };
1002
1003    let mut fields: Vec<Field> = raw_fields
1004        .into_iter()
1005        .map(|(fname, ty_name, guard, alignas, field_line)| {
1006            let resolved = aliases
1007                .get(&ty_name)
1008                .map(String::as_str)
1009                .unwrap_or(&ty_name);
1010            let (size, natural_align) = c_type_size_align(resolved, arch);
1011            // alignas(N) on a field overrides its alignment requirement.
1012            let align = alignas.unwrap_or(natural_align);
1013            let access = if let Some(g) = guard {
1014                AccessPattern::Concurrent {
1015                    guard: Some(g),
1016                    is_atomic: false,
1017                    is_annotated: true,
1018                }
1019            } else {
1020                AccessPattern::Unknown
1021            };
1022            Field {
1023                name: fname,
1024                ty: TypeInfo::Primitive {
1025                    name: ty_name,
1026                    size,
1027                    align,
1028                },
1029                offset: 0,
1030                size,
1031                align,
1032                source_file: None,
1033                source_line: Some(field_line),
1034                access,
1035            }
1036        })
1037        .collect();
1038
1039    let line = node.start_position().row as u32 + 1;
1040    // `__attribute__((packed))` forces pack_n=1; `#pragma pack(N)` caps at N.
1041    // When both apply, the more restrictive (smaller) wins.
1042    let pack_n = if is_packed {
1043        1
1044    } else if pragma_pack > 0 {
1045        pragma_pack
1046    } else {
1047        0
1048    };
1049    let mut layout = if is_union {
1050        simulate_union_layout(&mut fields, name, arch, Some(line))
1051    } else {
1052        simulate_layout(&mut fields, name, arch, Some(line), pack_n)
1053    };
1054
1055    // Apply struct-level alignas: the struct's alignment requirement is at
1056    // least N; trailing padding may grow to satisfy the new alignment.
1057    if let Some(al) = struct_alignas
1058        && al > layout.align
1059    {
1060        layout.align = al;
1061        if pack_n == 0 {
1062            layout.total_size = layout.total_size.next_multiple_of(al);
1063        }
1064    }
1065
1066    layout.suppressed_findings =
1067        super::suppress::suppressed_from_preceding_source(source, node.start_byte());
1068
1069    Some(layout)
1070}
1071
1072/// Parse a `typedef struct/union { ... } Name;` type_definition node.
1073fn parse_typedef_struct_or_union(
1074    source: &str,
1075    node: Node<'_>,
1076    arch: &'static ArchConfig,
1077    aliases: &HashMap<String, String>,
1078    pragma_pack: usize,
1079) -> Option<StructLayout> {
1080    let mut specifier_node: Option<Node> = None;
1081    let mut is_union = false;
1082    let mut typedef_name: Option<String> = None;
1083
1084    for i in 0..node.child_count() {
1085        let child = node.child(i)?;
1086        match child.kind() {
1087            "struct_specifier" => {
1088                specifier_node = Some(child);
1089                is_union = false;
1090            }
1091            "union_specifier" => {
1092                specifier_node = Some(child);
1093                is_union = true;
1094            }
1095            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
1096            _ => {}
1097        }
1098    }
1099
1100    let spec = specifier_node?;
1101    let typedef_name = typedef_name?;
1102
1103    let mut layout =
1104        parse_struct_or_union_specifier(source, spec, arch, is_union, aliases, pragma_pack)?;
1105    if layout.name == "<anonymous>" {
1106        layout.name = typedef_name;
1107    }
1108    Some(layout)
1109}
1110
1111/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
1112/// `__attribute__((pt_guarded_by(X)))` specifier node.
1113///
1114/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
1115/// which expand to the same attribute (Clang thread-safety analysis).
1116/// The match is done on the raw source text of any `attribute_specifier` child,
1117/// so it works regardless of how tree-sitter structures the inner tokens.
1118fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
1119    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
1120    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
1121        if let Some(pos) = field_source.find(kw) {
1122            let after = &field_source[pos + kw.len()..];
1123            // Expect `(` optionally preceded by whitespace
1124            let trimmed = after.trim_start();
1125            if let Some(inner) = trimmed.strip_prefix('(') {
1126                // Read until the matching ')'
1127                if let Some(end) = inner.find(')') {
1128                    let guard = inner[..end].trim().trim_matches('"');
1129                    if !guard.is_empty() {
1130                        return Some(guard.to_string());
1131                    }
1132                }
1133            }
1134        }
1135    }
1136    None
1137}
1138
1139/// Parse a numeric value from an `alignas_qualifier` node: `alignas(N)`.
1140/// tree-sitter-cpp uses the node kind `alignas_qualifier` for C++11 `alignas`.
1141/// Returns `None` when the specifier contains a type expression rather than
1142/// an integer literal (e.g. `alignas(double)` — handled elsewhere by the
1143/// compiler; we skip those conservatively).
1144fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
1145    for i in 0..node.child_count() {
1146        if let Some(child) = node.child(i) {
1147            match child.kind() {
1148                "number_literal" | "integer_literal" | "integer" => {
1149                    let text = source[child.byte_range()].trim();
1150                    if let Ok(n) = text.parse::<usize>() {
1151                        return Some(n);
1152                    }
1153                    // Hex literal: 0x40
1154                    if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
1155                        return usize::from_str_radix(hex, 16).ok();
1156                    }
1157                }
1158                // Recurse for nested nodes (parenthesised expression, etc.)
1159                "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
1160                    if let r @ Some(_) = parse_alignas_value(source, child) {
1161                        return r;
1162                    }
1163                }
1164                _ => {}
1165            }
1166        }
1167    }
1168    None
1169}
1170
1171/// Returns `(ty, field_name, guard, alignas_override)`.
1172/// `alignas_override` is `Some(N)` when the field carries `alignas(N)`.
1173/// Detect and parse an anonymous nested struct/union field declaration, e.g.:
1174///
1175/// ```c
1176/// struct Packet {
1177///     union {                    // ← anonymous nested union
1178///         uint32_t raw;
1179///         struct { uint8_t a; uint8_t b; uint8_t c; uint8_t d; };
1180///     };
1181///     uint64_t timestamp;
1182/// };
1183/// ```
1184///
1185/// A `field_declaration` is anonymous if it contains a `struct_specifier` or
1186/// `union_specifier` child that has a `field_declaration_list` (i.e. a body)
1187/// but no `type_identifier` (i.e. no name). The fields of the nested
1188/// struct/union are flattened into the parent.
1189///
1190/// Returns `None` if the declaration is not an anonymous nested struct/union
1191/// (the caller should fall through to `parse_field_declaration`).
1192/// (field_name, type_text, guard, alignas_override, source_line_1based)
1193type RawField = (String, String, Option<String>, Option<usize>, u32);
1194
1195#[allow(clippy::only_used_in_recursion)]
1196fn parse_anonymous_nested(
1197    source: &str,
1198    node: Node<'_>,
1199    arch: &'static ArchConfig,
1200    parent_is_union: bool,
1201) -> Option<Vec<RawField>> {
1202    // Find a struct_specifier or union_specifier child.
1203    for i in 0..node.child_count() {
1204        let child = node.child(i)?;
1205        if child.kind() != "struct_specifier" && child.kind() != "union_specifier" {
1206            continue;
1207        }
1208        let nested_is_union = child.kind() == "union_specifier";
1209
1210        // Must have a body (field_declaration_list) but no type_identifier.
1211        let mut has_name = false;
1212        let mut body_node: Option<Node> = None;
1213        for j in 0..child.child_count() {
1214            let sub = child.child(j)?;
1215            match sub.kind() {
1216                "type_identifier" => has_name = true,
1217                "field_declaration_list" => body_node = Some(sub),
1218                _ => {}
1219            }
1220        }
1221
1222        if has_name || body_node.is_none() {
1223            // Named struct/union used as a field type — handled by parse_field_declaration.
1224            continue;
1225        }
1226
1227        let body = body_node?;
1228        let mut nested_raw: Vec<RawField> = Vec::new();
1229
1230        for j in 0..body.child_count() {
1231            let inner = body.child(j)?;
1232            if inner.kind() == "field_declaration" {
1233                // Recurse to handle doubly-nested anonymous structs.
1234                if let Some(deeper) = parse_anonymous_nested(source, inner, arch, nested_is_union) {
1235                    nested_raw.extend(deeper);
1236                } else if let Some((ty, fname, guard, al, ln)) =
1237                    parse_field_declaration(source, inner)
1238                {
1239                    nested_raw.push((fname, ty, guard, al, ln));
1240                }
1241            }
1242        }
1243
1244        // If nested is a union, the fields all share offset 0 (relative to the
1245        // union's placement in the parent). We can't easily track this through
1246        // raw field lists, so we emit them as a synthetic __anon_union_N field
1247        // when the parent cares about offsets, or just flatten for unions.
1248        //
1249        // For simplicity: flatten all fields — the layout simulator will compute
1250        // correct offsets if the parent is a struct, and union semantics are
1251        // preserved when the parent is a union.
1252        let _ = (nested_is_union, parent_is_union);
1253
1254        if !nested_raw.is_empty() {
1255            return Some(nested_raw);
1256        }
1257    }
1258    None
1259}
1260
1261fn parse_field_declaration(source: &str, node: Node<'_>) -> Option<RawField> {
1262    let mut ty_parts: Vec<String> = Vec::new();
1263    let mut field_name: Option<String> = None;
1264    // Bit-field width, e.g. `int flags : 3;` → Some("3")
1265    let mut bit_width: Option<String> = None;
1266    // Collect attribute text for guard extraction
1267    let mut attr_text = String::new();
1268    // Field-level alignas override
1269    let mut alignas_override: Option<usize> = None;
1270
1271    for i in 0..node.child_count() {
1272        let child = node.child(i)?;
1273        match child.kind() {
1274            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
1275                ty_parts.push(source[child.byte_range()].trim().to_string());
1276            }
1277            // C++ qualified types: std::mutex, ns::Type, etc.
1278            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
1279            "qualified_identifier" | "template_type" => {
1280                ty_parts.push(source[child.byte_range()].trim().to_string());
1281            }
1282            // Nested struct/union used as a field type: `struct Vec2 tl;`
1283            // Extract just the type_identifier name (e.g. "Vec2") so the
1284            // nested-struct resolution pass can match it by name.
1285            "struct_specifier" | "union_specifier" => {
1286                for j in 0..child.child_count() {
1287                    if let Some(sub) = child.child(j)
1288                        && sub.kind() == "type_identifier"
1289                    {
1290                        ty_parts.push(source[sub.byte_range()].trim().to_string());
1291                        break;
1292                    }
1293                }
1294            }
1295            "field_identifier" => {
1296                field_name = Some(source[child.byte_range()].trim().to_string());
1297            }
1298            "pointer_declarator" => {
1299                field_name = extract_identifier(source, child);
1300                ty_parts.push("*".to_string());
1301            }
1302            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
1303            "bitfield_clause" => {
1304                let text = source[child.byte_range()].trim();
1305                // Strip leading ':' and whitespace to get just the width digits
1306                bit_width = Some(text.trim_start_matches(':').trim().to_string());
1307            }
1308            // GNU attribute specifier: __attribute__((...))
1309            "attribute_specifier" | "attribute" => {
1310                attr_text.push_str(source[child.byte_range()].trim());
1311                attr_text.push(' ');
1312            }
1313            // C++11 alignas: tree-sitter-cpp wraps it as type_qualifier → alignas_qualifier
1314            // Also handle the direct form in case grammar versions differ.
1315            "alignas_qualifier" | "alignas_specifier" if alignas_override.is_none() => {
1316                alignas_override = parse_alignas_value(source, child);
1317            }
1318            // type_qualifier wraps alignas_qualifier for field declarations:
1319            // `alignas(8) char c;` → type_qualifier { alignas_qualifier { ... } }
1320            "type_qualifier" if alignas_override.is_none() => {
1321                for j in 0..child.child_count() {
1322                    if let Some(sub) = child.child(j)
1323                        && (sub.kind() == "alignas_qualifier" || sub.kind() == "alignas_specifier")
1324                    {
1325                        alignas_override = parse_alignas_value(source, sub);
1326                        break;
1327                    }
1328                }
1329            }
1330            _ => {}
1331        }
1332    }
1333
1334    let base_ty = ty_parts.join(" ");
1335    if base_ty.is_empty() {
1336        return None;
1337    }
1338    // Annotate bit-field types as "type:N" so callers can detect and report them;
1339    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
1340    let ty = if let Some(w) = bit_width {
1341        format!("{base_ty}:{w}")
1342    } else {
1343        base_ty
1344    };
1345    // Anonymous bitfields (`int : 3;` with no declarator) are padding bits that
1346    // still consume storage-unit bits. Return them with an empty name so
1347    // `resolve_bitfield_groups` can account for them without displaying a name.
1348    let fname = match field_name {
1349        Some(n) => n,
1350        None if is_bitfield_type(&ty) => String::new(),
1351        None => return None,
1352    };
1353
1354    // Also check the full field source text (attribute_specifier may not always
1355    // be a direct child depending on tree-sitter grammar version).
1356    let field_src = source[node.byte_range()].to_string();
1357    let guard = extract_guard_from_c_field_text(&attr_text)
1358        .or_else(|| extract_guard_from_c_field_text(&field_src));
1359
1360    let line = node.start_position().row as u32 + 1;
1361    Some((ty, fname, guard, alignas_override, line))
1362}
1363
1364fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
1365    if node.kind() == "field_identifier" || node.kind() == "identifier" {
1366        return Some(source[node.byte_range()].to_string());
1367    }
1368    for i in 0..node.child_count() {
1369        if let Some(child) = node.child(i)
1370            && let Some(name) = extract_identifier(source, child)
1371        {
1372            return Some(name);
1373        }
1374    }
1375    None
1376}
1377
1378// ── public API ────────────────────────────────────────────────────────────────
1379
1380pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1381    let mut parser = Parser::new();
1382    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
1383    let tree = parser
1384        .parse(source, None)
1385        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1386    let mut layouts = Vec::new();
1387    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1388    Ok(layouts)
1389}
1390
1391pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1392    let mut parser = Parser::new();
1393    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
1394    let tree = parser
1395        .parse(source, None)
1396        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1397    let mut layouts = Vec::new();
1398    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1399    Ok(layouts)
1400}
1401
1402// ── tests ─────────────────────────────────────────────────────────────────────
1403
1404#[cfg(test)]
1405mod tests {
1406    use super::*;
1407    use padlock_core::arch::X86_64_SYSV;
1408
1409    #[test]
1410    fn parse_simple_c_struct() {
1411        let src = r#"
1412struct Point {
1413    int x;
1414    int y;
1415};
1416"#;
1417        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1418        assert_eq!(layouts.len(), 1);
1419        assert_eq!(layouts[0].name, "Point");
1420        assert_eq!(layouts[0].fields.len(), 2);
1421        assert_eq!(layouts[0].fields[0].name, "x");
1422        assert_eq!(layouts[0].fields[1].name, "y");
1423    }
1424
1425    #[test]
1426    fn parse_typedef_struct() {
1427        let src = r#"
1428typedef struct {
1429    char  is_active;
1430    double timeout;
1431    int   port;
1432} Connection;
1433"#;
1434        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1435        assert_eq!(layouts.len(), 1);
1436        assert_eq!(layouts[0].name, "Connection");
1437        assert_eq!(layouts[0].fields.len(), 3);
1438    }
1439
1440    #[test]
1441    fn c_layout_computes_offsets() {
1442        let src = "struct T { char a; double b; };";
1443        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1444        assert_eq!(layouts.len(), 1);
1445        let layout = &layouts[0];
1446        // char at offset 0, double at offset 8 (7 bytes padding)
1447        assert_eq!(layout.fields[0].offset, 0);
1448        assert_eq!(layout.fields[1].offset, 8);
1449        assert_eq!(layout.total_size, 16);
1450    }
1451
1452    #[test]
1453    fn c_layout_detects_padding() {
1454        let src = "struct T { char a; int b; };";
1455        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1456        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1457        assert!(!gaps.is_empty());
1458        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
1459    }
1460
1461    #[test]
1462    fn parse_cpp_struct() {
1463        let src = "struct Vec3 { float x; float y; float z; };";
1464        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1465        assert_eq!(layouts.len(), 1);
1466        assert_eq!(layouts[0].fields.len(), 3);
1467    }
1468
1469    // ── SIMD types ────────────────────────────────────────────────────────────
1470
1471    #[test]
1472    fn simd_sse_field_size_and_align() {
1473        let src = "struct Vecs { __m128 a; __m256 b; };";
1474        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1475        assert_eq!(layouts.len(), 1);
1476        let f = &layouts[0].fields;
1477        assert_eq!(f[0].size, 16); // __m128
1478        assert_eq!(f[0].align, 16);
1479        assert_eq!(f[1].size, 32); // __m256
1480        assert_eq!(f[1].align, 32);
1481    }
1482
1483    #[test]
1484    fn simd_avx512_size() {
1485        let src = "struct Wide { __m512 v; };";
1486        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1487        assert_eq!(layouts[0].fields[0].size, 64);
1488        assert_eq!(layouts[0].fields[0].align, 64);
1489    }
1490
1491    #[test]
1492    fn simd_padding_detected_when_small_field_before_avx() {
1493        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
1494        let src = "struct Mixed { char flag; __m256 data; };";
1495        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1496        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1497        assert!(!gaps.is_empty());
1498        assert_eq!(gaps[0].bytes, 31);
1499    }
1500
1501    // ── union parsing ─────────────────────────────────────────────────────────
1502
1503    #[test]
1504    fn union_fields_all_at_offset_zero() {
1505        let src = "union Data { int i; float f; double d; };";
1506        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1507        assert_eq!(layouts.len(), 1);
1508        let u = &layouts[0];
1509        assert!(u.is_union);
1510        for field in &u.fields {
1511            assert_eq!(
1512                field.offset, 0,
1513                "union field '{}' should be at offset 0",
1514                field.name
1515            );
1516        }
1517    }
1518
1519    #[test]
1520    fn union_total_size_is_max_field() {
1521        // double is the largest (8 bytes); total should be 8
1522        let src = "union Data { int i; float f; double d; };";
1523        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1524        assert_eq!(layouts[0].total_size, 8);
1525    }
1526
1527    #[test]
1528    fn union_no_padding_finding() {
1529        let src = "union Data { int i; double d; };";
1530        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1531        let report = padlock_core::findings::Report::from_layouts(&layouts);
1532        let sr = &report.structs[0];
1533        assert!(
1534            !sr.findings
1535                .iter()
1536                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
1537        );
1538        assert!(
1539            !sr.findings
1540                .iter()
1541                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
1542        );
1543    }
1544
1545    #[test]
1546    fn typedef_union_parsed() {
1547        let src = "typedef union { int a; double b; } Value;";
1548        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1549        assert_eq!(layouts.len(), 1);
1550        assert_eq!(layouts[0].name, "Value");
1551        assert!(layouts[0].is_union);
1552    }
1553
1554    // ── attribute guard extraction ─────────────────────────────────────────────
1555
1556    #[test]
1557    fn extract_guard_from_c_guarded_by_macro() {
1558        let text = "int value GUARDED_BY(mu);";
1559        let guard = extract_guard_from_c_field_text(text);
1560        assert_eq!(guard.as_deref(), Some("mu"));
1561    }
1562
1563    #[test]
1564    fn extract_guard_from_c_attribute_specifier() {
1565        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
1566        let guard = extract_guard_from_c_field_text(text);
1567        assert_eq!(guard.as_deref(), Some("counter_lock"));
1568    }
1569
1570    #[test]
1571    fn extract_guard_pt_guarded_by() {
1572        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
1573        let guard = extract_guard_from_c_field_text(text);
1574        assert_eq!(guard.as_deref(), Some("ptr_lock"));
1575    }
1576
1577    #[test]
1578    fn no_guard_returns_none() {
1579        let guard = extract_guard_from_c_field_text("int x;");
1580        assert!(guard.is_none());
1581    }
1582
1583    #[test]
1584    fn c_struct_guarded_by_sets_concurrent_access() {
1585        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
1586        // macro expansions, so test the text-extraction path via parse_field_declaration
1587        // indirectly by checking extract_guard_from_c_field_text.
1588        let text = "uint64_t readers GUARDED_BY(lock_a);";
1589        assert_eq!(
1590            extract_guard_from_c_field_text(text).as_deref(),
1591            Some("lock_a")
1592        );
1593    }
1594
1595    #[test]
1596    fn c_struct_different_guards_detected_as_false_sharing() {
1597        use padlock_core::arch::X86_64_SYSV;
1598        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
1599
1600        // Manually build a layout with two fields on the same cache line,
1601        // different guards — mirrors what the C frontend would produce for
1602        // __attribute__((guarded_by(...))) annotated fields.
1603        let mut layout = StructLayout {
1604            name: "S".into(),
1605            total_size: 128,
1606            align: 8,
1607            fields: vec![
1608                Field {
1609                    name: "readers".into(),
1610                    ty: TypeInfo::Primitive {
1611                        name: "uint64_t".into(),
1612                        size: 8,
1613                        align: 8,
1614                    },
1615                    offset: 0,
1616                    size: 8,
1617                    align: 8,
1618                    source_file: None,
1619                    source_line: None,
1620                    access: AccessPattern::Concurrent {
1621                        guard: Some("lock_a".into()),
1622                        is_atomic: false,
1623                        is_annotated: true,
1624                    },
1625                },
1626                Field {
1627                    name: "writers".into(),
1628                    ty: TypeInfo::Primitive {
1629                        name: "uint64_t".into(),
1630                        size: 8,
1631                        align: 8,
1632                    },
1633                    offset: 8,
1634                    size: 8,
1635                    align: 8,
1636                    source_file: None,
1637                    source_line: None,
1638                    access: AccessPattern::Concurrent {
1639                        guard: Some("lock_b".into()),
1640                        is_atomic: false,
1641                        is_annotated: true,
1642                    },
1643                },
1644            ],
1645            source_file: None,
1646            source_line: None,
1647            arch: &X86_64_SYSV,
1648            is_packed: false,
1649            is_union: false,
1650            is_repr_rust: false,
1651            suppressed_findings: Vec::new(),
1652            uncertain_fields: Vec::new(),
1653        };
1654        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1655            &layout
1656        ));
1657        // Same guard → no false sharing
1658        layout.fields[1].access = AccessPattern::Concurrent {
1659            guard: Some("lock_a".into()),
1660            is_atomic: false,
1661            is_annotated: true,
1662        };
1663        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1664            &layout
1665        ));
1666    }
1667
1668    // ── C++ class: vtable pointer ─────────────────────────────────────────────
1669
1670    #[test]
1671    fn cpp_class_with_virtual_method_has_vptr() {
1672        let src = r#"
1673class Widget {
1674    virtual void draw();
1675    int x;
1676    int y;
1677};
1678"#;
1679        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1680        assert_eq!(layouts.len(), 1);
1681        let l = &layouts[0];
1682        // First field must be __vptr
1683        assert_eq!(l.fields[0].name, "__vptr");
1684        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
1685        // __vptr is at offset 0
1686        assert_eq!(l.fields[0].offset, 0);
1687        // int x should come after the pointer (at offset 8)
1688        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1689        assert_eq!(x.offset, 8);
1690    }
1691
1692    #[test]
1693    fn cpp_class_without_virtual_has_no_vptr() {
1694        let src = "class Plain { int a; int b; };";
1695        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1696        assert_eq!(layouts.len(), 1);
1697        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1698    }
1699
1700    #[test]
1701    fn cpp_struct_keyword_with_virtual_has_vptr() {
1702        // `struct` in C++ can also have virtual methods
1703        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1704        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1705        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
1706        // (vtable injection is only for `class` nodes)
1707        let _ = layouts; // just verify it parses without panic
1708    }
1709
1710    // ── C++ class: single inheritance ─────────────────────────────────────────
1711
1712    #[test]
1713    fn cpp_derived_class_has_base_slot() {
1714        let src = r#"
1715class Base {
1716    int x;
1717};
1718class Derived : public Base {
1719    int y;
1720};
1721"#;
1722        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1723        // Both Base and Derived should be parsed
1724        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1725        // Derived must have a __base_Base synthetic field
1726        assert!(
1727            derived.fields.iter().any(|f| f.name == "__base_Base"),
1728            "Derived should have a __base_Base field"
1729        );
1730        // The y field should come after __base_Base
1731        let base_field = derived
1732            .fields
1733            .iter()
1734            .find(|f| f.name == "__base_Base")
1735            .unwrap();
1736        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1737        assert!(y_field.offset >= base_field.offset + base_field.size);
1738    }
1739
1740    #[test]
1741    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1742        let src = r#"
1743class A { int a; };
1744class B { int b; };
1745class C : public A, public B { int c; };
1746"#;
1747        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1748        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1749        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1750        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1751    }
1752
1753    #[test]
1754    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1755        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1756        let src = "class V { virtual void f(); int x; };";
1757        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1758        let l = &layouts[0];
1759        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1760        assert_eq!(l.total_size, 16);
1761    }
1762
1763    // ── bitfield handling ─────────────────────────────────────────────────────
1764
1765    #[test]
1766    fn is_bitfield_type_detects_colon_n() {
1767        assert!(is_bitfield_type("int:3"));
1768        assert!(is_bitfield_type("unsigned int:16"));
1769        assert!(is_bitfield_type("uint32_t:1"));
1770        // Not bit-fields — contains ':' but not followed by pure digits
1771        assert!(!is_bitfield_type("std::atomic<int>"));
1772        assert!(!is_bitfield_type("ns::Type"));
1773        assert!(!is_bitfield_type("int"));
1774    }
1775
1776    #[test]
1777    fn struct_with_bitfields_is_parsed() {
1778        // Bitfields are now packed into storage-unit-sized synthetic fields
1779        // instead of skipping the whole struct.
1780        let src = r#"
1781struct Flags {
1782    unsigned int active : 1;
1783    unsigned int ready  : 1;
1784    unsigned int error  : 6;
1785    int value;
1786};
1787"#;
1788        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1789        let l = layouts
1790            .iter()
1791            .find(|l| l.name == "Flags")
1792            .expect("Flags must be parsed");
1793        // active:1 + ready:1 + error:6 = 8 bits, fits in one int storage unit (4 bytes)
1794        // Then int value (4 bytes) → total 8 bytes
1795        assert_eq!(l.total_size, 8, "Flags should be 8 bytes");
1796        // The bitfield group and value field
1797        assert_eq!(
1798            l.fields.len(),
1799            2,
1800            "should have 2 fields (bitfield group + value)"
1801        );
1802        assert!(
1803            l.fields[0].name.starts_with('['),
1804            "first field should be the bitfield group"
1805        );
1806        assert_eq!(l.fields[1].name, "value");
1807    }
1808
1809    #[test]
1810    fn bitfield_group_name_shows_packed_members() {
1811        let src = "struct S { unsigned int a : 3; unsigned int b : 5; int c; };";
1812        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1813        let l = layouts.iter().find(|l| l.name == "S").unwrap();
1814        assert!(
1815            l.fields[0].name.contains("a:3"),
1816            "group name should contain a:3, got {}",
1817            l.fields[0].name
1818        );
1819        assert!(
1820            l.fields[0].name.contains("b:5"),
1821            "group name should contain b:5, got {}",
1822            l.fields[0].name
1823        );
1824    }
1825
1826    #[test]
1827    fn bitfield_different_unit_size_starts_new_group() {
1828        // uint8_t and uint32_t have different storage-unit sizes → separate groups.
1829        let src = "struct S { uint8_t a : 3; uint32_t b : 5; };";
1830        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1831        let l = layouts.iter().find(|l| l.name == "S").unwrap();
1832        // Two separate groups
1833        assert_eq!(l.fields.len(), 2, "different unit sizes → 2 groups");
1834        // b (uint32_t) aligns to 4 → offset 4; total = 8
1835        assert_eq!(l.total_size, 8);
1836    }
1837
1838    #[test]
1839    fn bitfield_overflow_starts_new_storage_unit() {
1840        // a:5 + b:5 = 10 bits > 8 (uint8_t): b must go into the next byte.
1841        let src = "struct S { uint8_t a : 5; uint8_t b : 5; };";
1842        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1843        let l = layouts.iter().find(|l| l.name == "S").unwrap();
1844        assert_eq!(l.fields.len(), 2, "overflow → 2 storage units");
1845        assert_eq!(l.total_size, 2, "two uint8_t units → 2 bytes");
1846    }
1847
1848    #[test]
1849    fn zero_width_bitfield_flushes_group() {
1850        // `int : 0` forces alignment to the next int boundary.
1851        let src = "struct S { int a : 3; int : 0; int b : 5; };";
1852        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1853        let l = layouts.iter().find(|l| l.name == "S").unwrap();
1854        // a:3 in one int unit, b:5 in the next → two groups, each 4 bytes
1855        assert_eq!(l.fields.len(), 2, "zero-width bitfield flushes → 2 groups");
1856        assert_eq!(l.total_size, 8, "two int units → 8 bytes");
1857    }
1858
1859    #[test]
1860    fn anonymous_bitfield_consumes_bits_in_storage_unit() {
1861        // `int : 5` is anonymous padding; a:3 + anon:5 = 8 bits, so b:3 still fits
1862        // in the same int unit (8 + 3 = 11 <= 32).  Without the anonymous bits
1863        // accounted for, a naïve grouper would also pack c into the same unit
1864        // even if that overflowed.
1865        let src = "struct S { int a : 3; int : 5; int b : 3; };";
1866        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1867        let l = layouts.iter().find(|l| l.name == "S").unwrap();
1868        // All three fit in one int storage unit → exactly one synthetic field, 4 bytes.
1869        assert_eq!(
1870            l.fields.len(),
1871            1,
1872            "a:3 + anon:5 + b:3 = 11 bits, fits in one int unit"
1873        );
1874        assert_eq!(l.total_size, 4);
1875        // The display name must show a and b but not the anonymous padding.
1876        assert!(l.fields[0].name.contains("a:3"), "name should include a:3");
1877        assert!(l.fields[0].name.contains("b:3"), "name should include b:3");
1878    }
1879
1880    #[test]
1881    fn anonymous_only_bitfield_unit_emits_pad_placeholder() {
1882        // `int : 32` fills an entire 4-byte int unit with anonymous padding.
1883        // The next field `a : 4` overflows and starts a new unit.
1884        // The all-anonymous first unit emits a `[__pad]` placeholder so that
1885        // `simulate_layout` counts its bytes in the total size.
1886        let src = "struct S { int : 32; int a : 4; };";
1887        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1888        let l = layouts.iter().find(|l| l.name == "S").unwrap();
1889        // Two synthetic fields: [__pad] (4 bytes) + [a:4] (4 bytes) = 8 bytes total.
1890        assert_eq!(l.fields.len(), 2, "[__pad] + [a:4] → 2 synthetic fields");
1891        assert!(
1892            l.fields[0].name.contains("__pad"),
1893            "first field is the pad placeholder"
1894        );
1895        assert_eq!(l.total_size, 8, "two int units → 8 bytes");
1896    }
1897
1898    #[test]
1899    fn struct_without_bitfields_is_still_parsed() {
1900        // Ensure the bitfield guard doesn't affect normal structs.
1901        let src = "struct Normal { int a; char b; double c; };";
1902        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1903        assert_eq!(layouts.len(), 1);
1904        assert_eq!(layouts[0].name, "Normal");
1905    }
1906
1907    #[test]
1908    fn c_struct_fields_have_source_lines() {
1909        let src = "struct Point {\n    int x;\n    int y;\n};";
1910        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1911        assert_eq!(layouts.len(), 1);
1912        let fields = &layouts[0].fields;
1913        // x is on line 2, y is on line 3
1914        assert_eq!(fields[0].source_line, Some(2), "x should be line 2");
1915        assert_eq!(fields[1].source_line, Some(3), "y should be line 3");
1916    }
1917
1918    #[test]
1919    fn cpp_class_with_bitfields_is_parsed() {
1920        // Bitfields in C++ classes are now grouped into storage units.
1921        let src = "class Packed { int x : 4; int y : 4; };";
1922        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1923        let l = layouts
1924            .iter()
1925            .find(|l| l.name == "Packed")
1926            .expect("Packed must be parsed");
1927        // x:4 + y:4 = 8 bits, fits in one int (4 bytes)
1928        assert_eq!(
1929            l.total_size, 4,
1930            "x:4 + y:4 fit in one int storage unit → 4 bytes"
1931        );
1932    }
1933
1934    #[test]
1935    fn all_bitfield_struct_is_parsed() {
1936        // Struct with ONLY bit-field members is now supported.
1937        let src = "struct BitPacked { int x:4; int y:4; };";
1938        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1939        let l = layouts
1940            .iter()
1941            .find(|l| l.name == "BitPacked")
1942            .expect("BitPacked must now be parsed");
1943        // x:4 + y:4 = 8 bits in one int storage unit → 4 bytes
1944        assert_eq!(l.total_size, 4);
1945    }
1946
1947    // ── __attribute__((packed)) detection ─────────────────────────────────────
1948
1949    #[test]
1950    fn packed_struct_has_no_alignment_padding() {
1951        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1952        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1953        let src = r#"
1954struct __attribute__((packed)) Tight {
1955    char a;
1956    int  b;
1957    char c;
1958};
1959"#;
1960        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1961        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1962        assert!(l.is_packed, "should be marked is_packed");
1963        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1964        assert_eq!(l.fields[0].offset, 0);
1965        assert_eq!(l.fields[1].offset, 1); // immediately after char
1966        assert_eq!(l.fields[2].offset, 5);
1967    }
1968
1969    #[test]
1970    fn non_packed_struct_has_normal_alignment_padding() {
1971        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1972        let src = r#"
1973struct Normal {
1974    char a;
1975    int  b;
1976    char c;
1977};
1978"#;
1979        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1980        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1981        assert!(!l.is_packed);
1982        assert_eq!(l.total_size, 12);
1983        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1984    }
1985
1986    #[test]
1987    fn cpp_class_packed_attribute_detected() {
1988        let src = r#"
1989class __attribute__((packed)) Dense {
1990    char a;
1991    int  b;
1992};
1993"#;
1994        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1995        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1996        assert!(
1997            l.is_packed,
1998            "C++ class with __attribute__((packed)) must be marked packed"
1999        );
2000        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
2001    }
2002
2003    // ── alignas detection ─────────────────────────────────────────────────────
2004
2005    #[test]
2006    fn field_alignas_overrides_natural_alignment() {
2007        // char is normally align=1 but alignas(8) forces it to align-8.
2008        // Layout: c(1B at offset 0, align=8) + x(4B at offset 4, align=4)
2009        // c must start on an 8-byte boundary (trivially satisfied at offset 0).
2010        // After c (1 byte), x aligns to 4: offset = 1.next_multiple_of(4) = 4.
2011        // Struct align = max(8, 4) = 8. Total = 8 bytes (4+4 → 8 → ok for align 8).
2012        let src = r#"
2013struct S {
2014    alignas(8) char c;
2015    int x;
2016};
2017"#;
2018        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2019        let l = layouts.iter().find(|l| l.name == "S").expect("S");
2020        // c should be forced to align 8
2021        let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
2022        assert_eq!(c_field.align, 8);
2023        // x comes after c (1 byte) with natural alignment 4 → offset 4
2024        let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
2025        assert_eq!(x_field.offset, 4);
2026        // Struct alignment is max(alignas(8), int align 4) = 8
2027        assert_eq!(l.align, 8);
2028        // Total = 8 bytes (x at 4, size 4; 4+4=8; 8 is multiple of align 8)
2029        assert_eq!(l.total_size, 8);
2030    }
2031
2032    #[test]
2033    fn struct_level_alignas_increases_struct_alignment() {
2034        // alignas(64) on the struct means its alignment requirement is 64.
2035        // Total size must be a multiple of 64.
2036        let src = r#"
2037struct alignas(64) CacheLine {
2038    int x;
2039    int y;
2040};
2041"#;
2042        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2043        let l = layouts
2044            .iter()
2045            .find(|l| l.name == "CacheLine")
2046            .expect("CacheLine");
2047        assert_eq!(l.align, 64);
2048        assert_eq!(l.total_size % 64, 0);
2049    }
2050
2051    #[test]
2052    fn alignas_on_field_smaller_than_natural_is_ignored() {
2053        // alignas(1) on an int field: does NOT reduce alignment below 4.
2054        // In C++, alignas cannot reduce alignment below the natural alignment.
2055        // Our implementation stores the alignas value; natural alignment wins
2056        // because we take max(alignas, natural) in the caller.
2057        // Note: we currently store alignas directly; this test documents behaviour.
2058        let src = "struct S { int x; int y; };";
2059        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2060        let l = &layouts[0];
2061        assert_eq!(l.fields[0].align, 4); // natural alignment, not reduced
2062    }
2063
2064    #[test]
2065    fn cpp_class_alignas_detected() {
2066        let src = r#"
2067class alignas(32) Aligned {
2068    double x;
2069    double y;
2070};
2071"#;
2072        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2073        let l = layouts
2074            .iter()
2075            .find(|l| l.name == "Aligned")
2076            .expect("Aligned");
2077        assert_eq!(l.align, 32);
2078        assert_eq!(l.total_size % 32, 0);
2079    }
2080
2081    // ── bad weather: alignas edge cases ───────────────────────────────────────
2082
2083    #[test]
2084    fn struct_without_alignas_unchanged() {
2085        // Ensure the alignas detection path doesn't affect structs without it
2086        let src = "struct Plain { int a; char b; };";
2087        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2088        let l = &layouts[0];
2089        assert_eq!(l.align, 4); // max field alignment = int = 4
2090        assert_eq!(l.total_size, 8); // int(4) + char(1) + 3 pad
2091    }
2092
2093    // ── anonymous nested structs/unions ───────────────────────────────────────
2094
2095    #[test]
2096    fn anonymous_nested_union_fields_flattened() {
2097        let src = r#"
2098struct Packet {
2099    union {
2100        uint32_t raw;
2101        uint8_t bytes[4];
2102    };
2103    uint64_t timestamp;
2104};
2105"#;
2106        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2107        let l = layouts.iter().find(|l| l.name == "Packet").expect("Packet");
2108        // raw, bytes (or similar) and timestamp must all be present
2109        assert!(
2110            l.fields.iter().any(|f| f.name == "raw"),
2111            "raw field must be flattened into Packet"
2112        );
2113        assert!(
2114            l.fields.iter().any(|f| f.name == "timestamp"),
2115            "timestamp must be present"
2116        );
2117    }
2118
2119    #[test]
2120    fn anonymous_nested_struct_fields_flattened() {
2121        let src = r#"
2122struct Outer {
2123    struct {
2124        int x;
2125        int y;
2126    };
2127    double z;
2128};
2129"#;
2130        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2131        let l = layouts.iter().find(|l| l.name == "Outer").expect("Outer");
2132        assert!(
2133            l.fields.iter().any(|f| f.name == "x"),
2134            "x must be flattened"
2135        );
2136        assert!(
2137            l.fields.iter().any(|f| f.name == "y"),
2138            "y must be flattened"
2139        );
2140        assert!(l.fields.iter().any(|f| f.name == "z"), "z present");
2141        // Total: x(4) + y(4) + z(8) = 16 bytes, no padding
2142        assert_eq!(l.total_size, 16);
2143    }
2144
2145    #[test]
2146    fn named_nested_struct_not_flattened() {
2147        // A named struct used as a field type must NOT be flattened
2148        let src = r#"
2149struct Vec2 { float x; float y; };
2150struct Rect { struct Vec2 tl; struct Vec2 br; };
2151"#;
2152        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2153        let rect = layouts.iter().find(|l| l.name == "Rect").expect("Rect");
2154        // Should have tl and br as opaque fields, not x/y flattened
2155        assert_eq!(rect.fields.len(), 2);
2156        assert!(rect.fields.iter().any(|f| f.name == "tl"));
2157        assert!(rect.fields.iter().any(|f| f.name == "br"));
2158    }
2159
2160    // ── type-table tests ──────────────────────────────────────────────────────
2161
2162    #[test]
2163    fn linux_kernel_types_correct_size() {
2164        // u8/u16/u32/u64 and s8/s16/s32/s64 (linux/types.h)
2165        assert_eq!(c_type_size_align("u8", &X86_64_SYSV), (1, 1));
2166        assert_eq!(c_type_size_align("u16", &X86_64_SYSV), (2, 2));
2167        assert_eq!(c_type_size_align("u32", &X86_64_SYSV), (4, 4));
2168        assert_eq!(c_type_size_align("u64", &X86_64_SYSV), (8, 8));
2169        assert_eq!(c_type_size_align("s8", &X86_64_SYSV), (1, 1));
2170        assert_eq!(c_type_size_align("s16", &X86_64_SYSV), (2, 2));
2171        assert_eq!(c_type_size_align("s32", &X86_64_SYSV), (4, 4));
2172        assert_eq!(c_type_size_align("s64", &X86_64_SYSV), (8, 8));
2173    }
2174
2175    #[test]
2176    fn linux_kernel_dunder_types_correct_size() {
2177        assert_eq!(c_type_size_align("__u8", &X86_64_SYSV), (1, 1));
2178        assert_eq!(c_type_size_align("__u16", &X86_64_SYSV), (2, 2));
2179        assert_eq!(c_type_size_align("__u32", &X86_64_SYSV), (4, 4));
2180        assert_eq!(c_type_size_align("__u64", &X86_64_SYSV), (8, 8));
2181        assert_eq!(c_type_size_align("__s8", &X86_64_SYSV), (1, 1));
2182        assert_eq!(c_type_size_align("__s64", &X86_64_SYSV), (8, 8));
2183        // Endian-annotated types are same width as their base
2184        assert_eq!(c_type_size_align("__be16", &X86_64_SYSV), (2, 2));
2185        assert_eq!(c_type_size_align("__le32", &X86_64_SYSV), (4, 4));
2186        assert_eq!(c_type_size_align("__be64", &X86_64_SYSV), (8, 8));
2187    }
2188
2189    #[test]
2190    fn c99_fast_types_correct_size() {
2191        // fast8/16 are their natural width
2192        assert_eq!(c_type_size_align("uint_fast8_t", &X86_64_SYSV), (1, 1));
2193        assert_eq!(c_type_size_align("uint_fast16_t", &X86_64_SYSV), (2, 2));
2194        // fast32/64 are pointer-sized on 64-bit
2195        assert_eq!(c_type_size_align("uint_fast32_t", &X86_64_SYSV), (8, 8));
2196        assert_eq!(c_type_size_align("uint_fast64_t", &X86_64_SYSV), (8, 8));
2197        // least types are their minimum guaranteed width
2198        assert_eq!(c_type_size_align("uint_least8_t", &X86_64_SYSV), (1, 1));
2199        assert_eq!(c_type_size_align("uint_least32_t", &X86_64_SYSV), (4, 4));
2200        assert_eq!(c_type_size_align("uint_least64_t", &X86_64_SYSV), (8, 8));
2201        assert_eq!(c_type_size_align("intmax_t", &X86_64_SYSV), (8, 8));
2202        assert_eq!(c_type_size_align("uintmax_t", &X86_64_SYSV), (8, 8));
2203    }
2204
2205    #[test]
2206    fn gcc_int128_correct_size() {
2207        assert_eq!(c_type_size_align("__int128", &X86_64_SYSV), (16, 16));
2208        assert_eq!(c_type_size_align("__uint128", &X86_64_SYSV), (16, 16));
2209        assert_eq!(c_type_size_align("__int128_t", &X86_64_SYSV), (16, 16));
2210        // unsigned __int128 — "unsigned " prefix is stripped, then __int128 matched
2211        assert_eq!(
2212            c_type_size_align("unsigned __int128", &X86_64_SYSV),
2213            (16, 16)
2214        );
2215    }
2216
2217    #[test]
2218    fn windows_types_correct_size() {
2219        assert_eq!(c_type_size_align("BYTE", &X86_64_SYSV), (1, 1));
2220        assert_eq!(c_type_size_align("WORD", &X86_64_SYSV), (2, 2));
2221        assert_eq!(c_type_size_align("DWORD", &X86_64_SYSV), (4, 4));
2222        assert_eq!(c_type_size_align("QWORD", &X86_64_SYSV), (8, 8));
2223        assert_eq!(c_type_size_align("BOOL", &X86_64_SYSV), (4, 4));
2224        assert_eq!(c_type_size_align("UINT8", &X86_64_SYSV), (1, 1));
2225        assert_eq!(c_type_size_align("INT32", &X86_64_SYSV), (4, 4));
2226        assert_eq!(c_type_size_align("UINT64", &X86_64_SYSV), (8, 8));
2227        assert_eq!(c_type_size_align("HANDLE", &X86_64_SYSV), (8, 8));
2228        assert_eq!(c_type_size_align("LPVOID", &X86_64_SYSV), (8, 8));
2229    }
2230
2231    #[test]
2232    fn char_types_correct_size() {
2233        assert_eq!(c_type_size_align("wchar_t", &X86_64_SYSV), (4, 4));
2234        assert_eq!(c_type_size_align("char8_t", &X86_64_SYSV), (1, 1));
2235        assert_eq!(c_type_size_align("char16_t", &X86_64_SYSV), (2, 2));
2236        assert_eq!(c_type_size_align("char32_t", &X86_64_SYSV), (4, 4));
2237    }
2238
2239    #[test]
2240    fn half_precision_types_correct_size() {
2241        assert_eq!(c_type_size_align("_Float16", &X86_64_SYSV), (2, 2));
2242        assert_eq!(c_type_size_align("__fp16", &X86_64_SYSV), (2, 2));
2243        assert_eq!(c_type_size_align("__bf16", &X86_64_SYSV), (2, 2));
2244        assert_eq!(c_type_size_align("_Float128", &X86_64_SYSV), (16, 16));
2245    }
2246
2247    #[test]
2248    fn unsigned_prefix_stripped_correctly() {
2249        // "unsigned short" → "short" → (2, 2)
2250        assert_eq!(c_type_size_align("unsigned short", &X86_64_SYSV), (2, 2));
2251        assert_eq!(c_type_size_align("unsigned int", &X86_64_SYSV), (4, 4));
2252        assert_eq!(
2253            c_type_size_align("unsigned long long", &X86_64_SYSV),
2254            (8, 8)
2255        );
2256        assert_eq!(
2257            c_type_size_align("long int", &X86_64_SYSV),
2258            (X86_64_SYSV.pointer_size, X86_64_SYSV.pointer_size)
2259        );
2260    }
2261
2262    #[test]
2263    fn linux_kernel_struct_with_new_types() {
2264        // Representative kernel-style struct using __u32, __be16, u8
2265        let src = r#"
2266struct NetHeader {
2267    __be32 src_ip;
2268    __be32 dst_ip;
2269    __be16 src_port;
2270    __be16 dst_port;
2271    u8     protocol;
2272    u8     ttl;
2273};
2274"#;
2275        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2276        assert_eq!(layouts.len(), 1);
2277        let l = &layouts[0];
2278        // 4+4+2+2+1+1 = 14B; max align is 4 (__be32) → padded to 16B
2279        assert_eq!(l.total_size, 16);
2280        assert_eq!(l.fields[0].size, 4); // __be32 src_ip
2281        assert_eq!(l.fields[2].size, 2); // __be16 src_port
2282        assert_eq!(l.fields[4].size, 1); // u8 protocol
2283    }
2284
2285    // ── C++ stdlib type tests ─────────────────────────────────────────────────
2286
2287    #[test]
2288    fn cpp_string_is_32_bytes() {
2289        assert_eq!(c_type_size_align("std::string", &X86_64_SYSV), (32, 8));
2290        assert_eq!(c_type_size_align("std::wstring", &X86_64_SYSV), (32, 8));
2291    }
2292
2293    #[test]
2294    fn cpp_string_view_is_two_words() {
2295        assert_eq!(c_type_size_align("std::string_view", &X86_64_SYSV), (16, 8));
2296    }
2297
2298    #[test]
2299    fn cpp_vector_is_24_bytes() {
2300        assert_eq!(c_type_size_align("std::vector<int>", &X86_64_SYSV), (24, 8));
2301        assert_eq!(
2302            c_type_size_align("std::vector<uint64_t>", &X86_64_SYSV),
2303            (24, 8)
2304        );
2305        // Size is independent of T
2306        assert_eq!(
2307            c_type_size_align("std::vector<std::string>", &X86_64_SYSV),
2308            (24, 8)
2309        );
2310    }
2311
2312    #[test]
2313    fn cpp_smart_pointers_correct_size() {
2314        // unique_ptr: single pointer
2315        assert_eq!(
2316            c_type_size_align("std::unique_ptr<int>", &X86_64_SYSV),
2317            (8, 8)
2318        );
2319        // shared_ptr / weak_ptr: two pointers
2320        assert_eq!(
2321            c_type_size_align("std::shared_ptr<int>", &X86_64_SYSV),
2322            (16, 8)
2323        );
2324        assert_eq!(
2325            c_type_size_align("std::weak_ptr<int>", &X86_64_SYSV),
2326            (16, 8)
2327        );
2328    }
2329
2330    #[test]
2331    fn cpp_optional_recursive_size() {
2332        // std::optional<bool>: 1B (bool) + 1B (has_value flag) → 2B
2333        assert_eq!(
2334            c_type_size_align("std::optional<bool>", &X86_64_SYSV),
2335            (2, 1)
2336        );
2337        // std::optional<int>: 4B + 1B → padded to 4B → 8B total? Let's check:
2338        // t_size=4, t_align=4; (4+1).next_multiple_of(4) = 8
2339        assert_eq!(
2340            c_type_size_align("std::optional<int>", &X86_64_SYSV),
2341            (8, 4)
2342        );
2343        // std::optional<double>: 8B + 1B → padded to 8B → 16B
2344        assert_eq!(
2345            c_type_size_align("std::optional<double>", &X86_64_SYSV),
2346            (16, 8)
2347        );
2348    }
2349
2350    #[test]
2351    fn cpp_function_is_32_bytes() {
2352        assert_eq!(
2353            c_type_size_align("std::function<void()>", &X86_64_SYSV),
2354            (32, 8)
2355        );
2356        assert_eq!(
2357            c_type_size_align("std::function<int(int)>", &X86_64_SYSV),
2358            (32, 8)
2359        );
2360    }
2361
2362    #[test]
2363    fn cpp_stdlib_struct_with_string_field() {
2364        // A struct with std::string fields — used to get pointer-size (8B), now 32B
2365        let src = r#"
2366struct Config {
2367    std::string name;
2368    int         version;
2369    bool        enabled;
2370};
2371"#;
2372        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2373        let l = &layouts[0];
2374        assert_eq!(l.fields[0].size, 32); // std::string, not 8
2375        // int at offset 32, bool at 36; total padded to 8-byte align = 40
2376        assert_eq!(l.fields[1].offset, 32);
2377        assert_eq!(l.fields[1].size, 4);
2378    }
2379
2380    // ── typedef alias resolution ──────────────────────────────────────────────
2381
2382    #[test]
2383    fn typedef_scalar_alias_resolves_correct_size() {
2384        // `typedef uint32_t UserId;` — UserId must be treated as 4B, not pointer-size.
2385        let src = r#"
2386typedef uint32_t UserId;
2387
2388struct User {
2389    UserId id;
2390    char   name[16];
2391};
2392"#;
2393        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2394        let l = layouts.iter().find(|l| l.name == "User").expect("User");
2395        let id = l.fields.iter().find(|f| f.name == "id").expect("id field");
2396        assert_eq!(id.size, 4, "UserId alias of uint32_t must be 4 bytes");
2397        assert_eq!(id.align, 4);
2398    }
2399
2400    #[test]
2401    fn typedef_alias_layout_correct_total_size() {
2402        // Without alias resolution: UserId → unknown → pointer_size (8B)
2403        // char(1) + 7 pad + unknown(8) = 16B.
2404        // With alias resolution: char(1) + 3 pad + uint32_t(4) = 8B.
2405        let src = r#"
2406typedef uint32_t Token;
2407
2408struct Auth {
2409    char  prefix;
2410    Token token;
2411};
2412"#;
2413        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2414        let l = layouts.iter().find(|l| l.name == "Auth").expect("Auth");
2415        // prefix(1) + 3-byte pad + token(4) = 8B
2416        assert_eq!(l.total_size, 8, "alias-resolved layout should be 8 bytes");
2417    }
2418
2419    #[test]
2420    fn typedef_pointer_not_confused_with_scalar_alias() {
2421        // `typedef int *IntPtr;` — pointer typedef; alias name lives in a nested
2422        // declarator, so collect_typedef_aliases must NOT collect it.
2423        // IntPtr falls through to the unknown-type catch-all → pointer_size (8B).
2424        let src = r#"
2425typedef int *IntPtr;
2426
2427struct S {
2428    IntPtr p;
2429    int    x;
2430};
2431"#;
2432        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2433        let l = layouts.iter().find(|l| l.name == "S").expect("S");
2434        let p = l.fields.iter().find(|f| f.name == "p").expect("p field");
2435        // Pointer typedef — either resolved as pointer (8B) or falls to pointer_size.
2436        assert_eq!(p.size, 8, "pointer typedef should be 8 bytes on x86_64");
2437    }
2438
2439    #[test]
2440    fn typedef_struct_not_collected_as_scalar_alias() {
2441        // `typedef struct { ... } MyStruct;` must not appear in scalar alias map.
2442        // The struct is still emitted as a StructLayout.
2443        let src = r#"
2444typedef struct {
2445    int x;
2446    int y;
2447} Point;
2448
2449struct Line {
2450    Point a;
2451    Point b;
2452};
2453"#;
2454        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2455        // Point must be emitted as a layout
2456        assert!(
2457            layouts.iter().any(|l| l.name == "Point"),
2458            "typedef struct should emit a StructLayout"
2459        );
2460    }
2461
2462    #[test]
2463    fn cpp_class_typedef_alias_resolved() {
2464        // Typedef alias resolution must also work for C++ class fields.
2465        let src = r#"
2466typedef uint64_t Timestamp;
2467
2468class Event {
2469    Timestamp when;
2470    int       kind;
2471};
2472"#;
2473        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2474        let l = layouts.iter().find(|l| l.name == "Event").expect("Event");
2475        let when = l.fields.iter().find(|f| f.name == "when").expect("when");
2476        assert_eq!(when.size, 8, "Timestamp alias of uint64_t must be 8 bytes");
2477        assert_eq!(when.align, 8);
2478    }
2479
2480    // ── C++ template skipping ─────────────────────────────────────────────────
2481
2482    #[test]
2483    fn cpp_template_struct_is_skipped() {
2484        // Generic C++ templates cannot be sized without monomorphisation.
2485        let src = "template<typename T> struct Wrapper { T value; int count; };";
2486        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2487        assert!(
2488            layouts.iter().all(|l| l.name != "Wrapper"),
2489            "template struct must be skipped, not emitted with wrong sizes"
2490        );
2491    }
2492
2493    #[test]
2494    fn cpp_template_class_is_skipped() {
2495        let src = "template<typename T, typename U> class Pair { T first; U second; };";
2496        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2497        assert!(
2498            layouts.iter().all(|l| l.name != "Pair"),
2499            "template class must be skipped"
2500        );
2501    }
2502
2503    #[test]
2504    fn cpp_non_template_struct_alongside_template_is_parsed() {
2505        // The template is skipped but concrete structs in the same TU are kept.
2506        let src = r#"
2507template<typename T> struct Generic { T val; };
2508struct Concrete { int x; double y; };
2509"#;
2510        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2511        assert!(
2512            layouts.iter().all(|l| l.name != "Generic"),
2513            "Generic template must be skipped"
2514        );
2515        let concrete = layouts
2516            .iter()
2517            .find(|l| l.name == "Concrete")
2518            .expect("Concrete must be parsed");
2519        assert_eq!(concrete.fields.len(), 2);
2520    }
2521}