Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use std::cell::Cell;
10use std::collections::HashMap;
11use tree_sitter::{Node, Parser};
12
13use crate::CppStdlib;
14
15thread_local! {
16    static STDLIB: Cell<CppStdlib> = const { Cell::new(CppStdlib::LibStdCpp) };
17}
18
19/// Set the active C++ stdlib variant for this thread.  Called from `lib.rs::set_cpp_stdlib`.
20pub(crate) fn set_stdlib(s: CppStdlib) {
21    STDLIB.with(|c| c.set(s));
22}
23
24fn active_stdlib() -> CppStdlib {
25    STDLIB.with(|c| c.get())
26}
27
28// ── type resolution ───────────────────────────────────────────────────────────
29
30/// Map a C/C++ type name to (size, align) using the target arch.
31fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
32    let ty = ty.trim();
33    // Strip qualifiers
34    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
35        if let Some(rest) = ty.strip_prefix(qual) {
36            return c_type_size_align(rest, arch);
37        }
38    }
39    // x86 SSE / AVX / AVX-512 SIMD types
40    match ty {
41        "__m64" => return (8, 8),
42        "__m128" | "__m128d" | "__m128i" => return (16, 16),
43        "__m256" | "__m256d" | "__m256i" => return (32, 32),
44        "__m512" | "__m512d" | "__m512i" => return (64, 64),
45        // ARM NEON — 64-bit (double-word) vectors
46        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
47        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
48        // ARM NEON — 128-bit (quad-word) vectors
49        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
50        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
51        _ => {}
52    }
53    // C++ standard library types — sizes vary by stdlib variant.
54    let stdlib = active_stdlib();
55    match ty {
56        // ── Synchronisation ───────────────────────────────────────────────────
57        // pthread_mutex_t on Linux/glibc: 40 bytes.
58        // On macOS (libc++): opaque_pthread_mutex_t = 56 bytes.
59        // MSVC: CRITICAL_SECTION = 40 bytes on 64-bit.
60        "std::mutex"
61        | "std::recursive_mutex"
62        | "std::timed_mutex"
63        | "std::recursive_timed_mutex"
64        | "pthread_mutex_t" => {
65            return match stdlib {
66                CppStdlib::LibCpp => (56, 8), // macOS/Apple pthread mutex
67                _ => (40, 8),
68            };
69        }
70        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
71        "std::condition_variable" | "pthread_cond_t" => {
72            return match stdlib {
73                CppStdlib::LibCpp => (40, 8), // macOS pthread_cond_t
74                _ => (48, 8),
75            };
76        }
77
78        // ── String / view ─────────────────────────────────────────────────────
79        // libstdc++ std::string: 32B (ptr + length + SSO buffer (15 chars + NUL)).
80        // libc++ (Clang/macOS/Android): 24B (short-string optimisation is smaller).
81        // MSVC STL: 32B on 64-bit (16-byte SSO inline buffer).
82        "std::string" | "std::wstring" | "std::u8string" | "std::u16string" | "std::u32string"
83        | "std::pmr::string" => {
84            return match stdlib {
85                CppStdlib::LibCpp => (24, 8),
86                _ => (32, 8), // libstdc++ and MSVC both 32B
87            };
88        }
89        // std::string_view / std::span<T>: pointer + length (2 words).
90        "std::string_view"
91        | "std::wstring_view"
92        | "std::u8string_view"
93        | "std::u16string_view"
94        | "std::u32string_view" => return (arch.pointer_size * 2, arch.pointer_size),
95
96        // ── Sequence containers ───────────────────────────────────────────────
97        // std::vector<T>: pointer + size + capacity = 3 words (24B on 64-bit).
98        // Size is independent of T.
99        ty if ty.starts_with("std::vector<") || ty == "std::vector" => {
100            return (arch.pointer_size * 3, arch.pointer_size);
101        }
102        // std::deque<T>: 80B on both libstdc++ and libc++ (64-bit Linux).
103        ty if ty.starts_with("std::deque<") || ty == "std::deque" => return (80, 8),
104        // std::list<T>: sentinel node pointer + size = 2 words + node pointers.
105        // libstdc++: 24B (size_t + two pointers). libc++: 24B.
106        ty if ty.starts_with("std::list<") || ty == "std::list" => {
107            return (arch.pointer_size * 3, arch.pointer_size);
108        }
109        // std::forward_list<T>: single pointer (head node).
110        ty if ty.starts_with("std::forward_list<") || ty == "std::forward_list" => {
111            return (arch.pointer_size, arch.pointer_size);
112        }
113        // std::array<T, N>: inline storage; size = N * sizeof(T).
114        // We cannot compute this without resolving T and N, so fall through.
115
116        // ── Associative / unordered containers ────────────────────────────────
117        // All map/set types: header node + size = ~48B (libstdc++) / ~40B (libc++).
118        // Use 48B as conservative approximation.
119        ty if ty.starts_with("std::map<")
120            || ty.starts_with("std::multimap<")
121            || ty.starts_with("std::set<")
122            || ty.starts_with("std::multiset<") =>
123        {
124            return (48, 8);
125        }
126        // std::unordered_map / unordered_set: bucket array pointer + size + load factor + etc.
127        // libstdc++: ~56B. libc++: ~72B. Use 56B.
128        ty if ty.starts_with("std::unordered_map<")
129            || ty.starts_with("std::unordered_multimap<")
130            || ty.starts_with("std::unordered_set<")
131            || ty.starts_with("std::unordered_multiset<") =>
132        {
133            return (56, 8);
134        }
135
136        // ── Smart pointers ────────────────────────────────────────────────────
137        // std::unique_ptr<T>: single pointer (deleter may be zero-sized via EBO).
138        ty if ty.starts_with("std::unique_ptr<") || ty == "std::unique_ptr" => {
139            return (arch.pointer_size, arch.pointer_size);
140        }
141        // std::shared_ptr<T> / std::weak_ptr<T>: object pointer + control block pointer.
142        ty if ty.starts_with("std::shared_ptr<")
143            || ty == "std::shared_ptr"
144            || ty.starts_with("std::weak_ptr<")
145            || ty == "std::weak_ptr" =>
146        {
147            return (arch.pointer_size * 2, arch.pointer_size);
148        }
149
150        // ── Type-erasure / utilities ──────────────────────────────────────────
151        // std::function<Sig>: 32B on libstdc++ and libc++ (64-bit Linux).
152        // Holds a functor pointer, a vtable pointer, and a small-functor buffer.
153        ty if ty.starts_with("std::function<") || ty == "std::function" => return (32, 8),
154        // std::any: 32B on libstdc++ (small-object buffer + vtable pointer).
155        "std::any" => return (32, 8),
156        // std::error_code / std::error_condition: pointer + int = 16B.
157        "std::error_code" | "std::error_condition" => return (16, 8),
158        // std::exception_ptr: single pointer.
159        "std::exception_ptr" => return (arch.pointer_size, arch.pointer_size),
160        // std::type_index: single pointer (wraps std::type_info*).
161        "std::type_index" => return (arch.pointer_size, arch.pointer_size),
162        // std::span<T>: pointer + length (2 words). Template arg irrelevant.
163        ty if ty.starts_with("std::span<") || ty == "std::span" => {
164            return (arch.pointer_size * 2, arch.pointer_size);
165        }
166        // std::optional<T>: sizeof(T) + 1B bool, padded to align(T).
167        // Recurse to resolve T then apply the formula.
168        ty if ty.starts_with("std::optional<") && ty.ends_with('>') => {
169            let inner = &ty["std::optional<".len()..ty.len() - 1];
170            let (t_size, t_align) = c_type_size_align(inner.trim(), arch);
171            let total = (t_size + 1).next_multiple_of(t_align.max(1));
172            return (total, t_align.max(1));
173        }
174
175        // ── Atomic ────────────────────────────────────────────────────────────
176        // std::atomic<T>: same size and alignment as T.
177        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
178            let inner = &ty[12..ty.len() - 1];
179            return c_type_size_align(inner.trim(), arch);
180        }
181        // std::atomic_flag: guaranteed 1B minimum, but often 4B in practice.
182        "std::atomic_flag" => return (4, 4),
183
184        _ => {} // fall through to primitive types below
185    }
186    // Primitive / stdint / pointer types
187    match ty {
188        "char" | "_Bool" | "bool" => (1, 1),
189        "short" | "short int" => (2, 2),
190        "int" => (4, 4),
191        "long" | "long int" => (arch.pointer_size, arch.pointer_size),
192        "long long" | "long long int" => (8, 8),
193        "float" => (4, 4),
194        "double" => (8, 8),
195        "long double" => (16, 16),
196
197        // C99 stdint exact-width types
198        "int8_t" | "uint8_t" => (1, 1),
199        "int16_t" | "uint16_t" => (2, 2),
200        "int32_t" | "uint32_t" => (4, 4),
201        "int64_t" | "uint64_t" => (8, 8),
202        "intmax_t" | "uintmax_t" => (8, 8),
203        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
204            (arch.pointer_size, arch.pointer_size)
205        }
206
207        // C99 fast types — uint_fast{8,16}_t are always 1/2B;
208        // uint_fast{32,64}_t are pointer-sized on 64-bit (8B), 4B on 32-bit.
209        "int_fast8_t" | "uint_fast8_t" => (1, 1),
210        "int_fast16_t" | "uint_fast16_t" => (2, 2),
211        "int_fast32_t" | "uint_fast32_t" | "int_fast64_t" | "uint_fast64_t" => {
212            (arch.pointer_size, arch.pointer_size)
213        }
214
215        // C99 least types — minimum guaranteed widths
216        "int_least8_t" | "uint_least8_t" => (1, 1),
217        "int_least16_t" | "uint_least16_t" => (2, 2),
218        "int_least32_t" | "uint_least32_t" => (4, 4),
219        "int_least64_t" | "uint_least64_t" => (8, 8),
220
221        // GCC/Clang 128-bit integer extension
222        "__int128" | "__uint128" | "__int128_t" | "__uint128_t" => (16, 16),
223
224        // Linux kernel short-form integer types (linux/types.h)
225        "u8" | "s8" => (1, 1),
226        "u16" | "s16" => (2, 2),
227        "u32" | "s32" => (4, 4),
228        "u64" | "s64" => (8, 8),
229
230        // Linux kernel double-underscore types (__u8, __s8, __be16, __le32, …)
231        "__u8" | "__s8" | "__u8__" | "__s8__" => (1, 1),
232        "__u16" | "__s16" | "__be16" | "__le16" => (2, 2),
233        "__u32" | "__s32" | "__be32" | "__le32" => (4, 4),
234        "__u64" | "__s64" | "__be64" | "__le64" => (8, 8),
235
236        // MSVC fixed-width intrinsics
237        "__int8" => (1, 1),
238        "__int16" => (2, 2),
239        "__int32" => (4, 4),
240        "__int64" => (8, 8),
241
242        // Windows SDK / WinAPI types
243        "BYTE" | "BOOLEAN" | "CHAR" | "INT8" | "UINT8" => (1, 1),
244        "WORD" | "WCHAR" | "SHORT" | "USHORT" | "INT16" | "UINT16" => (2, 2),
245        "DWORD" | "LONG" | "ULONG" | "INT" | "UINT" | "BOOL" | "FLOAT" | "INT32" | "UINT32" => {
246            (4, 4)
247        }
248        "QWORD" | "LONGLONG" | "ULONGLONG" | "INT64" | "UINT64" | "LARGE_INTEGER" => (8, 8),
249        "DWORD64" | "ULONG64" | "LONG64" => (8, 8),
250        "HANDLE" | "LPVOID" | "PVOID" | "LPCVOID" | "LPSTR" | "LPCSTR" | "LPWSTR" | "LPCWSTR"
251        | "SIZE_T" | "SSIZE_T" | "ULONG_PTR" | "LONG_PTR" | "DWORD_PTR" | "INT_PTR"
252        | "UINT_PTR" => (arch.pointer_size, arch.pointer_size),
253
254        // C/C++ character types
255        // wchar_t: 4B on Linux/macOS (GCC/Clang POSIX), 2B on Windows/MSVC.
256        // All current padlock arch configs are POSIX, so 4B is correct here.
257        "wchar_t" => (4, 4),
258        "char8_t" => (1, 1),
259        "char16_t" => (2, 2),
260        "char32_t" => (4, 4),
261
262        // Half-precision and bfloat16 (ARM, GCC, Clang, ML workloads)
263        "_Float16" | "__fp16" | "__bf16" => (2, 2),
264        // 128-bit float (GCC/Clang extension)
265        "_Float128" | "__float128" => (16, 16),
266
267        // Pointer types
268        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
269        // Unknown — use pointer size as a reasonable default
270        _ => (arch.pointer_size, arch.pointer_size),
271    }
272}
273
274// ── struct / union simulation ─────────────────────────────────────────────────
275
276/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
277/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
278fn strip_bitfield_suffix(ty: &str) -> &str {
279    if let Some(pos) = ty.rfind(':') {
280        let suffix = ty[pos + 1..].trim();
281        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
282            return ty[..pos].trim_end();
283        }
284    }
285    ty
286}
287
288/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
289/// Bit-field packing is compiler-controlled and cannot be accurately modelled
290/// without a compiler, so structs containing bit-field members are skipped.
291fn is_bitfield_type(ty: &str) -> bool {
292    strip_bitfield_suffix(ty) != ty
293}
294
295/// Simulate C/C++ struct layout given ordered fields.
296///
297/// `pack_n` controls field alignment capping:
298/// - `0` — no packing (default C/C++ ABI alignment rules)
299/// - `1` — `__attribute__((packed))` / `#pragma pack(1)`: force alignment to 1
300/// - `N` — `#pragma pack(N)`: cap each field's alignment at N bytes
301///
302/// This unified model handles both GCC/Clang `__attribute__((packed))` and
303/// MSVC-style `#pragma pack(N)` directives.
304fn simulate_layout(
305    fields: &mut Vec<Field>,
306    struct_name: String,
307    arch: &'static ArchConfig,
308    source_line: Option<u32>,
309    pack_n: usize,
310) -> StructLayout {
311    let mut offset = 0usize;
312    let mut struct_align = 1usize;
313
314    for f in fields.iter_mut() {
315        let eff_align = if pack_n > 0 {
316            f.align.min(pack_n)
317        } else {
318            f.align
319        };
320        if eff_align > 0 {
321            offset = offset.next_multiple_of(eff_align);
322        }
323        f.offset = offset;
324        offset += f.size;
325        struct_align = struct_align.max(eff_align);
326    }
327    // Trailing padding (not present when fully packed)
328    if pack_n != 1 && struct_align > 0 {
329        offset = offset.next_multiple_of(struct_align);
330    }
331
332    StructLayout {
333        name: struct_name,
334        total_size: offset,
335        align: struct_align,
336        fields: std::mem::take(fields),
337        source_file: None,
338        source_line,
339        arch,
340        is_packed: pack_n == 1,
341        is_union: false,
342        is_repr_rust: false,
343        suppressed_findings: Vec::new(),
344        uncertain_fields: Vec::new(),
345    }
346}
347
348/// Simulate a C/C++ union layout: all fields start at offset 0;
349/// total size is the largest field, rounded to max alignment.
350fn simulate_union_layout(
351    fields: &mut Vec<Field>,
352    name: String,
353    arch: &'static ArchConfig,
354    source_line: Option<u32>,
355) -> StructLayout {
356    for f in fields.iter_mut() {
357        f.offset = 0;
358    }
359    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
360    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
361    let total_size = if max_align > 0 {
362        max_size.next_multiple_of(max_align)
363    } else {
364        max_size
365    };
366
367    StructLayout {
368        name,
369        total_size,
370        align: max_align,
371        fields: std::mem::take(fields),
372        source_file: None,
373        source_line,
374        arch,
375        is_packed: false,
376        is_union: true,
377        is_repr_rust: false,
378        suppressed_findings: Vec::new(),
379        uncertain_fields: Vec::new(),
380    }
381}
382
383// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
384
385/// Parse a `class_specifier` node, modelling:
386/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
387/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
388///   later by the nested-struct resolution pass in `lib.rs`).
389fn parse_class_specifier(
390    source: &str,
391    node: Node<'_>,
392    arch: &'static ArchConfig,
393    aliases: &HashMap<String, String>,
394    pragma_pack: usize,
395) -> Option<StructLayout> {
396    let mut class_name = "<anonymous>".to_string();
397    let mut base_names: Vec<String> = Vec::new();
398    let mut body_node: Option<Node> = None;
399    let mut is_packed = false;
400    let mut struct_alignas: Option<usize> = None;
401
402    for i in 0..node.child_count() {
403        let child = node.child(i)?;
404        match child.kind() {
405            "type_identifier" => class_name = source[child.byte_range()].to_string(),
406            "base_class_clause" => {
407                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
408                // type_identifier nodes are direct children of base_class_clause.
409                for j in 0..child.child_count() {
410                    if let Some(base) = child.child(j)
411                        && base.kind() == "type_identifier"
412                    {
413                        base_names.push(source[base.byte_range()].to_string());
414                    }
415                }
416            }
417            "field_declaration_list" => body_node = Some(child),
418            "attribute_specifier" if source[child.byte_range()].contains("packed") => {
419                is_packed = true;
420            }
421            // C++11 class-level alignas: `class alignas(64) Name { ... };`
422            "alignas_qualifier" | "alignas_specifier" if struct_alignas.is_none() => {
423                struct_alignas = parse_alignas_value(source, child);
424            }
425            _ => {}
426        }
427    }
428
429    let body = body_node?;
430
431    // Detect virtual methods: look for `virtual` keyword anywhere in body
432    let has_virtual = contains_virtual_keyword(source, body);
433
434    // Collect declared fields: (field_name, type_text, guard, alignas_override, source_line)
435    let mut raw_fields: Vec<RawField> = Vec::new();
436    for i in 0..body.child_count() {
437        let Some(child) = body.child(i) else {
438            continue;
439        };
440        if child.kind() == "field_declaration" {
441            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, false) {
442                raw_fields.extend(anon_fields);
443            } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
444            {
445                raw_fields.push((fname, ty, guard, al, ln));
446            }
447        }
448    }
449
450    // Build fields: vtable pointer, then base-class slots, then declared fields
451    let mut fields: Vec<Field> = Vec::new();
452
453    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
454    if has_virtual {
455        let ps = arch.pointer_size;
456        fields.push(Field {
457            name: "__vptr".to_string(),
458            ty: TypeInfo::Pointer {
459                size: ps,
460                align: ps,
461            },
462            offset: 0,
463            size: ps,
464            align: ps,
465            source_file: None,
466            source_line: None,
467            access: AccessPattern::Unknown,
468        });
469    }
470
471    // Base class storage (opaque until nested-struct resolver fills in sizes)
472    for base in &base_names {
473        let ps = arch.pointer_size;
474        fields.push(Field {
475            name: format!("__base_{base}"),
476            ty: TypeInfo::Opaque {
477                name: base.clone(),
478                size: ps,
479                align: ps,
480            },
481            offset: 0,
482            size: ps,
483            align: ps,
484            source_file: None,
485            source_line: None,
486            access: AccessPattern::Unknown,
487        });
488    }
489
490    // Skip classes with bit-field members (same reason as structs).
491    if raw_fields
492        .iter()
493        .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
494    {
495        eprintln!(
496            "padlock: note: skipping '{class_name}' — contains bit-fields \
497             (bit-field layout is compiler-controlled; use binary analysis for accurate results)"
498        );
499        return None;
500    }
501
502    // Declared member fields
503    for (fname, ty_name, guard, alignas, field_line) in raw_fields {
504        let resolved = aliases
505            .get(&ty_name)
506            .map(String::as_str)
507            .unwrap_or(&ty_name);
508        let (size, natural_align) = c_type_size_align(resolved, arch);
509        let align = alignas.unwrap_or(natural_align);
510        let access = if let Some(g) = guard {
511            AccessPattern::Concurrent {
512                guard: Some(g),
513                is_atomic: false,
514                is_annotated: true,
515            }
516        } else {
517            AccessPattern::Unknown
518        };
519        fields.push(Field {
520            name: fname,
521            ty: TypeInfo::Primitive {
522                name: ty_name,
523                size,
524                align,
525            },
526            offset: 0,
527            size,
528            align,
529            source_file: None,
530            source_line: Some(field_line),
531            access,
532        });
533    }
534
535    if fields.is_empty() {
536        return None;
537    }
538
539    let line = node.start_position().row as u32 + 1;
540    let pack_n = if is_packed {
541        1
542    } else if pragma_pack > 0 {
543        pragma_pack
544    } else {
545        0
546    };
547    let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), pack_n);
548
549    if let Some(al) = struct_alignas
550        && al > layout.align
551    {
552        layout.align = al;
553        if pack_n == 0 {
554            layout.total_size = layout.total_size.next_multiple_of(al);
555        }
556    }
557
558    layout.suppressed_findings =
559        super::suppress::suppressed_from_preceding_source(source, node.start_byte());
560
561    Some(layout)
562}
563
564/// Return true if a `field_declaration_list` node contains any `virtual` keyword
565/// (indicating that the class needs a vtable pointer).
566fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
567    let mut stack = vec![node];
568    while let Some(n) = stack.pop() {
569        if n.kind() == "virtual" {
570            return true;
571        }
572        // Also check raw text for cases where tree-sitter may not produce a
573        // dedicated `virtual` node (e.g. inside complex declarations).
574        if n.child_count() == 0 {
575            let text = &source[n.byte_range()];
576            if text == "virtual" {
577                return true;
578            }
579        }
580        for i in (0..n.child_count()).rev() {
581            if let Some(child) = n.child(i) {
582                stack.push(child);
583            }
584        }
585    }
586    false
587}
588
589// ── tree-sitter walker ────────────────────────────────────────────────────────
590
591/// Pre-scan a tree for plain scalar typedef declarations and return a map of
592/// `AliasName → BaseTypeName` for within-file alias resolution.
593///
594/// Only simple scalar aliases are collected, e.g.:
595///   `typedef uint32_t MyId;`   → `{"MyId": "uint32_t"}`
596///   `typedef unsigned int Idx;` → `{"Idx": "unsigned int"}`
597///
598/// Struct/union, function-pointer, and pointer typedefs are skipped — the
599/// alias name in those cases lives in a nested declarator node and will not
600/// appear as a direct `type_identifier` child, so they naturally produce fewer
601/// than two type parts and are filtered out.
602fn collect_typedef_aliases(source: &str, root: Node<'_>) -> HashMap<String, String> {
603    let mut aliases = HashMap::new();
604    let mut stack = vec![root];
605    while let Some(node) = stack.pop() {
606        for i in (0..node.child_count()).rev() {
607            if let Some(child) = node.child(i) {
608                stack.push(child);
609            }
610        }
611        if node.kind() != "type_definition" {
612            continue;
613        }
614        // Skip struct/union/class typedefs — those produce StructLayout entries.
615        let has_record = (0..node.child_count()).any(|i| {
616            node.child(i)
617                .map(|c| {
618                    matches!(
619                        c.kind(),
620                        "struct_specifier" | "union_specifier" | "class_specifier"
621                    )
622                })
623                .unwrap_or(false)
624        });
625        if has_record {
626            continue;
627        }
628        // Collect direct-child type parts in declaration order:
629        //   `typedef uint32_t MyId;`       → ["uint32_t", "MyId"]
630        //   `typedef unsigned int MyUInt;` → ["unsigned int", "MyUInt"]
631        // For pointer/function typedefs the alias name is nested inside a
632        // declarator node, so only one part is collected and we skip (len < 2).
633        let mut type_parts: Vec<String> = Vec::new();
634        for i in 0..node.child_count() {
635            let Some(child) = node.child(i) else {
636                continue;
637            };
638            match child.kind() {
639                "typedef" | ";" => {}
640                "type_identifier" | "primitive_type" | "sized_type_specifier" => {
641                    type_parts.push(source[child.byte_range()].trim().to_string());
642                }
643                _ => {}
644            }
645        }
646        if type_parts.len() < 2 {
647            continue;
648        }
649        // Last element is the alias name; everything before is the base type.
650        let alias_name = type_parts.pop().unwrap();
651        let base_type = type_parts.join(" ");
652        aliases.entry(alias_name).or_insert(base_type);
653    }
654    aliases
655}
656
657fn extract_structs_from_tree(
658    source: &str,
659    root: Node<'_>,
660    arch: &'static ArchConfig,
661    layouts: &mut Vec<StructLayout>,
662) {
663    // Phase 0: collect within-file typedef scalar aliases for field type resolution.
664    let aliases = collect_typedef_aliases(source, root);
665
666    // Phase 1: extract struct/union/class layouts.
667    // We do a single linear pass ordered by byte offset so that `#pragma pack`
668    // directives are processed in document order, keeping `current_pack` accurate
669    // at each struct declaration site.
670    //
671    // Pack state: `current_pack = 0` means no active pragma (use default ABI
672    // alignment); `current_pack = N > 0` means cap field alignment at N bytes.
673    let mut pack_stack: Vec<usize> = Vec::new();
674    let mut current_pack: usize = 0;
675
676    let cursor = root.walk();
677    let mut stack = vec![root];
678
679    while let Some(node) = stack.pop() {
680        // Push children in reverse so we process left-to-right in document order.
681        for i in (0..node.child_count()).rev() {
682            if let Some(child) = node.child(i) {
683                stack.push(child);
684            }
685        }
686
687        // Track `#pragma pack(...)` directives (tree-sitter: `preproc_call` nodes).
688        if node.kind() == "preproc_call" {
689            let text = &source[node.byte_range()];
690            if text.contains("#pragma") && text.contains("pack(") {
691                current_pack = parse_pragma_pack(text, &mut pack_stack, current_pack);
692            }
693        }
694
695        // Skip C++ template structs/classes/unions — without monomorphisation we
696        // cannot know T's size, so any sizing would be wrong.  tree-sitter-cpp
697        // wraps these as `template_declaration > struct_specifier` etc.
698        let in_template = node
699            .parent()
700            .map(|p| p.kind() == "template_declaration")
701            .unwrap_or(false);
702        if in_template {
703            let tpl_name = (0..node.child_count())
704                .filter_map(|i| node.child(i))
705                .find(|c| c.kind() == "type_identifier")
706                .map(|c| source[c.byte_range()].to_string())
707                .unwrap_or_else(|| "(unknown)".to_string());
708            eprintln!(
709                "padlock: note: skipping '{tpl_name}' — template \
710                 (layout depends on type arguments; use binary analysis for accurate results)"
711            );
712            continue;
713        }
714
715        match node.kind() {
716            "struct_specifier" => {
717                if let Some(layout) = parse_struct_or_union_specifier(
718                    source,
719                    node,
720                    arch,
721                    false,
722                    &aliases,
723                    current_pack,
724                ) {
725                    layouts.push(layout);
726                }
727            }
728            "union_specifier" => {
729                if let Some(layout) = parse_struct_or_union_specifier(
730                    source,
731                    node,
732                    arch,
733                    true,
734                    &aliases,
735                    current_pack,
736                ) {
737                    layouts.push(layout);
738                }
739            }
740            "class_specifier" => {
741                if let Some(layout) =
742                    parse_class_specifier(source, node, arch, &aliases, current_pack)
743                {
744                    layouts.push(layout);
745                }
746            }
747            _ => {}
748        }
749    }
750
751    // Also handle `typedef struct/union { ... } Name;`.
752    // Run a second pass; at this point we do not re-track pragma pack since
753    // typedef structs with non-default packing will have already been captured
754    // in the first pass (the struct specifier inside the typedef is the same
755    // node). The second pass only renames anonymous structs, so pack accuracy
756    // is inherited from the first-pass result.
757    let cursor2 = root.walk();
758    let mut stack2 = vec![root];
759    while let Some(node) = stack2.pop() {
760        for i in (0..node.child_count()).rev() {
761            if let Some(child) = node.child(i) {
762                stack2.push(child);
763            }
764        }
765        if node.kind() == "type_definition"
766            && let Some(layout) =
767                parse_typedef_struct_or_union(source, node, arch, &aliases, current_pack)
768        {
769            let existing = layouts
770                .iter()
771                .position(|l| l.name == layout.name || l.name == "<anonymous>");
772            match existing {
773                Some(i) if layouts[i].name == "<anonymous>" => {
774                    layouts[i] = layout;
775                }
776                None => layouts.push(layout),
777                _ => {}
778            }
779        }
780    }
781    let _ = cursor;
782    let _ = cursor2; // silence unused warnings
783}
784
785/// Parse a `#pragma pack(...)` directive and update the pack stack/current level.
786///
787/// Recognised forms:
788/// - `#pragma pack(N)`        — set pack level to N
789/// - `#pragma pack()`         — reset to default (0)
790/// - `#pragma pack(push, N)`  — push current level, set to N
791/// - `#pragma pack(push)`     — push current level (no change)
792/// - `#pragma pack(pop)`      — restore previous level
793///
794/// Returns the new `current_pack` value.
795fn parse_pragma_pack(text: &str, stack: &mut Vec<usize>, current: usize) -> usize {
796    // Extract the argument list between the outer parentheses of `pack(...)`.
797    let Some(start) = text.find("pack(") else {
798        return current;
799    };
800    let rest = &text[start + 5..]; // skip "pack("
801    let Some(end) = rest.find(')') else {
802        return current;
803    };
804    let args = rest[..end].trim();
805
806    if args.is_empty() {
807        // #pragma pack() — reset to default
808        return 0;
809    }
810
811    // Split on comma to distinguish `push`/`pop`/`N`/`push, N`.
812    let parts: Vec<&str> = args.splitn(2, ',').map(str::trim).collect();
813    match parts[0] {
814        "pop" => stack.pop().unwrap_or(0),
815        "push" => {
816            stack.push(current);
817            if let Some(n_str) = parts.get(1) {
818                n_str.parse::<usize>().unwrap_or(current)
819            } else {
820                current // push without N: keep current level
821            }
822        }
823        n_str => n_str.parse::<usize>().unwrap_or(current),
824    }
825}
826
827/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
828///
829/// `pragma_pack` is the active `#pragma pack(N)` level at the point of the
830/// declaration (`0` = no active pragma, equivalent to default ABI alignment).
831fn parse_struct_or_union_specifier(
832    source: &str,
833    node: Node<'_>,
834    arch: &'static ArchConfig,
835    is_union: bool,
836    aliases: &HashMap<String, String>,
837    pragma_pack: usize,
838) -> Option<StructLayout> {
839    let mut name = "<anonymous>".to_string();
840    let mut body_node: Option<Node> = None;
841    let mut is_packed = false;
842    // Struct-level alignas: `struct alignas(64) CacheAligned { ... };`
843    let mut struct_alignas: Option<usize> = None;
844
845    for i in 0..node.child_count() {
846        let child = node.child(i)?;
847        match child.kind() {
848            "type_identifier" => name = source[child.byte_range()].to_string(),
849            "field_declaration_list" => body_node = Some(child),
850            "attribute_specifier" => {
851                let text = &source[child.byte_range()];
852                if text.contains("packed") {
853                    is_packed = true;
854                }
855            }
856            // C++11 struct-level alignas: `struct alignas(64) Name { ... };`
857            // tree-sitter-cpp: `alignas_qualifier` as direct child of struct_specifier
858            "alignas_qualifier" | "alignas_specifier" if struct_alignas.is_none() => {
859                struct_alignas = parse_alignas_value(source, child);
860            }
861            _ => {}
862        }
863    }
864
865    let body = body_node?;
866    let mut raw_fields: Vec<RawField> = Vec::new();
867
868    for i in 0..body.child_count() {
869        let child = body.child(i)?;
870        if child.kind() == "field_declaration" {
871            // Check for anonymous nested struct/union: a field_declaration whose
872            // only non-field-identifier child is a struct_specifier/union_specifier
873            // with no type_identifier (i.e. `struct { int x; int y; };`).
874            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, is_union) {
875                raw_fields.extend(anon_fields);
876            } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
877            {
878                raw_fields.push((fname, ty, guard, al, ln));
879            }
880        }
881    }
882
883    if raw_fields.is_empty() {
884        return None;
885    }
886
887    // Bit-field packing is compiler-controlled and cannot be accurately modelled
888    // without a compiler. Skip the entire struct to avoid producing wrong layout
889    // data. Use `padlock analyze` on the compiled binary for accurate results.
890    if raw_fields
891        .iter()
892        .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
893    {
894        eprintln!(
895            "padlock: note: skipping '{name}' — contains bit-fields \
896             (bit-field layout is compiler-controlled; use binary analysis for accurate results)"
897        );
898        return None;
899    }
900
901    let mut fields: Vec<Field> = raw_fields
902        .into_iter()
903        .map(|(fname, ty_name, guard, alignas, field_line)| {
904            let resolved = aliases
905                .get(&ty_name)
906                .map(String::as_str)
907                .unwrap_or(&ty_name);
908            let (size, natural_align) = c_type_size_align(resolved, arch);
909            // alignas(N) on a field overrides its alignment requirement.
910            let align = alignas.unwrap_or(natural_align);
911            let access = if let Some(g) = guard {
912                AccessPattern::Concurrent {
913                    guard: Some(g),
914                    is_atomic: false,
915                    is_annotated: true,
916                }
917            } else {
918                AccessPattern::Unknown
919            };
920            Field {
921                name: fname,
922                ty: TypeInfo::Primitive {
923                    name: ty_name,
924                    size,
925                    align,
926                },
927                offset: 0,
928                size,
929                align,
930                source_file: None,
931                source_line: Some(field_line),
932                access,
933            }
934        })
935        .collect();
936
937    let line = node.start_position().row as u32 + 1;
938    // `__attribute__((packed))` forces pack_n=1; `#pragma pack(N)` caps at N.
939    // When both apply, the more restrictive (smaller) wins.
940    let pack_n = if is_packed {
941        1
942    } else if pragma_pack > 0 {
943        pragma_pack
944    } else {
945        0
946    };
947    let mut layout = if is_union {
948        simulate_union_layout(&mut fields, name, arch, Some(line))
949    } else {
950        simulate_layout(&mut fields, name, arch, Some(line), pack_n)
951    };
952
953    // Apply struct-level alignas: the struct's alignment requirement is at
954    // least N; trailing padding may grow to satisfy the new alignment.
955    if let Some(al) = struct_alignas
956        && al > layout.align
957    {
958        layout.align = al;
959        if pack_n == 0 {
960            layout.total_size = layout.total_size.next_multiple_of(al);
961        }
962    }
963
964    layout.suppressed_findings =
965        super::suppress::suppressed_from_preceding_source(source, node.start_byte());
966
967    Some(layout)
968}
969
970/// Parse a `typedef struct/union { ... } Name;` type_definition node.
971fn parse_typedef_struct_or_union(
972    source: &str,
973    node: Node<'_>,
974    arch: &'static ArchConfig,
975    aliases: &HashMap<String, String>,
976    pragma_pack: usize,
977) -> Option<StructLayout> {
978    let mut specifier_node: Option<Node> = None;
979    let mut is_union = false;
980    let mut typedef_name: Option<String> = None;
981
982    for i in 0..node.child_count() {
983        let child = node.child(i)?;
984        match child.kind() {
985            "struct_specifier" => {
986                specifier_node = Some(child);
987                is_union = false;
988            }
989            "union_specifier" => {
990                specifier_node = Some(child);
991                is_union = true;
992            }
993            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
994            _ => {}
995        }
996    }
997
998    let spec = specifier_node?;
999    let typedef_name = typedef_name?;
1000
1001    let mut layout =
1002        parse_struct_or_union_specifier(source, spec, arch, is_union, aliases, pragma_pack)?;
1003    if layout.name == "<anonymous>" {
1004        layout.name = typedef_name;
1005    }
1006    Some(layout)
1007}
1008
1009/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
1010/// `__attribute__((pt_guarded_by(X)))` specifier node.
1011///
1012/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
1013/// which expand to the same attribute (Clang thread-safety analysis).
1014/// The match is done on the raw source text of any `attribute_specifier` child,
1015/// so it works regardless of how tree-sitter structures the inner tokens.
1016fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
1017    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
1018    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
1019        if let Some(pos) = field_source.find(kw) {
1020            let after = &field_source[pos + kw.len()..];
1021            // Expect `(` optionally preceded by whitespace
1022            let trimmed = after.trim_start();
1023            if let Some(inner) = trimmed.strip_prefix('(') {
1024                // Read until the matching ')'
1025                if let Some(end) = inner.find(')') {
1026                    let guard = inner[..end].trim().trim_matches('"');
1027                    if !guard.is_empty() {
1028                        return Some(guard.to_string());
1029                    }
1030                }
1031            }
1032        }
1033    }
1034    None
1035}
1036
1037/// Parse a numeric value from an `alignas_qualifier` node: `alignas(N)`.
1038/// tree-sitter-cpp uses the node kind `alignas_qualifier` for C++11 `alignas`.
1039/// Returns `None` when the specifier contains a type expression rather than
1040/// an integer literal (e.g. `alignas(double)` — handled elsewhere by the
1041/// compiler; we skip those conservatively).
1042fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
1043    for i in 0..node.child_count() {
1044        if let Some(child) = node.child(i) {
1045            match child.kind() {
1046                "number_literal" | "integer_literal" | "integer" => {
1047                    let text = source[child.byte_range()].trim();
1048                    if let Ok(n) = text.parse::<usize>() {
1049                        return Some(n);
1050                    }
1051                    // Hex literal: 0x40
1052                    if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
1053                        return usize::from_str_radix(hex, 16).ok();
1054                    }
1055                }
1056                // Recurse for nested nodes (parenthesised expression, etc.)
1057                "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
1058                    if let r @ Some(_) = parse_alignas_value(source, child) {
1059                        return r;
1060                    }
1061                }
1062                _ => {}
1063            }
1064        }
1065    }
1066    None
1067}
1068
1069/// Returns `(ty, field_name, guard, alignas_override)`.
1070/// `alignas_override` is `Some(N)` when the field carries `alignas(N)`.
1071/// Detect and parse an anonymous nested struct/union field declaration, e.g.:
1072///
1073/// ```c
1074/// struct Packet {
1075///     union {                    // ← anonymous nested union
1076///         uint32_t raw;
1077///         struct { uint8_t a; uint8_t b; uint8_t c; uint8_t d; };
1078///     };
1079///     uint64_t timestamp;
1080/// };
1081/// ```
1082///
1083/// A `field_declaration` is anonymous if it contains a `struct_specifier` or
1084/// `union_specifier` child that has a `field_declaration_list` (i.e. a body)
1085/// but no `type_identifier` (i.e. no name). The fields of the nested
1086/// struct/union are flattened into the parent.
1087///
1088/// Returns `None` if the declaration is not an anonymous nested struct/union
1089/// (the caller should fall through to `parse_field_declaration`).
1090/// (field_name, type_text, guard, alignas_override, source_line_1based)
1091type RawField = (String, String, Option<String>, Option<usize>, u32);
1092
1093#[allow(clippy::only_used_in_recursion)]
1094fn parse_anonymous_nested(
1095    source: &str,
1096    node: Node<'_>,
1097    arch: &'static ArchConfig,
1098    parent_is_union: bool,
1099) -> Option<Vec<RawField>> {
1100    // Find a struct_specifier or union_specifier child.
1101    for i in 0..node.child_count() {
1102        let child = node.child(i)?;
1103        if child.kind() != "struct_specifier" && child.kind() != "union_specifier" {
1104            continue;
1105        }
1106        let nested_is_union = child.kind() == "union_specifier";
1107
1108        // Must have a body (field_declaration_list) but no type_identifier.
1109        let mut has_name = false;
1110        let mut body_node: Option<Node> = None;
1111        for j in 0..child.child_count() {
1112            let sub = child.child(j)?;
1113            match sub.kind() {
1114                "type_identifier" => has_name = true,
1115                "field_declaration_list" => body_node = Some(sub),
1116                _ => {}
1117            }
1118        }
1119
1120        if has_name || body_node.is_none() {
1121            // Named struct/union used as a field type — handled by parse_field_declaration.
1122            continue;
1123        }
1124
1125        let body = body_node?;
1126        let mut nested_raw: Vec<RawField> = Vec::new();
1127
1128        for j in 0..body.child_count() {
1129            let inner = body.child(j)?;
1130            if inner.kind() == "field_declaration" {
1131                // Recurse to handle doubly-nested anonymous structs.
1132                if let Some(deeper) = parse_anonymous_nested(source, inner, arch, nested_is_union) {
1133                    nested_raw.extend(deeper);
1134                } else if let Some((ty, fname, guard, al, ln)) =
1135                    parse_field_declaration(source, inner)
1136                {
1137                    nested_raw.push((fname, ty, guard, al, ln));
1138                }
1139            }
1140        }
1141
1142        // If nested is a union, the fields all share offset 0 (relative to the
1143        // union's placement in the parent). We can't easily track this through
1144        // raw field lists, so we emit them as a synthetic __anon_union_N field
1145        // when the parent cares about offsets, or just flatten for unions.
1146        //
1147        // For simplicity: flatten all fields — the layout simulator will compute
1148        // correct offsets if the parent is a struct, and union semantics are
1149        // preserved when the parent is a union.
1150        let _ = (nested_is_union, parent_is_union);
1151
1152        if !nested_raw.is_empty() {
1153            return Some(nested_raw);
1154        }
1155    }
1156    None
1157}
1158
1159fn parse_field_declaration(source: &str, node: Node<'_>) -> Option<RawField> {
1160    let mut ty_parts: Vec<String> = Vec::new();
1161    let mut field_name: Option<String> = None;
1162    // Bit-field width, e.g. `int flags : 3;` → Some("3")
1163    let mut bit_width: Option<String> = None;
1164    // Collect attribute text for guard extraction
1165    let mut attr_text = String::new();
1166    // Field-level alignas override
1167    let mut alignas_override: Option<usize> = None;
1168
1169    for i in 0..node.child_count() {
1170        let child = node.child(i)?;
1171        match child.kind() {
1172            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
1173                ty_parts.push(source[child.byte_range()].trim().to_string());
1174            }
1175            // C++ qualified types: std::mutex, ns::Type, etc.
1176            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
1177            "qualified_identifier" | "template_type" => {
1178                ty_parts.push(source[child.byte_range()].trim().to_string());
1179            }
1180            // Nested struct/union used as a field type: `struct Vec2 tl;`
1181            // Extract just the type_identifier name (e.g. "Vec2") so the
1182            // nested-struct resolution pass can match it by name.
1183            "struct_specifier" | "union_specifier" => {
1184                for j in 0..child.child_count() {
1185                    if let Some(sub) = child.child(j)
1186                        && sub.kind() == "type_identifier"
1187                    {
1188                        ty_parts.push(source[sub.byte_range()].trim().to_string());
1189                        break;
1190                    }
1191                }
1192            }
1193            "field_identifier" => {
1194                field_name = Some(source[child.byte_range()].trim().to_string());
1195            }
1196            "pointer_declarator" => {
1197                field_name = extract_identifier(source, child);
1198                ty_parts.push("*".to_string());
1199            }
1200            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
1201            "bitfield_clause" => {
1202                let text = source[child.byte_range()].trim();
1203                // Strip leading ':' and whitespace to get just the width digits
1204                bit_width = Some(text.trim_start_matches(':').trim().to_string());
1205            }
1206            // GNU attribute specifier: __attribute__((...))
1207            "attribute_specifier" | "attribute" => {
1208                attr_text.push_str(source[child.byte_range()].trim());
1209                attr_text.push(' ');
1210            }
1211            // C++11 alignas: tree-sitter-cpp wraps it as type_qualifier → alignas_qualifier
1212            // Also handle the direct form in case grammar versions differ.
1213            "alignas_qualifier" | "alignas_specifier" if alignas_override.is_none() => {
1214                alignas_override = parse_alignas_value(source, child);
1215            }
1216            // type_qualifier wraps alignas_qualifier for field declarations:
1217            // `alignas(8) char c;` → type_qualifier { alignas_qualifier { ... } }
1218            "type_qualifier" if alignas_override.is_none() => {
1219                for j in 0..child.child_count() {
1220                    if let Some(sub) = child.child(j)
1221                        && (sub.kind() == "alignas_qualifier" || sub.kind() == "alignas_specifier")
1222                    {
1223                        alignas_override = parse_alignas_value(source, sub);
1224                        break;
1225                    }
1226                }
1227            }
1228            _ => {}
1229        }
1230    }
1231
1232    let base_ty = ty_parts.join(" ");
1233    let fname = field_name?;
1234    if base_ty.is_empty() {
1235        return None;
1236    }
1237    // Annotate bit-field types as "type:N" so callers can detect and report them;
1238    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
1239    let ty = if let Some(w) = bit_width {
1240        format!("{base_ty}:{w}")
1241    } else {
1242        base_ty
1243    };
1244
1245    // Also check the full field source text (attribute_specifier may not always
1246    // be a direct child depending on tree-sitter grammar version).
1247    let field_src = source[node.byte_range()].to_string();
1248    let guard = extract_guard_from_c_field_text(&attr_text)
1249        .or_else(|| extract_guard_from_c_field_text(&field_src));
1250
1251    let line = node.start_position().row as u32 + 1;
1252    Some((ty, fname, guard, alignas_override, line))
1253}
1254
1255fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
1256    if node.kind() == "field_identifier" || node.kind() == "identifier" {
1257        return Some(source[node.byte_range()].to_string());
1258    }
1259    for i in 0..node.child_count() {
1260        if let Some(child) = node.child(i)
1261            && let Some(name) = extract_identifier(source, child)
1262        {
1263            return Some(name);
1264        }
1265    }
1266    None
1267}
1268
1269// ── public API ────────────────────────────────────────────────────────────────
1270
1271pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1272    let mut parser = Parser::new();
1273    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
1274    let tree = parser
1275        .parse(source, None)
1276        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1277    let mut layouts = Vec::new();
1278    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1279    Ok(layouts)
1280}
1281
1282pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1283    let mut parser = Parser::new();
1284    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
1285    let tree = parser
1286        .parse(source, None)
1287        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1288    let mut layouts = Vec::new();
1289    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1290    Ok(layouts)
1291}
1292
1293// ── tests ─────────────────────────────────────────────────────────────────────
1294
1295#[cfg(test)]
1296mod tests {
1297    use super::*;
1298    use padlock_core::arch::X86_64_SYSV;
1299
1300    #[test]
1301    fn parse_simple_c_struct() {
1302        let src = r#"
1303struct Point {
1304    int x;
1305    int y;
1306};
1307"#;
1308        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1309        assert_eq!(layouts.len(), 1);
1310        assert_eq!(layouts[0].name, "Point");
1311        assert_eq!(layouts[0].fields.len(), 2);
1312        assert_eq!(layouts[0].fields[0].name, "x");
1313        assert_eq!(layouts[0].fields[1].name, "y");
1314    }
1315
1316    #[test]
1317    fn parse_typedef_struct() {
1318        let src = r#"
1319typedef struct {
1320    char  is_active;
1321    double timeout;
1322    int   port;
1323} Connection;
1324"#;
1325        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1326        assert_eq!(layouts.len(), 1);
1327        assert_eq!(layouts[0].name, "Connection");
1328        assert_eq!(layouts[0].fields.len(), 3);
1329    }
1330
1331    #[test]
1332    fn c_layout_computes_offsets() {
1333        let src = "struct T { char a; double b; };";
1334        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1335        assert_eq!(layouts.len(), 1);
1336        let layout = &layouts[0];
1337        // char at offset 0, double at offset 8 (7 bytes padding)
1338        assert_eq!(layout.fields[0].offset, 0);
1339        assert_eq!(layout.fields[1].offset, 8);
1340        assert_eq!(layout.total_size, 16);
1341    }
1342
1343    #[test]
1344    fn c_layout_detects_padding() {
1345        let src = "struct T { char a; int b; };";
1346        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1347        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1348        assert!(!gaps.is_empty());
1349        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
1350    }
1351
1352    #[test]
1353    fn parse_cpp_struct() {
1354        let src = "struct Vec3 { float x; float y; float z; };";
1355        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1356        assert_eq!(layouts.len(), 1);
1357        assert_eq!(layouts[0].fields.len(), 3);
1358    }
1359
1360    // ── SIMD types ────────────────────────────────────────────────────────────
1361
1362    #[test]
1363    fn simd_sse_field_size_and_align() {
1364        let src = "struct Vecs { __m128 a; __m256 b; };";
1365        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1366        assert_eq!(layouts.len(), 1);
1367        let f = &layouts[0].fields;
1368        assert_eq!(f[0].size, 16); // __m128
1369        assert_eq!(f[0].align, 16);
1370        assert_eq!(f[1].size, 32); // __m256
1371        assert_eq!(f[1].align, 32);
1372    }
1373
1374    #[test]
1375    fn simd_avx512_size() {
1376        let src = "struct Wide { __m512 v; };";
1377        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1378        assert_eq!(layouts[0].fields[0].size, 64);
1379        assert_eq!(layouts[0].fields[0].align, 64);
1380    }
1381
1382    #[test]
1383    fn simd_padding_detected_when_small_field_before_avx() {
1384        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
1385        let src = "struct Mixed { char flag; __m256 data; };";
1386        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1387        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1388        assert!(!gaps.is_empty());
1389        assert_eq!(gaps[0].bytes, 31);
1390    }
1391
1392    // ── union parsing ─────────────────────────────────────────────────────────
1393
1394    #[test]
1395    fn union_fields_all_at_offset_zero() {
1396        let src = "union Data { int i; float f; double d; };";
1397        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1398        assert_eq!(layouts.len(), 1);
1399        let u = &layouts[0];
1400        assert!(u.is_union);
1401        for field in &u.fields {
1402            assert_eq!(
1403                field.offset, 0,
1404                "union field '{}' should be at offset 0",
1405                field.name
1406            );
1407        }
1408    }
1409
1410    #[test]
1411    fn union_total_size_is_max_field() {
1412        // double is the largest (8 bytes); total should be 8
1413        let src = "union Data { int i; float f; double d; };";
1414        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1415        assert_eq!(layouts[0].total_size, 8);
1416    }
1417
1418    #[test]
1419    fn union_no_padding_finding() {
1420        let src = "union Data { int i; double d; };";
1421        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1422        let report = padlock_core::findings::Report::from_layouts(&layouts);
1423        let sr = &report.structs[0];
1424        assert!(
1425            !sr.findings
1426                .iter()
1427                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
1428        );
1429        assert!(
1430            !sr.findings
1431                .iter()
1432                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
1433        );
1434    }
1435
1436    #[test]
1437    fn typedef_union_parsed() {
1438        let src = "typedef union { int a; double b; } Value;";
1439        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1440        assert_eq!(layouts.len(), 1);
1441        assert_eq!(layouts[0].name, "Value");
1442        assert!(layouts[0].is_union);
1443    }
1444
1445    // ── attribute guard extraction ─────────────────────────────────────────────
1446
1447    #[test]
1448    fn extract_guard_from_c_guarded_by_macro() {
1449        let text = "int value GUARDED_BY(mu);";
1450        let guard = extract_guard_from_c_field_text(text);
1451        assert_eq!(guard.as_deref(), Some("mu"));
1452    }
1453
1454    #[test]
1455    fn extract_guard_from_c_attribute_specifier() {
1456        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
1457        let guard = extract_guard_from_c_field_text(text);
1458        assert_eq!(guard.as_deref(), Some("counter_lock"));
1459    }
1460
1461    #[test]
1462    fn extract_guard_pt_guarded_by() {
1463        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
1464        let guard = extract_guard_from_c_field_text(text);
1465        assert_eq!(guard.as_deref(), Some("ptr_lock"));
1466    }
1467
1468    #[test]
1469    fn no_guard_returns_none() {
1470        let guard = extract_guard_from_c_field_text("int x;");
1471        assert!(guard.is_none());
1472    }
1473
1474    #[test]
1475    fn c_struct_guarded_by_sets_concurrent_access() {
1476        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
1477        // macro expansions, so test the text-extraction path via parse_field_declaration
1478        // indirectly by checking extract_guard_from_c_field_text.
1479        let text = "uint64_t readers GUARDED_BY(lock_a);";
1480        assert_eq!(
1481            extract_guard_from_c_field_text(text).as_deref(),
1482            Some("lock_a")
1483        );
1484    }
1485
1486    #[test]
1487    fn c_struct_different_guards_detected_as_false_sharing() {
1488        use padlock_core::arch::X86_64_SYSV;
1489        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
1490
1491        // Manually build a layout with two fields on the same cache line,
1492        // different guards — mirrors what the C frontend would produce for
1493        // __attribute__((guarded_by(...))) annotated fields.
1494        let mut layout = StructLayout {
1495            name: "S".into(),
1496            total_size: 128,
1497            align: 8,
1498            fields: vec![
1499                Field {
1500                    name: "readers".into(),
1501                    ty: TypeInfo::Primitive {
1502                        name: "uint64_t".into(),
1503                        size: 8,
1504                        align: 8,
1505                    },
1506                    offset: 0,
1507                    size: 8,
1508                    align: 8,
1509                    source_file: None,
1510                    source_line: None,
1511                    access: AccessPattern::Concurrent {
1512                        guard: Some("lock_a".into()),
1513                        is_atomic: false,
1514                        is_annotated: true,
1515                    },
1516                },
1517                Field {
1518                    name: "writers".into(),
1519                    ty: TypeInfo::Primitive {
1520                        name: "uint64_t".into(),
1521                        size: 8,
1522                        align: 8,
1523                    },
1524                    offset: 8,
1525                    size: 8,
1526                    align: 8,
1527                    source_file: None,
1528                    source_line: None,
1529                    access: AccessPattern::Concurrent {
1530                        guard: Some("lock_b".into()),
1531                        is_atomic: false,
1532                        is_annotated: true,
1533                    },
1534                },
1535            ],
1536            source_file: None,
1537            source_line: None,
1538            arch: &X86_64_SYSV,
1539            is_packed: false,
1540            is_union: false,
1541            is_repr_rust: false,
1542            suppressed_findings: Vec::new(),
1543            uncertain_fields: Vec::new(),
1544        };
1545        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1546            &layout
1547        ));
1548        // Same guard → no false sharing
1549        layout.fields[1].access = AccessPattern::Concurrent {
1550            guard: Some("lock_a".into()),
1551            is_atomic: false,
1552            is_annotated: true,
1553        };
1554        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1555            &layout
1556        ));
1557    }
1558
1559    // ── C++ class: vtable pointer ─────────────────────────────────────────────
1560
1561    #[test]
1562    fn cpp_class_with_virtual_method_has_vptr() {
1563        let src = r#"
1564class Widget {
1565    virtual void draw();
1566    int x;
1567    int y;
1568};
1569"#;
1570        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1571        assert_eq!(layouts.len(), 1);
1572        let l = &layouts[0];
1573        // First field must be __vptr
1574        assert_eq!(l.fields[0].name, "__vptr");
1575        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
1576        // __vptr is at offset 0
1577        assert_eq!(l.fields[0].offset, 0);
1578        // int x should come after the pointer (at offset 8)
1579        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1580        assert_eq!(x.offset, 8);
1581    }
1582
1583    #[test]
1584    fn cpp_class_without_virtual_has_no_vptr() {
1585        let src = "class Plain { int a; int b; };";
1586        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1587        assert_eq!(layouts.len(), 1);
1588        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1589    }
1590
1591    #[test]
1592    fn cpp_struct_keyword_with_virtual_has_vptr() {
1593        // `struct` in C++ can also have virtual methods
1594        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1595        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1596        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
1597        // (vtable injection is only for `class` nodes)
1598        let _ = layouts; // just verify it parses without panic
1599    }
1600
1601    // ── C++ class: single inheritance ─────────────────────────────────────────
1602
1603    #[test]
1604    fn cpp_derived_class_has_base_slot() {
1605        let src = r#"
1606class Base {
1607    int x;
1608};
1609class Derived : public Base {
1610    int y;
1611};
1612"#;
1613        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1614        // Both Base and Derived should be parsed
1615        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1616        // Derived must have a __base_Base synthetic field
1617        assert!(
1618            derived.fields.iter().any(|f| f.name == "__base_Base"),
1619            "Derived should have a __base_Base field"
1620        );
1621        // The y field should come after __base_Base
1622        let base_field = derived
1623            .fields
1624            .iter()
1625            .find(|f| f.name == "__base_Base")
1626            .unwrap();
1627        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1628        assert!(y_field.offset >= base_field.offset + base_field.size);
1629    }
1630
1631    #[test]
1632    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1633        let src = r#"
1634class A { int a; };
1635class B { int b; };
1636class C : public A, public B { int c; };
1637"#;
1638        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1639        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1640        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1641        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1642    }
1643
1644    #[test]
1645    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1646        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1647        let src = "class V { virtual void f(); int x; };";
1648        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1649        let l = &layouts[0];
1650        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1651        assert_eq!(l.total_size, 16);
1652    }
1653
1654    // ── bitfield handling ─────────────────────────────────────────────────────
1655
1656    #[test]
1657    fn is_bitfield_type_detects_colon_n() {
1658        assert!(is_bitfield_type("int:3"));
1659        assert!(is_bitfield_type("unsigned int:16"));
1660        assert!(is_bitfield_type("uint32_t:1"));
1661        // Not bit-fields — contains ':' but not followed by pure digits
1662        assert!(!is_bitfield_type("std::atomic<int>"));
1663        assert!(!is_bitfield_type("ns::Type"));
1664        assert!(!is_bitfield_type("int"));
1665    }
1666
1667    #[test]
1668    fn struct_with_bitfields_is_skipped() {
1669        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1670        // without a compiler. The struct must be skipped entirely.
1671        let src = r#"
1672struct Flags {
1673    unsigned int active : 1;
1674    unsigned int ready  : 1;
1675    unsigned int error  : 6;
1676    int value;
1677};
1678"#;
1679        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1680        // Flags must not appear — its layout cannot be accurately computed.
1681        assert!(
1682            layouts.iter().all(|l| l.name != "Flags"),
1683            "struct with bitfields should be skipped; got {:?}",
1684            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1685        );
1686    }
1687
1688    #[test]
1689    fn struct_without_bitfields_is_still_parsed() {
1690        // Ensure the bitfield guard doesn't affect normal structs.
1691        let src = "struct Normal { int a; char b; double c; };";
1692        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1693        assert_eq!(layouts.len(), 1);
1694        assert_eq!(layouts[0].name, "Normal");
1695    }
1696
1697    #[test]
1698    fn c_struct_fields_have_source_lines() {
1699        let src = "struct Point {\n    int x;\n    int y;\n};";
1700        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1701        assert_eq!(layouts.len(), 1);
1702        let fields = &layouts[0].fields;
1703        // x is on line 2, y is on line 3
1704        assert_eq!(fields[0].source_line, Some(2), "x should be line 2");
1705        assert_eq!(fields[1].source_line, Some(3), "y should be line 3");
1706    }
1707
1708    #[test]
1709    fn cpp_class_with_bitfields_is_skipped() {
1710        let src = "class Packed { int x : 4; int y : 4; };";
1711        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1712        assert!(
1713            layouts.iter().all(|l| l.name != "Packed"),
1714            "C++ class with bitfields should be skipped"
1715        );
1716    }
1717
1718    #[test]
1719    fn all_bitfield_struct_is_skipped() {
1720        // Struct with ONLY bit-field members (no normal fields).
1721        // raw_fields is non-empty but all entries carry the `:N` annotation,
1722        // so the bit-field guard must still fire and skip the struct.
1723        let src = "struct BitPacked { int x:4; int y:4; };";
1724        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1725        assert!(
1726            layouts.iter().all(|l| l.name != "BitPacked"),
1727            "all-bitfield struct should be skipped; got {:?}",
1728            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1729        );
1730    }
1731
1732    // ── __attribute__((packed)) detection ─────────────────────────────────────
1733
1734    #[test]
1735    fn packed_struct_has_no_alignment_padding() {
1736        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1737        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1738        let src = r#"
1739struct __attribute__((packed)) Tight {
1740    char a;
1741    int  b;
1742    char c;
1743};
1744"#;
1745        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1746        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1747        assert!(l.is_packed, "should be marked is_packed");
1748        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1749        assert_eq!(l.fields[0].offset, 0);
1750        assert_eq!(l.fields[1].offset, 1); // immediately after char
1751        assert_eq!(l.fields[2].offset, 5);
1752    }
1753
1754    #[test]
1755    fn non_packed_struct_has_normal_alignment_padding() {
1756        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1757        let src = r#"
1758struct Normal {
1759    char a;
1760    int  b;
1761    char c;
1762};
1763"#;
1764        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1765        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1766        assert!(!l.is_packed);
1767        assert_eq!(l.total_size, 12);
1768        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1769    }
1770
1771    #[test]
1772    fn cpp_class_packed_attribute_detected() {
1773        let src = r#"
1774class __attribute__((packed)) Dense {
1775    char a;
1776    int  b;
1777};
1778"#;
1779        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1780        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1781        assert!(
1782            l.is_packed,
1783            "C++ class with __attribute__((packed)) must be marked packed"
1784        );
1785        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1786    }
1787
1788    // ── alignas detection ─────────────────────────────────────────────────────
1789
1790    #[test]
1791    fn field_alignas_overrides_natural_alignment() {
1792        // char is normally align=1 but alignas(8) forces it to align-8.
1793        // Layout: c(1B at offset 0, align=8) + x(4B at offset 4, align=4)
1794        // c must start on an 8-byte boundary (trivially satisfied at offset 0).
1795        // After c (1 byte), x aligns to 4: offset = 1.next_multiple_of(4) = 4.
1796        // Struct align = max(8, 4) = 8. Total = 8 bytes (4+4 → 8 → ok for align 8).
1797        let src = r#"
1798struct S {
1799    alignas(8) char c;
1800    int x;
1801};
1802"#;
1803        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1804        let l = layouts.iter().find(|l| l.name == "S").expect("S");
1805        // c should be forced to align 8
1806        let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
1807        assert_eq!(c_field.align, 8);
1808        // x comes after c (1 byte) with natural alignment 4 → offset 4
1809        let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
1810        assert_eq!(x_field.offset, 4);
1811        // Struct alignment is max(alignas(8), int align 4) = 8
1812        assert_eq!(l.align, 8);
1813        // Total = 8 bytes (x at 4, size 4; 4+4=8; 8 is multiple of align 8)
1814        assert_eq!(l.total_size, 8);
1815    }
1816
1817    #[test]
1818    fn struct_level_alignas_increases_struct_alignment() {
1819        // alignas(64) on the struct means its alignment requirement is 64.
1820        // Total size must be a multiple of 64.
1821        let src = r#"
1822struct alignas(64) CacheLine {
1823    int x;
1824    int y;
1825};
1826"#;
1827        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1828        let l = layouts
1829            .iter()
1830            .find(|l| l.name == "CacheLine")
1831            .expect("CacheLine");
1832        assert_eq!(l.align, 64);
1833        assert_eq!(l.total_size % 64, 0);
1834    }
1835
1836    #[test]
1837    fn alignas_on_field_smaller_than_natural_is_ignored() {
1838        // alignas(1) on an int field: does NOT reduce alignment below 4.
1839        // In C++, alignas cannot reduce alignment below the natural alignment.
1840        // Our implementation stores the alignas value; natural alignment wins
1841        // because we take max(alignas, natural) in the caller.
1842        // Note: we currently store alignas directly; this test documents behaviour.
1843        let src = "struct S { int x; int y; };";
1844        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1845        let l = &layouts[0];
1846        assert_eq!(l.fields[0].align, 4); // natural alignment, not reduced
1847    }
1848
1849    #[test]
1850    fn cpp_class_alignas_detected() {
1851        let src = r#"
1852class alignas(32) Aligned {
1853    double x;
1854    double y;
1855};
1856"#;
1857        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1858        let l = layouts
1859            .iter()
1860            .find(|l| l.name == "Aligned")
1861            .expect("Aligned");
1862        assert_eq!(l.align, 32);
1863        assert_eq!(l.total_size % 32, 0);
1864    }
1865
1866    // ── bad weather: alignas edge cases ───────────────────────────────────────
1867
1868    #[test]
1869    fn struct_without_alignas_unchanged() {
1870        // Ensure the alignas detection path doesn't affect structs without it
1871        let src = "struct Plain { int a; char b; };";
1872        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1873        let l = &layouts[0];
1874        assert_eq!(l.align, 4); // max field alignment = int = 4
1875        assert_eq!(l.total_size, 8); // int(4) + char(1) + 3 pad
1876    }
1877
1878    // ── anonymous nested structs/unions ───────────────────────────────────────
1879
1880    #[test]
1881    fn anonymous_nested_union_fields_flattened() {
1882        let src = r#"
1883struct Packet {
1884    union {
1885        uint32_t raw;
1886        uint8_t bytes[4];
1887    };
1888    uint64_t timestamp;
1889};
1890"#;
1891        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1892        let l = layouts.iter().find(|l| l.name == "Packet").expect("Packet");
1893        // raw, bytes (or similar) and timestamp must all be present
1894        assert!(
1895            l.fields.iter().any(|f| f.name == "raw"),
1896            "raw field must be flattened into Packet"
1897        );
1898        assert!(
1899            l.fields.iter().any(|f| f.name == "timestamp"),
1900            "timestamp must be present"
1901        );
1902    }
1903
1904    #[test]
1905    fn anonymous_nested_struct_fields_flattened() {
1906        let src = r#"
1907struct Outer {
1908    struct {
1909        int x;
1910        int y;
1911    };
1912    double z;
1913};
1914"#;
1915        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1916        let l = layouts.iter().find(|l| l.name == "Outer").expect("Outer");
1917        assert!(
1918            l.fields.iter().any(|f| f.name == "x"),
1919            "x must be flattened"
1920        );
1921        assert!(
1922            l.fields.iter().any(|f| f.name == "y"),
1923            "y must be flattened"
1924        );
1925        assert!(l.fields.iter().any(|f| f.name == "z"), "z present");
1926        // Total: x(4) + y(4) + z(8) = 16 bytes, no padding
1927        assert_eq!(l.total_size, 16);
1928    }
1929
1930    #[test]
1931    fn named_nested_struct_not_flattened() {
1932        // A named struct used as a field type must NOT be flattened
1933        let src = r#"
1934struct Vec2 { float x; float y; };
1935struct Rect { struct Vec2 tl; struct Vec2 br; };
1936"#;
1937        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1938        let rect = layouts.iter().find(|l| l.name == "Rect").expect("Rect");
1939        // Should have tl and br as opaque fields, not x/y flattened
1940        assert_eq!(rect.fields.len(), 2);
1941        assert!(rect.fields.iter().any(|f| f.name == "tl"));
1942        assert!(rect.fields.iter().any(|f| f.name == "br"));
1943    }
1944
1945    // ── type-table tests ──────────────────────────────────────────────────────
1946
1947    #[test]
1948    fn linux_kernel_types_correct_size() {
1949        // u8/u16/u32/u64 and s8/s16/s32/s64 (linux/types.h)
1950        assert_eq!(c_type_size_align("u8", &X86_64_SYSV), (1, 1));
1951        assert_eq!(c_type_size_align("u16", &X86_64_SYSV), (2, 2));
1952        assert_eq!(c_type_size_align("u32", &X86_64_SYSV), (4, 4));
1953        assert_eq!(c_type_size_align("u64", &X86_64_SYSV), (8, 8));
1954        assert_eq!(c_type_size_align("s8", &X86_64_SYSV), (1, 1));
1955        assert_eq!(c_type_size_align("s16", &X86_64_SYSV), (2, 2));
1956        assert_eq!(c_type_size_align("s32", &X86_64_SYSV), (4, 4));
1957        assert_eq!(c_type_size_align("s64", &X86_64_SYSV), (8, 8));
1958    }
1959
1960    #[test]
1961    fn linux_kernel_dunder_types_correct_size() {
1962        assert_eq!(c_type_size_align("__u8", &X86_64_SYSV), (1, 1));
1963        assert_eq!(c_type_size_align("__u16", &X86_64_SYSV), (2, 2));
1964        assert_eq!(c_type_size_align("__u32", &X86_64_SYSV), (4, 4));
1965        assert_eq!(c_type_size_align("__u64", &X86_64_SYSV), (8, 8));
1966        assert_eq!(c_type_size_align("__s8", &X86_64_SYSV), (1, 1));
1967        assert_eq!(c_type_size_align("__s64", &X86_64_SYSV), (8, 8));
1968        // Endian-annotated types are same width as their base
1969        assert_eq!(c_type_size_align("__be16", &X86_64_SYSV), (2, 2));
1970        assert_eq!(c_type_size_align("__le32", &X86_64_SYSV), (4, 4));
1971        assert_eq!(c_type_size_align("__be64", &X86_64_SYSV), (8, 8));
1972    }
1973
1974    #[test]
1975    fn c99_fast_types_correct_size() {
1976        // fast8/16 are their natural width
1977        assert_eq!(c_type_size_align("uint_fast8_t", &X86_64_SYSV), (1, 1));
1978        assert_eq!(c_type_size_align("uint_fast16_t", &X86_64_SYSV), (2, 2));
1979        // fast32/64 are pointer-sized on 64-bit
1980        assert_eq!(c_type_size_align("uint_fast32_t", &X86_64_SYSV), (8, 8));
1981        assert_eq!(c_type_size_align("uint_fast64_t", &X86_64_SYSV), (8, 8));
1982        // least types are their minimum guaranteed width
1983        assert_eq!(c_type_size_align("uint_least8_t", &X86_64_SYSV), (1, 1));
1984        assert_eq!(c_type_size_align("uint_least32_t", &X86_64_SYSV), (4, 4));
1985        assert_eq!(c_type_size_align("uint_least64_t", &X86_64_SYSV), (8, 8));
1986        assert_eq!(c_type_size_align("intmax_t", &X86_64_SYSV), (8, 8));
1987        assert_eq!(c_type_size_align("uintmax_t", &X86_64_SYSV), (8, 8));
1988    }
1989
1990    #[test]
1991    fn gcc_int128_correct_size() {
1992        assert_eq!(c_type_size_align("__int128", &X86_64_SYSV), (16, 16));
1993        assert_eq!(c_type_size_align("__uint128", &X86_64_SYSV), (16, 16));
1994        assert_eq!(c_type_size_align("__int128_t", &X86_64_SYSV), (16, 16));
1995        // unsigned __int128 — "unsigned " prefix is stripped, then __int128 matched
1996        assert_eq!(
1997            c_type_size_align("unsigned __int128", &X86_64_SYSV),
1998            (16, 16)
1999        );
2000    }
2001
2002    #[test]
2003    fn windows_types_correct_size() {
2004        assert_eq!(c_type_size_align("BYTE", &X86_64_SYSV), (1, 1));
2005        assert_eq!(c_type_size_align("WORD", &X86_64_SYSV), (2, 2));
2006        assert_eq!(c_type_size_align("DWORD", &X86_64_SYSV), (4, 4));
2007        assert_eq!(c_type_size_align("QWORD", &X86_64_SYSV), (8, 8));
2008        assert_eq!(c_type_size_align("BOOL", &X86_64_SYSV), (4, 4));
2009        assert_eq!(c_type_size_align("UINT8", &X86_64_SYSV), (1, 1));
2010        assert_eq!(c_type_size_align("INT32", &X86_64_SYSV), (4, 4));
2011        assert_eq!(c_type_size_align("UINT64", &X86_64_SYSV), (8, 8));
2012        assert_eq!(c_type_size_align("HANDLE", &X86_64_SYSV), (8, 8));
2013        assert_eq!(c_type_size_align("LPVOID", &X86_64_SYSV), (8, 8));
2014    }
2015
2016    #[test]
2017    fn char_types_correct_size() {
2018        assert_eq!(c_type_size_align("wchar_t", &X86_64_SYSV), (4, 4));
2019        assert_eq!(c_type_size_align("char8_t", &X86_64_SYSV), (1, 1));
2020        assert_eq!(c_type_size_align("char16_t", &X86_64_SYSV), (2, 2));
2021        assert_eq!(c_type_size_align("char32_t", &X86_64_SYSV), (4, 4));
2022    }
2023
2024    #[test]
2025    fn half_precision_types_correct_size() {
2026        assert_eq!(c_type_size_align("_Float16", &X86_64_SYSV), (2, 2));
2027        assert_eq!(c_type_size_align("__fp16", &X86_64_SYSV), (2, 2));
2028        assert_eq!(c_type_size_align("__bf16", &X86_64_SYSV), (2, 2));
2029        assert_eq!(c_type_size_align("_Float128", &X86_64_SYSV), (16, 16));
2030    }
2031
2032    #[test]
2033    fn unsigned_prefix_stripped_correctly() {
2034        // "unsigned short" → "short" → (2, 2)
2035        assert_eq!(c_type_size_align("unsigned short", &X86_64_SYSV), (2, 2));
2036        assert_eq!(c_type_size_align("unsigned int", &X86_64_SYSV), (4, 4));
2037        assert_eq!(
2038            c_type_size_align("unsigned long long", &X86_64_SYSV),
2039            (8, 8)
2040        );
2041        assert_eq!(
2042            c_type_size_align("long int", &X86_64_SYSV),
2043            (X86_64_SYSV.pointer_size, X86_64_SYSV.pointer_size)
2044        );
2045    }
2046
2047    #[test]
2048    fn linux_kernel_struct_with_new_types() {
2049        // Representative kernel-style struct using __u32, __be16, u8
2050        let src = r#"
2051struct NetHeader {
2052    __be32 src_ip;
2053    __be32 dst_ip;
2054    __be16 src_port;
2055    __be16 dst_port;
2056    u8     protocol;
2057    u8     ttl;
2058};
2059"#;
2060        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2061        assert_eq!(layouts.len(), 1);
2062        let l = &layouts[0];
2063        // 4+4+2+2+1+1 = 14B; max align is 4 (__be32) → padded to 16B
2064        assert_eq!(l.total_size, 16);
2065        assert_eq!(l.fields[0].size, 4); // __be32 src_ip
2066        assert_eq!(l.fields[2].size, 2); // __be16 src_port
2067        assert_eq!(l.fields[4].size, 1); // u8 protocol
2068    }
2069
2070    // ── C++ stdlib type tests ─────────────────────────────────────────────────
2071
2072    #[test]
2073    fn cpp_string_is_32_bytes() {
2074        assert_eq!(c_type_size_align("std::string", &X86_64_SYSV), (32, 8));
2075        assert_eq!(c_type_size_align("std::wstring", &X86_64_SYSV), (32, 8));
2076    }
2077
2078    #[test]
2079    fn cpp_string_view_is_two_words() {
2080        assert_eq!(c_type_size_align("std::string_view", &X86_64_SYSV), (16, 8));
2081    }
2082
2083    #[test]
2084    fn cpp_vector_is_24_bytes() {
2085        assert_eq!(c_type_size_align("std::vector<int>", &X86_64_SYSV), (24, 8));
2086        assert_eq!(
2087            c_type_size_align("std::vector<uint64_t>", &X86_64_SYSV),
2088            (24, 8)
2089        );
2090        // Size is independent of T
2091        assert_eq!(
2092            c_type_size_align("std::vector<std::string>", &X86_64_SYSV),
2093            (24, 8)
2094        );
2095    }
2096
2097    #[test]
2098    fn cpp_smart_pointers_correct_size() {
2099        // unique_ptr: single pointer
2100        assert_eq!(
2101            c_type_size_align("std::unique_ptr<int>", &X86_64_SYSV),
2102            (8, 8)
2103        );
2104        // shared_ptr / weak_ptr: two pointers
2105        assert_eq!(
2106            c_type_size_align("std::shared_ptr<int>", &X86_64_SYSV),
2107            (16, 8)
2108        );
2109        assert_eq!(
2110            c_type_size_align("std::weak_ptr<int>", &X86_64_SYSV),
2111            (16, 8)
2112        );
2113    }
2114
2115    #[test]
2116    fn cpp_optional_recursive_size() {
2117        // std::optional<bool>: 1B (bool) + 1B (has_value flag) → 2B
2118        assert_eq!(
2119            c_type_size_align("std::optional<bool>", &X86_64_SYSV),
2120            (2, 1)
2121        );
2122        // std::optional<int>: 4B + 1B → padded to 4B → 8B total? Let's check:
2123        // t_size=4, t_align=4; (4+1).next_multiple_of(4) = 8
2124        assert_eq!(
2125            c_type_size_align("std::optional<int>", &X86_64_SYSV),
2126            (8, 4)
2127        );
2128        // std::optional<double>: 8B + 1B → padded to 8B → 16B
2129        assert_eq!(
2130            c_type_size_align("std::optional<double>", &X86_64_SYSV),
2131            (16, 8)
2132        );
2133    }
2134
2135    #[test]
2136    fn cpp_function_is_32_bytes() {
2137        assert_eq!(
2138            c_type_size_align("std::function<void()>", &X86_64_SYSV),
2139            (32, 8)
2140        );
2141        assert_eq!(
2142            c_type_size_align("std::function<int(int)>", &X86_64_SYSV),
2143            (32, 8)
2144        );
2145    }
2146
2147    #[test]
2148    fn cpp_stdlib_struct_with_string_field() {
2149        // A struct with std::string fields — used to get pointer-size (8B), now 32B
2150        let src = r#"
2151struct Config {
2152    std::string name;
2153    int         version;
2154    bool        enabled;
2155};
2156"#;
2157        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2158        let l = &layouts[0];
2159        assert_eq!(l.fields[0].size, 32); // std::string, not 8
2160        // int at offset 32, bool at 36; total padded to 8-byte align = 40
2161        assert_eq!(l.fields[1].offset, 32);
2162        assert_eq!(l.fields[1].size, 4);
2163    }
2164
2165    // ── typedef alias resolution ──────────────────────────────────────────────
2166
2167    #[test]
2168    fn typedef_scalar_alias_resolves_correct_size() {
2169        // `typedef uint32_t UserId;` — UserId must be treated as 4B, not pointer-size.
2170        let src = r#"
2171typedef uint32_t UserId;
2172
2173struct User {
2174    UserId id;
2175    char   name[16];
2176};
2177"#;
2178        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2179        let l = layouts.iter().find(|l| l.name == "User").expect("User");
2180        let id = l.fields.iter().find(|f| f.name == "id").expect("id field");
2181        assert_eq!(id.size, 4, "UserId alias of uint32_t must be 4 bytes");
2182        assert_eq!(id.align, 4);
2183    }
2184
2185    #[test]
2186    fn typedef_alias_layout_correct_total_size() {
2187        // Without alias resolution: UserId → unknown → pointer_size (8B)
2188        // char(1) + 7 pad + unknown(8) = 16B.
2189        // With alias resolution: char(1) + 3 pad + uint32_t(4) = 8B.
2190        let src = r#"
2191typedef uint32_t Token;
2192
2193struct Auth {
2194    char  prefix;
2195    Token token;
2196};
2197"#;
2198        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2199        let l = layouts.iter().find(|l| l.name == "Auth").expect("Auth");
2200        // prefix(1) + 3-byte pad + token(4) = 8B
2201        assert_eq!(l.total_size, 8, "alias-resolved layout should be 8 bytes");
2202    }
2203
2204    #[test]
2205    fn typedef_pointer_not_confused_with_scalar_alias() {
2206        // `typedef int *IntPtr;` — pointer typedef; alias name lives in a nested
2207        // declarator, so collect_typedef_aliases must NOT collect it.
2208        // IntPtr falls through to the unknown-type catch-all → pointer_size (8B).
2209        let src = r#"
2210typedef int *IntPtr;
2211
2212struct S {
2213    IntPtr p;
2214    int    x;
2215};
2216"#;
2217        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2218        let l = layouts.iter().find(|l| l.name == "S").expect("S");
2219        let p = l.fields.iter().find(|f| f.name == "p").expect("p field");
2220        // Pointer typedef — either resolved as pointer (8B) or falls to pointer_size.
2221        assert_eq!(p.size, 8, "pointer typedef should be 8 bytes on x86_64");
2222    }
2223
2224    #[test]
2225    fn typedef_struct_not_collected_as_scalar_alias() {
2226        // `typedef struct { ... } MyStruct;` must not appear in scalar alias map.
2227        // The struct is still emitted as a StructLayout.
2228        let src = r#"
2229typedef struct {
2230    int x;
2231    int y;
2232} Point;
2233
2234struct Line {
2235    Point a;
2236    Point b;
2237};
2238"#;
2239        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2240        // Point must be emitted as a layout
2241        assert!(
2242            layouts.iter().any(|l| l.name == "Point"),
2243            "typedef struct should emit a StructLayout"
2244        );
2245    }
2246
2247    #[test]
2248    fn cpp_class_typedef_alias_resolved() {
2249        // Typedef alias resolution must also work for C++ class fields.
2250        let src = r#"
2251typedef uint64_t Timestamp;
2252
2253class Event {
2254    Timestamp when;
2255    int       kind;
2256};
2257"#;
2258        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2259        let l = layouts.iter().find(|l| l.name == "Event").expect("Event");
2260        let when = l.fields.iter().find(|f| f.name == "when").expect("when");
2261        assert_eq!(when.size, 8, "Timestamp alias of uint64_t must be 8 bytes");
2262        assert_eq!(when.align, 8);
2263    }
2264
2265    // ── C++ template skipping ─────────────────────────────────────────────────
2266
2267    #[test]
2268    fn cpp_template_struct_is_skipped() {
2269        // Generic C++ templates cannot be sized without monomorphisation.
2270        let src = "template<typename T> struct Wrapper { T value; int count; };";
2271        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2272        assert!(
2273            layouts.iter().all(|l| l.name != "Wrapper"),
2274            "template struct must be skipped, not emitted with wrong sizes"
2275        );
2276    }
2277
2278    #[test]
2279    fn cpp_template_class_is_skipped() {
2280        let src = "template<typename T, typename U> class Pair { T first; U second; };";
2281        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2282        assert!(
2283            layouts.iter().all(|l| l.name != "Pair"),
2284            "template class must be skipped"
2285        );
2286    }
2287
2288    #[test]
2289    fn cpp_non_template_struct_alongside_template_is_parsed() {
2290        // The template is skipped but concrete structs in the same TU are kept.
2291        let src = r#"
2292template<typename T> struct Generic { T val; };
2293struct Concrete { int x; double y; };
2294"#;
2295        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2296        assert!(
2297            layouts.iter().all(|l| l.name != "Generic"),
2298            "Generic template must be skipped"
2299        );
2300        let concrete = layouts
2301            .iter()
2302            .find(|l| l.name == "Concrete")
2303            .expect("Concrete must be parsed");
2304        assert_eq!(concrete.fields.len(), 2);
2305    }
2306}