Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library types (Linux/glibc + libstdc++ defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // ── Synchronisation ───────────────────────────────────────────────────
41        // pthread_mutex_t on Linux/glibc is 40 bytes.
42        "std::mutex"
43        | "std::recursive_mutex"
44        | "std::timed_mutex"
45        | "std::recursive_timed_mutex"
46        | "pthread_mutex_t" => return (40, 8),
47        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49
50        // ── String / view ─────────────────────────────────────────────────────
51        // libstdc++ std::string: 32B (ptr + length + SSO buffer / capacity).
52        // libc++ (Clang): 24B. We use 32B (libstdc++ / GCC, dominant on Linux).
53        "std::string" | "std::wstring" | "std::u8string" | "std::u16string" | "std::u32string"
54        | "std::pmr::string" => return (32, 8),
55        // std::string_view / std::span<T>: pointer + length (2 words).
56        "std::string_view"
57        | "std::wstring_view"
58        | "std::u8string_view"
59        | "std::u16string_view"
60        | "std::u32string_view" => return (arch.pointer_size * 2, arch.pointer_size),
61
62        // ── Sequence containers ───────────────────────────────────────────────
63        // std::vector<T>: pointer + size + capacity = 3 words (24B on 64-bit).
64        // Size is independent of T.
65        ty if ty.starts_with("std::vector<") || ty == "std::vector" => {
66            return (arch.pointer_size * 3, arch.pointer_size);
67        }
68        // std::deque<T>: 80B on both libstdc++ and libc++ (64-bit Linux).
69        ty if ty.starts_with("std::deque<") || ty == "std::deque" => return (80, 8),
70        // std::list<T>: sentinel node pointer + size = 2 words + node pointers.
71        // libstdc++: 24B (size_t + two pointers). libc++: 24B.
72        ty if ty.starts_with("std::list<") || ty == "std::list" => {
73            return (arch.pointer_size * 3, arch.pointer_size);
74        }
75        // std::forward_list<T>: single pointer (head node).
76        ty if ty.starts_with("std::forward_list<") || ty == "std::forward_list" => {
77            return (arch.pointer_size, arch.pointer_size);
78        }
79        // std::array<T, N>: inline storage; size = N * sizeof(T).
80        // We cannot compute this without resolving T and N, so fall through.
81
82        // ── Associative / unordered containers ────────────────────────────────
83        // All map/set types: header node + size = ~48B (libstdc++) / ~40B (libc++).
84        // Use 48B as conservative approximation.
85        ty if ty.starts_with("std::map<")
86            || ty.starts_with("std::multimap<")
87            || ty.starts_with("std::set<")
88            || ty.starts_with("std::multiset<") =>
89        {
90            return (48, 8);
91        }
92        // std::unordered_map / unordered_set: bucket array pointer + size + load factor + etc.
93        // libstdc++: ~56B. libc++: ~72B. Use 56B.
94        ty if ty.starts_with("std::unordered_map<")
95            || ty.starts_with("std::unordered_multimap<")
96            || ty.starts_with("std::unordered_set<")
97            || ty.starts_with("std::unordered_multiset<") =>
98        {
99            return (56, 8);
100        }
101
102        // ── Smart pointers ────────────────────────────────────────────────────
103        // std::unique_ptr<T>: single pointer (deleter may be zero-sized via EBO).
104        ty if ty.starts_with("std::unique_ptr<") || ty == "std::unique_ptr" => {
105            return (arch.pointer_size, arch.pointer_size);
106        }
107        // std::shared_ptr<T> / std::weak_ptr<T>: object pointer + control block pointer.
108        ty if ty.starts_with("std::shared_ptr<")
109            || ty == "std::shared_ptr"
110            || ty.starts_with("std::weak_ptr<")
111            || ty == "std::weak_ptr" =>
112        {
113            return (arch.pointer_size * 2, arch.pointer_size);
114        }
115
116        // ── Type-erasure / utilities ──────────────────────────────────────────
117        // std::function<Sig>: 32B on libstdc++ and libc++ (64-bit Linux).
118        // Holds a functor pointer, a vtable pointer, and a small-functor buffer.
119        ty if ty.starts_with("std::function<") || ty == "std::function" => return (32, 8),
120        // std::any: 32B on libstdc++ (small-object buffer + vtable pointer).
121        "std::any" => return (32, 8),
122        // std::error_code / std::error_condition: pointer + int = 16B.
123        "std::error_code" | "std::error_condition" => return (16, 8),
124        // std::exception_ptr: single pointer.
125        "std::exception_ptr" => return (arch.pointer_size, arch.pointer_size),
126        // std::type_index: single pointer (wraps std::type_info*).
127        "std::type_index" => return (arch.pointer_size, arch.pointer_size),
128        // std::span<T>: pointer + length (2 words). Template arg irrelevant.
129        ty if ty.starts_with("std::span<") || ty == "std::span" => {
130            return (arch.pointer_size * 2, arch.pointer_size);
131        }
132        // std::optional<T>: sizeof(T) + 1B bool, padded to align(T).
133        // Recurse to resolve T then apply the formula.
134        ty if ty.starts_with("std::optional<") && ty.ends_with('>') => {
135            let inner = &ty["std::optional<".len()..ty.len() - 1];
136            let (t_size, t_align) = c_type_size_align(inner.trim(), arch);
137            let total = (t_size + 1).next_multiple_of(t_align.max(1));
138            return (total, t_align.max(1));
139        }
140
141        // ── Atomic ────────────────────────────────────────────────────────────
142        // std::atomic<T>: same size and alignment as T.
143        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
144            let inner = &ty[12..ty.len() - 1];
145            return c_type_size_align(inner.trim(), arch);
146        }
147        // std::atomic_flag: guaranteed 1B minimum, but often 4B in practice.
148        "std::atomic_flag" => return (4, 4),
149
150        _ => {} // fall through to primitive types below
151    }
152    // Primitive / stdint / pointer types
153    match ty {
154        "char" | "_Bool" | "bool" => (1, 1),
155        "short" | "short int" => (2, 2),
156        "int" => (4, 4),
157        "long" | "long int" => (arch.pointer_size, arch.pointer_size),
158        "long long" | "long long int" => (8, 8),
159        "float" => (4, 4),
160        "double" => (8, 8),
161        "long double" => (16, 16),
162
163        // C99 stdint exact-width types
164        "int8_t" | "uint8_t" => (1, 1),
165        "int16_t" | "uint16_t" => (2, 2),
166        "int32_t" | "uint32_t" => (4, 4),
167        "int64_t" | "uint64_t" => (8, 8),
168        "intmax_t" | "uintmax_t" => (8, 8),
169        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
170            (arch.pointer_size, arch.pointer_size)
171        }
172
173        // C99 fast types — uint_fast{8,16}_t are always 1/2B;
174        // uint_fast{32,64}_t are pointer-sized on 64-bit (8B), 4B on 32-bit.
175        "int_fast8_t" | "uint_fast8_t" => (1, 1),
176        "int_fast16_t" | "uint_fast16_t" => (2, 2),
177        "int_fast32_t" | "uint_fast32_t" | "int_fast64_t" | "uint_fast64_t" => {
178            (arch.pointer_size, arch.pointer_size)
179        }
180
181        // C99 least types — minimum guaranteed widths
182        "int_least8_t" | "uint_least8_t" => (1, 1),
183        "int_least16_t" | "uint_least16_t" => (2, 2),
184        "int_least32_t" | "uint_least32_t" => (4, 4),
185        "int_least64_t" | "uint_least64_t" => (8, 8),
186
187        // GCC/Clang 128-bit integer extension
188        "__int128" | "__uint128" | "__int128_t" | "__uint128_t" => (16, 16),
189
190        // Linux kernel short-form integer types (linux/types.h)
191        "u8" | "s8" => (1, 1),
192        "u16" | "s16" => (2, 2),
193        "u32" | "s32" => (4, 4),
194        "u64" | "s64" => (8, 8),
195
196        // Linux kernel double-underscore types (__u8, __s8, __be16, __le32, …)
197        "__u8" | "__s8" | "__u8__" | "__s8__" => (1, 1),
198        "__u16" | "__s16" | "__be16" | "__le16" => (2, 2),
199        "__u32" | "__s32" | "__be32" | "__le32" => (4, 4),
200        "__u64" | "__s64" | "__be64" | "__le64" => (8, 8),
201
202        // MSVC fixed-width intrinsics
203        "__int8" => (1, 1),
204        "__int16" => (2, 2),
205        "__int32" => (4, 4),
206        "__int64" => (8, 8),
207
208        // Windows SDK / WinAPI types
209        "BYTE" | "BOOLEAN" | "CHAR" | "INT8" | "UINT8" => (1, 1),
210        "WORD" | "WCHAR" | "SHORT" | "USHORT" | "INT16" | "UINT16" => (2, 2),
211        "DWORD" | "LONG" | "ULONG" | "INT" | "UINT" | "BOOL" | "FLOAT" | "INT32" | "UINT32" => {
212            (4, 4)
213        }
214        "QWORD" | "LONGLONG" | "ULONGLONG" | "INT64" | "UINT64" | "LARGE_INTEGER" => (8, 8),
215        "DWORD64" | "ULONG64" | "LONG64" => (8, 8),
216        "HANDLE" | "LPVOID" | "PVOID" | "LPCVOID" | "LPSTR" | "LPCSTR" | "LPWSTR" | "LPCWSTR"
217        | "SIZE_T" | "SSIZE_T" | "ULONG_PTR" | "LONG_PTR" | "DWORD_PTR" | "INT_PTR"
218        | "UINT_PTR" => (arch.pointer_size, arch.pointer_size),
219
220        // C/C++ character types
221        // wchar_t: 4B on Linux/macOS (GCC/Clang POSIX), 2B on Windows/MSVC.
222        // All current padlock arch configs are POSIX, so 4B is correct here.
223        "wchar_t" => (4, 4),
224        "char8_t" => (1, 1),
225        "char16_t" => (2, 2),
226        "char32_t" => (4, 4),
227
228        // Half-precision and bfloat16 (ARM, GCC, Clang, ML workloads)
229        "_Float16" | "__fp16" | "__bf16" => (2, 2),
230        // 128-bit float (GCC/Clang extension)
231        "_Float128" | "__float128" => (16, 16),
232
233        // Pointer types
234        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
235        // Unknown — use pointer size as a reasonable default
236        _ => (arch.pointer_size, arch.pointer_size),
237    }
238}
239
240// ── struct / union simulation ─────────────────────────────────────────────────
241
242/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
243/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
244fn strip_bitfield_suffix(ty: &str) -> &str {
245    if let Some(pos) = ty.rfind(':') {
246        let suffix = ty[pos + 1..].trim();
247        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
248            return ty[..pos].trim_end();
249        }
250    }
251    ty
252}
253
254/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
255/// Bit-field packing is compiler-controlled and cannot be accurately modelled
256/// without a compiler, so structs containing bit-field members are skipped.
257fn is_bitfield_type(ty: &str) -> bool {
258    strip_bitfield_suffix(ty) != ty
259}
260
261/// Simulate C/C++ struct layout given ordered fields.
262///
263/// When `packed` is `true` the layout mirrors `__attribute__((packed))`:
264/// no inter-field alignment padding is inserted and the struct alignment
265/// is forced to 1. This matches GCC/Clang behaviour for packed structs.
266fn simulate_layout(
267    fields: &mut Vec<Field>,
268    struct_name: String,
269    arch: &'static ArchConfig,
270    source_line: Option<u32>,
271    packed: bool,
272) -> StructLayout {
273    let mut offset = 0usize;
274    let mut struct_align = 1usize;
275
276    for f in fields.iter_mut() {
277        if !packed && f.align > 0 {
278            offset = offset.next_multiple_of(f.align);
279        }
280        f.offset = offset;
281        offset += f.size;
282        if !packed {
283            struct_align = struct_align.max(f.align);
284        }
285    }
286    // Trailing padding (not present in packed structs)
287    if !packed && struct_align > 0 {
288        offset = offset.next_multiple_of(struct_align);
289    }
290
291    StructLayout {
292        name: struct_name,
293        total_size: offset,
294        align: struct_align,
295        fields: std::mem::take(fields),
296        source_file: None,
297        source_line,
298        arch,
299        is_packed: packed,
300        is_union: false,
301        is_repr_rust: false,
302        suppressed_findings: Vec::new(),
303    }
304}
305
306/// Simulate a C/C++ union layout: all fields start at offset 0;
307/// total size is the largest field, rounded to max alignment.
308fn simulate_union_layout(
309    fields: &mut Vec<Field>,
310    name: String,
311    arch: &'static ArchConfig,
312    source_line: Option<u32>,
313) -> StructLayout {
314    for f in fields.iter_mut() {
315        f.offset = 0;
316    }
317    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
318    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
319    let total_size = if max_align > 0 {
320        max_size.next_multiple_of(max_align)
321    } else {
322        max_size
323    };
324
325    StructLayout {
326        name,
327        total_size,
328        align: max_align,
329        fields: std::mem::take(fields),
330        source_file: None,
331        source_line,
332        arch,
333        is_packed: false,
334        is_union: true,
335        is_repr_rust: false,
336        suppressed_findings: Vec::new(),
337    }
338}
339
340// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
341
342/// Parse a `class_specifier` node, modelling:
343/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
344/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
345///   later by the nested-struct resolution pass in `lib.rs`).
346fn parse_class_specifier(
347    source: &str,
348    node: Node<'_>,
349    arch: &'static ArchConfig,
350) -> Option<StructLayout> {
351    let mut class_name = "<anonymous>".to_string();
352    let mut base_names: Vec<String> = Vec::new();
353    let mut body_node: Option<Node> = None;
354    let mut is_packed = false;
355    let mut struct_alignas: Option<usize> = None;
356
357    for i in 0..node.child_count() {
358        let child = node.child(i)?;
359        match child.kind() {
360            "type_identifier" => class_name = source[child.byte_range()].to_string(),
361            "base_class_clause" => {
362                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
363                // type_identifier nodes are direct children of base_class_clause.
364                for j in 0..child.child_count() {
365                    if let Some(base) = child.child(j)
366                        && base.kind() == "type_identifier"
367                    {
368                        base_names.push(source[base.byte_range()].to_string());
369                    }
370                }
371            }
372            "field_declaration_list" => body_node = Some(child),
373            "attribute_specifier" => {
374                if source[child.byte_range()].contains("packed") {
375                    is_packed = true;
376                }
377            }
378            // C++11 class-level alignas: `class alignas(64) Name { ... };`
379            "alignas_qualifier" | "alignas_specifier" => {
380                if struct_alignas.is_none() {
381                    struct_alignas = parse_alignas_value(source, child);
382                }
383            }
384            _ => {}
385        }
386    }
387
388    let body = body_node?;
389
390    // Detect virtual methods: look for `virtual` keyword anywhere in body
391    let has_virtual = contains_virtual_keyword(source, body);
392
393    // Collect declared fields: (field_name, type_text, guard, alignas_override, source_line)
394    let mut raw_fields: Vec<RawField> = Vec::new();
395    for i in 0..body.child_count() {
396        let Some(child) = body.child(i) else {
397            continue;
398        };
399        if child.kind() == "field_declaration" {
400            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, false) {
401                raw_fields.extend(anon_fields);
402            } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
403            {
404                raw_fields.push((fname, ty, guard, al, ln));
405            }
406        }
407    }
408
409    // Build fields: vtable pointer, then base-class slots, then declared fields
410    let mut fields: Vec<Field> = Vec::new();
411
412    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
413    if has_virtual {
414        let ps = arch.pointer_size;
415        fields.push(Field {
416            name: "__vptr".to_string(),
417            ty: TypeInfo::Pointer {
418                size: ps,
419                align: ps,
420            },
421            offset: 0,
422            size: ps,
423            align: ps,
424            source_file: None,
425            source_line: None,
426            access: AccessPattern::Unknown,
427        });
428    }
429
430    // Base class storage (opaque until nested-struct resolver fills in sizes)
431    for base in &base_names {
432        let ps = arch.pointer_size;
433        fields.push(Field {
434            name: format!("__base_{base}"),
435            ty: TypeInfo::Opaque {
436                name: base.clone(),
437                size: ps,
438                align: ps,
439            },
440            offset: 0,
441            size: ps,
442            align: ps,
443            source_file: None,
444            source_line: None,
445            access: AccessPattern::Unknown,
446        });
447    }
448
449    // Skip classes with bit-field members (same reason as structs).
450    if raw_fields
451        .iter()
452        .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
453    {
454        eprintln!(
455            "padlock: note: skipping '{class_name}' — contains bit-fields \
456             (bit-field layout is compiler-controlled; use binary analysis for accurate results)"
457        );
458        return None;
459    }
460
461    // Declared member fields
462    for (fname, ty_name, guard, alignas, field_line) in raw_fields {
463        let (size, natural_align) = c_type_size_align(&ty_name, arch);
464        let align = alignas.unwrap_or(natural_align);
465        let access = if let Some(g) = guard {
466            AccessPattern::Concurrent {
467                guard: Some(g),
468                is_atomic: false,
469                is_annotated: true,
470            }
471        } else {
472            AccessPattern::Unknown
473        };
474        fields.push(Field {
475            name: fname,
476            ty: TypeInfo::Primitive {
477                name: ty_name,
478                size,
479                align,
480            },
481            offset: 0,
482            size,
483            align,
484            source_file: None,
485            source_line: Some(field_line),
486            access,
487        });
488    }
489
490    if fields.is_empty() {
491        return None;
492    }
493
494    let line = node.start_position().row as u32 + 1;
495    let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), is_packed);
496
497    if let Some(al) = struct_alignas
498        && al > layout.align
499    {
500        layout.align = al;
501        if !is_packed {
502            layout.total_size = layout.total_size.next_multiple_of(al);
503        }
504    }
505
506    layout.suppressed_findings =
507        super::suppress::suppressed_from_preceding_source(source, node.start_byte());
508
509    Some(layout)
510}
511
512/// Return true if a `field_declaration_list` node contains any `virtual` keyword
513/// (indicating that the class needs a vtable pointer).
514fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
515    let mut stack = vec![node];
516    while let Some(n) = stack.pop() {
517        if n.kind() == "virtual" {
518            return true;
519        }
520        // Also check raw text for cases where tree-sitter may not produce a
521        // dedicated `virtual` node (e.g. inside complex declarations).
522        if n.child_count() == 0 {
523            let text = &source[n.byte_range()];
524            if text == "virtual" {
525                return true;
526            }
527        }
528        for i in (0..n.child_count()).rev() {
529            if let Some(child) = n.child(i) {
530                stack.push(child);
531            }
532        }
533    }
534    false
535}
536
537// ── tree-sitter walker ────────────────────────────────────────────────────────
538
539fn extract_structs_from_tree(
540    source: &str,
541    root: Node<'_>,
542    arch: &'static ArchConfig,
543    layouts: &mut Vec<StructLayout>,
544) {
545    let cursor = root.walk();
546    let mut stack = vec![root];
547
548    while let Some(node) = stack.pop() {
549        // Push children in reverse so we process left-to-right
550        for i in (0..node.child_count()).rev() {
551            if let Some(child) = node.child(i) {
552                stack.push(child);
553            }
554        }
555
556        match node.kind() {
557            "struct_specifier" => {
558                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
559                    layouts.push(layout);
560                }
561            }
562            "union_specifier" => {
563                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
564                    layouts.push(layout);
565                }
566            }
567            "class_specifier" => {
568                if let Some(layout) = parse_class_specifier(source, node, arch) {
569                    layouts.push(layout);
570                }
571            }
572            _ => {}
573        }
574    }
575
576    // Also handle `typedef struct/union { ... } Name;`
577    let cursor2 = root.walk();
578    let mut stack2 = vec![root];
579    while let Some(node) = stack2.pop() {
580        for i in (0..node.child_count()).rev() {
581            if let Some(child) = node.child(i) {
582                stack2.push(child);
583            }
584        }
585        if node.kind() == "type_definition"
586            && let Some(layout) = parse_typedef_struct_or_union(source, node, arch)
587        {
588            let existing = layouts
589                .iter()
590                .position(|l| l.name == layout.name || l.name == "<anonymous>");
591            match existing {
592                Some(i) if layouts[i].name == "<anonymous>" => {
593                    layouts[i] = layout;
594                }
595                None => layouts.push(layout),
596                _ => {}
597            }
598        }
599    }
600    let _ = cursor;
601    let _ = cursor2; // silence unused warnings
602}
603
604/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
605fn parse_struct_or_union_specifier(
606    source: &str,
607    node: Node<'_>,
608    arch: &'static ArchConfig,
609    is_union: bool,
610) -> Option<StructLayout> {
611    let mut name = "<anonymous>".to_string();
612    let mut body_node: Option<Node> = None;
613    let mut is_packed = false;
614    // Struct-level alignas: `struct alignas(64) CacheAligned { ... };`
615    let mut struct_alignas: Option<usize> = None;
616
617    for i in 0..node.child_count() {
618        let child = node.child(i)?;
619        match child.kind() {
620            "type_identifier" => name = source[child.byte_range()].to_string(),
621            "field_declaration_list" => body_node = Some(child),
622            "attribute_specifier" => {
623                let text = &source[child.byte_range()];
624                if text.contains("packed") {
625                    is_packed = true;
626                }
627            }
628            // C++11 struct-level alignas: `struct alignas(64) Name { ... };`
629            // tree-sitter-cpp: `alignas_qualifier` as direct child of struct_specifier
630            "alignas_qualifier" | "alignas_specifier" => {
631                if struct_alignas.is_none() {
632                    struct_alignas = parse_alignas_value(source, child);
633                }
634            }
635            _ => {}
636        }
637    }
638
639    let body = body_node?;
640    let mut raw_fields: Vec<RawField> = Vec::new();
641
642    for i in 0..body.child_count() {
643        let child = body.child(i)?;
644        if child.kind() == "field_declaration" {
645            // Check for anonymous nested struct/union: a field_declaration whose
646            // only non-field-identifier child is a struct_specifier/union_specifier
647            // with no type_identifier (i.e. `struct { int x; int y; };`).
648            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, is_union) {
649                raw_fields.extend(anon_fields);
650            } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
651            {
652                raw_fields.push((fname, ty, guard, al, ln));
653            }
654        }
655    }
656
657    if raw_fields.is_empty() {
658        return None;
659    }
660
661    // Bit-field packing is compiler-controlled and cannot be accurately modelled
662    // without a compiler. Skip the entire struct to avoid producing wrong layout
663    // data. Use `padlock analyze` on the compiled binary for accurate results.
664    if raw_fields
665        .iter()
666        .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
667    {
668        eprintln!(
669            "padlock: note: skipping '{name}' — contains bit-fields \
670             (bit-field layout is compiler-controlled; use binary analysis for accurate results)"
671        );
672        return None;
673    }
674
675    let mut fields: Vec<Field> = raw_fields
676        .into_iter()
677        .map(|(fname, ty_name, guard, alignas, field_line)| {
678            let (size, natural_align) = c_type_size_align(&ty_name, arch);
679            // alignas(N) on a field overrides its alignment requirement.
680            let align = alignas.unwrap_or(natural_align);
681            let access = if let Some(g) = guard {
682                AccessPattern::Concurrent {
683                    guard: Some(g),
684                    is_atomic: false,
685                    is_annotated: true,
686                }
687            } else {
688                AccessPattern::Unknown
689            };
690            Field {
691                name: fname,
692                ty: TypeInfo::Primitive {
693                    name: ty_name,
694                    size,
695                    align,
696                },
697                offset: 0,
698                size,
699                align,
700                source_file: None,
701                source_line: Some(field_line),
702                access,
703            }
704        })
705        .collect();
706
707    let line = node.start_position().row as u32 + 1;
708    let mut layout = if is_union {
709        simulate_union_layout(&mut fields, name, arch, Some(line))
710    } else {
711        simulate_layout(&mut fields, name, arch, Some(line), is_packed)
712    };
713
714    // Apply struct-level alignas: the struct's alignment requirement is at
715    // least N; trailing padding may grow to satisfy the new alignment.
716    if let Some(al) = struct_alignas
717        && al > layout.align
718    {
719        layout.align = al;
720        if !is_packed {
721            layout.total_size = layout.total_size.next_multiple_of(al);
722        }
723    }
724
725    layout.suppressed_findings =
726        super::suppress::suppressed_from_preceding_source(source, node.start_byte());
727
728    Some(layout)
729}
730
731/// Parse a `typedef struct/union { ... } Name;` type_definition node.
732fn parse_typedef_struct_or_union(
733    source: &str,
734    node: Node<'_>,
735    arch: &'static ArchConfig,
736) -> Option<StructLayout> {
737    let mut specifier_node: Option<Node> = None;
738    let mut is_union = false;
739    let mut typedef_name: Option<String> = None;
740
741    for i in 0..node.child_count() {
742        let child = node.child(i)?;
743        match child.kind() {
744            "struct_specifier" => {
745                specifier_node = Some(child);
746                is_union = false;
747            }
748            "union_specifier" => {
749                specifier_node = Some(child);
750                is_union = true;
751            }
752            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
753            _ => {}
754        }
755    }
756
757    let spec = specifier_node?;
758    let typedef_name = typedef_name?;
759
760    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
761    if layout.name == "<anonymous>" {
762        layout.name = typedef_name;
763    }
764    Some(layout)
765}
766
767// Alias kept for the typedef pass in extract_structs_from_tree.
768#[allow(dead_code)]
769fn parse_typedef_struct(
770    source: &str,
771    node: Node<'_>,
772    arch: &'static ArchConfig,
773) -> Option<StructLayout> {
774    parse_typedef_struct_or_union(source, node, arch)
775}
776
777/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
778/// `__attribute__((pt_guarded_by(X)))` specifier node.
779///
780/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
781/// which expand to the same attribute (Clang thread-safety analysis).
782/// The match is done on the raw source text of any `attribute_specifier` child,
783/// so it works regardless of how tree-sitter structures the inner tokens.
784fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
785    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
786    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
787        if let Some(pos) = field_source.find(kw) {
788            let after = &field_source[pos + kw.len()..];
789            // Expect `(` optionally preceded by whitespace
790            let trimmed = after.trim_start();
791            if let Some(inner) = trimmed.strip_prefix('(') {
792                // Read until the matching ')'
793                if let Some(end) = inner.find(')') {
794                    let guard = inner[..end].trim().trim_matches('"');
795                    if !guard.is_empty() {
796                        return Some(guard.to_string());
797                    }
798                }
799            }
800        }
801    }
802    None
803}
804
805/// Parse a numeric value from an `alignas_qualifier` node: `alignas(N)`.
806/// tree-sitter-cpp uses the node kind `alignas_qualifier` for C++11 `alignas`.
807/// Returns `None` when the specifier contains a type expression rather than
808/// an integer literal (e.g. `alignas(double)` — handled elsewhere by the
809/// compiler; we skip those conservatively).
810fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
811    for i in 0..node.child_count() {
812        if let Some(child) = node.child(i) {
813            match child.kind() {
814                "number_literal" | "integer_literal" | "integer" => {
815                    let text = source[child.byte_range()].trim();
816                    if let Ok(n) = text.parse::<usize>() {
817                        return Some(n);
818                    }
819                    // Hex literal: 0x40
820                    if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
821                        return usize::from_str_radix(hex, 16).ok();
822                    }
823                }
824                // Recurse for nested nodes (parenthesised expression, etc.)
825                "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
826                    if let r @ Some(_) = parse_alignas_value(source, child) {
827                        return r;
828                    }
829                }
830                _ => {}
831            }
832        }
833    }
834    None
835}
836
837/// Returns `(ty, field_name, guard, alignas_override)`.
838/// `alignas_override` is `Some(N)` when the field carries `alignas(N)`.
839/// Detect and parse an anonymous nested struct/union field declaration, e.g.:
840///
841/// ```c
842/// struct Packet {
843///     union {                    // ← anonymous nested union
844///         uint32_t raw;
845///         struct { uint8_t a; uint8_t b; uint8_t c; uint8_t d; };
846///     };
847///     uint64_t timestamp;
848/// };
849/// ```
850///
851/// A `field_declaration` is anonymous if it contains a `struct_specifier` or
852/// `union_specifier` child that has a `field_declaration_list` (i.e. a body)
853/// but no `type_identifier` (i.e. no name). The fields of the nested
854/// struct/union are flattened into the parent.
855///
856/// Returns `None` if the declaration is not an anonymous nested struct/union
857/// (the caller should fall through to `parse_field_declaration`).
858/// (field_name, type_text, guard, alignas_override, source_line_1based)
859type RawField = (String, String, Option<String>, Option<usize>, u32);
860
861#[allow(clippy::only_used_in_recursion)]
862fn parse_anonymous_nested(
863    source: &str,
864    node: Node<'_>,
865    arch: &'static ArchConfig,
866    parent_is_union: bool,
867) -> Option<Vec<RawField>> {
868    // Find a struct_specifier or union_specifier child.
869    for i in 0..node.child_count() {
870        let child = node.child(i)?;
871        if child.kind() != "struct_specifier" && child.kind() != "union_specifier" {
872            continue;
873        }
874        let nested_is_union = child.kind() == "union_specifier";
875
876        // Must have a body (field_declaration_list) but no type_identifier.
877        let mut has_name = false;
878        let mut body_node: Option<Node> = None;
879        for j in 0..child.child_count() {
880            let sub = child.child(j)?;
881            match sub.kind() {
882                "type_identifier" => has_name = true,
883                "field_declaration_list" => body_node = Some(sub),
884                _ => {}
885            }
886        }
887
888        if has_name || body_node.is_none() {
889            // Named struct/union used as a field type — handled by parse_field_declaration.
890            continue;
891        }
892
893        let body = body_node?;
894        let mut nested_raw: Vec<RawField> = Vec::new();
895
896        for j in 0..body.child_count() {
897            let inner = body.child(j)?;
898            if inner.kind() == "field_declaration" {
899                // Recurse to handle doubly-nested anonymous structs.
900                if let Some(deeper) = parse_anonymous_nested(source, inner, arch, nested_is_union) {
901                    nested_raw.extend(deeper);
902                } else if let Some((ty, fname, guard, al, ln)) =
903                    parse_field_declaration(source, inner)
904                {
905                    nested_raw.push((fname, ty, guard, al, ln));
906                }
907            }
908        }
909
910        // If nested is a union, the fields all share offset 0 (relative to the
911        // union's placement in the parent). We can't easily track this through
912        // raw field lists, so we emit them as a synthetic __anon_union_N field
913        // when the parent cares about offsets, or just flatten for unions.
914        //
915        // For simplicity: flatten all fields — the layout simulator will compute
916        // correct offsets if the parent is a struct, and union semantics are
917        // preserved when the parent is a union.
918        let _ = (nested_is_union, parent_is_union);
919
920        if !nested_raw.is_empty() {
921            return Some(nested_raw);
922        }
923    }
924    None
925}
926
927fn parse_field_declaration(source: &str, node: Node<'_>) -> Option<RawField> {
928    let mut ty_parts: Vec<String> = Vec::new();
929    let mut field_name: Option<String> = None;
930    // Bit-field width, e.g. `int flags : 3;` → Some("3")
931    let mut bit_width: Option<String> = None;
932    // Collect attribute text for guard extraction
933    let mut attr_text = String::new();
934    // Field-level alignas override
935    let mut alignas_override: Option<usize> = None;
936
937    for i in 0..node.child_count() {
938        let child = node.child(i)?;
939        match child.kind() {
940            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
941                ty_parts.push(source[child.byte_range()].trim().to_string());
942            }
943            // C++ qualified types: std::mutex, ns::Type, etc.
944            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
945            "qualified_identifier" | "template_type" => {
946                ty_parts.push(source[child.byte_range()].trim().to_string());
947            }
948            // Nested struct/union used as a field type: `struct Vec2 tl;`
949            // Extract just the type_identifier name (e.g. "Vec2") so the
950            // nested-struct resolution pass can match it by name.
951            "struct_specifier" | "union_specifier" => {
952                for j in 0..child.child_count() {
953                    if let Some(sub) = child.child(j)
954                        && sub.kind() == "type_identifier"
955                    {
956                        ty_parts.push(source[sub.byte_range()].trim().to_string());
957                        break;
958                    }
959                }
960            }
961            "field_identifier" => {
962                field_name = Some(source[child.byte_range()].trim().to_string());
963            }
964            "pointer_declarator" => {
965                field_name = extract_identifier(source, child);
966                ty_parts.push("*".to_string());
967            }
968            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
969            "bitfield_clause" => {
970                let text = source[child.byte_range()].trim();
971                // Strip leading ':' and whitespace to get just the width digits
972                bit_width = Some(text.trim_start_matches(':').trim().to_string());
973            }
974            // GNU attribute specifier: __attribute__((...))
975            "attribute_specifier" | "attribute" => {
976                attr_text.push_str(source[child.byte_range()].trim());
977                attr_text.push(' ');
978            }
979            // C++11 alignas: tree-sitter-cpp wraps it as type_qualifier → alignas_qualifier
980            // Also handle the direct form in case grammar versions differ.
981            "alignas_qualifier" | "alignas_specifier" => {
982                if alignas_override.is_none() {
983                    alignas_override = parse_alignas_value(source, child);
984                }
985            }
986            // type_qualifier wraps alignas_qualifier for field declarations:
987            // `alignas(8) char c;` → type_qualifier { alignas_qualifier { ... } }
988            "type_qualifier" => {
989                if alignas_override.is_none() {
990                    for j in 0..child.child_count() {
991                        if let Some(sub) = child.child(j)
992                            && (sub.kind() == "alignas_qualifier"
993                                || sub.kind() == "alignas_specifier")
994                        {
995                            alignas_override = parse_alignas_value(source, sub);
996                            break;
997                        }
998                    }
999                }
1000            }
1001            _ => {}
1002        }
1003    }
1004
1005    let base_ty = ty_parts.join(" ");
1006    let fname = field_name?;
1007    if base_ty.is_empty() {
1008        return None;
1009    }
1010    // Annotate bit-field types as "type:N" so callers can detect and report them;
1011    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
1012    let ty = if let Some(w) = bit_width {
1013        format!("{base_ty}:{w}")
1014    } else {
1015        base_ty
1016    };
1017
1018    // Also check the full field source text (attribute_specifier may not always
1019    // be a direct child depending on tree-sitter grammar version).
1020    let field_src = source[node.byte_range()].to_string();
1021    let guard = extract_guard_from_c_field_text(&attr_text)
1022        .or_else(|| extract_guard_from_c_field_text(&field_src));
1023
1024    let line = node.start_position().row as u32 + 1;
1025    Some((ty, fname, guard, alignas_override, line))
1026}
1027
1028fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
1029    if node.kind() == "field_identifier" || node.kind() == "identifier" {
1030        return Some(source[node.byte_range()].to_string());
1031    }
1032    for i in 0..node.child_count() {
1033        if let Some(child) = node.child(i)
1034            && let Some(name) = extract_identifier(source, child)
1035        {
1036            return Some(name);
1037        }
1038    }
1039    None
1040}
1041
1042// ── public API ────────────────────────────────────────────────────────────────
1043
1044pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1045    let mut parser = Parser::new();
1046    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
1047    let tree = parser
1048        .parse(source, None)
1049        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1050    let mut layouts = Vec::new();
1051    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1052    Ok(layouts)
1053}
1054
1055pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1056    let mut parser = Parser::new();
1057    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
1058    let tree = parser
1059        .parse(source, None)
1060        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1061    let mut layouts = Vec::new();
1062    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1063    Ok(layouts)
1064}
1065
1066// ── tests ─────────────────────────────────────────────────────────────────────
1067
1068#[cfg(test)]
1069mod tests {
1070    use super::*;
1071    use padlock_core::arch::X86_64_SYSV;
1072
1073    #[test]
1074    fn parse_simple_c_struct() {
1075        let src = r#"
1076struct Point {
1077    int x;
1078    int y;
1079};
1080"#;
1081        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1082        assert_eq!(layouts.len(), 1);
1083        assert_eq!(layouts[0].name, "Point");
1084        assert_eq!(layouts[0].fields.len(), 2);
1085        assert_eq!(layouts[0].fields[0].name, "x");
1086        assert_eq!(layouts[0].fields[1].name, "y");
1087    }
1088
1089    #[test]
1090    fn parse_typedef_struct() {
1091        let src = r#"
1092typedef struct {
1093    char  is_active;
1094    double timeout;
1095    int   port;
1096} Connection;
1097"#;
1098        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1099        assert_eq!(layouts.len(), 1);
1100        assert_eq!(layouts[0].name, "Connection");
1101        assert_eq!(layouts[0].fields.len(), 3);
1102    }
1103
1104    #[test]
1105    fn c_layout_computes_offsets() {
1106        let src = "struct T { char a; double b; };";
1107        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1108        assert_eq!(layouts.len(), 1);
1109        let layout = &layouts[0];
1110        // char at offset 0, double at offset 8 (7 bytes padding)
1111        assert_eq!(layout.fields[0].offset, 0);
1112        assert_eq!(layout.fields[1].offset, 8);
1113        assert_eq!(layout.total_size, 16);
1114    }
1115
1116    #[test]
1117    fn c_layout_detects_padding() {
1118        let src = "struct T { char a; int b; };";
1119        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1120        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1121        assert!(!gaps.is_empty());
1122        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
1123    }
1124
1125    #[test]
1126    fn parse_cpp_struct() {
1127        let src = "struct Vec3 { float x; float y; float z; };";
1128        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1129        assert_eq!(layouts.len(), 1);
1130        assert_eq!(layouts[0].fields.len(), 3);
1131    }
1132
1133    // ── SIMD types ────────────────────────────────────────────────────────────
1134
1135    #[test]
1136    fn simd_sse_field_size_and_align() {
1137        let src = "struct Vecs { __m128 a; __m256 b; };";
1138        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1139        assert_eq!(layouts.len(), 1);
1140        let f = &layouts[0].fields;
1141        assert_eq!(f[0].size, 16); // __m128
1142        assert_eq!(f[0].align, 16);
1143        assert_eq!(f[1].size, 32); // __m256
1144        assert_eq!(f[1].align, 32);
1145    }
1146
1147    #[test]
1148    fn simd_avx512_size() {
1149        let src = "struct Wide { __m512 v; };";
1150        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1151        assert_eq!(layouts[0].fields[0].size, 64);
1152        assert_eq!(layouts[0].fields[0].align, 64);
1153    }
1154
1155    #[test]
1156    fn simd_padding_detected_when_small_field_before_avx() {
1157        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
1158        let src = "struct Mixed { char flag; __m256 data; };";
1159        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1160        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1161        assert!(!gaps.is_empty());
1162        assert_eq!(gaps[0].bytes, 31);
1163    }
1164
1165    // ── union parsing ─────────────────────────────────────────────────────────
1166
1167    #[test]
1168    fn union_fields_all_at_offset_zero() {
1169        let src = "union Data { int i; float f; double d; };";
1170        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1171        assert_eq!(layouts.len(), 1);
1172        let u = &layouts[0];
1173        assert!(u.is_union);
1174        for field in &u.fields {
1175            assert_eq!(
1176                field.offset, 0,
1177                "union field '{}' should be at offset 0",
1178                field.name
1179            );
1180        }
1181    }
1182
1183    #[test]
1184    fn union_total_size_is_max_field() {
1185        // double is the largest (8 bytes); total should be 8
1186        let src = "union Data { int i; float f; double d; };";
1187        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1188        assert_eq!(layouts[0].total_size, 8);
1189    }
1190
1191    #[test]
1192    fn union_no_padding_finding() {
1193        let src = "union Data { int i; double d; };";
1194        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1195        let report = padlock_core::findings::Report::from_layouts(&layouts);
1196        let sr = &report.structs[0];
1197        assert!(
1198            !sr.findings
1199                .iter()
1200                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
1201        );
1202        assert!(
1203            !sr.findings
1204                .iter()
1205                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
1206        );
1207    }
1208
1209    #[test]
1210    fn typedef_union_parsed() {
1211        let src = "typedef union { int a; double b; } Value;";
1212        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1213        assert_eq!(layouts.len(), 1);
1214        assert_eq!(layouts[0].name, "Value");
1215        assert!(layouts[0].is_union);
1216    }
1217
1218    // ── attribute guard extraction ─────────────────────────────────────────────
1219
1220    #[test]
1221    fn extract_guard_from_c_guarded_by_macro() {
1222        let text = "int value GUARDED_BY(mu);";
1223        let guard = extract_guard_from_c_field_text(text);
1224        assert_eq!(guard.as_deref(), Some("mu"));
1225    }
1226
1227    #[test]
1228    fn extract_guard_from_c_attribute_specifier() {
1229        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
1230        let guard = extract_guard_from_c_field_text(text);
1231        assert_eq!(guard.as_deref(), Some("counter_lock"));
1232    }
1233
1234    #[test]
1235    fn extract_guard_pt_guarded_by() {
1236        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
1237        let guard = extract_guard_from_c_field_text(text);
1238        assert_eq!(guard.as_deref(), Some("ptr_lock"));
1239    }
1240
1241    #[test]
1242    fn no_guard_returns_none() {
1243        let guard = extract_guard_from_c_field_text("int x;");
1244        assert!(guard.is_none());
1245    }
1246
1247    #[test]
1248    fn c_struct_guarded_by_sets_concurrent_access() {
1249        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
1250        // macro expansions, so test the text-extraction path via parse_field_declaration
1251        // indirectly by checking extract_guard_from_c_field_text.
1252        let text = "uint64_t readers GUARDED_BY(lock_a);";
1253        assert_eq!(
1254            extract_guard_from_c_field_text(text).as_deref(),
1255            Some("lock_a")
1256        );
1257    }
1258
1259    #[test]
1260    fn c_struct_different_guards_detected_as_false_sharing() {
1261        use padlock_core::arch::X86_64_SYSV;
1262        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
1263
1264        // Manually build a layout with two fields on the same cache line,
1265        // different guards — mirrors what the C frontend would produce for
1266        // __attribute__((guarded_by(...))) annotated fields.
1267        let mut layout = StructLayout {
1268            name: "S".into(),
1269            total_size: 128,
1270            align: 8,
1271            fields: vec![
1272                Field {
1273                    name: "readers".into(),
1274                    ty: TypeInfo::Primitive {
1275                        name: "uint64_t".into(),
1276                        size: 8,
1277                        align: 8,
1278                    },
1279                    offset: 0,
1280                    size: 8,
1281                    align: 8,
1282                    source_file: None,
1283                    source_line: None,
1284                    access: AccessPattern::Concurrent {
1285                        guard: Some("lock_a".into()),
1286                        is_atomic: false,
1287                        is_annotated: true,
1288                    },
1289                },
1290                Field {
1291                    name: "writers".into(),
1292                    ty: TypeInfo::Primitive {
1293                        name: "uint64_t".into(),
1294                        size: 8,
1295                        align: 8,
1296                    },
1297                    offset: 8,
1298                    size: 8,
1299                    align: 8,
1300                    source_file: None,
1301                    source_line: None,
1302                    access: AccessPattern::Concurrent {
1303                        guard: Some("lock_b".into()),
1304                        is_atomic: false,
1305                        is_annotated: true,
1306                    },
1307                },
1308            ],
1309            source_file: None,
1310            source_line: None,
1311            arch: &X86_64_SYSV,
1312            is_packed: false,
1313            is_union: false,
1314            is_repr_rust: false,
1315            suppressed_findings: Vec::new(),
1316        };
1317        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1318            &layout
1319        ));
1320        // Same guard → no false sharing
1321        layout.fields[1].access = AccessPattern::Concurrent {
1322            guard: Some("lock_a".into()),
1323            is_atomic: false,
1324            is_annotated: true,
1325        };
1326        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1327            &layout
1328        ));
1329    }
1330
1331    // ── C++ class: vtable pointer ─────────────────────────────────────────────
1332
1333    #[test]
1334    fn cpp_class_with_virtual_method_has_vptr() {
1335        let src = r#"
1336class Widget {
1337    virtual void draw();
1338    int x;
1339    int y;
1340};
1341"#;
1342        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1343        assert_eq!(layouts.len(), 1);
1344        let l = &layouts[0];
1345        // First field must be __vptr
1346        assert_eq!(l.fields[0].name, "__vptr");
1347        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
1348        // __vptr is at offset 0
1349        assert_eq!(l.fields[0].offset, 0);
1350        // int x should come after the pointer (at offset 8)
1351        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1352        assert_eq!(x.offset, 8);
1353    }
1354
1355    #[test]
1356    fn cpp_class_without_virtual_has_no_vptr() {
1357        let src = "class Plain { int a; int b; };";
1358        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1359        assert_eq!(layouts.len(), 1);
1360        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1361    }
1362
1363    #[test]
1364    fn cpp_struct_keyword_with_virtual_has_vptr() {
1365        // `struct` in C++ can also have virtual methods
1366        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1367        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1368        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
1369        // (vtable injection is only for `class` nodes)
1370        let _ = layouts; // just verify it parses without panic
1371    }
1372
1373    // ── C++ class: single inheritance ─────────────────────────────────────────
1374
1375    #[test]
1376    fn cpp_derived_class_has_base_slot() {
1377        let src = r#"
1378class Base {
1379    int x;
1380};
1381class Derived : public Base {
1382    int y;
1383};
1384"#;
1385        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1386        // Both Base and Derived should be parsed
1387        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1388        // Derived must have a __base_Base synthetic field
1389        assert!(
1390            derived.fields.iter().any(|f| f.name == "__base_Base"),
1391            "Derived should have a __base_Base field"
1392        );
1393        // The y field should come after __base_Base
1394        let base_field = derived
1395            .fields
1396            .iter()
1397            .find(|f| f.name == "__base_Base")
1398            .unwrap();
1399        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1400        assert!(y_field.offset >= base_field.offset + base_field.size);
1401    }
1402
1403    #[test]
1404    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1405        let src = r#"
1406class A { int a; };
1407class B { int b; };
1408class C : public A, public B { int c; };
1409"#;
1410        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1411        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1412        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1413        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1414    }
1415
1416    #[test]
1417    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1418        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1419        let src = "class V { virtual void f(); int x; };";
1420        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1421        let l = &layouts[0];
1422        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1423        assert_eq!(l.total_size, 16);
1424    }
1425
1426    // ── bitfield handling ─────────────────────────────────────────────────────
1427
1428    #[test]
1429    fn is_bitfield_type_detects_colon_n() {
1430        assert!(is_bitfield_type("int:3"));
1431        assert!(is_bitfield_type("unsigned int:16"));
1432        assert!(is_bitfield_type("uint32_t:1"));
1433        // Not bit-fields — contains ':' but not followed by pure digits
1434        assert!(!is_bitfield_type("std::atomic<int>"));
1435        assert!(!is_bitfield_type("ns::Type"));
1436        assert!(!is_bitfield_type("int"));
1437    }
1438
1439    #[test]
1440    fn struct_with_bitfields_is_skipped() {
1441        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1442        // without a compiler. The struct must be skipped entirely.
1443        let src = r#"
1444struct Flags {
1445    unsigned int active : 1;
1446    unsigned int ready  : 1;
1447    unsigned int error  : 6;
1448    int value;
1449};
1450"#;
1451        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1452        // Flags must not appear — its layout cannot be accurately computed.
1453        assert!(
1454            layouts.iter().all(|l| l.name != "Flags"),
1455            "struct with bitfields should be skipped; got {:?}",
1456            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1457        );
1458    }
1459
1460    #[test]
1461    fn struct_without_bitfields_is_still_parsed() {
1462        // Ensure the bitfield guard doesn't affect normal structs.
1463        let src = "struct Normal { int a; char b; double c; };";
1464        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1465        assert_eq!(layouts.len(), 1);
1466        assert_eq!(layouts[0].name, "Normal");
1467    }
1468
1469    #[test]
1470    fn c_struct_fields_have_source_lines() {
1471        let src = "struct Point {\n    int x;\n    int y;\n};";
1472        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1473        assert_eq!(layouts.len(), 1);
1474        let fields = &layouts[0].fields;
1475        // x is on line 2, y is on line 3
1476        assert_eq!(fields[0].source_line, Some(2), "x should be line 2");
1477        assert_eq!(fields[1].source_line, Some(3), "y should be line 3");
1478    }
1479
1480    #[test]
1481    fn cpp_class_with_bitfields_is_skipped() {
1482        let src = "class Packed { int x : 4; int y : 4; };";
1483        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1484        assert!(
1485            layouts.iter().all(|l| l.name != "Packed"),
1486            "C++ class with bitfields should be skipped"
1487        );
1488    }
1489
1490    #[test]
1491    fn all_bitfield_struct_is_skipped() {
1492        // Struct with ONLY bit-field members (no normal fields).
1493        // raw_fields is non-empty but all entries carry the `:N` annotation,
1494        // so the bit-field guard must still fire and skip the struct.
1495        let src = "struct BitPacked { int x:4; int y:4; };";
1496        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1497        assert!(
1498            layouts.iter().all(|l| l.name != "BitPacked"),
1499            "all-bitfield struct should be skipped; got {:?}",
1500            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1501        );
1502    }
1503
1504    // ── __attribute__((packed)) detection ─────────────────────────────────────
1505
1506    #[test]
1507    fn packed_struct_has_no_alignment_padding() {
1508        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1509        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1510        let src = r#"
1511struct __attribute__((packed)) Tight {
1512    char a;
1513    int  b;
1514    char c;
1515};
1516"#;
1517        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1518        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1519        assert!(l.is_packed, "should be marked is_packed");
1520        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1521        assert_eq!(l.fields[0].offset, 0);
1522        assert_eq!(l.fields[1].offset, 1); // immediately after char
1523        assert_eq!(l.fields[2].offset, 5);
1524    }
1525
1526    #[test]
1527    fn non_packed_struct_has_normal_alignment_padding() {
1528        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1529        let src = r#"
1530struct Normal {
1531    char a;
1532    int  b;
1533    char c;
1534};
1535"#;
1536        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1537        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1538        assert!(!l.is_packed);
1539        assert_eq!(l.total_size, 12);
1540        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1541    }
1542
1543    #[test]
1544    fn cpp_class_packed_attribute_detected() {
1545        let src = r#"
1546class __attribute__((packed)) Dense {
1547    char a;
1548    int  b;
1549};
1550"#;
1551        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1552        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1553        assert!(
1554            l.is_packed,
1555            "C++ class with __attribute__((packed)) must be marked packed"
1556        );
1557        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1558    }
1559
1560    // ── alignas detection ─────────────────────────────────────────────────────
1561
1562    #[test]
1563    fn field_alignas_overrides_natural_alignment() {
1564        // char is normally align=1 but alignas(8) forces it to align-8.
1565        // Layout: c(1B at offset 0, align=8) + x(4B at offset 4, align=4)
1566        // c must start on an 8-byte boundary (trivially satisfied at offset 0).
1567        // After c (1 byte), x aligns to 4: offset = 1.next_multiple_of(4) = 4.
1568        // Struct align = max(8, 4) = 8. Total = 8 bytes (4+4 → 8 → ok for align 8).
1569        let src = r#"
1570struct S {
1571    alignas(8) char c;
1572    int x;
1573};
1574"#;
1575        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1576        let l = layouts.iter().find(|l| l.name == "S").expect("S");
1577        // c should be forced to align 8
1578        let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
1579        assert_eq!(c_field.align, 8);
1580        // x comes after c (1 byte) with natural alignment 4 → offset 4
1581        let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
1582        assert_eq!(x_field.offset, 4);
1583        // Struct alignment is max(alignas(8), int align 4) = 8
1584        assert_eq!(l.align, 8);
1585        // Total = 8 bytes (x at 4, size 4; 4+4=8; 8 is multiple of align 8)
1586        assert_eq!(l.total_size, 8);
1587    }
1588
1589    #[test]
1590    fn struct_level_alignas_increases_struct_alignment() {
1591        // alignas(64) on the struct means its alignment requirement is 64.
1592        // Total size must be a multiple of 64.
1593        let src = r#"
1594struct alignas(64) CacheLine {
1595    int x;
1596    int y;
1597};
1598"#;
1599        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1600        let l = layouts
1601            .iter()
1602            .find(|l| l.name == "CacheLine")
1603            .expect("CacheLine");
1604        assert_eq!(l.align, 64);
1605        assert_eq!(l.total_size % 64, 0);
1606    }
1607
1608    #[test]
1609    fn alignas_on_field_smaller_than_natural_is_ignored() {
1610        // alignas(1) on an int field: does NOT reduce alignment below 4.
1611        // In C++, alignas cannot reduce alignment below the natural alignment.
1612        // Our implementation stores the alignas value; natural alignment wins
1613        // because we take max(alignas, natural) in the caller.
1614        // Note: we currently store alignas directly; this test documents behaviour.
1615        let src = "struct S { int x; int y; };";
1616        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1617        let l = &layouts[0];
1618        assert_eq!(l.fields[0].align, 4); // natural alignment, not reduced
1619    }
1620
1621    #[test]
1622    fn cpp_class_alignas_detected() {
1623        let src = r#"
1624class alignas(32) Aligned {
1625    double x;
1626    double y;
1627};
1628"#;
1629        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1630        let l = layouts
1631            .iter()
1632            .find(|l| l.name == "Aligned")
1633            .expect("Aligned");
1634        assert_eq!(l.align, 32);
1635        assert_eq!(l.total_size % 32, 0);
1636    }
1637
1638    // ── bad weather: alignas edge cases ───────────────────────────────────────
1639
1640    #[test]
1641    fn struct_without_alignas_unchanged() {
1642        // Ensure the alignas detection path doesn't affect structs without it
1643        let src = "struct Plain { int a; char b; };";
1644        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1645        let l = &layouts[0];
1646        assert_eq!(l.align, 4); // max field alignment = int = 4
1647        assert_eq!(l.total_size, 8); // int(4) + char(1) + 3 pad
1648    }
1649
1650    // ── anonymous nested structs/unions ───────────────────────────────────────
1651
1652    #[test]
1653    fn anonymous_nested_union_fields_flattened() {
1654        let src = r#"
1655struct Packet {
1656    union {
1657        uint32_t raw;
1658        uint8_t bytes[4];
1659    };
1660    uint64_t timestamp;
1661};
1662"#;
1663        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1664        let l = layouts.iter().find(|l| l.name == "Packet").expect("Packet");
1665        // raw, bytes (or similar) and timestamp must all be present
1666        assert!(
1667            l.fields.iter().any(|f| f.name == "raw"),
1668            "raw field must be flattened into Packet"
1669        );
1670        assert!(
1671            l.fields.iter().any(|f| f.name == "timestamp"),
1672            "timestamp must be present"
1673        );
1674    }
1675
1676    #[test]
1677    fn anonymous_nested_struct_fields_flattened() {
1678        let src = r#"
1679struct Outer {
1680    struct {
1681        int x;
1682        int y;
1683    };
1684    double z;
1685};
1686"#;
1687        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1688        let l = layouts.iter().find(|l| l.name == "Outer").expect("Outer");
1689        assert!(
1690            l.fields.iter().any(|f| f.name == "x"),
1691            "x must be flattened"
1692        );
1693        assert!(
1694            l.fields.iter().any(|f| f.name == "y"),
1695            "y must be flattened"
1696        );
1697        assert!(l.fields.iter().any(|f| f.name == "z"), "z present");
1698        // Total: x(4) + y(4) + z(8) = 16 bytes, no padding
1699        assert_eq!(l.total_size, 16);
1700    }
1701
1702    #[test]
1703    fn named_nested_struct_not_flattened() {
1704        // A named struct used as a field type must NOT be flattened
1705        let src = r#"
1706struct Vec2 { float x; float y; };
1707struct Rect { struct Vec2 tl; struct Vec2 br; };
1708"#;
1709        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1710        let rect = layouts.iter().find(|l| l.name == "Rect").expect("Rect");
1711        // Should have tl and br as opaque fields, not x/y flattened
1712        assert_eq!(rect.fields.len(), 2);
1713        assert!(rect.fields.iter().any(|f| f.name == "tl"));
1714        assert!(rect.fields.iter().any(|f| f.name == "br"));
1715    }
1716
1717    // ── type-table tests ──────────────────────────────────────────────────────
1718
1719    #[test]
1720    fn linux_kernel_types_correct_size() {
1721        // u8/u16/u32/u64 and s8/s16/s32/s64 (linux/types.h)
1722        assert_eq!(c_type_size_align("u8", &X86_64_SYSV), (1, 1));
1723        assert_eq!(c_type_size_align("u16", &X86_64_SYSV), (2, 2));
1724        assert_eq!(c_type_size_align("u32", &X86_64_SYSV), (4, 4));
1725        assert_eq!(c_type_size_align("u64", &X86_64_SYSV), (8, 8));
1726        assert_eq!(c_type_size_align("s8", &X86_64_SYSV), (1, 1));
1727        assert_eq!(c_type_size_align("s16", &X86_64_SYSV), (2, 2));
1728        assert_eq!(c_type_size_align("s32", &X86_64_SYSV), (4, 4));
1729        assert_eq!(c_type_size_align("s64", &X86_64_SYSV), (8, 8));
1730    }
1731
1732    #[test]
1733    fn linux_kernel_dunder_types_correct_size() {
1734        assert_eq!(c_type_size_align("__u8", &X86_64_SYSV), (1, 1));
1735        assert_eq!(c_type_size_align("__u16", &X86_64_SYSV), (2, 2));
1736        assert_eq!(c_type_size_align("__u32", &X86_64_SYSV), (4, 4));
1737        assert_eq!(c_type_size_align("__u64", &X86_64_SYSV), (8, 8));
1738        assert_eq!(c_type_size_align("__s8", &X86_64_SYSV), (1, 1));
1739        assert_eq!(c_type_size_align("__s64", &X86_64_SYSV), (8, 8));
1740        // Endian-annotated types are same width as their base
1741        assert_eq!(c_type_size_align("__be16", &X86_64_SYSV), (2, 2));
1742        assert_eq!(c_type_size_align("__le32", &X86_64_SYSV), (4, 4));
1743        assert_eq!(c_type_size_align("__be64", &X86_64_SYSV), (8, 8));
1744    }
1745
1746    #[test]
1747    fn c99_fast_types_correct_size() {
1748        // fast8/16 are their natural width
1749        assert_eq!(c_type_size_align("uint_fast8_t", &X86_64_SYSV), (1, 1));
1750        assert_eq!(c_type_size_align("uint_fast16_t", &X86_64_SYSV), (2, 2));
1751        // fast32/64 are pointer-sized on 64-bit
1752        assert_eq!(c_type_size_align("uint_fast32_t", &X86_64_SYSV), (8, 8));
1753        assert_eq!(c_type_size_align("uint_fast64_t", &X86_64_SYSV), (8, 8));
1754        // least types are their minimum guaranteed width
1755        assert_eq!(c_type_size_align("uint_least8_t", &X86_64_SYSV), (1, 1));
1756        assert_eq!(c_type_size_align("uint_least32_t", &X86_64_SYSV), (4, 4));
1757        assert_eq!(c_type_size_align("uint_least64_t", &X86_64_SYSV), (8, 8));
1758        assert_eq!(c_type_size_align("intmax_t", &X86_64_SYSV), (8, 8));
1759        assert_eq!(c_type_size_align("uintmax_t", &X86_64_SYSV), (8, 8));
1760    }
1761
1762    #[test]
1763    fn gcc_int128_correct_size() {
1764        assert_eq!(c_type_size_align("__int128", &X86_64_SYSV), (16, 16));
1765        assert_eq!(c_type_size_align("__uint128", &X86_64_SYSV), (16, 16));
1766        assert_eq!(c_type_size_align("__int128_t", &X86_64_SYSV), (16, 16));
1767        // unsigned __int128 — "unsigned " prefix is stripped, then __int128 matched
1768        assert_eq!(
1769            c_type_size_align("unsigned __int128", &X86_64_SYSV),
1770            (16, 16)
1771        );
1772    }
1773
1774    #[test]
1775    fn windows_types_correct_size() {
1776        assert_eq!(c_type_size_align("BYTE", &X86_64_SYSV), (1, 1));
1777        assert_eq!(c_type_size_align("WORD", &X86_64_SYSV), (2, 2));
1778        assert_eq!(c_type_size_align("DWORD", &X86_64_SYSV), (4, 4));
1779        assert_eq!(c_type_size_align("QWORD", &X86_64_SYSV), (8, 8));
1780        assert_eq!(c_type_size_align("BOOL", &X86_64_SYSV), (4, 4));
1781        assert_eq!(c_type_size_align("UINT8", &X86_64_SYSV), (1, 1));
1782        assert_eq!(c_type_size_align("INT32", &X86_64_SYSV), (4, 4));
1783        assert_eq!(c_type_size_align("UINT64", &X86_64_SYSV), (8, 8));
1784        assert_eq!(c_type_size_align("HANDLE", &X86_64_SYSV), (8, 8));
1785        assert_eq!(c_type_size_align("LPVOID", &X86_64_SYSV), (8, 8));
1786    }
1787
1788    #[test]
1789    fn char_types_correct_size() {
1790        assert_eq!(c_type_size_align("wchar_t", &X86_64_SYSV), (4, 4));
1791        assert_eq!(c_type_size_align("char8_t", &X86_64_SYSV), (1, 1));
1792        assert_eq!(c_type_size_align("char16_t", &X86_64_SYSV), (2, 2));
1793        assert_eq!(c_type_size_align("char32_t", &X86_64_SYSV), (4, 4));
1794    }
1795
1796    #[test]
1797    fn half_precision_types_correct_size() {
1798        assert_eq!(c_type_size_align("_Float16", &X86_64_SYSV), (2, 2));
1799        assert_eq!(c_type_size_align("__fp16", &X86_64_SYSV), (2, 2));
1800        assert_eq!(c_type_size_align("__bf16", &X86_64_SYSV), (2, 2));
1801        assert_eq!(c_type_size_align("_Float128", &X86_64_SYSV), (16, 16));
1802    }
1803
1804    #[test]
1805    fn unsigned_prefix_stripped_correctly() {
1806        // "unsigned short" → "short" → (2, 2)
1807        assert_eq!(c_type_size_align("unsigned short", &X86_64_SYSV), (2, 2));
1808        assert_eq!(c_type_size_align("unsigned int", &X86_64_SYSV), (4, 4));
1809        assert_eq!(
1810            c_type_size_align("unsigned long long", &X86_64_SYSV),
1811            (8, 8)
1812        );
1813        assert_eq!(
1814            c_type_size_align("long int", &X86_64_SYSV),
1815            (X86_64_SYSV.pointer_size, X86_64_SYSV.pointer_size)
1816        );
1817    }
1818
1819    #[test]
1820    fn linux_kernel_struct_with_new_types() {
1821        // Representative kernel-style struct using __u32, __be16, u8
1822        let src = r#"
1823struct NetHeader {
1824    __be32 src_ip;
1825    __be32 dst_ip;
1826    __be16 src_port;
1827    __be16 dst_port;
1828    u8     protocol;
1829    u8     ttl;
1830};
1831"#;
1832        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1833        assert_eq!(layouts.len(), 1);
1834        let l = &layouts[0];
1835        // 4+4+2+2+1+1 = 14B; max align is 4 (__be32) → padded to 16B
1836        assert_eq!(l.total_size, 16);
1837        assert_eq!(l.fields[0].size, 4); // __be32 src_ip
1838        assert_eq!(l.fields[2].size, 2); // __be16 src_port
1839        assert_eq!(l.fields[4].size, 1); // u8 protocol
1840    }
1841
1842    // ── C++ stdlib type tests ─────────────────────────────────────────────────
1843
1844    #[test]
1845    fn cpp_string_is_32_bytes() {
1846        assert_eq!(c_type_size_align("std::string", &X86_64_SYSV), (32, 8));
1847        assert_eq!(c_type_size_align("std::wstring", &X86_64_SYSV), (32, 8));
1848    }
1849
1850    #[test]
1851    fn cpp_string_view_is_two_words() {
1852        assert_eq!(c_type_size_align("std::string_view", &X86_64_SYSV), (16, 8));
1853    }
1854
1855    #[test]
1856    fn cpp_vector_is_24_bytes() {
1857        assert_eq!(c_type_size_align("std::vector<int>", &X86_64_SYSV), (24, 8));
1858        assert_eq!(
1859            c_type_size_align("std::vector<uint64_t>", &X86_64_SYSV),
1860            (24, 8)
1861        );
1862        // Size is independent of T
1863        assert_eq!(
1864            c_type_size_align("std::vector<std::string>", &X86_64_SYSV),
1865            (24, 8)
1866        );
1867    }
1868
1869    #[test]
1870    fn cpp_smart_pointers_correct_size() {
1871        // unique_ptr: single pointer
1872        assert_eq!(
1873            c_type_size_align("std::unique_ptr<int>", &X86_64_SYSV),
1874            (8, 8)
1875        );
1876        // shared_ptr / weak_ptr: two pointers
1877        assert_eq!(
1878            c_type_size_align("std::shared_ptr<int>", &X86_64_SYSV),
1879            (16, 8)
1880        );
1881        assert_eq!(
1882            c_type_size_align("std::weak_ptr<int>", &X86_64_SYSV),
1883            (16, 8)
1884        );
1885    }
1886
1887    #[test]
1888    fn cpp_optional_recursive_size() {
1889        // std::optional<bool>: 1B (bool) + 1B (has_value flag) → 2B
1890        assert_eq!(
1891            c_type_size_align("std::optional<bool>", &X86_64_SYSV),
1892            (2, 1)
1893        );
1894        // std::optional<int>: 4B + 1B → padded to 4B → 8B total? Let's check:
1895        // t_size=4, t_align=4; (4+1).next_multiple_of(4) = 8
1896        assert_eq!(
1897            c_type_size_align("std::optional<int>", &X86_64_SYSV),
1898            (8, 4)
1899        );
1900        // std::optional<double>: 8B + 1B → padded to 8B → 16B
1901        assert_eq!(
1902            c_type_size_align("std::optional<double>", &X86_64_SYSV),
1903            (16, 8)
1904        );
1905    }
1906
1907    #[test]
1908    fn cpp_function_is_32_bytes() {
1909        assert_eq!(
1910            c_type_size_align("std::function<void()>", &X86_64_SYSV),
1911            (32, 8)
1912        );
1913        assert_eq!(
1914            c_type_size_align("std::function<int(int)>", &X86_64_SYSV),
1915            (32, 8)
1916        );
1917    }
1918
1919    #[test]
1920    fn cpp_stdlib_struct_with_string_field() {
1921        // A struct with std::string fields — used to get pointer-size (8B), now 32B
1922        let src = r#"
1923struct Config {
1924    std::string name;
1925    int         version;
1926    bool        enabled;
1927};
1928"#;
1929        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1930        let l = &layouts[0];
1931        assert_eq!(l.fields[0].size, 32); // std::string, not 8
1932        // int at offset 32, bool at 36; total padded to 8-byte align = 40
1933        assert_eq!(l.fields[1].offset, 32);
1934        assert_eq!(l.fields[1].size, 4);
1935    }
1936}