Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library types (Linux/glibc + libstdc++ defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // ── Synchronisation ───────────────────────────────────────────────────
41        // pthread_mutex_t on Linux/glibc is 40 bytes.
42        "std::mutex"
43        | "std::recursive_mutex"
44        | "std::timed_mutex"
45        | "std::recursive_timed_mutex"
46        | "pthread_mutex_t" => return (40, 8),
47        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49
50        // ── String / view ─────────────────────────────────────────────────────
51        // libstdc++ std::string: 32B (ptr + length + SSO buffer / capacity).
52        // libc++ (Clang): 24B. We use 32B (libstdc++ / GCC, dominant on Linux).
53        "std::string" | "std::wstring" | "std::u8string" | "std::u16string" | "std::u32string"
54        | "std::pmr::string" => return (32, 8),
55        // std::string_view / std::span<T>: pointer + length (2 words).
56        "std::string_view"
57        | "std::wstring_view"
58        | "std::u8string_view"
59        | "std::u16string_view"
60        | "std::u32string_view" => return (arch.pointer_size * 2, arch.pointer_size),
61
62        // ── Sequence containers ───────────────────────────────────────────────
63        // std::vector<T>: pointer + size + capacity = 3 words (24B on 64-bit).
64        // Size is independent of T.
65        ty if ty.starts_with("std::vector<") || ty == "std::vector" => {
66            return (arch.pointer_size * 3, arch.pointer_size);
67        }
68        // std::deque<T>: 80B on both libstdc++ and libc++ (64-bit Linux).
69        ty if ty.starts_with("std::deque<") || ty == "std::deque" => return (80, 8),
70        // std::list<T>: sentinel node pointer + size = 2 words + node pointers.
71        // libstdc++: 24B (size_t + two pointers). libc++: 24B.
72        ty if ty.starts_with("std::list<") || ty == "std::list" => {
73            return (arch.pointer_size * 3, arch.pointer_size);
74        }
75        // std::forward_list<T>: single pointer (head node).
76        ty if ty.starts_with("std::forward_list<") || ty == "std::forward_list" => {
77            return (arch.pointer_size, arch.pointer_size);
78        }
79        // std::array<T, N>: inline storage; size = N * sizeof(T).
80        // We cannot compute this without resolving T and N, so fall through.
81
82        // ── Associative / unordered containers ────────────────────────────────
83        // All map/set types: header node + size = ~48B (libstdc++) / ~40B (libc++).
84        // Use 48B as conservative approximation.
85        ty if ty.starts_with("std::map<")
86            || ty.starts_with("std::multimap<")
87            || ty.starts_with("std::set<")
88            || ty.starts_with("std::multiset<") =>
89        {
90            return (48, 8);
91        }
92        // std::unordered_map / unordered_set: bucket array pointer + size + load factor + etc.
93        // libstdc++: ~56B. libc++: ~72B. Use 56B.
94        ty if ty.starts_with("std::unordered_map<")
95            || ty.starts_with("std::unordered_multimap<")
96            || ty.starts_with("std::unordered_set<")
97            || ty.starts_with("std::unordered_multiset<") =>
98        {
99            return (56, 8);
100        }
101
102        // ── Smart pointers ────────────────────────────────────────────────────
103        // std::unique_ptr<T>: single pointer (deleter may be zero-sized via EBO).
104        ty if ty.starts_with("std::unique_ptr<") || ty == "std::unique_ptr" => {
105            return (arch.pointer_size, arch.pointer_size);
106        }
107        // std::shared_ptr<T> / std::weak_ptr<T>: object pointer + control block pointer.
108        ty if ty.starts_with("std::shared_ptr<")
109            || ty == "std::shared_ptr"
110            || ty.starts_with("std::weak_ptr<")
111            || ty == "std::weak_ptr" =>
112        {
113            return (arch.pointer_size * 2, arch.pointer_size);
114        }
115
116        // ── Type-erasure / utilities ──────────────────────────────────────────
117        // std::function<Sig>: 32B on libstdc++ and libc++ (64-bit Linux).
118        // Holds a functor pointer, a vtable pointer, and a small-functor buffer.
119        ty if ty.starts_with("std::function<") || ty == "std::function" => return (32, 8),
120        // std::any: 32B on libstdc++ (small-object buffer + vtable pointer).
121        "std::any" => return (32, 8),
122        // std::error_code / std::error_condition: pointer + int = 16B.
123        "std::error_code" | "std::error_condition" => return (16, 8),
124        // std::exception_ptr: single pointer.
125        "std::exception_ptr" => return (arch.pointer_size, arch.pointer_size),
126        // std::type_index: single pointer (wraps std::type_info*).
127        "std::type_index" => return (arch.pointer_size, arch.pointer_size),
128        // std::span<T>: pointer + length (2 words). Template arg irrelevant.
129        ty if ty.starts_with("std::span<") || ty == "std::span" => {
130            return (arch.pointer_size * 2, arch.pointer_size);
131        }
132        // std::optional<T>: sizeof(T) + 1B bool, padded to align(T).
133        // Recurse to resolve T then apply the formula.
134        ty if ty.starts_with("std::optional<") && ty.ends_with('>') => {
135            let inner = &ty["std::optional<".len()..ty.len() - 1];
136            let (t_size, t_align) = c_type_size_align(inner.trim(), arch);
137            let total = (t_size + 1).next_multiple_of(t_align.max(1));
138            return (total, t_align.max(1));
139        }
140
141        // ── Atomic ────────────────────────────────────────────────────────────
142        // std::atomic<T>: same size and alignment as T.
143        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
144            let inner = &ty[12..ty.len() - 1];
145            return c_type_size_align(inner.trim(), arch);
146        }
147        // std::atomic_flag: guaranteed 1B minimum, but often 4B in practice.
148        "std::atomic_flag" => return (4, 4),
149
150        _ => {} // fall through to primitive types below
151    }
152    // Primitive / stdint / pointer types
153    match ty {
154        "char" | "_Bool" | "bool" => (1, 1),
155        "short" | "short int" => (2, 2),
156        "int" => (4, 4),
157        "long" | "long int" => (arch.pointer_size, arch.pointer_size),
158        "long long" | "long long int" => (8, 8),
159        "float" => (4, 4),
160        "double" => (8, 8),
161        "long double" => (16, 16),
162
163        // C99 stdint exact-width types
164        "int8_t" | "uint8_t" => (1, 1),
165        "int16_t" | "uint16_t" => (2, 2),
166        "int32_t" | "uint32_t" => (4, 4),
167        "int64_t" | "uint64_t" => (8, 8),
168        "intmax_t" | "uintmax_t" => (8, 8),
169        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
170            (arch.pointer_size, arch.pointer_size)
171        }
172
173        // C99 fast types — uint_fast{8,16}_t are always 1/2B;
174        // uint_fast{32,64}_t are pointer-sized on 64-bit (8B), 4B on 32-bit.
175        "int_fast8_t" | "uint_fast8_t" => (1, 1),
176        "int_fast16_t" | "uint_fast16_t" => (2, 2),
177        "int_fast32_t" | "uint_fast32_t" | "int_fast64_t" | "uint_fast64_t" => {
178            (arch.pointer_size, arch.pointer_size)
179        }
180
181        // C99 least types — minimum guaranteed widths
182        "int_least8_t" | "uint_least8_t" => (1, 1),
183        "int_least16_t" | "uint_least16_t" => (2, 2),
184        "int_least32_t" | "uint_least32_t" => (4, 4),
185        "int_least64_t" | "uint_least64_t" => (8, 8),
186
187        // GCC/Clang 128-bit integer extension
188        "__int128" | "__uint128" | "__int128_t" | "__uint128_t" => (16, 16),
189
190        // Linux kernel short-form integer types (linux/types.h)
191        "u8" | "s8" => (1, 1),
192        "u16" | "s16" => (2, 2),
193        "u32" | "s32" => (4, 4),
194        "u64" | "s64" => (8, 8),
195
196        // Linux kernel double-underscore types (__u8, __s8, __be16, __le32, …)
197        "__u8" | "__s8" | "__u8__" | "__s8__" => (1, 1),
198        "__u16" | "__s16" | "__be16" | "__le16" => (2, 2),
199        "__u32" | "__s32" | "__be32" | "__le32" => (4, 4),
200        "__u64" | "__s64" | "__be64" | "__le64" => (8, 8),
201
202        // MSVC fixed-width intrinsics
203        "__int8" => (1, 1),
204        "__int16" => (2, 2),
205        "__int32" => (4, 4),
206        "__int64" => (8, 8),
207
208        // Windows SDK / WinAPI types
209        "BYTE" | "BOOLEAN" | "CHAR" | "INT8" | "UINT8" => (1, 1),
210        "WORD" | "WCHAR" | "SHORT" | "USHORT" | "INT16" | "UINT16" => (2, 2),
211        "DWORD" | "LONG" | "ULONG" | "INT" | "UINT" | "BOOL" | "FLOAT" | "INT32" | "UINT32" => {
212            (4, 4)
213        }
214        "QWORD" | "LONGLONG" | "ULONGLONG" | "INT64" | "UINT64" | "LARGE_INTEGER" => (8, 8),
215        "DWORD64" | "ULONG64" | "LONG64" => (8, 8),
216        "HANDLE" | "LPVOID" | "PVOID" | "LPCVOID" | "LPSTR" | "LPCSTR" | "LPWSTR" | "LPCWSTR"
217        | "SIZE_T" | "SSIZE_T" | "ULONG_PTR" | "LONG_PTR" | "DWORD_PTR" | "INT_PTR"
218        | "UINT_PTR" => (arch.pointer_size, arch.pointer_size),
219
220        // C/C++ character types
221        // wchar_t: 4B on Linux/macOS (GCC/Clang POSIX), 2B on Windows/MSVC.
222        // All current padlock arch configs are POSIX, so 4B is correct here.
223        "wchar_t" => (4, 4),
224        "char8_t" => (1, 1),
225        "char16_t" => (2, 2),
226        "char32_t" => (4, 4),
227
228        // Half-precision and bfloat16 (ARM, GCC, Clang, ML workloads)
229        "_Float16" | "__fp16" | "__bf16" => (2, 2),
230        // 128-bit float (GCC/Clang extension)
231        "_Float128" | "__float128" => (16, 16),
232
233        // Pointer types
234        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
235        // Unknown — use pointer size as a reasonable default
236        _ => (arch.pointer_size, arch.pointer_size),
237    }
238}
239
240// ── struct / union simulation ─────────────────────────────────────────────────
241
242/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
243/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
244fn strip_bitfield_suffix(ty: &str) -> &str {
245    if let Some(pos) = ty.rfind(':') {
246        let suffix = ty[pos + 1..].trim();
247        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
248            return ty[..pos].trim_end();
249        }
250    }
251    ty
252}
253
254/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
255/// Bit-field packing is compiler-controlled and cannot be accurately modelled
256/// without a compiler, so structs containing bit-field members are skipped.
257fn is_bitfield_type(ty: &str) -> bool {
258    strip_bitfield_suffix(ty) != ty
259}
260
261/// Simulate C/C++ struct layout given ordered fields.
262///
263/// When `packed` is `true` the layout mirrors `__attribute__((packed))`:
264/// no inter-field alignment padding is inserted and the struct alignment
265/// is forced to 1. This matches GCC/Clang behaviour for packed structs.
266fn simulate_layout(
267    fields: &mut Vec<Field>,
268    struct_name: String,
269    arch: &'static ArchConfig,
270    source_line: Option<u32>,
271    packed: bool,
272) -> StructLayout {
273    let mut offset = 0usize;
274    let mut struct_align = 1usize;
275
276    for f in fields.iter_mut() {
277        if !packed && f.align > 0 {
278            offset = offset.next_multiple_of(f.align);
279        }
280        f.offset = offset;
281        offset += f.size;
282        if !packed {
283            struct_align = struct_align.max(f.align);
284        }
285    }
286    // Trailing padding (not present in packed structs)
287    if !packed && struct_align > 0 {
288        offset = offset.next_multiple_of(struct_align);
289    }
290
291    StructLayout {
292        name: struct_name,
293        total_size: offset,
294        align: struct_align,
295        fields: std::mem::take(fields),
296        source_file: None,
297        source_line,
298        arch,
299        is_packed: packed,
300        is_union: false,
301        is_repr_rust: false,
302    }
303}
304
305/// Simulate a C/C++ union layout: all fields start at offset 0;
306/// total size is the largest field, rounded to max alignment.
307fn simulate_union_layout(
308    fields: &mut Vec<Field>,
309    name: String,
310    arch: &'static ArchConfig,
311    source_line: Option<u32>,
312) -> StructLayout {
313    for f in fields.iter_mut() {
314        f.offset = 0;
315    }
316    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
317    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
318    let total_size = if max_align > 0 {
319        max_size.next_multiple_of(max_align)
320    } else {
321        max_size
322    };
323
324    StructLayout {
325        name,
326        total_size,
327        align: max_align,
328        fields: std::mem::take(fields),
329        source_file: None,
330        source_line,
331        arch,
332        is_packed: false,
333        is_union: true,
334        is_repr_rust: false,
335    }
336}
337
338// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
339
340/// Parse a `class_specifier` node, modelling:
341/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
342/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
343///   later by the nested-struct resolution pass in `lib.rs`).
344fn parse_class_specifier(
345    source: &str,
346    node: Node<'_>,
347    arch: &'static ArchConfig,
348) -> Option<StructLayout> {
349    let mut class_name = "<anonymous>".to_string();
350    let mut base_names: Vec<String> = Vec::new();
351    let mut body_node: Option<Node> = None;
352    let mut is_packed = false;
353    let mut struct_alignas: Option<usize> = None;
354
355    for i in 0..node.child_count() {
356        let child = node.child(i)?;
357        match child.kind() {
358            "type_identifier" => class_name = source[child.byte_range()].to_string(),
359            "base_class_clause" => {
360                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
361                // type_identifier nodes are direct children of base_class_clause.
362                for j in 0..child.child_count() {
363                    if let Some(base) = child.child(j)
364                        && base.kind() == "type_identifier"
365                    {
366                        base_names.push(source[base.byte_range()].to_string());
367                    }
368                }
369            }
370            "field_declaration_list" => body_node = Some(child),
371            "attribute_specifier" => {
372                if source[child.byte_range()].contains("packed") {
373                    is_packed = true;
374                }
375            }
376            // C++11 class-level alignas: `class alignas(64) Name { ... };`
377            "alignas_qualifier" | "alignas_specifier" => {
378                if struct_alignas.is_none() {
379                    struct_alignas = parse_alignas_value(source, child);
380                }
381            }
382            _ => {}
383        }
384    }
385
386    let body = body_node?;
387
388    // Detect virtual methods: look for `virtual` keyword anywhere in body
389    let has_virtual = contains_virtual_keyword(source, body);
390
391    // Collect declared fields: (field_name, type_text, guard, alignas_override)
392    let mut raw_fields: Vec<(String, String, Option<String>, Option<usize>)> = Vec::new();
393    for i in 0..body.child_count() {
394        let Some(child) = body.child(i) else {
395            continue;
396        };
397        if child.kind() == "field_declaration" {
398            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, false) {
399                raw_fields.extend(anon_fields);
400            } else if let Some((ty, fname, guard, al)) = parse_field_declaration(source, child) {
401                raw_fields.push((fname, ty, guard, al));
402            }
403        }
404    }
405
406    // Build fields: vtable pointer, then base-class slots, then declared fields
407    let mut fields: Vec<Field> = Vec::new();
408
409    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
410    if has_virtual {
411        let ps = arch.pointer_size;
412        fields.push(Field {
413            name: "__vptr".to_string(),
414            ty: TypeInfo::Pointer {
415                size: ps,
416                align: ps,
417            },
418            offset: 0,
419            size: ps,
420            align: ps,
421            source_file: None,
422            source_line: None,
423            access: AccessPattern::Unknown,
424        });
425    }
426
427    // Base class storage (opaque until nested-struct resolver fills in sizes)
428    for base in &base_names {
429        let ps = arch.pointer_size;
430        fields.push(Field {
431            name: format!("__base_{base}"),
432            ty: TypeInfo::Opaque {
433                name: base.clone(),
434                size: ps,
435                align: ps,
436            },
437            offset: 0,
438            size: ps,
439            align: ps,
440            source_file: None,
441            source_line: None,
442            access: AccessPattern::Unknown,
443        });
444    }
445
446    // Skip classes with bit-field members (same reason as structs).
447    if raw_fields.iter().any(|(_, ty, _, _)| is_bitfield_type(ty)) {
448        return None;
449    }
450
451    // Declared member fields
452    for (fname, ty_name, guard, alignas) in raw_fields {
453        let (size, natural_align) = c_type_size_align(&ty_name, arch);
454        let align = alignas.unwrap_or(natural_align);
455        let access = if let Some(g) = guard {
456            AccessPattern::Concurrent {
457                guard: Some(g),
458                is_atomic: false,
459            }
460        } else {
461            AccessPattern::Unknown
462        };
463        fields.push(Field {
464            name: fname,
465            ty: TypeInfo::Primitive {
466                name: ty_name,
467                size,
468                align,
469            },
470            offset: 0,
471            size,
472            align,
473            source_file: None,
474            source_line: None,
475            access,
476        });
477    }
478
479    if fields.is_empty() {
480        return None;
481    }
482
483    let line = node.start_position().row as u32 + 1;
484    let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), is_packed);
485
486    if let Some(al) = struct_alignas
487        && al > layout.align
488    {
489        layout.align = al;
490        if !is_packed {
491            layout.total_size = layout.total_size.next_multiple_of(al);
492        }
493    }
494
495    Some(layout)
496}
497
498/// Return true if a `field_declaration_list` node contains any `virtual` keyword
499/// (indicating that the class needs a vtable pointer).
500fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
501    let mut stack = vec![node];
502    while let Some(n) = stack.pop() {
503        if n.kind() == "virtual" {
504            return true;
505        }
506        // Also check raw text for cases where tree-sitter may not produce a
507        // dedicated `virtual` node (e.g. inside complex declarations).
508        if n.child_count() == 0 {
509            let text = &source[n.byte_range()];
510            if text == "virtual" {
511                return true;
512            }
513        }
514        for i in (0..n.child_count()).rev() {
515            if let Some(child) = n.child(i) {
516                stack.push(child);
517            }
518        }
519    }
520    false
521}
522
523// ── tree-sitter walker ────────────────────────────────────────────────────────
524
525fn extract_structs_from_tree(
526    source: &str,
527    root: Node<'_>,
528    arch: &'static ArchConfig,
529    layouts: &mut Vec<StructLayout>,
530) {
531    let cursor = root.walk();
532    let mut stack = vec![root];
533
534    while let Some(node) = stack.pop() {
535        // Push children in reverse so we process left-to-right
536        for i in (0..node.child_count()).rev() {
537            if let Some(child) = node.child(i) {
538                stack.push(child);
539            }
540        }
541
542        match node.kind() {
543            "struct_specifier" => {
544                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
545                    layouts.push(layout);
546                }
547            }
548            "union_specifier" => {
549                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
550                    layouts.push(layout);
551                }
552            }
553            "class_specifier" => {
554                if let Some(layout) = parse_class_specifier(source, node, arch) {
555                    layouts.push(layout);
556                }
557            }
558            _ => {}
559        }
560    }
561
562    // Also handle `typedef struct/union { ... } Name;`
563    let cursor2 = root.walk();
564    let mut stack2 = vec![root];
565    while let Some(node) = stack2.pop() {
566        for i in (0..node.child_count()).rev() {
567            if let Some(child) = node.child(i) {
568                stack2.push(child);
569            }
570        }
571        if node.kind() == "type_definition"
572            && let Some(layout) = parse_typedef_struct_or_union(source, node, arch)
573        {
574            let existing = layouts
575                .iter()
576                .position(|l| l.name == layout.name || l.name == "<anonymous>");
577            match existing {
578                Some(i) if layouts[i].name == "<anonymous>" => {
579                    layouts[i] = layout;
580                }
581                None => layouts.push(layout),
582                _ => {}
583            }
584        }
585    }
586    let _ = cursor;
587    let _ = cursor2; // silence unused warnings
588}
589
590/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
591fn parse_struct_or_union_specifier(
592    source: &str,
593    node: Node<'_>,
594    arch: &'static ArchConfig,
595    is_union: bool,
596) -> Option<StructLayout> {
597    let mut name = "<anonymous>".to_string();
598    let mut body_node: Option<Node> = None;
599    let mut is_packed = false;
600    // Struct-level alignas: `struct alignas(64) CacheAligned { ... };`
601    let mut struct_alignas: Option<usize> = None;
602
603    for i in 0..node.child_count() {
604        let child = node.child(i)?;
605        match child.kind() {
606            "type_identifier" => name = source[child.byte_range()].to_string(),
607            "field_declaration_list" => body_node = Some(child),
608            "attribute_specifier" => {
609                let text = &source[child.byte_range()];
610                if text.contains("packed") {
611                    is_packed = true;
612                }
613            }
614            // C++11 struct-level alignas: `struct alignas(64) Name { ... };`
615            // tree-sitter-cpp: `alignas_qualifier` as direct child of struct_specifier
616            "alignas_qualifier" | "alignas_specifier" => {
617                if struct_alignas.is_none() {
618                    struct_alignas = parse_alignas_value(source, child);
619                }
620            }
621            _ => {}
622        }
623    }
624
625    let body = body_node?;
626    // (field_name, type_text, guard, alignas_override)
627    let mut raw_fields: Vec<(String, String, Option<String>, Option<usize>)> = Vec::new();
628
629    for i in 0..body.child_count() {
630        let child = body.child(i)?;
631        if child.kind() == "field_declaration" {
632            // Check for anonymous nested struct/union: a field_declaration whose
633            // only non-field-identifier child is a struct_specifier/union_specifier
634            // with no type_identifier (i.e. `struct { int x; int y; };`).
635            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, is_union) {
636                raw_fields.extend(anon_fields);
637            } else if let Some((ty, fname, guard, al)) = parse_field_declaration(source, child) {
638                raw_fields.push((fname, ty, guard, al));
639            }
640        }
641    }
642
643    if raw_fields.is_empty() {
644        return None;
645    }
646
647    // Bit-field packing is compiler-controlled and cannot be accurately modelled
648    // without a compiler. Skip the entire struct to avoid producing wrong layout
649    // data. Use `padlock analyze` on the compiled binary for accurate results.
650    if raw_fields.iter().any(|(_, ty, _, _)| is_bitfield_type(ty)) {
651        return None;
652    }
653
654    let mut fields: Vec<Field> = raw_fields
655        .into_iter()
656        .map(|(fname, ty_name, guard, alignas)| {
657            let (size, natural_align) = c_type_size_align(&ty_name, arch);
658            // alignas(N) on a field overrides its alignment requirement.
659            let align = alignas.unwrap_or(natural_align);
660            let access = if let Some(g) = guard {
661                AccessPattern::Concurrent {
662                    guard: Some(g),
663                    is_atomic: false,
664                }
665            } else {
666                AccessPattern::Unknown
667            };
668            Field {
669                name: fname,
670                ty: TypeInfo::Primitive {
671                    name: ty_name,
672                    size,
673                    align,
674                },
675                offset: 0,
676                size,
677                align,
678                source_file: None,
679                source_line: None,
680                access,
681            }
682        })
683        .collect();
684
685    let line = node.start_position().row as u32 + 1;
686    let mut layout = if is_union {
687        simulate_union_layout(&mut fields, name, arch, Some(line))
688    } else {
689        simulate_layout(&mut fields, name, arch, Some(line), is_packed)
690    };
691
692    // Apply struct-level alignas: the struct's alignment requirement is at
693    // least N; trailing padding may grow to satisfy the new alignment.
694    if let Some(al) = struct_alignas
695        && al > layout.align
696    {
697        layout.align = al;
698        if !is_packed {
699            layout.total_size = layout.total_size.next_multiple_of(al);
700        }
701    }
702
703    Some(layout)
704}
705
706/// Parse a `typedef struct/union { ... } Name;` type_definition node.
707fn parse_typedef_struct_or_union(
708    source: &str,
709    node: Node<'_>,
710    arch: &'static ArchConfig,
711) -> Option<StructLayout> {
712    let mut specifier_node: Option<Node> = None;
713    let mut is_union = false;
714    let mut typedef_name: Option<String> = None;
715
716    for i in 0..node.child_count() {
717        let child = node.child(i)?;
718        match child.kind() {
719            "struct_specifier" => {
720                specifier_node = Some(child);
721                is_union = false;
722            }
723            "union_specifier" => {
724                specifier_node = Some(child);
725                is_union = true;
726            }
727            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
728            _ => {}
729        }
730    }
731
732    let spec = specifier_node?;
733    let typedef_name = typedef_name?;
734
735    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
736    if layout.name == "<anonymous>" {
737        layout.name = typedef_name;
738    }
739    Some(layout)
740}
741
742// Alias kept for the typedef pass in extract_structs_from_tree.
743#[allow(dead_code)]
744fn parse_typedef_struct(
745    source: &str,
746    node: Node<'_>,
747    arch: &'static ArchConfig,
748) -> Option<StructLayout> {
749    parse_typedef_struct_or_union(source, node, arch)
750}
751
752/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
753/// `__attribute__((pt_guarded_by(X)))` specifier node.
754///
755/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
756/// which expand to the same attribute (Clang thread-safety analysis).
757/// The match is done on the raw source text of any `attribute_specifier` child,
758/// so it works regardless of how tree-sitter structures the inner tokens.
759fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
760    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
761    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
762        if let Some(pos) = field_source.find(kw) {
763            let after = &field_source[pos + kw.len()..];
764            // Expect `(` optionally preceded by whitespace
765            let trimmed = after.trim_start();
766            if let Some(inner) = trimmed.strip_prefix('(') {
767                // Read until the matching ')'
768                if let Some(end) = inner.find(')') {
769                    let guard = inner[..end].trim().trim_matches('"');
770                    if !guard.is_empty() {
771                        return Some(guard.to_string());
772                    }
773                }
774            }
775        }
776    }
777    None
778}
779
780/// Parse a numeric value from an `alignas_qualifier` node: `alignas(N)`.
781/// tree-sitter-cpp uses the node kind `alignas_qualifier` for C++11 `alignas`.
782/// Returns `None` when the specifier contains a type expression rather than
783/// an integer literal (e.g. `alignas(double)` — handled elsewhere by the
784/// compiler; we skip those conservatively).
785fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
786    for i in 0..node.child_count() {
787        if let Some(child) = node.child(i) {
788            match child.kind() {
789                "number_literal" | "integer_literal" | "integer" => {
790                    let text = source[child.byte_range()].trim();
791                    if let Ok(n) = text.parse::<usize>() {
792                        return Some(n);
793                    }
794                    // Hex literal: 0x40
795                    if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
796                        return usize::from_str_radix(hex, 16).ok();
797                    }
798                }
799                // Recurse for nested nodes (parenthesised expression, etc.)
800                "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
801                    if let r @ Some(_) = parse_alignas_value(source, child) {
802                        return r;
803                    }
804                }
805                _ => {}
806            }
807        }
808    }
809    None
810}
811
812/// Returns `(ty, field_name, guard, alignas_override)`.
813/// `alignas_override` is `Some(N)` when the field carries `alignas(N)`.
814/// Detect and parse an anonymous nested struct/union field declaration, e.g.:
815///
816/// ```c
817/// struct Packet {
818///     union {                    // ← anonymous nested union
819///         uint32_t raw;
820///         struct { uint8_t a; uint8_t b; uint8_t c; uint8_t d; };
821///     };
822///     uint64_t timestamp;
823/// };
824/// ```
825///
826/// A `field_declaration` is anonymous if it contains a `struct_specifier` or
827/// `union_specifier` child that has a `field_declaration_list` (i.e. a body)
828/// but no `type_identifier` (i.e. no name). The fields of the nested
829/// struct/union are flattened into the parent.
830///
831/// Returns `None` if the declaration is not an anonymous nested struct/union
832/// (the caller should fall through to `parse_field_declaration`).
833type RawField = (String, String, Option<String>, Option<usize>);
834
835#[allow(clippy::only_used_in_recursion)]
836fn parse_anonymous_nested(
837    source: &str,
838    node: Node<'_>,
839    arch: &'static ArchConfig,
840    parent_is_union: bool,
841) -> Option<Vec<RawField>> {
842    // Find a struct_specifier or union_specifier child.
843    for i in 0..node.child_count() {
844        let child = node.child(i)?;
845        if child.kind() != "struct_specifier" && child.kind() != "union_specifier" {
846            continue;
847        }
848        let nested_is_union = child.kind() == "union_specifier";
849
850        // Must have a body (field_declaration_list) but no type_identifier.
851        let mut has_name = false;
852        let mut body_node: Option<Node> = None;
853        for j in 0..child.child_count() {
854            let sub = child.child(j)?;
855            match sub.kind() {
856                "type_identifier" => has_name = true,
857                "field_declaration_list" => body_node = Some(sub),
858                _ => {}
859            }
860        }
861
862        if has_name || body_node.is_none() {
863            // Named struct/union used as a field type — handled by parse_field_declaration.
864            continue;
865        }
866
867        let body = body_node?;
868        let mut nested_raw: Vec<RawField> = Vec::new();
869
870        for j in 0..body.child_count() {
871            let inner = body.child(j)?;
872            if inner.kind() == "field_declaration" {
873                // Recurse to handle doubly-nested anonymous structs.
874                if let Some(deeper) = parse_anonymous_nested(source, inner, arch, nested_is_union) {
875                    nested_raw.extend(deeper);
876                } else if let Some((ty, fname, guard, al)) = parse_field_declaration(source, inner)
877                {
878                    nested_raw.push((fname, ty, guard, al));
879                }
880            }
881        }
882
883        // If nested is a union, the fields all share offset 0 (relative to the
884        // union's placement in the parent). We can't easily track this through
885        // raw field lists, so we emit them as a synthetic __anon_union_N field
886        // when the parent cares about offsets, or just flatten for unions.
887        //
888        // For simplicity: flatten all fields — the layout simulator will compute
889        // correct offsets if the parent is a struct, and union semantics are
890        // preserved when the parent is a union.
891        let _ = (nested_is_union, parent_is_union);
892
893        if !nested_raw.is_empty() {
894            return Some(nested_raw);
895        }
896    }
897    None
898}
899
900fn parse_field_declaration(
901    source: &str,
902    node: Node<'_>,
903) -> Option<(String, String, Option<String>, Option<usize>)> {
904    let mut ty_parts: Vec<String> = Vec::new();
905    let mut field_name: Option<String> = None;
906    // Bit-field width, e.g. `int flags : 3;` → Some("3")
907    let mut bit_width: Option<String> = None;
908    // Collect attribute text for guard extraction
909    let mut attr_text = String::new();
910    // Field-level alignas override
911    let mut alignas_override: Option<usize> = None;
912
913    for i in 0..node.child_count() {
914        let child = node.child(i)?;
915        match child.kind() {
916            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
917                ty_parts.push(source[child.byte_range()].trim().to_string());
918            }
919            // C++ qualified types: std::mutex, ns::Type, etc.
920            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
921            "qualified_identifier" | "template_type" => {
922                ty_parts.push(source[child.byte_range()].trim().to_string());
923            }
924            // Nested struct/union used as a field type: `struct Vec2 tl;`
925            // Extract just the type_identifier name (e.g. "Vec2") so the
926            // nested-struct resolution pass can match it by name.
927            "struct_specifier" | "union_specifier" => {
928                for j in 0..child.child_count() {
929                    if let Some(sub) = child.child(j)
930                        && sub.kind() == "type_identifier"
931                    {
932                        ty_parts.push(source[sub.byte_range()].trim().to_string());
933                        break;
934                    }
935                }
936            }
937            "field_identifier" => {
938                field_name = Some(source[child.byte_range()].trim().to_string());
939            }
940            "pointer_declarator" => {
941                field_name = extract_identifier(source, child);
942                ty_parts.push("*".to_string());
943            }
944            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
945            "bitfield_clause" => {
946                let text = source[child.byte_range()].trim();
947                // Strip leading ':' and whitespace to get just the width digits
948                bit_width = Some(text.trim_start_matches(':').trim().to_string());
949            }
950            // GNU attribute specifier: __attribute__((...))
951            "attribute_specifier" | "attribute" => {
952                attr_text.push_str(source[child.byte_range()].trim());
953                attr_text.push(' ');
954            }
955            // C++11 alignas: tree-sitter-cpp wraps it as type_qualifier → alignas_qualifier
956            // Also handle the direct form in case grammar versions differ.
957            "alignas_qualifier" | "alignas_specifier" => {
958                if alignas_override.is_none() {
959                    alignas_override = parse_alignas_value(source, child);
960                }
961            }
962            // type_qualifier wraps alignas_qualifier for field declarations:
963            // `alignas(8) char c;` → type_qualifier { alignas_qualifier { ... } }
964            "type_qualifier" => {
965                if alignas_override.is_none() {
966                    for j in 0..child.child_count() {
967                        if let Some(sub) = child.child(j)
968                            && (sub.kind() == "alignas_qualifier"
969                                || sub.kind() == "alignas_specifier")
970                        {
971                            alignas_override = parse_alignas_value(source, sub);
972                            break;
973                        }
974                    }
975                }
976            }
977            _ => {}
978        }
979    }
980
981    let base_ty = ty_parts.join(" ");
982    let fname = field_name?;
983    if base_ty.is_empty() {
984        return None;
985    }
986    // Annotate bit-field types as "type:N" so callers can detect and report them;
987    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
988    let ty = if let Some(w) = bit_width {
989        format!("{base_ty}:{w}")
990    } else {
991        base_ty
992    };
993
994    // Also check the full field source text (attribute_specifier may not always
995    // be a direct child depending on tree-sitter grammar version).
996    let field_src = source[node.byte_range()].to_string();
997    let guard = extract_guard_from_c_field_text(&attr_text)
998        .or_else(|| extract_guard_from_c_field_text(&field_src));
999
1000    Some((ty, fname, guard, alignas_override))
1001}
1002
1003fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
1004    if node.kind() == "field_identifier" || node.kind() == "identifier" {
1005        return Some(source[node.byte_range()].to_string());
1006    }
1007    for i in 0..node.child_count() {
1008        if let Some(child) = node.child(i)
1009            && let Some(name) = extract_identifier(source, child)
1010        {
1011            return Some(name);
1012        }
1013    }
1014    None
1015}
1016
1017// ── public API ────────────────────────────────────────────────────────────────
1018
1019pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1020    let mut parser = Parser::new();
1021    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
1022    let tree = parser
1023        .parse(source, None)
1024        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1025    let mut layouts = Vec::new();
1026    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1027    Ok(layouts)
1028}
1029
1030pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1031    let mut parser = Parser::new();
1032    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
1033    let tree = parser
1034        .parse(source, None)
1035        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1036    let mut layouts = Vec::new();
1037    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1038    Ok(layouts)
1039}
1040
1041// ── tests ─────────────────────────────────────────────────────────────────────
1042
1043#[cfg(test)]
1044mod tests {
1045    use super::*;
1046    use padlock_core::arch::X86_64_SYSV;
1047
1048    #[test]
1049    fn parse_simple_c_struct() {
1050        let src = r#"
1051struct Point {
1052    int x;
1053    int y;
1054};
1055"#;
1056        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1057        assert_eq!(layouts.len(), 1);
1058        assert_eq!(layouts[0].name, "Point");
1059        assert_eq!(layouts[0].fields.len(), 2);
1060        assert_eq!(layouts[0].fields[0].name, "x");
1061        assert_eq!(layouts[0].fields[1].name, "y");
1062    }
1063
1064    #[test]
1065    fn parse_typedef_struct() {
1066        let src = r#"
1067typedef struct {
1068    char  is_active;
1069    double timeout;
1070    int   port;
1071} Connection;
1072"#;
1073        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1074        assert_eq!(layouts.len(), 1);
1075        assert_eq!(layouts[0].name, "Connection");
1076        assert_eq!(layouts[0].fields.len(), 3);
1077    }
1078
1079    #[test]
1080    fn c_layout_computes_offsets() {
1081        let src = "struct T { char a; double b; };";
1082        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1083        assert_eq!(layouts.len(), 1);
1084        let layout = &layouts[0];
1085        // char at offset 0, double at offset 8 (7 bytes padding)
1086        assert_eq!(layout.fields[0].offset, 0);
1087        assert_eq!(layout.fields[1].offset, 8);
1088        assert_eq!(layout.total_size, 16);
1089    }
1090
1091    #[test]
1092    fn c_layout_detects_padding() {
1093        let src = "struct T { char a; int b; };";
1094        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1095        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1096        assert!(!gaps.is_empty());
1097        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
1098    }
1099
1100    #[test]
1101    fn parse_cpp_struct() {
1102        let src = "struct Vec3 { float x; float y; float z; };";
1103        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1104        assert_eq!(layouts.len(), 1);
1105        assert_eq!(layouts[0].fields.len(), 3);
1106    }
1107
1108    // ── SIMD types ────────────────────────────────────────────────────────────
1109
1110    #[test]
1111    fn simd_sse_field_size_and_align() {
1112        let src = "struct Vecs { __m128 a; __m256 b; };";
1113        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1114        assert_eq!(layouts.len(), 1);
1115        let f = &layouts[0].fields;
1116        assert_eq!(f[0].size, 16); // __m128
1117        assert_eq!(f[0].align, 16);
1118        assert_eq!(f[1].size, 32); // __m256
1119        assert_eq!(f[1].align, 32);
1120    }
1121
1122    #[test]
1123    fn simd_avx512_size() {
1124        let src = "struct Wide { __m512 v; };";
1125        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1126        assert_eq!(layouts[0].fields[0].size, 64);
1127        assert_eq!(layouts[0].fields[0].align, 64);
1128    }
1129
1130    #[test]
1131    fn simd_padding_detected_when_small_field_before_avx() {
1132        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
1133        let src = "struct Mixed { char flag; __m256 data; };";
1134        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1135        let gaps = padlock_core::ir::find_padding(&layouts[0]);
1136        assert!(!gaps.is_empty());
1137        assert_eq!(gaps[0].bytes, 31);
1138    }
1139
1140    // ── union parsing ─────────────────────────────────────────────────────────
1141
1142    #[test]
1143    fn union_fields_all_at_offset_zero() {
1144        let src = "union Data { int i; float f; double d; };";
1145        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1146        assert_eq!(layouts.len(), 1);
1147        let u = &layouts[0];
1148        assert!(u.is_union);
1149        for field in &u.fields {
1150            assert_eq!(
1151                field.offset, 0,
1152                "union field '{}' should be at offset 0",
1153                field.name
1154            );
1155        }
1156    }
1157
1158    #[test]
1159    fn union_total_size_is_max_field() {
1160        // double is the largest (8 bytes); total should be 8
1161        let src = "union Data { int i; float f; double d; };";
1162        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1163        assert_eq!(layouts[0].total_size, 8);
1164    }
1165
1166    #[test]
1167    fn union_no_padding_finding() {
1168        let src = "union Data { int i; double d; };";
1169        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1170        let report = padlock_core::findings::Report::from_layouts(&layouts);
1171        let sr = &report.structs[0];
1172        assert!(
1173            !sr.findings
1174                .iter()
1175                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
1176        );
1177        assert!(
1178            !sr.findings
1179                .iter()
1180                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
1181        );
1182    }
1183
1184    #[test]
1185    fn typedef_union_parsed() {
1186        let src = "typedef union { int a; double b; } Value;";
1187        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1188        assert_eq!(layouts.len(), 1);
1189        assert_eq!(layouts[0].name, "Value");
1190        assert!(layouts[0].is_union);
1191    }
1192
1193    // ── attribute guard extraction ─────────────────────────────────────────────
1194
1195    #[test]
1196    fn extract_guard_from_c_guarded_by_macro() {
1197        let text = "int value GUARDED_BY(mu);";
1198        let guard = extract_guard_from_c_field_text(text);
1199        assert_eq!(guard.as_deref(), Some("mu"));
1200    }
1201
1202    #[test]
1203    fn extract_guard_from_c_attribute_specifier() {
1204        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
1205        let guard = extract_guard_from_c_field_text(text);
1206        assert_eq!(guard.as_deref(), Some("counter_lock"));
1207    }
1208
1209    #[test]
1210    fn extract_guard_pt_guarded_by() {
1211        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
1212        let guard = extract_guard_from_c_field_text(text);
1213        assert_eq!(guard.as_deref(), Some("ptr_lock"));
1214    }
1215
1216    #[test]
1217    fn no_guard_returns_none() {
1218        let guard = extract_guard_from_c_field_text("int x;");
1219        assert!(guard.is_none());
1220    }
1221
1222    #[test]
1223    fn c_struct_guarded_by_sets_concurrent_access() {
1224        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
1225        // macro expansions, so test the text-extraction path via parse_field_declaration
1226        // indirectly by checking extract_guard_from_c_field_text.
1227        let text = "uint64_t readers GUARDED_BY(lock_a);";
1228        assert_eq!(
1229            extract_guard_from_c_field_text(text).as_deref(),
1230            Some("lock_a")
1231        );
1232    }
1233
1234    #[test]
1235    fn c_struct_different_guards_detected_as_false_sharing() {
1236        use padlock_core::arch::X86_64_SYSV;
1237        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
1238
1239        // Manually build a layout with two fields on the same cache line,
1240        // different guards — mirrors what the C frontend would produce for
1241        // __attribute__((guarded_by(...))) annotated fields.
1242        let mut layout = StructLayout {
1243            name: "S".into(),
1244            total_size: 128,
1245            align: 8,
1246            fields: vec![
1247                Field {
1248                    name: "readers".into(),
1249                    ty: TypeInfo::Primitive {
1250                        name: "uint64_t".into(),
1251                        size: 8,
1252                        align: 8,
1253                    },
1254                    offset: 0,
1255                    size: 8,
1256                    align: 8,
1257                    source_file: None,
1258                    source_line: None,
1259                    access: AccessPattern::Concurrent {
1260                        guard: Some("lock_a".into()),
1261                        is_atomic: false,
1262                    },
1263                },
1264                Field {
1265                    name: "writers".into(),
1266                    ty: TypeInfo::Primitive {
1267                        name: "uint64_t".into(),
1268                        size: 8,
1269                        align: 8,
1270                    },
1271                    offset: 8,
1272                    size: 8,
1273                    align: 8,
1274                    source_file: None,
1275                    source_line: None,
1276                    access: AccessPattern::Concurrent {
1277                        guard: Some("lock_b".into()),
1278                        is_atomic: false,
1279                    },
1280                },
1281            ],
1282            source_file: None,
1283            source_line: None,
1284            arch: &X86_64_SYSV,
1285            is_packed: false,
1286            is_union: false,
1287            is_repr_rust: false,
1288        };
1289        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1290            &layout
1291        ));
1292        // Same guard → no false sharing
1293        layout.fields[1].access = AccessPattern::Concurrent {
1294            guard: Some("lock_a".into()),
1295            is_atomic: false,
1296        };
1297        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1298            &layout
1299        ));
1300    }
1301
1302    // ── C++ class: vtable pointer ─────────────────────────────────────────────
1303
1304    #[test]
1305    fn cpp_class_with_virtual_method_has_vptr() {
1306        let src = r#"
1307class Widget {
1308    virtual void draw();
1309    int x;
1310    int y;
1311};
1312"#;
1313        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1314        assert_eq!(layouts.len(), 1);
1315        let l = &layouts[0];
1316        // First field must be __vptr
1317        assert_eq!(l.fields[0].name, "__vptr");
1318        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
1319        // __vptr is at offset 0
1320        assert_eq!(l.fields[0].offset, 0);
1321        // int x should come after the pointer (at offset 8)
1322        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1323        assert_eq!(x.offset, 8);
1324    }
1325
1326    #[test]
1327    fn cpp_class_without_virtual_has_no_vptr() {
1328        let src = "class Plain { int a; int b; };";
1329        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1330        assert_eq!(layouts.len(), 1);
1331        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1332    }
1333
1334    #[test]
1335    fn cpp_struct_keyword_with_virtual_has_vptr() {
1336        // `struct` in C++ can also have virtual methods
1337        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1338        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1339        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
1340        // (vtable injection is only for `class` nodes)
1341        let _ = layouts; // just verify it parses without panic
1342    }
1343
1344    // ── C++ class: single inheritance ─────────────────────────────────────────
1345
1346    #[test]
1347    fn cpp_derived_class_has_base_slot() {
1348        let src = r#"
1349class Base {
1350    int x;
1351};
1352class Derived : public Base {
1353    int y;
1354};
1355"#;
1356        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1357        // Both Base and Derived should be parsed
1358        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1359        // Derived must have a __base_Base synthetic field
1360        assert!(
1361            derived.fields.iter().any(|f| f.name == "__base_Base"),
1362            "Derived should have a __base_Base field"
1363        );
1364        // The y field should come after __base_Base
1365        let base_field = derived
1366            .fields
1367            .iter()
1368            .find(|f| f.name == "__base_Base")
1369            .unwrap();
1370        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1371        assert!(y_field.offset >= base_field.offset + base_field.size);
1372    }
1373
1374    #[test]
1375    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1376        let src = r#"
1377class A { int a; };
1378class B { int b; };
1379class C : public A, public B { int c; };
1380"#;
1381        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1382        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1383        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1384        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1385    }
1386
1387    #[test]
1388    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1389        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1390        let src = "class V { virtual void f(); int x; };";
1391        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1392        let l = &layouts[0];
1393        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1394        assert_eq!(l.total_size, 16);
1395    }
1396
1397    // ── bitfield handling ─────────────────────────────────────────────────────
1398
1399    #[test]
1400    fn is_bitfield_type_detects_colon_n() {
1401        assert!(is_bitfield_type("int:3"));
1402        assert!(is_bitfield_type("unsigned int:16"));
1403        assert!(is_bitfield_type("uint32_t:1"));
1404        // Not bit-fields — contains ':' but not followed by pure digits
1405        assert!(!is_bitfield_type("std::atomic<int>"));
1406        assert!(!is_bitfield_type("ns::Type"));
1407        assert!(!is_bitfield_type("int"));
1408    }
1409
1410    #[test]
1411    fn struct_with_bitfields_is_skipped() {
1412        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1413        // without a compiler. The struct must be skipped entirely.
1414        let src = r#"
1415struct Flags {
1416    unsigned int active : 1;
1417    unsigned int ready  : 1;
1418    unsigned int error  : 6;
1419    int value;
1420};
1421"#;
1422        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1423        // Flags must not appear — its layout cannot be accurately computed.
1424        assert!(
1425            layouts.iter().all(|l| l.name != "Flags"),
1426            "struct with bitfields should be skipped; got {:?}",
1427            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1428        );
1429    }
1430
1431    #[test]
1432    fn struct_without_bitfields_is_still_parsed() {
1433        // Ensure the bitfield guard doesn't affect normal structs.
1434        let src = "struct Normal { int a; char b; double c; };";
1435        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1436        assert_eq!(layouts.len(), 1);
1437        assert_eq!(layouts[0].name, "Normal");
1438    }
1439
1440    #[test]
1441    fn cpp_class_with_bitfields_is_skipped() {
1442        let src = "class Packed { int x : 4; int y : 4; };";
1443        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1444        assert!(
1445            layouts.iter().all(|l| l.name != "Packed"),
1446            "C++ class with bitfields should be skipped"
1447        );
1448    }
1449
1450    // ── __attribute__((packed)) detection ─────────────────────────────────────
1451
1452    #[test]
1453    fn packed_struct_has_no_alignment_padding() {
1454        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1455        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1456        let src = r#"
1457struct __attribute__((packed)) Tight {
1458    char a;
1459    int  b;
1460    char c;
1461};
1462"#;
1463        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1464        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1465        assert!(l.is_packed, "should be marked is_packed");
1466        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1467        assert_eq!(l.fields[0].offset, 0);
1468        assert_eq!(l.fields[1].offset, 1); // immediately after char
1469        assert_eq!(l.fields[2].offset, 5);
1470    }
1471
1472    #[test]
1473    fn non_packed_struct_has_normal_alignment_padding() {
1474        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1475        let src = r#"
1476struct Normal {
1477    char a;
1478    int  b;
1479    char c;
1480};
1481"#;
1482        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1483        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1484        assert!(!l.is_packed);
1485        assert_eq!(l.total_size, 12);
1486        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1487    }
1488
1489    #[test]
1490    fn cpp_class_packed_attribute_detected() {
1491        let src = r#"
1492class __attribute__((packed)) Dense {
1493    char a;
1494    int  b;
1495};
1496"#;
1497        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1498        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1499        assert!(
1500            l.is_packed,
1501            "C++ class with __attribute__((packed)) must be marked packed"
1502        );
1503        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1504    }
1505
1506    // ── alignas detection ─────────────────────────────────────────────────────
1507
1508    #[test]
1509    fn field_alignas_overrides_natural_alignment() {
1510        // char is normally align=1 but alignas(8) forces it to align-8.
1511        // Layout: c(1B at offset 0, align=8) + x(4B at offset 4, align=4)
1512        // c must start on an 8-byte boundary (trivially satisfied at offset 0).
1513        // After c (1 byte), x aligns to 4: offset = 1.next_multiple_of(4) = 4.
1514        // Struct align = max(8, 4) = 8. Total = 8 bytes (4+4 → 8 → ok for align 8).
1515        let src = r#"
1516struct S {
1517    alignas(8) char c;
1518    int x;
1519};
1520"#;
1521        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1522        let l = layouts.iter().find(|l| l.name == "S").expect("S");
1523        // c should be forced to align 8
1524        let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
1525        assert_eq!(c_field.align, 8);
1526        // x comes after c (1 byte) with natural alignment 4 → offset 4
1527        let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
1528        assert_eq!(x_field.offset, 4);
1529        // Struct alignment is max(alignas(8), int align 4) = 8
1530        assert_eq!(l.align, 8);
1531        // Total = 8 bytes (x at 4, size 4; 4+4=8; 8 is multiple of align 8)
1532        assert_eq!(l.total_size, 8);
1533    }
1534
1535    #[test]
1536    fn struct_level_alignas_increases_struct_alignment() {
1537        // alignas(64) on the struct means its alignment requirement is 64.
1538        // Total size must be a multiple of 64.
1539        let src = r#"
1540struct alignas(64) CacheLine {
1541    int x;
1542    int y;
1543};
1544"#;
1545        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1546        let l = layouts
1547            .iter()
1548            .find(|l| l.name == "CacheLine")
1549            .expect("CacheLine");
1550        assert_eq!(l.align, 64);
1551        assert_eq!(l.total_size % 64, 0);
1552    }
1553
1554    #[test]
1555    fn alignas_on_field_smaller_than_natural_is_ignored() {
1556        // alignas(1) on an int field: does NOT reduce alignment below 4.
1557        // In C++, alignas cannot reduce alignment below the natural alignment.
1558        // Our implementation stores the alignas value; natural alignment wins
1559        // because we take max(alignas, natural) in the caller.
1560        // Note: we currently store alignas directly; this test documents behaviour.
1561        let src = "struct S { int x; int y; };";
1562        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1563        let l = &layouts[0];
1564        assert_eq!(l.fields[0].align, 4); // natural alignment, not reduced
1565    }
1566
1567    #[test]
1568    fn cpp_class_alignas_detected() {
1569        let src = r#"
1570class alignas(32) Aligned {
1571    double x;
1572    double y;
1573};
1574"#;
1575        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1576        let l = layouts
1577            .iter()
1578            .find(|l| l.name == "Aligned")
1579            .expect("Aligned");
1580        assert_eq!(l.align, 32);
1581        assert_eq!(l.total_size % 32, 0);
1582    }
1583
1584    // ── bad weather: alignas edge cases ───────────────────────────────────────
1585
1586    #[test]
1587    fn struct_without_alignas_unchanged() {
1588        // Ensure the alignas detection path doesn't affect structs without it
1589        let src = "struct Plain { int a; char b; };";
1590        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1591        let l = &layouts[0];
1592        assert_eq!(l.align, 4); // max field alignment = int = 4
1593        assert_eq!(l.total_size, 8); // int(4) + char(1) + 3 pad
1594    }
1595
1596    // ── anonymous nested structs/unions ───────────────────────────────────────
1597
1598    #[test]
1599    fn anonymous_nested_union_fields_flattened() {
1600        let src = r#"
1601struct Packet {
1602    union {
1603        uint32_t raw;
1604        uint8_t bytes[4];
1605    };
1606    uint64_t timestamp;
1607};
1608"#;
1609        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1610        let l = layouts.iter().find(|l| l.name == "Packet").expect("Packet");
1611        // raw, bytes (or similar) and timestamp must all be present
1612        assert!(
1613            l.fields.iter().any(|f| f.name == "raw"),
1614            "raw field must be flattened into Packet"
1615        );
1616        assert!(
1617            l.fields.iter().any(|f| f.name == "timestamp"),
1618            "timestamp must be present"
1619        );
1620    }
1621
1622    #[test]
1623    fn anonymous_nested_struct_fields_flattened() {
1624        let src = r#"
1625struct Outer {
1626    struct {
1627        int x;
1628        int y;
1629    };
1630    double z;
1631};
1632"#;
1633        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1634        let l = layouts.iter().find(|l| l.name == "Outer").expect("Outer");
1635        assert!(
1636            l.fields.iter().any(|f| f.name == "x"),
1637            "x must be flattened"
1638        );
1639        assert!(
1640            l.fields.iter().any(|f| f.name == "y"),
1641            "y must be flattened"
1642        );
1643        assert!(l.fields.iter().any(|f| f.name == "z"), "z present");
1644        // Total: x(4) + y(4) + z(8) = 16 bytes, no padding
1645        assert_eq!(l.total_size, 16);
1646    }
1647
1648    #[test]
1649    fn named_nested_struct_not_flattened() {
1650        // A named struct used as a field type must NOT be flattened
1651        let src = r#"
1652struct Vec2 { float x; float y; };
1653struct Rect { struct Vec2 tl; struct Vec2 br; };
1654"#;
1655        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1656        let rect = layouts.iter().find(|l| l.name == "Rect").expect("Rect");
1657        // Should have tl and br as opaque fields, not x/y flattened
1658        assert_eq!(rect.fields.len(), 2);
1659        assert!(rect.fields.iter().any(|f| f.name == "tl"));
1660        assert!(rect.fields.iter().any(|f| f.name == "br"));
1661    }
1662
1663    // ── type-table tests ──────────────────────────────────────────────────────
1664
1665    #[test]
1666    fn linux_kernel_types_correct_size() {
1667        // u8/u16/u32/u64 and s8/s16/s32/s64 (linux/types.h)
1668        assert_eq!(c_type_size_align("u8", &X86_64_SYSV), (1, 1));
1669        assert_eq!(c_type_size_align("u16", &X86_64_SYSV), (2, 2));
1670        assert_eq!(c_type_size_align("u32", &X86_64_SYSV), (4, 4));
1671        assert_eq!(c_type_size_align("u64", &X86_64_SYSV), (8, 8));
1672        assert_eq!(c_type_size_align("s8", &X86_64_SYSV), (1, 1));
1673        assert_eq!(c_type_size_align("s16", &X86_64_SYSV), (2, 2));
1674        assert_eq!(c_type_size_align("s32", &X86_64_SYSV), (4, 4));
1675        assert_eq!(c_type_size_align("s64", &X86_64_SYSV), (8, 8));
1676    }
1677
1678    #[test]
1679    fn linux_kernel_dunder_types_correct_size() {
1680        assert_eq!(c_type_size_align("__u8", &X86_64_SYSV), (1, 1));
1681        assert_eq!(c_type_size_align("__u16", &X86_64_SYSV), (2, 2));
1682        assert_eq!(c_type_size_align("__u32", &X86_64_SYSV), (4, 4));
1683        assert_eq!(c_type_size_align("__u64", &X86_64_SYSV), (8, 8));
1684        assert_eq!(c_type_size_align("__s8", &X86_64_SYSV), (1, 1));
1685        assert_eq!(c_type_size_align("__s64", &X86_64_SYSV), (8, 8));
1686        // Endian-annotated types are same width as their base
1687        assert_eq!(c_type_size_align("__be16", &X86_64_SYSV), (2, 2));
1688        assert_eq!(c_type_size_align("__le32", &X86_64_SYSV), (4, 4));
1689        assert_eq!(c_type_size_align("__be64", &X86_64_SYSV), (8, 8));
1690    }
1691
1692    #[test]
1693    fn c99_fast_types_correct_size() {
1694        // fast8/16 are their natural width
1695        assert_eq!(c_type_size_align("uint_fast8_t", &X86_64_SYSV), (1, 1));
1696        assert_eq!(c_type_size_align("uint_fast16_t", &X86_64_SYSV), (2, 2));
1697        // fast32/64 are pointer-sized on 64-bit
1698        assert_eq!(c_type_size_align("uint_fast32_t", &X86_64_SYSV), (8, 8));
1699        assert_eq!(c_type_size_align("uint_fast64_t", &X86_64_SYSV), (8, 8));
1700        // least types are their minimum guaranteed width
1701        assert_eq!(c_type_size_align("uint_least8_t", &X86_64_SYSV), (1, 1));
1702        assert_eq!(c_type_size_align("uint_least32_t", &X86_64_SYSV), (4, 4));
1703        assert_eq!(c_type_size_align("uint_least64_t", &X86_64_SYSV), (8, 8));
1704        assert_eq!(c_type_size_align("intmax_t", &X86_64_SYSV), (8, 8));
1705        assert_eq!(c_type_size_align("uintmax_t", &X86_64_SYSV), (8, 8));
1706    }
1707
1708    #[test]
1709    fn gcc_int128_correct_size() {
1710        assert_eq!(c_type_size_align("__int128", &X86_64_SYSV), (16, 16));
1711        assert_eq!(c_type_size_align("__uint128", &X86_64_SYSV), (16, 16));
1712        assert_eq!(c_type_size_align("__int128_t", &X86_64_SYSV), (16, 16));
1713        // unsigned __int128 — "unsigned " prefix is stripped, then __int128 matched
1714        assert_eq!(
1715            c_type_size_align("unsigned __int128", &X86_64_SYSV),
1716            (16, 16)
1717        );
1718    }
1719
1720    #[test]
1721    fn windows_types_correct_size() {
1722        assert_eq!(c_type_size_align("BYTE", &X86_64_SYSV), (1, 1));
1723        assert_eq!(c_type_size_align("WORD", &X86_64_SYSV), (2, 2));
1724        assert_eq!(c_type_size_align("DWORD", &X86_64_SYSV), (4, 4));
1725        assert_eq!(c_type_size_align("QWORD", &X86_64_SYSV), (8, 8));
1726        assert_eq!(c_type_size_align("BOOL", &X86_64_SYSV), (4, 4));
1727        assert_eq!(c_type_size_align("UINT8", &X86_64_SYSV), (1, 1));
1728        assert_eq!(c_type_size_align("INT32", &X86_64_SYSV), (4, 4));
1729        assert_eq!(c_type_size_align("UINT64", &X86_64_SYSV), (8, 8));
1730        assert_eq!(c_type_size_align("HANDLE", &X86_64_SYSV), (8, 8));
1731        assert_eq!(c_type_size_align("LPVOID", &X86_64_SYSV), (8, 8));
1732    }
1733
1734    #[test]
1735    fn char_types_correct_size() {
1736        assert_eq!(c_type_size_align("wchar_t", &X86_64_SYSV), (4, 4));
1737        assert_eq!(c_type_size_align("char8_t", &X86_64_SYSV), (1, 1));
1738        assert_eq!(c_type_size_align("char16_t", &X86_64_SYSV), (2, 2));
1739        assert_eq!(c_type_size_align("char32_t", &X86_64_SYSV), (4, 4));
1740    }
1741
1742    #[test]
1743    fn half_precision_types_correct_size() {
1744        assert_eq!(c_type_size_align("_Float16", &X86_64_SYSV), (2, 2));
1745        assert_eq!(c_type_size_align("__fp16", &X86_64_SYSV), (2, 2));
1746        assert_eq!(c_type_size_align("__bf16", &X86_64_SYSV), (2, 2));
1747        assert_eq!(c_type_size_align("_Float128", &X86_64_SYSV), (16, 16));
1748    }
1749
1750    #[test]
1751    fn unsigned_prefix_stripped_correctly() {
1752        // "unsigned short" → "short" → (2, 2)
1753        assert_eq!(c_type_size_align("unsigned short", &X86_64_SYSV), (2, 2));
1754        assert_eq!(c_type_size_align("unsigned int", &X86_64_SYSV), (4, 4));
1755        assert_eq!(
1756            c_type_size_align("unsigned long long", &X86_64_SYSV),
1757            (8, 8)
1758        );
1759        assert_eq!(
1760            c_type_size_align("long int", &X86_64_SYSV),
1761            (X86_64_SYSV.pointer_size, X86_64_SYSV.pointer_size)
1762        );
1763    }
1764
1765    #[test]
1766    fn linux_kernel_struct_with_new_types() {
1767        // Representative kernel-style struct using __u32, __be16, u8
1768        let src = r#"
1769struct NetHeader {
1770    __be32 src_ip;
1771    __be32 dst_ip;
1772    __be16 src_port;
1773    __be16 dst_port;
1774    u8     protocol;
1775    u8     ttl;
1776};
1777"#;
1778        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1779        assert_eq!(layouts.len(), 1);
1780        let l = &layouts[0];
1781        // 4+4+2+2+1+1 = 14B; max align is 4 (__be32) → padded to 16B
1782        assert_eq!(l.total_size, 16);
1783        assert_eq!(l.fields[0].size, 4); // __be32 src_ip
1784        assert_eq!(l.fields[2].size, 2); // __be16 src_port
1785        assert_eq!(l.fields[4].size, 1); // u8 protocol
1786    }
1787
1788    // ── C++ stdlib type tests ─────────────────────────────────────────────────
1789
1790    #[test]
1791    fn cpp_string_is_32_bytes() {
1792        assert_eq!(c_type_size_align("std::string", &X86_64_SYSV), (32, 8));
1793        assert_eq!(c_type_size_align("std::wstring", &X86_64_SYSV), (32, 8));
1794    }
1795
1796    #[test]
1797    fn cpp_string_view_is_two_words() {
1798        assert_eq!(c_type_size_align("std::string_view", &X86_64_SYSV), (16, 8));
1799    }
1800
1801    #[test]
1802    fn cpp_vector_is_24_bytes() {
1803        assert_eq!(c_type_size_align("std::vector<int>", &X86_64_SYSV), (24, 8));
1804        assert_eq!(
1805            c_type_size_align("std::vector<uint64_t>", &X86_64_SYSV),
1806            (24, 8)
1807        );
1808        // Size is independent of T
1809        assert_eq!(
1810            c_type_size_align("std::vector<std::string>", &X86_64_SYSV),
1811            (24, 8)
1812        );
1813    }
1814
1815    #[test]
1816    fn cpp_smart_pointers_correct_size() {
1817        // unique_ptr: single pointer
1818        assert_eq!(
1819            c_type_size_align("std::unique_ptr<int>", &X86_64_SYSV),
1820            (8, 8)
1821        );
1822        // shared_ptr / weak_ptr: two pointers
1823        assert_eq!(
1824            c_type_size_align("std::shared_ptr<int>", &X86_64_SYSV),
1825            (16, 8)
1826        );
1827        assert_eq!(
1828            c_type_size_align("std::weak_ptr<int>", &X86_64_SYSV),
1829            (16, 8)
1830        );
1831    }
1832
1833    #[test]
1834    fn cpp_optional_recursive_size() {
1835        // std::optional<bool>: 1B (bool) + 1B (has_value flag) → 2B
1836        assert_eq!(
1837            c_type_size_align("std::optional<bool>", &X86_64_SYSV),
1838            (2, 1)
1839        );
1840        // std::optional<int>: 4B + 1B → padded to 4B → 8B total? Let's check:
1841        // t_size=4, t_align=4; (4+1).next_multiple_of(4) = 8
1842        assert_eq!(
1843            c_type_size_align("std::optional<int>", &X86_64_SYSV),
1844            (8, 4)
1845        );
1846        // std::optional<double>: 8B + 1B → padded to 8B → 16B
1847        assert_eq!(
1848            c_type_size_align("std::optional<double>", &X86_64_SYSV),
1849            (16, 8)
1850        );
1851    }
1852
1853    #[test]
1854    fn cpp_function_is_32_bytes() {
1855        assert_eq!(
1856            c_type_size_align("std::function<void()>", &X86_64_SYSV),
1857            (32, 8)
1858        );
1859        assert_eq!(
1860            c_type_size_align("std::function<int(int)>", &X86_64_SYSV),
1861            (32, 8)
1862        );
1863    }
1864
1865    #[test]
1866    fn cpp_stdlib_struct_with_string_field() {
1867        // A struct with std::string fields — used to get pointer-size (8B), now 32B
1868        let src = r#"
1869struct Config {
1870    std::string name;
1871    int         version;
1872    bool        enabled;
1873};
1874"#;
1875        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1876        let l = &layouts[0];
1877        assert_eq!(l.fields[0].size, 32); // std::string, not 8
1878        // int at offset 32, bool at 36; total padded to 8-byte align = 40
1879        assert_eq!(l.fields[1].offset, 32);
1880        assert_eq!(l.fields[1].size, 4);
1881    }
1882}