Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library synchronisation types (Linux/glibc x86-64 defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // Mutexes — all backed by pthread_mutex_t (40 bytes on Linux/glibc)
41        "std::mutex"
42        | "std::recursive_mutex"
43        | "std::timed_mutex"
44        | "std::recursive_timed_mutex"
45        | "pthread_mutex_t" => return (40, 8),
46        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
47        // Condition variables
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49        // std::atomic<T> — same size as T; extract and recurse
50        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
51            let inner = &ty[12..ty.len() - 1];
52            return c_type_size_align(inner.trim(), arch);
53        }
54        _ => {} // fall through to primitive types below
55    }
56    // Primitive / stdint / pointer types
57    match ty {
58        "char" | "_Bool" | "bool" => (1, 1),
59        "short" | "short int" => (2, 2),
60        "int" => (4, 4),
61        "long" => (arch.pointer_size, arch.pointer_size),
62        "long long" => (8, 8),
63        "float" => (4, 4),
64        "double" => (8, 8),
65        "long double" => (16, 16),
66        "int8_t" | "uint8_t" => (1, 1),
67        "int16_t" | "uint16_t" => (2, 2),
68        "int32_t" | "uint32_t" => (4, 4),
69        "int64_t" | "uint64_t" => (8, 8),
70        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
71            (arch.pointer_size, arch.pointer_size)
72        }
73        // Pointer types
74        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
75        // Unknown — use pointer size as a reasonable default
76        _ => (arch.pointer_size, arch.pointer_size),
77    }
78}
79
80// ── struct / union simulation ─────────────────────────────────────────────────
81
82/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
83/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
84fn strip_bitfield_suffix(ty: &str) -> &str {
85    if let Some(pos) = ty.rfind(':') {
86        let suffix = ty[pos + 1..].trim();
87        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
88            return ty[..pos].trim_end();
89        }
90    }
91    ty
92}
93
94/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
95/// Bit-field packing is compiler-controlled and cannot be accurately modelled
96/// without a compiler, so structs containing bit-field members are skipped.
97fn is_bitfield_type(ty: &str) -> bool {
98    strip_bitfield_suffix(ty) != ty
99}
100
101/// Simulate C/C++ struct layout given ordered fields.
102///
103/// When `packed` is `true` the layout mirrors `__attribute__((packed))`:
104/// no inter-field alignment padding is inserted and the struct alignment
105/// is forced to 1. This matches GCC/Clang behaviour for packed structs.
106fn simulate_layout(
107    fields: &mut Vec<Field>,
108    struct_name: String,
109    arch: &'static ArchConfig,
110    source_line: Option<u32>,
111    packed: bool,
112) -> StructLayout {
113    let mut offset = 0usize;
114    let mut struct_align = 1usize;
115
116    for f in fields.iter_mut() {
117        if !packed && f.align > 0 {
118            offset = offset.next_multiple_of(f.align);
119        }
120        f.offset = offset;
121        offset += f.size;
122        if !packed {
123            struct_align = struct_align.max(f.align);
124        }
125    }
126    // Trailing padding (not present in packed structs)
127    if !packed && struct_align > 0 {
128        offset = offset.next_multiple_of(struct_align);
129    }
130
131    StructLayout {
132        name: struct_name,
133        total_size: offset,
134        align: struct_align,
135        fields: std::mem::take(fields),
136        source_file: None,
137        source_line,
138        arch,
139        is_packed: packed,
140        is_union: false,
141    }
142}
143
144/// Simulate a C/C++ union layout: all fields start at offset 0;
145/// total size is the largest field, rounded to max alignment.
146fn simulate_union_layout(
147    fields: &mut Vec<Field>,
148    name: String,
149    arch: &'static ArchConfig,
150    source_line: Option<u32>,
151) -> StructLayout {
152    for f in fields.iter_mut() {
153        f.offset = 0;
154    }
155    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
156    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
157    let total_size = if max_align > 0 {
158        max_size.next_multiple_of(max_align)
159    } else {
160        max_size
161    };
162
163    StructLayout {
164        name,
165        total_size,
166        align: max_align,
167        fields: std::mem::take(fields),
168        source_file: None,
169        source_line,
170        arch,
171        is_packed: false,
172        is_union: true,
173    }
174}
175
176// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
177
178/// Parse a `class_specifier` node, modelling:
179/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
180/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
181///   later by the nested-struct resolution pass in `lib.rs`).
182fn parse_class_specifier(
183    source: &str,
184    node: Node<'_>,
185    arch: &'static ArchConfig,
186) -> Option<StructLayout> {
187    let mut class_name = "<anonymous>".to_string();
188    let mut base_names: Vec<String> = Vec::new();
189    let mut body_node: Option<Node> = None;
190    let mut is_packed = false;
191    let mut struct_alignas: Option<usize> = None;
192
193    for i in 0..node.child_count() {
194        let child = node.child(i)?;
195        match child.kind() {
196            "type_identifier" => class_name = source[child.byte_range()].to_string(),
197            "base_class_clause" => {
198                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
199                // type_identifier nodes are direct children of base_class_clause.
200                for j in 0..child.child_count() {
201                    if let Some(base) = child.child(j)
202                        && base.kind() == "type_identifier"
203                    {
204                        base_names.push(source[base.byte_range()].to_string());
205                    }
206                }
207            }
208            "field_declaration_list" => body_node = Some(child),
209            "attribute_specifier" => {
210                if source[child.byte_range()].contains("packed") {
211                    is_packed = true;
212                }
213            }
214            // C++11 class-level alignas: `class alignas(64) Name { ... };`
215            "alignas_qualifier" | "alignas_specifier" => {
216                if struct_alignas.is_none() {
217                    struct_alignas = parse_alignas_value(source, child);
218                }
219            }
220            _ => {}
221        }
222    }
223
224    let body = body_node?;
225
226    // Detect virtual methods: look for `virtual` keyword anywhere in body
227    let has_virtual = contains_virtual_keyword(source, body);
228
229    // Collect declared fields: (field_name, type_text, guard, alignas_override)
230    let mut raw_fields: Vec<(String, String, Option<String>, Option<usize>)> = Vec::new();
231    for i in 0..body.child_count() {
232        let Some(child) = body.child(i) else {
233            continue;
234        };
235        if child.kind() == "field_declaration" {
236            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, false) {
237                raw_fields.extend(anon_fields);
238            } else if let Some((ty, fname, guard, al)) = parse_field_declaration(source, child) {
239                raw_fields.push((fname, ty, guard, al));
240            }
241        }
242    }
243
244    // Build fields: vtable pointer, then base-class slots, then declared fields
245    let mut fields: Vec<Field> = Vec::new();
246
247    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
248    if has_virtual {
249        let ps = arch.pointer_size;
250        fields.push(Field {
251            name: "__vptr".to_string(),
252            ty: TypeInfo::Pointer {
253                size: ps,
254                align: ps,
255            },
256            offset: 0,
257            size: ps,
258            align: ps,
259            source_file: None,
260            source_line: None,
261            access: AccessPattern::Unknown,
262        });
263    }
264
265    // Base class storage (opaque until nested-struct resolver fills in sizes)
266    for base in &base_names {
267        let ps = arch.pointer_size;
268        fields.push(Field {
269            name: format!("__base_{base}"),
270            ty: TypeInfo::Opaque {
271                name: base.clone(),
272                size: ps,
273                align: ps,
274            },
275            offset: 0,
276            size: ps,
277            align: ps,
278            source_file: None,
279            source_line: None,
280            access: AccessPattern::Unknown,
281        });
282    }
283
284    // Skip classes with bit-field members (same reason as structs).
285    if raw_fields.iter().any(|(_, ty, _, _)| is_bitfield_type(ty)) {
286        return None;
287    }
288
289    // Declared member fields
290    for (fname, ty_name, guard, alignas) in raw_fields {
291        let (size, natural_align) = c_type_size_align(&ty_name, arch);
292        let align = alignas.unwrap_or(natural_align);
293        let access = if let Some(g) = guard {
294            AccessPattern::Concurrent {
295                guard: Some(g),
296                is_atomic: false,
297            }
298        } else {
299            AccessPattern::Unknown
300        };
301        fields.push(Field {
302            name: fname,
303            ty: TypeInfo::Primitive {
304                name: ty_name,
305                size,
306                align,
307            },
308            offset: 0,
309            size,
310            align,
311            source_file: None,
312            source_line: None,
313            access,
314        });
315    }
316
317    if fields.is_empty() {
318        return None;
319    }
320
321    let line = node.start_position().row as u32 + 1;
322    let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), is_packed);
323
324    if let Some(al) = struct_alignas
325        && al > layout.align
326    {
327        layout.align = al;
328        if !is_packed {
329            layout.total_size = layout.total_size.next_multiple_of(al);
330        }
331    }
332
333    Some(layout)
334}
335
336/// Return true if a `field_declaration_list` node contains any `virtual` keyword
337/// (indicating that the class needs a vtable pointer).
338fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
339    let mut stack = vec![node];
340    while let Some(n) = stack.pop() {
341        if n.kind() == "virtual" {
342            return true;
343        }
344        // Also check raw text for cases where tree-sitter may not produce a
345        // dedicated `virtual` node (e.g. inside complex declarations).
346        if n.child_count() == 0 {
347            let text = &source[n.byte_range()];
348            if text == "virtual" {
349                return true;
350            }
351        }
352        for i in (0..n.child_count()).rev() {
353            if let Some(child) = n.child(i) {
354                stack.push(child);
355            }
356        }
357    }
358    false
359}
360
361// ── tree-sitter walker ────────────────────────────────────────────────────────
362
363fn extract_structs_from_tree(
364    source: &str,
365    root: Node<'_>,
366    arch: &'static ArchConfig,
367    layouts: &mut Vec<StructLayout>,
368) {
369    let cursor = root.walk();
370    let mut stack = vec![root];
371
372    while let Some(node) = stack.pop() {
373        // Push children in reverse so we process left-to-right
374        for i in (0..node.child_count()).rev() {
375            if let Some(child) = node.child(i) {
376                stack.push(child);
377            }
378        }
379
380        match node.kind() {
381            "struct_specifier" => {
382                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
383                    layouts.push(layout);
384                }
385            }
386            "union_specifier" => {
387                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
388                    layouts.push(layout);
389                }
390            }
391            "class_specifier" => {
392                if let Some(layout) = parse_class_specifier(source, node, arch) {
393                    layouts.push(layout);
394                }
395            }
396            _ => {}
397        }
398    }
399
400    // Also handle `typedef struct/union { ... } Name;`
401    let cursor2 = root.walk();
402    let mut stack2 = vec![root];
403    while let Some(node) = stack2.pop() {
404        for i in (0..node.child_count()).rev() {
405            if let Some(child) = node.child(i) {
406                stack2.push(child);
407            }
408        }
409        if node.kind() == "type_definition"
410            && let Some(layout) = parse_typedef_struct_or_union(source, node, arch)
411        {
412            let existing = layouts
413                .iter()
414                .position(|l| l.name == layout.name || l.name == "<anonymous>");
415            match existing {
416                Some(i) if layouts[i].name == "<anonymous>" => {
417                    layouts[i] = layout;
418                }
419                None => layouts.push(layout),
420                _ => {}
421            }
422        }
423    }
424    let _ = cursor;
425    let _ = cursor2; // silence unused warnings
426}
427
428/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
429fn parse_struct_or_union_specifier(
430    source: &str,
431    node: Node<'_>,
432    arch: &'static ArchConfig,
433    is_union: bool,
434) -> Option<StructLayout> {
435    let mut name = "<anonymous>".to_string();
436    let mut body_node: Option<Node> = None;
437    let mut is_packed = false;
438    // Struct-level alignas: `struct alignas(64) CacheAligned { ... };`
439    let mut struct_alignas: Option<usize> = None;
440
441    for i in 0..node.child_count() {
442        let child = node.child(i)?;
443        match child.kind() {
444            "type_identifier" => name = source[child.byte_range()].to_string(),
445            "field_declaration_list" => body_node = Some(child),
446            "attribute_specifier" => {
447                let text = &source[child.byte_range()];
448                if text.contains("packed") {
449                    is_packed = true;
450                }
451            }
452            // C++11 struct-level alignas: `struct alignas(64) Name { ... };`
453            // tree-sitter-cpp: `alignas_qualifier` as direct child of struct_specifier
454            "alignas_qualifier" | "alignas_specifier" => {
455                if struct_alignas.is_none() {
456                    struct_alignas = parse_alignas_value(source, child);
457                }
458            }
459            _ => {}
460        }
461    }
462
463    let body = body_node?;
464    // (field_name, type_text, guard, alignas_override)
465    let mut raw_fields: Vec<(String, String, Option<String>, Option<usize>)> = Vec::new();
466
467    for i in 0..body.child_count() {
468        let child = body.child(i)?;
469        if child.kind() == "field_declaration" {
470            // Check for anonymous nested struct/union: a field_declaration whose
471            // only non-field-identifier child is a struct_specifier/union_specifier
472            // with no type_identifier (i.e. `struct { int x; int y; };`).
473            if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, is_union) {
474                raw_fields.extend(anon_fields);
475            } else if let Some((ty, fname, guard, al)) = parse_field_declaration(source, child) {
476                raw_fields.push((fname, ty, guard, al));
477            }
478        }
479    }
480
481    if raw_fields.is_empty() {
482        return None;
483    }
484
485    // Bit-field packing is compiler-controlled and cannot be accurately modelled
486    // without a compiler. Skip the entire struct to avoid producing wrong layout
487    // data. Use `padlock analyze` on the compiled binary for accurate results.
488    if raw_fields.iter().any(|(_, ty, _, _)| is_bitfield_type(ty)) {
489        return None;
490    }
491
492    let mut fields: Vec<Field> = raw_fields
493        .into_iter()
494        .map(|(fname, ty_name, guard, alignas)| {
495            let (size, natural_align) = c_type_size_align(&ty_name, arch);
496            // alignas(N) on a field overrides its alignment requirement.
497            let align = alignas.unwrap_or(natural_align);
498            let access = if let Some(g) = guard {
499                AccessPattern::Concurrent {
500                    guard: Some(g),
501                    is_atomic: false,
502                }
503            } else {
504                AccessPattern::Unknown
505            };
506            Field {
507                name: fname,
508                ty: TypeInfo::Primitive {
509                    name: ty_name,
510                    size,
511                    align,
512                },
513                offset: 0,
514                size,
515                align,
516                source_file: None,
517                source_line: None,
518                access,
519            }
520        })
521        .collect();
522
523    let line = node.start_position().row as u32 + 1;
524    let mut layout = if is_union {
525        simulate_union_layout(&mut fields, name, arch, Some(line))
526    } else {
527        simulate_layout(&mut fields, name, arch, Some(line), is_packed)
528    };
529
530    // Apply struct-level alignas: the struct's alignment requirement is at
531    // least N; trailing padding may grow to satisfy the new alignment.
532    if let Some(al) = struct_alignas
533        && al > layout.align
534    {
535        layout.align = al;
536        if !is_packed {
537            layout.total_size = layout.total_size.next_multiple_of(al);
538        }
539    }
540
541    Some(layout)
542}
543
544/// Parse a `typedef struct/union { ... } Name;` type_definition node.
545fn parse_typedef_struct_or_union(
546    source: &str,
547    node: Node<'_>,
548    arch: &'static ArchConfig,
549) -> Option<StructLayout> {
550    let mut specifier_node: Option<Node> = None;
551    let mut is_union = false;
552    let mut typedef_name: Option<String> = None;
553
554    for i in 0..node.child_count() {
555        let child = node.child(i)?;
556        match child.kind() {
557            "struct_specifier" => {
558                specifier_node = Some(child);
559                is_union = false;
560            }
561            "union_specifier" => {
562                specifier_node = Some(child);
563                is_union = true;
564            }
565            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
566            _ => {}
567        }
568    }
569
570    let spec = specifier_node?;
571    let typedef_name = typedef_name?;
572
573    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
574    if layout.name == "<anonymous>" {
575        layout.name = typedef_name;
576    }
577    Some(layout)
578}
579
580// Alias kept for the typedef pass in extract_structs_from_tree.
581#[allow(dead_code)]
582fn parse_typedef_struct(
583    source: &str,
584    node: Node<'_>,
585    arch: &'static ArchConfig,
586) -> Option<StructLayout> {
587    parse_typedef_struct_or_union(source, node, arch)
588}
589
590/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
591/// `__attribute__((pt_guarded_by(X)))` specifier node.
592///
593/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
594/// which expand to the same attribute (Clang thread-safety analysis).
595/// The match is done on the raw source text of any `attribute_specifier` child,
596/// so it works regardless of how tree-sitter structures the inner tokens.
597fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
598    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
599    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
600        if let Some(pos) = field_source.find(kw) {
601            let after = &field_source[pos + kw.len()..];
602            // Expect `(` optionally preceded by whitespace
603            let trimmed = after.trim_start();
604            if let Some(inner) = trimmed.strip_prefix('(') {
605                // Read until the matching ')'
606                if let Some(end) = inner.find(')') {
607                    let guard = inner[..end].trim().trim_matches('"');
608                    if !guard.is_empty() {
609                        return Some(guard.to_string());
610                    }
611                }
612            }
613        }
614    }
615    None
616}
617
618/// Parse a numeric value from an `alignas_qualifier` node: `alignas(N)`.
619/// tree-sitter-cpp uses the node kind `alignas_qualifier` for C++11 `alignas`.
620/// Returns `None` when the specifier contains a type expression rather than
621/// an integer literal (e.g. `alignas(double)` — handled elsewhere by the
622/// compiler; we skip those conservatively).
623fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
624    for i in 0..node.child_count() {
625        if let Some(child) = node.child(i) {
626            match child.kind() {
627                "number_literal" | "integer_literal" | "integer" => {
628                    let text = source[child.byte_range()].trim();
629                    if let Ok(n) = text.parse::<usize>() {
630                        return Some(n);
631                    }
632                    // Hex literal: 0x40
633                    if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
634                        return usize::from_str_radix(hex, 16).ok();
635                    }
636                }
637                // Recurse for nested nodes (parenthesised expression, etc.)
638                "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
639                    if let r @ Some(_) = parse_alignas_value(source, child) {
640                        return r;
641                    }
642                }
643                _ => {}
644            }
645        }
646    }
647    None
648}
649
650/// Returns `(ty, field_name, guard, alignas_override)`.
651/// `alignas_override` is `Some(N)` when the field carries `alignas(N)`.
652/// Detect and parse an anonymous nested struct/union field declaration, e.g.:
653///
654/// ```c
655/// struct Packet {
656///     union {                    // ← anonymous nested union
657///         uint32_t raw;
658///         struct { uint8_t a; uint8_t b; uint8_t c; uint8_t d; };
659///     };
660///     uint64_t timestamp;
661/// };
662/// ```
663///
664/// A `field_declaration` is anonymous if it contains a `struct_specifier` or
665/// `union_specifier` child that has a `field_declaration_list` (i.e. a body)
666/// but no `type_identifier` (i.e. no name). The fields of the nested
667/// struct/union are flattened into the parent.
668///
669/// Returns `None` if the declaration is not an anonymous nested struct/union
670/// (the caller should fall through to `parse_field_declaration`).
671type RawField = (String, String, Option<String>, Option<usize>);
672
673#[allow(clippy::only_used_in_recursion)]
674fn parse_anonymous_nested(
675    source: &str,
676    node: Node<'_>,
677    arch: &'static ArchConfig,
678    parent_is_union: bool,
679) -> Option<Vec<RawField>> {
680    // Find a struct_specifier or union_specifier child.
681    for i in 0..node.child_count() {
682        let child = node.child(i)?;
683        if child.kind() != "struct_specifier" && child.kind() != "union_specifier" {
684            continue;
685        }
686        let nested_is_union = child.kind() == "union_specifier";
687
688        // Must have a body (field_declaration_list) but no type_identifier.
689        let mut has_name = false;
690        let mut body_node: Option<Node> = None;
691        for j in 0..child.child_count() {
692            let sub = child.child(j)?;
693            match sub.kind() {
694                "type_identifier" => has_name = true,
695                "field_declaration_list" => body_node = Some(sub),
696                _ => {}
697            }
698        }
699
700        if has_name || body_node.is_none() {
701            // Named struct/union used as a field type — handled by parse_field_declaration.
702            continue;
703        }
704
705        let body = body_node?;
706        let mut nested_raw: Vec<RawField> = Vec::new();
707
708        for j in 0..body.child_count() {
709            let inner = body.child(j)?;
710            if inner.kind() == "field_declaration" {
711                // Recurse to handle doubly-nested anonymous structs.
712                if let Some(deeper) = parse_anonymous_nested(source, inner, arch, nested_is_union) {
713                    nested_raw.extend(deeper);
714                } else if let Some((ty, fname, guard, al)) = parse_field_declaration(source, inner)
715                {
716                    nested_raw.push((fname, ty, guard, al));
717                }
718            }
719        }
720
721        // If nested is a union, the fields all share offset 0 (relative to the
722        // union's placement in the parent). We can't easily track this through
723        // raw field lists, so we emit them as a synthetic __anon_union_N field
724        // when the parent cares about offsets, or just flatten for unions.
725        //
726        // For simplicity: flatten all fields — the layout simulator will compute
727        // correct offsets if the parent is a struct, and union semantics are
728        // preserved when the parent is a union.
729        let _ = (nested_is_union, parent_is_union);
730
731        if !nested_raw.is_empty() {
732            return Some(nested_raw);
733        }
734    }
735    None
736}
737
738fn parse_field_declaration(
739    source: &str,
740    node: Node<'_>,
741) -> Option<(String, String, Option<String>, Option<usize>)> {
742    let mut ty_parts: Vec<String> = Vec::new();
743    let mut field_name: Option<String> = None;
744    // Bit-field width, e.g. `int flags : 3;` → Some("3")
745    let mut bit_width: Option<String> = None;
746    // Collect attribute text for guard extraction
747    let mut attr_text = String::new();
748    // Field-level alignas override
749    let mut alignas_override: Option<usize> = None;
750
751    for i in 0..node.child_count() {
752        let child = node.child(i)?;
753        match child.kind() {
754            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
755                ty_parts.push(source[child.byte_range()].trim().to_string());
756            }
757            // C++ qualified types: std::mutex, ns::Type, etc.
758            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
759            "qualified_identifier" | "template_type" => {
760                ty_parts.push(source[child.byte_range()].trim().to_string());
761            }
762            // Nested struct/union used as a field type: `struct Vec2 tl;`
763            // Extract just the type_identifier name (e.g. "Vec2") so the
764            // nested-struct resolution pass can match it by name.
765            "struct_specifier" | "union_specifier" => {
766                for j in 0..child.child_count() {
767                    if let Some(sub) = child.child(j)
768                        && sub.kind() == "type_identifier"
769                    {
770                        ty_parts.push(source[sub.byte_range()].trim().to_string());
771                        break;
772                    }
773                }
774            }
775            "field_identifier" => {
776                field_name = Some(source[child.byte_range()].trim().to_string());
777            }
778            "pointer_declarator" => {
779                field_name = extract_identifier(source, child);
780                ty_parts.push("*".to_string());
781            }
782            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
783            "bitfield_clause" => {
784                let text = source[child.byte_range()].trim();
785                // Strip leading ':' and whitespace to get just the width digits
786                bit_width = Some(text.trim_start_matches(':').trim().to_string());
787            }
788            // GNU attribute specifier: __attribute__((...))
789            "attribute_specifier" | "attribute" => {
790                attr_text.push_str(source[child.byte_range()].trim());
791                attr_text.push(' ');
792            }
793            // C++11 alignas: tree-sitter-cpp wraps it as type_qualifier → alignas_qualifier
794            // Also handle the direct form in case grammar versions differ.
795            "alignas_qualifier" | "alignas_specifier" => {
796                if alignas_override.is_none() {
797                    alignas_override = parse_alignas_value(source, child);
798                }
799            }
800            // type_qualifier wraps alignas_qualifier for field declarations:
801            // `alignas(8) char c;` → type_qualifier { alignas_qualifier { ... } }
802            "type_qualifier" => {
803                if alignas_override.is_none() {
804                    for j in 0..child.child_count() {
805                        if let Some(sub) = child.child(j)
806                            && (sub.kind() == "alignas_qualifier"
807                                || sub.kind() == "alignas_specifier")
808                        {
809                            alignas_override = parse_alignas_value(source, sub);
810                            break;
811                        }
812                    }
813                }
814            }
815            _ => {}
816        }
817    }
818
819    let base_ty = ty_parts.join(" ");
820    let fname = field_name?;
821    if base_ty.is_empty() {
822        return None;
823    }
824    // Annotate bit-field types as "type:N" so callers can detect and report them;
825    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
826    let ty = if let Some(w) = bit_width {
827        format!("{base_ty}:{w}")
828    } else {
829        base_ty
830    };
831
832    // Also check the full field source text (attribute_specifier may not always
833    // be a direct child depending on tree-sitter grammar version).
834    let field_src = source[node.byte_range()].to_string();
835    let guard = extract_guard_from_c_field_text(&attr_text)
836        .or_else(|| extract_guard_from_c_field_text(&field_src));
837
838    Some((ty, fname, guard, alignas_override))
839}
840
841fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
842    if node.kind() == "field_identifier" || node.kind() == "identifier" {
843        return Some(source[node.byte_range()].to_string());
844    }
845    for i in 0..node.child_count() {
846        if let Some(child) = node.child(i)
847            && let Some(name) = extract_identifier(source, child)
848        {
849            return Some(name);
850        }
851    }
852    None
853}
854
855// ── public API ────────────────────────────────────────────────────────────────
856
857pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
858    let mut parser = Parser::new();
859    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
860    let tree = parser
861        .parse(source, None)
862        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
863    let mut layouts = Vec::new();
864    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
865    Ok(layouts)
866}
867
868pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
869    let mut parser = Parser::new();
870    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
871    let tree = parser
872        .parse(source, None)
873        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
874    let mut layouts = Vec::new();
875    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
876    Ok(layouts)
877}
878
879// ── tests ─────────────────────────────────────────────────────────────────────
880
881#[cfg(test)]
882mod tests {
883    use super::*;
884    use padlock_core::arch::X86_64_SYSV;
885
886    #[test]
887    fn parse_simple_c_struct() {
888        let src = r#"
889struct Point {
890    int x;
891    int y;
892};
893"#;
894        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
895        assert_eq!(layouts.len(), 1);
896        assert_eq!(layouts[0].name, "Point");
897        assert_eq!(layouts[0].fields.len(), 2);
898        assert_eq!(layouts[0].fields[0].name, "x");
899        assert_eq!(layouts[0].fields[1].name, "y");
900    }
901
902    #[test]
903    fn parse_typedef_struct() {
904        let src = r#"
905typedef struct {
906    char  is_active;
907    double timeout;
908    int   port;
909} Connection;
910"#;
911        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
912        assert_eq!(layouts.len(), 1);
913        assert_eq!(layouts[0].name, "Connection");
914        assert_eq!(layouts[0].fields.len(), 3);
915    }
916
917    #[test]
918    fn c_layout_computes_offsets() {
919        let src = "struct T { char a; double b; };";
920        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
921        assert_eq!(layouts.len(), 1);
922        let layout = &layouts[0];
923        // char at offset 0, double at offset 8 (7 bytes padding)
924        assert_eq!(layout.fields[0].offset, 0);
925        assert_eq!(layout.fields[1].offset, 8);
926        assert_eq!(layout.total_size, 16);
927    }
928
929    #[test]
930    fn c_layout_detects_padding() {
931        let src = "struct T { char a; int b; };";
932        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
933        let gaps = padlock_core::ir::find_padding(&layouts[0]);
934        assert!(!gaps.is_empty());
935        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
936    }
937
938    #[test]
939    fn parse_cpp_struct() {
940        let src = "struct Vec3 { float x; float y; float z; };";
941        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
942        assert_eq!(layouts.len(), 1);
943        assert_eq!(layouts[0].fields.len(), 3);
944    }
945
946    // ── SIMD types ────────────────────────────────────────────────────────────
947
948    #[test]
949    fn simd_sse_field_size_and_align() {
950        let src = "struct Vecs { __m128 a; __m256 b; };";
951        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
952        assert_eq!(layouts.len(), 1);
953        let f = &layouts[0].fields;
954        assert_eq!(f[0].size, 16); // __m128
955        assert_eq!(f[0].align, 16);
956        assert_eq!(f[1].size, 32); // __m256
957        assert_eq!(f[1].align, 32);
958    }
959
960    #[test]
961    fn simd_avx512_size() {
962        let src = "struct Wide { __m512 v; };";
963        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
964        assert_eq!(layouts[0].fields[0].size, 64);
965        assert_eq!(layouts[0].fields[0].align, 64);
966    }
967
968    #[test]
969    fn simd_padding_detected_when_small_field_before_avx() {
970        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
971        let src = "struct Mixed { char flag; __m256 data; };";
972        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
973        let gaps = padlock_core::ir::find_padding(&layouts[0]);
974        assert!(!gaps.is_empty());
975        assert_eq!(gaps[0].bytes, 31);
976    }
977
978    // ── union parsing ─────────────────────────────────────────────────────────
979
980    #[test]
981    fn union_fields_all_at_offset_zero() {
982        let src = "union Data { int i; float f; double d; };";
983        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
984        assert_eq!(layouts.len(), 1);
985        let u = &layouts[0];
986        assert!(u.is_union);
987        for field in &u.fields {
988            assert_eq!(
989                field.offset, 0,
990                "union field '{}' should be at offset 0",
991                field.name
992            );
993        }
994    }
995
996    #[test]
997    fn union_total_size_is_max_field() {
998        // double is the largest (8 bytes); total should be 8
999        let src = "union Data { int i; float f; double d; };";
1000        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1001        assert_eq!(layouts[0].total_size, 8);
1002    }
1003
1004    #[test]
1005    fn union_no_padding_finding() {
1006        let src = "union Data { int i; double d; };";
1007        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1008        let report = padlock_core::findings::Report::from_layouts(&layouts);
1009        let sr = &report.structs[0];
1010        assert!(
1011            !sr.findings
1012                .iter()
1013                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
1014        );
1015        assert!(
1016            !sr.findings
1017                .iter()
1018                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
1019        );
1020    }
1021
1022    #[test]
1023    fn typedef_union_parsed() {
1024        let src = "typedef union { int a; double b; } Value;";
1025        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1026        assert_eq!(layouts.len(), 1);
1027        assert_eq!(layouts[0].name, "Value");
1028        assert!(layouts[0].is_union);
1029    }
1030
1031    // ── attribute guard extraction ─────────────────────────────────────────────
1032
1033    #[test]
1034    fn extract_guard_from_c_guarded_by_macro() {
1035        let text = "int value GUARDED_BY(mu);";
1036        let guard = extract_guard_from_c_field_text(text);
1037        assert_eq!(guard.as_deref(), Some("mu"));
1038    }
1039
1040    #[test]
1041    fn extract_guard_from_c_attribute_specifier() {
1042        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
1043        let guard = extract_guard_from_c_field_text(text);
1044        assert_eq!(guard.as_deref(), Some("counter_lock"));
1045    }
1046
1047    #[test]
1048    fn extract_guard_pt_guarded_by() {
1049        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
1050        let guard = extract_guard_from_c_field_text(text);
1051        assert_eq!(guard.as_deref(), Some("ptr_lock"));
1052    }
1053
1054    #[test]
1055    fn no_guard_returns_none() {
1056        let guard = extract_guard_from_c_field_text("int x;");
1057        assert!(guard.is_none());
1058    }
1059
1060    #[test]
1061    fn c_struct_guarded_by_sets_concurrent_access() {
1062        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
1063        // macro expansions, so test the text-extraction path via parse_field_declaration
1064        // indirectly by checking extract_guard_from_c_field_text.
1065        let text = "uint64_t readers GUARDED_BY(lock_a);";
1066        assert_eq!(
1067            extract_guard_from_c_field_text(text).as_deref(),
1068            Some("lock_a")
1069        );
1070    }
1071
1072    #[test]
1073    fn c_struct_different_guards_detected_as_false_sharing() {
1074        use padlock_core::arch::X86_64_SYSV;
1075        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
1076
1077        // Manually build a layout with two fields on the same cache line,
1078        // different guards — mirrors what the C frontend would produce for
1079        // __attribute__((guarded_by(...))) annotated fields.
1080        let mut layout = StructLayout {
1081            name: "S".into(),
1082            total_size: 128,
1083            align: 8,
1084            fields: vec![
1085                Field {
1086                    name: "readers".into(),
1087                    ty: TypeInfo::Primitive {
1088                        name: "uint64_t".into(),
1089                        size: 8,
1090                        align: 8,
1091                    },
1092                    offset: 0,
1093                    size: 8,
1094                    align: 8,
1095                    source_file: None,
1096                    source_line: None,
1097                    access: AccessPattern::Concurrent {
1098                        guard: Some("lock_a".into()),
1099                        is_atomic: false,
1100                    },
1101                },
1102                Field {
1103                    name: "writers".into(),
1104                    ty: TypeInfo::Primitive {
1105                        name: "uint64_t".into(),
1106                        size: 8,
1107                        align: 8,
1108                    },
1109                    offset: 8,
1110                    size: 8,
1111                    align: 8,
1112                    source_file: None,
1113                    source_line: None,
1114                    access: AccessPattern::Concurrent {
1115                        guard: Some("lock_b".into()),
1116                        is_atomic: false,
1117                    },
1118                },
1119            ],
1120            source_file: None,
1121            source_line: None,
1122            arch: &X86_64_SYSV,
1123            is_packed: false,
1124            is_union: false,
1125        };
1126        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1127            &layout
1128        ));
1129        // Same guard → no false sharing
1130        layout.fields[1].access = AccessPattern::Concurrent {
1131            guard: Some("lock_a".into()),
1132            is_atomic: false,
1133        };
1134        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1135            &layout
1136        ));
1137    }
1138
1139    // ── C++ class: vtable pointer ─────────────────────────────────────────────
1140
1141    #[test]
1142    fn cpp_class_with_virtual_method_has_vptr() {
1143        let src = r#"
1144class Widget {
1145    virtual void draw();
1146    int x;
1147    int y;
1148};
1149"#;
1150        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1151        assert_eq!(layouts.len(), 1);
1152        let l = &layouts[0];
1153        // First field must be __vptr
1154        assert_eq!(l.fields[0].name, "__vptr");
1155        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
1156        // __vptr is at offset 0
1157        assert_eq!(l.fields[0].offset, 0);
1158        // int x should come after the pointer (at offset 8)
1159        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1160        assert_eq!(x.offset, 8);
1161    }
1162
1163    #[test]
1164    fn cpp_class_without_virtual_has_no_vptr() {
1165        let src = "class Plain { int a; int b; };";
1166        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1167        assert_eq!(layouts.len(), 1);
1168        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1169    }
1170
1171    #[test]
1172    fn cpp_struct_keyword_with_virtual_has_vptr() {
1173        // `struct` in C++ can also have virtual methods
1174        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1175        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1176        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
1177        // (vtable injection is only for `class` nodes)
1178        let _ = layouts; // just verify it parses without panic
1179    }
1180
1181    // ── C++ class: single inheritance ─────────────────────────────────────────
1182
1183    #[test]
1184    fn cpp_derived_class_has_base_slot() {
1185        let src = r#"
1186class Base {
1187    int x;
1188};
1189class Derived : public Base {
1190    int y;
1191};
1192"#;
1193        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1194        // Both Base and Derived should be parsed
1195        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1196        // Derived must have a __base_Base synthetic field
1197        assert!(
1198            derived.fields.iter().any(|f| f.name == "__base_Base"),
1199            "Derived should have a __base_Base field"
1200        );
1201        // The y field should come after __base_Base
1202        let base_field = derived
1203            .fields
1204            .iter()
1205            .find(|f| f.name == "__base_Base")
1206            .unwrap();
1207        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1208        assert!(y_field.offset >= base_field.offset + base_field.size);
1209    }
1210
1211    #[test]
1212    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1213        let src = r#"
1214class A { int a; };
1215class B { int b; };
1216class C : public A, public B { int c; };
1217"#;
1218        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1219        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1220        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1221        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1222    }
1223
1224    #[test]
1225    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1226        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1227        let src = "class V { virtual void f(); int x; };";
1228        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1229        let l = &layouts[0];
1230        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1231        assert_eq!(l.total_size, 16);
1232    }
1233
1234    // ── bitfield handling ─────────────────────────────────────────────────────
1235
1236    #[test]
1237    fn is_bitfield_type_detects_colon_n() {
1238        assert!(is_bitfield_type("int:3"));
1239        assert!(is_bitfield_type("unsigned int:16"));
1240        assert!(is_bitfield_type("uint32_t:1"));
1241        // Not bit-fields — contains ':' but not followed by pure digits
1242        assert!(!is_bitfield_type("std::atomic<int>"));
1243        assert!(!is_bitfield_type("ns::Type"));
1244        assert!(!is_bitfield_type("int"));
1245    }
1246
1247    #[test]
1248    fn struct_with_bitfields_is_skipped() {
1249        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1250        // without a compiler. The struct must be skipped entirely.
1251        let src = r#"
1252struct Flags {
1253    unsigned int active : 1;
1254    unsigned int ready  : 1;
1255    unsigned int error  : 6;
1256    int value;
1257};
1258"#;
1259        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1260        // Flags must not appear — its layout cannot be accurately computed.
1261        assert!(
1262            layouts.iter().all(|l| l.name != "Flags"),
1263            "struct with bitfields should be skipped; got {:?}",
1264            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1265        );
1266    }
1267
1268    #[test]
1269    fn struct_without_bitfields_is_still_parsed() {
1270        // Ensure the bitfield guard doesn't affect normal structs.
1271        let src = "struct Normal { int a; char b; double c; };";
1272        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1273        assert_eq!(layouts.len(), 1);
1274        assert_eq!(layouts[0].name, "Normal");
1275    }
1276
1277    #[test]
1278    fn cpp_class_with_bitfields_is_skipped() {
1279        let src = "class Packed { int x : 4; int y : 4; };";
1280        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1281        assert!(
1282            layouts.iter().all(|l| l.name != "Packed"),
1283            "C++ class with bitfields should be skipped"
1284        );
1285    }
1286
1287    // ── __attribute__((packed)) detection ─────────────────────────────────────
1288
1289    #[test]
1290    fn packed_struct_has_no_alignment_padding() {
1291        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1292        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1293        let src = r#"
1294struct __attribute__((packed)) Tight {
1295    char a;
1296    int  b;
1297    char c;
1298};
1299"#;
1300        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1301        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1302        assert!(l.is_packed, "should be marked is_packed");
1303        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1304        assert_eq!(l.fields[0].offset, 0);
1305        assert_eq!(l.fields[1].offset, 1); // immediately after char
1306        assert_eq!(l.fields[2].offset, 5);
1307    }
1308
1309    #[test]
1310    fn non_packed_struct_has_normal_alignment_padding() {
1311        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1312        let src = r#"
1313struct Normal {
1314    char a;
1315    int  b;
1316    char c;
1317};
1318"#;
1319        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1320        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1321        assert!(!l.is_packed);
1322        assert_eq!(l.total_size, 12);
1323        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1324    }
1325
1326    #[test]
1327    fn cpp_class_packed_attribute_detected() {
1328        let src = r#"
1329class __attribute__((packed)) Dense {
1330    char a;
1331    int  b;
1332};
1333"#;
1334        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1335        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1336        assert!(
1337            l.is_packed,
1338            "C++ class with __attribute__((packed)) must be marked packed"
1339        );
1340        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1341    }
1342
1343    // ── alignas detection ─────────────────────────────────────────────────────
1344
1345    #[test]
1346    fn field_alignas_overrides_natural_alignment() {
1347        // char is normally align=1 but alignas(8) forces it to align-8.
1348        // Layout: c(1B at offset 0, align=8) + x(4B at offset 4, align=4)
1349        // c must start on an 8-byte boundary (trivially satisfied at offset 0).
1350        // After c (1 byte), x aligns to 4: offset = 1.next_multiple_of(4) = 4.
1351        // Struct align = max(8, 4) = 8. Total = 8 bytes (4+4 → 8 → ok for align 8).
1352        let src = r#"
1353struct S {
1354    alignas(8) char c;
1355    int x;
1356};
1357"#;
1358        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1359        let l = layouts.iter().find(|l| l.name == "S").expect("S");
1360        // c should be forced to align 8
1361        let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
1362        assert_eq!(c_field.align, 8);
1363        // x comes after c (1 byte) with natural alignment 4 → offset 4
1364        let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
1365        assert_eq!(x_field.offset, 4);
1366        // Struct alignment is max(alignas(8), int align 4) = 8
1367        assert_eq!(l.align, 8);
1368        // Total = 8 bytes (x at 4, size 4; 4+4=8; 8 is multiple of align 8)
1369        assert_eq!(l.total_size, 8);
1370    }
1371
1372    #[test]
1373    fn struct_level_alignas_increases_struct_alignment() {
1374        // alignas(64) on the struct means its alignment requirement is 64.
1375        // Total size must be a multiple of 64.
1376        let src = r#"
1377struct alignas(64) CacheLine {
1378    int x;
1379    int y;
1380};
1381"#;
1382        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1383        let l = layouts
1384            .iter()
1385            .find(|l| l.name == "CacheLine")
1386            .expect("CacheLine");
1387        assert_eq!(l.align, 64);
1388        assert_eq!(l.total_size % 64, 0);
1389    }
1390
1391    #[test]
1392    fn alignas_on_field_smaller_than_natural_is_ignored() {
1393        // alignas(1) on an int field: does NOT reduce alignment below 4.
1394        // In C++, alignas cannot reduce alignment below the natural alignment.
1395        // Our implementation stores the alignas value; natural alignment wins
1396        // because we take max(alignas, natural) in the caller.
1397        // Note: we currently store alignas directly; this test documents behaviour.
1398        let src = "struct S { int x; int y; };";
1399        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1400        let l = &layouts[0];
1401        assert_eq!(l.fields[0].align, 4); // natural alignment, not reduced
1402    }
1403
1404    #[test]
1405    fn cpp_class_alignas_detected() {
1406        let src = r#"
1407class alignas(32) Aligned {
1408    double x;
1409    double y;
1410};
1411"#;
1412        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1413        let l = layouts
1414            .iter()
1415            .find(|l| l.name == "Aligned")
1416            .expect("Aligned");
1417        assert_eq!(l.align, 32);
1418        assert_eq!(l.total_size % 32, 0);
1419    }
1420
1421    // ── bad weather: alignas edge cases ───────────────────────────────────────
1422
1423    #[test]
1424    fn struct_without_alignas_unchanged() {
1425        // Ensure the alignas detection path doesn't affect structs without it
1426        let src = "struct Plain { int a; char b; };";
1427        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1428        let l = &layouts[0];
1429        assert_eq!(l.align, 4); // max field alignment = int = 4
1430        assert_eq!(l.total_size, 8); // int(4) + char(1) + 3 pad
1431    }
1432
1433    // ── anonymous nested structs/unions ───────────────────────────────────────
1434
1435    #[test]
1436    fn anonymous_nested_union_fields_flattened() {
1437        let src = r#"
1438struct Packet {
1439    union {
1440        uint32_t raw;
1441        uint8_t bytes[4];
1442    };
1443    uint64_t timestamp;
1444};
1445"#;
1446        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1447        let l = layouts.iter().find(|l| l.name == "Packet").expect("Packet");
1448        // raw, bytes (or similar) and timestamp must all be present
1449        assert!(
1450            l.fields.iter().any(|f| f.name == "raw"),
1451            "raw field must be flattened into Packet"
1452        );
1453        assert!(
1454            l.fields.iter().any(|f| f.name == "timestamp"),
1455            "timestamp must be present"
1456        );
1457    }
1458
1459    #[test]
1460    fn anonymous_nested_struct_fields_flattened() {
1461        let src = r#"
1462struct Outer {
1463    struct {
1464        int x;
1465        int y;
1466    };
1467    double z;
1468};
1469"#;
1470        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1471        let l = layouts.iter().find(|l| l.name == "Outer").expect("Outer");
1472        assert!(
1473            l.fields.iter().any(|f| f.name == "x"),
1474            "x must be flattened"
1475        );
1476        assert!(
1477            l.fields.iter().any(|f| f.name == "y"),
1478            "y must be flattened"
1479        );
1480        assert!(l.fields.iter().any(|f| f.name == "z"), "z present");
1481        // Total: x(4) + y(4) + z(8) = 16 bytes, no padding
1482        assert_eq!(l.total_size, 16);
1483    }
1484
1485    #[test]
1486    fn named_nested_struct_not_flattened() {
1487        // A named struct used as a field type must NOT be flattened
1488        let src = r#"
1489struct Vec2 { float x; float y; };
1490struct Rect { struct Vec2 tl; struct Vec2 br; };
1491"#;
1492        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1493        let rect = layouts.iter().find(|l| l.name == "Rect").expect("Rect");
1494        // Should have tl and br as opaque fields, not x/y flattened
1495        assert_eq!(rect.fields.len(), 2);
1496        assert!(rect.fields.iter().any(|f| f.name == "tl"));
1497        assert!(rect.fields.iter().any(|f| f.name == "br"));
1498    }
1499}