Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library synchronisation types (Linux/glibc x86-64 defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // Mutexes — all backed by pthread_mutex_t (40 bytes on Linux/glibc)
41        "std::mutex"
42        | "std::recursive_mutex"
43        | "std::timed_mutex"
44        | "std::recursive_timed_mutex"
45        | "pthread_mutex_t" => return (40, 8),
46        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
47        // Condition variables
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49        // std::atomic<T> — same size as T; extract and recurse
50        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
51            let inner = &ty[12..ty.len() - 1];
52            return c_type_size_align(inner.trim(), arch);
53        }
54        _ => {} // fall through to primitive types below
55    }
56    // Primitive / stdint / pointer types
57    match ty {
58        "char" | "_Bool" | "bool" => (1, 1),
59        "short" | "short int" => (2, 2),
60        "int" => (4, 4),
61        "long" => (arch.pointer_size, arch.pointer_size),
62        "long long" => (8, 8),
63        "float" => (4, 4),
64        "double" => (8, 8),
65        "long double" => (16, 16),
66        "int8_t" | "uint8_t" => (1, 1),
67        "int16_t" | "uint16_t" => (2, 2),
68        "int32_t" | "uint32_t" => (4, 4),
69        "int64_t" | "uint64_t" => (8, 8),
70        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
71            (arch.pointer_size, arch.pointer_size)
72        }
73        // Pointer types
74        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
75        // Unknown — use pointer size as a reasonable default
76        _ => (arch.pointer_size, arch.pointer_size),
77    }
78}
79
80// ── struct / union simulation ─────────────────────────────────────────────────
81
82/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
83/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
84fn strip_bitfield_suffix(ty: &str) -> &str {
85    if let Some(pos) = ty.rfind(':') {
86        let suffix = ty[pos + 1..].trim();
87        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
88            return ty[..pos].trim_end();
89        }
90    }
91    ty
92}
93
94/// Simulate C struct layout (no `__attribute__((packed))`) given ordered fields.
95fn simulate_layout(
96    fields: &mut Vec<Field>,
97    struct_name: String,
98    arch: &'static ArchConfig,
99) -> StructLayout {
100    let mut offset = 0usize;
101    let mut struct_align = 1usize;
102
103    for f in fields.iter_mut() {
104        if f.align > 0 {
105            offset = offset.next_multiple_of(f.align);
106        }
107        f.offset = offset;
108        offset += f.size;
109        struct_align = struct_align.max(f.align);
110    }
111    // Trailing padding
112    if struct_align > 0 {
113        offset = offset.next_multiple_of(struct_align);
114    }
115
116    StructLayout {
117        name: struct_name,
118        total_size: offset,
119        align: struct_align,
120        fields: std::mem::take(fields),
121        source_file: None,
122        source_line: None,
123        arch,
124        is_packed: false,
125        is_union: false,
126    }
127}
128
129/// Simulate a C/C++ union layout: all fields start at offset 0;
130/// total size is the largest field, rounded to max alignment.
131fn simulate_union_layout(
132    fields: &mut Vec<Field>,
133    name: String,
134    arch: &'static ArchConfig,
135) -> StructLayout {
136    for f in fields.iter_mut() {
137        f.offset = 0;
138    }
139    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
140    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
141    let total_size = if max_align > 0 {
142        max_size.next_multiple_of(max_align)
143    } else {
144        max_size
145    };
146
147    StructLayout {
148        name,
149        total_size,
150        align: max_align,
151        fields: std::mem::take(fields),
152        source_file: None,
153        source_line: None,
154        arch,
155        is_packed: false,
156        is_union: true,
157    }
158}
159
160// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
161
162/// Parse a `class_specifier` node, modelling:
163/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
164/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
165///   later by the nested-struct resolution pass in `lib.rs`).
166fn parse_class_specifier(
167    source: &str,
168    node: Node<'_>,
169    arch: &'static ArchConfig,
170) -> Option<StructLayout> {
171    let mut class_name = "<anonymous>".to_string();
172    let mut base_names: Vec<String> = Vec::new();
173    let mut body_node: Option<Node> = None;
174
175    for i in 0..node.child_count() {
176        let child = node.child(i)?;
177        match child.kind() {
178            "type_identifier" => class_name = source[child.byte_range()].to_string(),
179            "base_class_clause" => {
180                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
181                // type_identifier nodes are direct children of base_class_clause.
182                for j in 0..child.child_count() {
183                    if let Some(base) = child.child(j) {
184                        if base.kind() == "type_identifier" {
185                            base_names.push(source[base.byte_range()].to_string());
186                        }
187                    }
188                }
189            }
190            "field_declaration_list" => body_node = Some(child),
191            _ => {}
192        }
193    }
194
195    let body = body_node?;
196
197    // Detect virtual methods: look for `virtual` keyword anywhere in body
198    let has_virtual = contains_virtual_keyword(source, body);
199
200    // Collect declared fields
201    let mut raw_fields: Vec<(String, String, Option<String>)> = Vec::new();
202    for i in 0..body.child_count() {
203        if let Some(child) = body.child(i) {
204            if child.kind() == "field_declaration" {
205                if let Some((ty, fname, guard)) = parse_field_declaration(source, child) {
206                    raw_fields.push((fname, ty, guard));
207                }
208            }
209        }
210    }
211
212    // Build fields: vtable pointer, then base-class slots, then declared fields
213    let mut fields: Vec<Field> = Vec::new();
214
215    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
216    if has_virtual {
217        let ps = arch.pointer_size;
218        fields.push(Field {
219            name: "__vptr".to_string(),
220            ty: TypeInfo::Pointer {
221                size: ps,
222                align: ps,
223            },
224            offset: 0,
225            size: ps,
226            align: ps,
227            source_file: None,
228            source_line: None,
229            access: AccessPattern::Unknown,
230        });
231    }
232
233    // Base class storage (opaque until nested-struct resolver fills in sizes)
234    for base in &base_names {
235        let ps = arch.pointer_size;
236        fields.push(Field {
237            name: format!("__base_{base}"),
238            ty: TypeInfo::Opaque {
239                name: base.clone(),
240                size: ps,
241                align: ps,
242            },
243            offset: 0,
244            size: ps,
245            align: ps,
246            source_file: None,
247            source_line: None,
248            access: AccessPattern::Unknown,
249        });
250    }
251
252    // Declared member fields
253    for (fname, ty_name, guard) in raw_fields {
254        let base_ty = strip_bitfield_suffix(&ty_name);
255        let (size, align) = c_type_size_align(base_ty, arch);
256        let access = if let Some(g) = guard {
257            AccessPattern::Concurrent {
258                guard: Some(g),
259                is_atomic: false,
260            }
261        } else {
262            AccessPattern::Unknown
263        };
264        fields.push(Field {
265            name: fname,
266            ty: TypeInfo::Primitive {
267                name: ty_name,
268                size,
269                align,
270            },
271            offset: 0,
272            size,
273            align,
274            source_file: None,
275            source_line: None,
276            access,
277        });
278    }
279
280    if fields.is_empty() {
281        return None;
282    }
283
284    Some(simulate_layout(&mut fields, class_name, arch))
285}
286
287/// Return true if a `field_declaration_list` node contains any `virtual` keyword
288/// (indicating that the class needs a vtable pointer).
289fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
290    let mut stack = vec![node];
291    while let Some(n) = stack.pop() {
292        if n.kind() == "virtual" {
293            return true;
294        }
295        // Also check raw text for cases where tree-sitter may not produce a
296        // dedicated `virtual` node (e.g. inside complex declarations).
297        if n.child_count() == 0 {
298            let text = &source[n.byte_range()];
299            if text == "virtual" {
300                return true;
301            }
302        }
303        for i in (0..n.child_count()).rev() {
304            if let Some(child) = n.child(i) {
305                stack.push(child);
306            }
307        }
308    }
309    false
310}
311
312// ── tree-sitter walker ────────────────────────────────────────────────────────
313
314fn extract_structs_from_tree(
315    source: &str,
316    root: Node<'_>,
317    arch: &'static ArchConfig,
318    layouts: &mut Vec<StructLayout>,
319) {
320    let cursor = root.walk();
321    let mut stack = vec![root];
322
323    while let Some(node) = stack.pop() {
324        // Push children in reverse so we process left-to-right
325        for i in (0..node.child_count()).rev() {
326            if let Some(child) = node.child(i) {
327                stack.push(child);
328            }
329        }
330
331        match node.kind() {
332            "struct_specifier" => {
333                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
334                    layouts.push(layout);
335                }
336            }
337            "union_specifier" => {
338                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
339                    layouts.push(layout);
340                }
341            }
342            "class_specifier" => {
343                if let Some(layout) = parse_class_specifier(source, node, arch) {
344                    layouts.push(layout);
345                }
346            }
347            _ => {}
348        }
349    }
350
351    // Also handle `typedef struct/union { ... } Name;`
352    let cursor2 = root.walk();
353    let mut stack2 = vec![root];
354    while let Some(node) = stack2.pop() {
355        for i in (0..node.child_count()).rev() {
356            if let Some(child) = node.child(i) {
357                stack2.push(child);
358            }
359        }
360        if node.kind() == "type_definition" {
361            if let Some(layout) = parse_typedef_struct_or_union(source, node, arch) {
362                let existing = layouts
363                    .iter()
364                    .position(|l| l.name == layout.name || l.name == "<anonymous>");
365                match existing {
366                    Some(i) if layouts[i].name == "<anonymous>" => {
367                        layouts[i] = layout;
368                    }
369                    None => layouts.push(layout),
370                    _ => {}
371                }
372            }
373        }
374    }
375    let _ = cursor;
376    let _ = cursor2; // silence unused warnings
377}
378
379/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
380fn parse_struct_or_union_specifier(
381    source: &str,
382    node: Node<'_>,
383    arch: &'static ArchConfig,
384    is_union: bool,
385) -> Option<StructLayout> {
386    let mut name = "<anonymous>".to_string();
387    let mut body_node: Option<Node> = None;
388
389    for i in 0..node.child_count() {
390        let child = node.child(i)?;
391        match child.kind() {
392            "type_identifier" => name = source[child.byte_range()].to_string(),
393            "field_declaration_list" => body_node = Some(child),
394            _ => {}
395        }
396    }
397
398    let body = body_node?;
399    let mut raw_fields: Vec<(String, String, Option<String>)> = Vec::new();
400
401    for i in 0..body.child_count() {
402        let child = body.child(i)?;
403        if child.kind() == "field_declaration" {
404            if let Some((ty, fname, guard)) = parse_field_declaration(source, child) {
405                raw_fields.push((fname, ty, guard));
406            }
407        }
408    }
409
410    if raw_fields.is_empty() {
411        return None;
412    }
413
414    let mut fields: Vec<Field> = raw_fields
415        .into_iter()
416        .map(|(fname, ty_name, guard)| {
417            // Use the base type (without bit-field `:N` suffix) for size/align lookup.
418            let base = strip_bitfield_suffix(&ty_name);
419            let (size, align) = c_type_size_align(base, arch);
420            let access = if let Some(g) = guard {
421                AccessPattern::Concurrent {
422                    guard: Some(g),
423                    is_atomic: false,
424                }
425            } else {
426                AccessPattern::Unknown
427            };
428            Field {
429                name: fname,
430                ty: TypeInfo::Primitive {
431                    name: ty_name,
432                    size,
433                    align,
434                },
435                offset: 0,
436                size,
437                align,
438                source_file: None,
439                source_line: None,
440                access,
441            }
442        })
443        .collect();
444
445    if is_union {
446        Some(simulate_union_layout(&mut fields, name, arch))
447    } else {
448        Some(simulate_layout(&mut fields, name, arch))
449    }
450}
451
452/// Parse a `typedef struct/union { ... } Name;` type_definition node.
453fn parse_typedef_struct_or_union(
454    source: &str,
455    node: Node<'_>,
456    arch: &'static ArchConfig,
457) -> Option<StructLayout> {
458    let mut specifier_node: Option<Node> = None;
459    let mut is_union = false;
460    let mut typedef_name: Option<String> = None;
461
462    for i in 0..node.child_count() {
463        let child = node.child(i)?;
464        match child.kind() {
465            "struct_specifier" => {
466                specifier_node = Some(child);
467                is_union = false;
468            }
469            "union_specifier" => {
470                specifier_node = Some(child);
471                is_union = true;
472            }
473            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
474            _ => {}
475        }
476    }
477
478    let spec = specifier_node?;
479    let typedef_name = typedef_name?;
480
481    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
482    if layout.name == "<anonymous>" {
483        layout.name = typedef_name;
484    }
485    Some(layout)
486}
487
488// Alias kept for the typedef pass in extract_structs_from_tree.
489#[allow(dead_code)]
490fn parse_typedef_struct(
491    source: &str,
492    node: Node<'_>,
493    arch: &'static ArchConfig,
494) -> Option<StructLayout> {
495    parse_typedef_struct_or_union(source, node, arch)
496}
497
498/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
499/// `__attribute__((pt_guarded_by(X)))` specifier node.
500///
501/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
502/// which expand to the same attribute (Clang thread-safety analysis).
503/// The match is done on the raw source text of any `attribute_specifier` child,
504/// so it works regardless of how tree-sitter structures the inner tokens.
505fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
506    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
507    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
508        if let Some(pos) = field_source.find(kw) {
509            let after = &field_source[pos + kw.len()..];
510            // Expect `(` optionally preceded by whitespace
511            let trimmed = after.trim_start();
512            if let Some(inner) = trimmed.strip_prefix('(') {
513                // Read until the matching ')'
514                if let Some(end) = inner.find(')') {
515                    let guard = inner[..end].trim().trim_matches('"');
516                    if !guard.is_empty() {
517                        return Some(guard.to_string());
518                    }
519                }
520            }
521        }
522    }
523    None
524}
525
526fn parse_field_declaration(
527    source: &str,
528    node: Node<'_>,
529) -> Option<(String, String, Option<String>)> {
530    let mut ty_parts: Vec<String> = Vec::new();
531    let mut field_name: Option<String> = None;
532    // Bit-field width, e.g. `int flags : 3;` → Some("3")
533    let mut bit_width: Option<String> = None;
534    // Collect attribute text for guard extraction
535    let mut attr_text = String::new();
536
537    for i in 0..node.child_count() {
538        let child = node.child(i)?;
539        match child.kind() {
540            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
541                ty_parts.push(source[child.byte_range()].trim().to_string());
542            }
543            // C++ qualified types: std::mutex, ns::Type, etc.
544            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
545            "qualified_identifier" | "template_type" => {
546                ty_parts.push(source[child.byte_range()].trim().to_string());
547            }
548            // Nested struct/union used as a field type: `struct Vec2 tl;`
549            // Extract just the type_identifier name (e.g. "Vec2") so the
550            // nested-struct resolution pass can match it by name.
551            "struct_specifier" | "union_specifier" => {
552                for j in 0..child.child_count() {
553                    if let Some(sub) = child.child(j) {
554                        if sub.kind() == "type_identifier" {
555                            ty_parts.push(source[sub.byte_range()].trim().to_string());
556                            break;
557                        }
558                    }
559                }
560            }
561            "field_identifier" => {
562                field_name = Some(source[child.byte_range()].trim().to_string());
563            }
564            "pointer_declarator" => {
565                field_name = extract_identifier(source, child);
566                ty_parts.push("*".to_string());
567            }
568            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
569            "bitfield_clause" => {
570                let text = source[child.byte_range()].trim();
571                // Strip leading ':' and whitespace to get just the width digits
572                bit_width = Some(text.trim_start_matches(':').trim().to_string());
573            }
574            // GNU attribute specifier: __attribute__((...))
575            "attribute_specifier" | "attribute" => {
576                attr_text.push_str(source[child.byte_range()].trim());
577                attr_text.push(' ');
578            }
579            _ => {}
580        }
581    }
582
583    let base_ty = ty_parts.join(" ");
584    let fname = field_name?;
585    if base_ty.is_empty() {
586        return None;
587    }
588    // Annotate bit-field types as "type:N" so callers can detect and report them;
589    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
590    let ty = if let Some(w) = bit_width {
591        format!("{base_ty}:{w}")
592    } else {
593        base_ty
594    };
595
596    // Also check the full field source text (attribute_specifier may not always
597    // be a direct child depending on tree-sitter grammar version).
598    let field_src = source[node.byte_range()].to_string();
599    let guard = extract_guard_from_c_field_text(&attr_text)
600        .or_else(|| extract_guard_from_c_field_text(&field_src));
601
602    Some((ty, fname, guard))
603}
604
605fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
606    if node.kind() == "field_identifier" || node.kind() == "identifier" {
607        return Some(source[node.byte_range()].to_string());
608    }
609    for i in 0..node.child_count() {
610        if let Some(child) = node.child(i) {
611            if let Some(name) = extract_identifier(source, child) {
612                return Some(name);
613            }
614        }
615    }
616    None
617}
618
619// ── public API ────────────────────────────────────────────────────────────────
620
621pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
622    let mut parser = Parser::new();
623    parser.set_language(&tree_sitter_c::language())?;
624    let tree = parser
625        .parse(source, None)
626        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
627    let mut layouts = Vec::new();
628    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
629    Ok(layouts)
630}
631
632pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
633    let mut parser = Parser::new();
634    parser.set_language(&tree_sitter_cpp::language())?;
635    let tree = parser
636        .parse(source, None)
637        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
638    let mut layouts = Vec::new();
639    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
640    Ok(layouts)
641}
642
643// ── tests ─────────────────────────────────────────────────────────────────────
644
645#[cfg(test)]
646mod tests {
647    use super::*;
648    use padlock_core::arch::X86_64_SYSV;
649
650    #[test]
651    fn parse_simple_c_struct() {
652        let src = r#"
653struct Point {
654    int x;
655    int y;
656};
657"#;
658        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
659        assert_eq!(layouts.len(), 1);
660        assert_eq!(layouts[0].name, "Point");
661        assert_eq!(layouts[0].fields.len(), 2);
662        assert_eq!(layouts[0].fields[0].name, "x");
663        assert_eq!(layouts[0].fields[1].name, "y");
664    }
665
666    #[test]
667    fn parse_typedef_struct() {
668        let src = r#"
669typedef struct {
670    char  is_active;
671    double timeout;
672    int   port;
673} Connection;
674"#;
675        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
676        assert_eq!(layouts.len(), 1);
677        assert_eq!(layouts[0].name, "Connection");
678        assert_eq!(layouts[0].fields.len(), 3);
679    }
680
681    #[test]
682    fn c_layout_computes_offsets() {
683        let src = "struct T { char a; double b; };";
684        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
685        assert_eq!(layouts.len(), 1);
686        let layout = &layouts[0];
687        // char at offset 0, double at offset 8 (7 bytes padding)
688        assert_eq!(layout.fields[0].offset, 0);
689        assert_eq!(layout.fields[1].offset, 8);
690        assert_eq!(layout.total_size, 16);
691    }
692
693    #[test]
694    fn c_layout_detects_padding() {
695        let src = "struct T { char a; int b; };";
696        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
697        let gaps = padlock_core::ir::find_padding(&layouts[0]);
698        assert!(!gaps.is_empty());
699        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
700    }
701
702    #[test]
703    fn parse_cpp_struct() {
704        let src = "struct Vec3 { float x; float y; float z; };";
705        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
706        assert_eq!(layouts.len(), 1);
707        assert_eq!(layouts[0].fields.len(), 3);
708    }
709
710    // ── SIMD types ────────────────────────────────────────────────────────────
711
712    #[test]
713    fn simd_sse_field_size_and_align() {
714        let src = "struct Vecs { __m128 a; __m256 b; };";
715        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
716        assert_eq!(layouts.len(), 1);
717        let f = &layouts[0].fields;
718        assert_eq!(f[0].size, 16); // __m128
719        assert_eq!(f[0].align, 16);
720        assert_eq!(f[1].size, 32); // __m256
721        assert_eq!(f[1].align, 32);
722    }
723
724    #[test]
725    fn simd_avx512_size() {
726        let src = "struct Wide { __m512 v; };";
727        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
728        assert_eq!(layouts[0].fields[0].size, 64);
729        assert_eq!(layouts[0].fields[0].align, 64);
730    }
731
732    #[test]
733    fn simd_padding_detected_when_small_field_before_avx() {
734        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
735        let src = "struct Mixed { char flag; __m256 data; };";
736        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
737        let gaps = padlock_core::ir::find_padding(&layouts[0]);
738        assert!(!gaps.is_empty());
739        assert_eq!(gaps[0].bytes, 31);
740    }
741
742    // ── union parsing ─────────────────────────────────────────────────────────
743
744    #[test]
745    fn union_fields_all_at_offset_zero() {
746        let src = "union Data { int i; float f; double d; };";
747        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
748        assert_eq!(layouts.len(), 1);
749        let u = &layouts[0];
750        assert!(u.is_union);
751        for field in &u.fields {
752            assert_eq!(
753                field.offset, 0,
754                "union field '{}' should be at offset 0",
755                field.name
756            );
757        }
758    }
759
760    #[test]
761    fn union_total_size_is_max_field() {
762        // double is the largest (8 bytes); total should be 8
763        let src = "union Data { int i; float f; double d; };";
764        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
765        assert_eq!(layouts[0].total_size, 8);
766    }
767
768    #[test]
769    fn union_no_padding_finding() {
770        let src = "union Data { int i; double d; };";
771        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
772        let report = padlock_core::findings::Report::from_layouts(&layouts);
773        let sr = &report.structs[0];
774        assert!(!sr
775            .findings
776            .iter()
777            .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. })));
778        assert!(!sr
779            .findings
780            .iter()
781            .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. })));
782    }
783
784    #[test]
785    fn typedef_union_parsed() {
786        let src = "typedef union { int a; double b; } Value;";
787        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
788        assert_eq!(layouts.len(), 1);
789        assert_eq!(layouts[0].name, "Value");
790        assert!(layouts[0].is_union);
791    }
792
793    // ── bit fields ────────────────────────────────────────────────────────────
794
795    #[test]
796    fn bitfield_type_annotated_with_width() {
797        let src = "struct Flags { int a : 3; int b : 5; };";
798        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
799        assert_eq!(layouts.len(), 1);
800        // Both fields should be present; type names should contain the width
801        let names: Vec<&str> = layouts[0].fields.iter().map(|f| f.name.as_str()).collect();
802        assert!(names.contains(&"a") && names.contains(&"b"));
803        // Type name should encode the bit width
804        let a_ty = match &layouts[0].fields[0].ty {
805            padlock_core::ir::TypeInfo::Primitive { name, .. } => name.clone(),
806            _ => panic!("expected Primitive"),
807        };
808        assert!(
809            a_ty.contains(':'),
810            "bit field type should contain ':' width annotation"
811        );
812    }
813
814    #[test]
815    fn bitfield_uses_storage_unit_size() {
816        // `int a : 3` should report size = sizeof(int) = 4
817        let src = "struct S { int a : 3; };";
818        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
819        assert_eq!(layouts[0].fields[0].size, 4);
820    }
821
822    // ── attribute guard extraction ─────────────────────────────────────────────
823
824    #[test]
825    fn extract_guard_from_c_guarded_by_macro() {
826        let text = "int value GUARDED_BY(mu);";
827        let guard = extract_guard_from_c_field_text(text);
828        assert_eq!(guard.as_deref(), Some("mu"));
829    }
830
831    #[test]
832    fn extract_guard_from_c_attribute_specifier() {
833        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
834        let guard = extract_guard_from_c_field_text(text);
835        assert_eq!(guard.as_deref(), Some("counter_lock"));
836    }
837
838    #[test]
839    fn extract_guard_pt_guarded_by() {
840        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
841        let guard = extract_guard_from_c_field_text(text);
842        assert_eq!(guard.as_deref(), Some("ptr_lock"));
843    }
844
845    #[test]
846    fn no_guard_returns_none() {
847        let guard = extract_guard_from_c_field_text("int x;");
848        assert!(guard.is_none());
849    }
850
851    #[test]
852    fn c_struct_guarded_by_sets_concurrent_access() {
853        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
854        // macro expansions, so test the text-extraction path via parse_field_declaration
855        // indirectly by checking extract_guard_from_c_field_text.
856        let text = "uint64_t readers GUARDED_BY(lock_a);";
857        assert_eq!(
858            extract_guard_from_c_field_text(text).as_deref(),
859            Some("lock_a")
860        );
861    }
862
863    #[test]
864    fn c_struct_different_guards_detected_as_false_sharing() {
865        use padlock_core::arch::X86_64_SYSV;
866        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
867
868        // Manually build a layout with two fields on the same cache line,
869        // different guards — mirrors what the C frontend would produce for
870        // __attribute__((guarded_by(...))) annotated fields.
871        let mut layout = StructLayout {
872            name: "S".into(),
873            total_size: 128,
874            align: 8,
875            fields: vec![
876                Field {
877                    name: "readers".into(),
878                    ty: TypeInfo::Primitive {
879                        name: "uint64_t".into(),
880                        size: 8,
881                        align: 8,
882                    },
883                    offset: 0,
884                    size: 8,
885                    align: 8,
886                    source_file: None,
887                    source_line: None,
888                    access: AccessPattern::Concurrent {
889                        guard: Some("lock_a".into()),
890                        is_atomic: false,
891                    },
892                },
893                Field {
894                    name: "writers".into(),
895                    ty: TypeInfo::Primitive {
896                        name: "uint64_t".into(),
897                        size: 8,
898                        align: 8,
899                    },
900                    offset: 8,
901                    size: 8,
902                    align: 8,
903                    source_file: None,
904                    source_line: None,
905                    access: AccessPattern::Concurrent {
906                        guard: Some("lock_b".into()),
907                        is_atomic: false,
908                    },
909                },
910            ],
911            source_file: None,
912            source_line: None,
913            arch: &X86_64_SYSV,
914            is_packed: false,
915            is_union: false,
916        };
917        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
918            &layout
919        ));
920        // Same guard → no false sharing
921        layout.fields[1].access = AccessPattern::Concurrent {
922            guard: Some("lock_a".into()),
923            is_atomic: false,
924        };
925        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
926            &layout
927        ));
928    }
929
930    // ── C++ class: vtable pointer ─────────────────────────────────────────────
931
932    #[test]
933    fn cpp_class_with_virtual_method_has_vptr() {
934        let src = r#"
935class Widget {
936    virtual void draw();
937    int x;
938    int y;
939};
940"#;
941        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
942        assert_eq!(layouts.len(), 1);
943        let l = &layouts[0];
944        // First field must be __vptr
945        assert_eq!(l.fields[0].name, "__vptr");
946        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
947                                         // __vptr is at offset 0
948        assert_eq!(l.fields[0].offset, 0);
949        // int x should come after the pointer (at offset 8)
950        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
951        assert_eq!(x.offset, 8);
952    }
953
954    #[test]
955    fn cpp_class_without_virtual_has_no_vptr() {
956        let src = "class Plain { int a; int b; };";
957        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
958        assert_eq!(layouts.len(), 1);
959        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
960    }
961
962    #[test]
963    fn cpp_struct_keyword_with_virtual_has_vptr() {
964        // `struct` in C++ can also have virtual methods
965        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
966        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
967        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
968        // (vtable injection is only for `class` nodes)
969        let _ = layouts; // just verify it parses without panic
970    }
971
972    // ── C++ class: single inheritance ─────────────────────────────────────────
973
974    #[test]
975    fn cpp_derived_class_has_base_slot() {
976        let src = r#"
977class Base {
978    int x;
979};
980class Derived : public Base {
981    int y;
982};
983"#;
984        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
985        // Both Base and Derived should be parsed
986        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
987        // Derived must have a __base_Base synthetic field
988        assert!(
989            derived.fields.iter().any(|f| f.name == "__base_Base"),
990            "Derived should have a __base_Base field"
991        );
992        // The y field should come after __base_Base
993        let base_field = derived
994            .fields
995            .iter()
996            .find(|f| f.name == "__base_Base")
997            .unwrap();
998        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
999        assert!(y_field.offset >= base_field.offset + base_field.size);
1000    }
1001
1002    #[test]
1003    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1004        let src = r#"
1005class A { int a; };
1006class B { int b; };
1007class C : public A, public B { int c; };
1008"#;
1009        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1010        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1011        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1012        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1013    }
1014
1015    #[test]
1016    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1017        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1018        let src = "class V { virtual void f(); int x; };";
1019        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1020        let l = &layouts[0];
1021        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1022        assert_eq!(l.total_size, 16);
1023    }
1024}