Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library synchronisation types (Linux/glibc x86-64 defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // Mutexes — all backed by pthread_mutex_t (40 bytes on Linux/glibc)
41        "std::mutex"
42        | "std::recursive_mutex"
43        | "std::timed_mutex"
44        | "std::recursive_timed_mutex"
45        | "pthread_mutex_t" => return (40, 8),
46        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
47        // Condition variables
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49        // std::atomic<T> — same size as T; extract and recurse
50        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
51            let inner = &ty[12..ty.len() - 1];
52            return c_type_size_align(inner.trim(), arch);
53        }
54        _ => {} // fall through to primitive types below
55    }
56    // Primitive / stdint / pointer types
57    match ty {
58        "char" | "_Bool" | "bool" => (1, 1),
59        "short" | "short int" => (2, 2),
60        "int" => (4, 4),
61        "long" => (arch.pointer_size, arch.pointer_size),
62        "long long" => (8, 8),
63        "float" => (4, 4),
64        "double" => (8, 8),
65        "long double" => (16, 16),
66        "int8_t" | "uint8_t" => (1, 1),
67        "int16_t" | "uint16_t" => (2, 2),
68        "int32_t" | "uint32_t" => (4, 4),
69        "int64_t" | "uint64_t" => (8, 8),
70        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
71            (arch.pointer_size, arch.pointer_size)
72        }
73        // Pointer types
74        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
75        // Unknown — use pointer size as a reasonable default
76        _ => (arch.pointer_size, arch.pointer_size),
77    }
78}
79
80// ── struct / union simulation ─────────────────────────────────────────────────
81
82/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
83/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
84fn strip_bitfield_suffix(ty: &str) -> &str {
85    if let Some(pos) = ty.rfind(':') {
86        let suffix = ty[pos + 1..].trim();
87        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
88            return ty[..pos].trim_end();
89        }
90    }
91    ty
92}
93
94/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
95/// Bit-field packing is compiler-controlled and cannot be accurately modelled
96/// without a compiler, so structs containing bit-field members are skipped.
97fn is_bitfield_type(ty: &str) -> bool {
98    strip_bitfield_suffix(ty) != ty
99}
100
101/// Simulate C/C++ struct layout given ordered fields.
102///
103/// When `packed` is `true` the layout mirrors `__attribute__((packed))`:
104/// no inter-field alignment padding is inserted and the struct alignment
105/// is forced to 1. This matches GCC/Clang behaviour for packed structs.
106fn simulate_layout(
107    fields: &mut Vec<Field>,
108    struct_name: String,
109    arch: &'static ArchConfig,
110    source_line: Option<u32>,
111    packed: bool,
112) -> StructLayout {
113    let mut offset = 0usize;
114    let mut struct_align = 1usize;
115
116    for f in fields.iter_mut() {
117        if !packed && f.align > 0 {
118            offset = offset.next_multiple_of(f.align);
119        }
120        f.offset = offset;
121        offset += f.size;
122        if !packed {
123            struct_align = struct_align.max(f.align);
124        }
125    }
126    // Trailing padding (not present in packed structs)
127    if !packed && struct_align > 0 {
128        offset = offset.next_multiple_of(struct_align);
129    }
130
131    StructLayout {
132        name: struct_name,
133        total_size: offset,
134        align: struct_align,
135        fields: std::mem::take(fields),
136        source_file: None,
137        source_line,
138        arch,
139        is_packed: packed,
140        is_union: false,
141    }
142}
143
144/// Simulate a C/C++ union layout: all fields start at offset 0;
145/// total size is the largest field, rounded to max alignment.
146fn simulate_union_layout(
147    fields: &mut Vec<Field>,
148    name: String,
149    arch: &'static ArchConfig,
150    source_line: Option<u32>,
151) -> StructLayout {
152    for f in fields.iter_mut() {
153        f.offset = 0;
154    }
155    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
156    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
157    let total_size = if max_align > 0 {
158        max_size.next_multiple_of(max_align)
159    } else {
160        max_size
161    };
162
163    StructLayout {
164        name,
165        total_size,
166        align: max_align,
167        fields: std::mem::take(fields),
168        source_file: None,
169        source_line,
170        arch,
171        is_packed: false,
172        is_union: true,
173    }
174}
175
176// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
177
178/// Parse a `class_specifier` node, modelling:
179/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
180/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
181///   later by the nested-struct resolution pass in `lib.rs`).
182fn parse_class_specifier(
183    source: &str,
184    node: Node<'_>,
185    arch: &'static ArchConfig,
186) -> Option<StructLayout> {
187    let mut class_name = "<anonymous>".to_string();
188    let mut base_names: Vec<String> = Vec::new();
189    let mut body_node: Option<Node> = None;
190    let mut is_packed = false;
191
192    for i in 0..node.child_count() {
193        let child = node.child(i)?;
194        match child.kind() {
195            "type_identifier" => class_name = source[child.byte_range()].to_string(),
196            "base_class_clause" => {
197                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
198                // type_identifier nodes are direct children of base_class_clause.
199                for j in 0..child.child_count() {
200                    if let Some(base) = child.child(j) {
201                        if base.kind() == "type_identifier" {
202                            base_names.push(source[base.byte_range()].to_string());
203                        }
204                    }
205                }
206            }
207            "field_declaration_list" => body_node = Some(child),
208            "attribute_specifier" => {
209                if source[child.byte_range()].contains("packed") {
210                    is_packed = true;
211                }
212            }
213            _ => {}
214        }
215    }
216
217    let body = body_node?;
218
219    // Detect virtual methods: look for `virtual` keyword anywhere in body
220    let has_virtual = contains_virtual_keyword(source, body);
221
222    // Collect declared fields
223    let mut raw_fields: Vec<(String, String, Option<String>)> = Vec::new();
224    for i in 0..body.child_count() {
225        if let Some(child) = body.child(i) {
226            if child.kind() == "field_declaration" {
227                if let Some((ty, fname, guard)) = parse_field_declaration(source, child) {
228                    raw_fields.push((fname, ty, guard));
229                }
230            }
231        }
232    }
233
234    // Build fields: vtable pointer, then base-class slots, then declared fields
235    let mut fields: Vec<Field> = Vec::new();
236
237    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
238    if has_virtual {
239        let ps = arch.pointer_size;
240        fields.push(Field {
241            name: "__vptr".to_string(),
242            ty: TypeInfo::Pointer {
243                size: ps,
244                align: ps,
245            },
246            offset: 0,
247            size: ps,
248            align: ps,
249            source_file: None,
250            source_line: None,
251            access: AccessPattern::Unknown,
252        });
253    }
254
255    // Base class storage (opaque until nested-struct resolver fills in sizes)
256    for base in &base_names {
257        let ps = arch.pointer_size;
258        fields.push(Field {
259            name: format!("__base_{base}"),
260            ty: TypeInfo::Opaque {
261                name: base.clone(),
262                size: ps,
263                align: ps,
264            },
265            offset: 0,
266            size: ps,
267            align: ps,
268            source_file: None,
269            source_line: None,
270            access: AccessPattern::Unknown,
271        });
272    }
273
274    // Skip classes with bit-field members (same reason as structs).
275    if raw_fields.iter().any(|(_, ty, _)| is_bitfield_type(ty)) {
276        return None;
277    }
278
279    // Declared member fields
280    for (fname, ty_name, guard) in raw_fields {
281        let (size, align) = c_type_size_align(&ty_name, arch);
282        let access = if let Some(g) = guard {
283            AccessPattern::Concurrent {
284                guard: Some(g),
285                is_atomic: false,
286            }
287        } else {
288            AccessPattern::Unknown
289        };
290        fields.push(Field {
291            name: fname,
292            ty: TypeInfo::Primitive {
293                name: ty_name,
294                size,
295                align,
296            },
297            offset: 0,
298            size,
299            align,
300            source_file: None,
301            source_line: None,
302            access,
303        });
304    }
305
306    if fields.is_empty() {
307        return None;
308    }
309
310    let line = node.start_position().row as u32 + 1;
311    Some(simulate_layout(
312        &mut fields,
313        class_name,
314        arch,
315        Some(line),
316        is_packed,
317    ))
318}
319
320/// Return true if a `field_declaration_list` node contains any `virtual` keyword
321/// (indicating that the class needs a vtable pointer).
322fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
323    let mut stack = vec![node];
324    while let Some(n) = stack.pop() {
325        if n.kind() == "virtual" {
326            return true;
327        }
328        // Also check raw text for cases where tree-sitter may not produce a
329        // dedicated `virtual` node (e.g. inside complex declarations).
330        if n.child_count() == 0 {
331            let text = &source[n.byte_range()];
332            if text == "virtual" {
333                return true;
334            }
335        }
336        for i in (0..n.child_count()).rev() {
337            if let Some(child) = n.child(i) {
338                stack.push(child);
339            }
340        }
341    }
342    false
343}
344
345// ── tree-sitter walker ────────────────────────────────────────────────────────
346
347fn extract_structs_from_tree(
348    source: &str,
349    root: Node<'_>,
350    arch: &'static ArchConfig,
351    layouts: &mut Vec<StructLayout>,
352) {
353    let cursor = root.walk();
354    let mut stack = vec![root];
355
356    while let Some(node) = stack.pop() {
357        // Push children in reverse so we process left-to-right
358        for i in (0..node.child_count()).rev() {
359            if let Some(child) = node.child(i) {
360                stack.push(child);
361            }
362        }
363
364        match node.kind() {
365            "struct_specifier" => {
366                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
367                    layouts.push(layout);
368                }
369            }
370            "union_specifier" => {
371                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
372                    layouts.push(layout);
373                }
374            }
375            "class_specifier" => {
376                if let Some(layout) = parse_class_specifier(source, node, arch) {
377                    layouts.push(layout);
378                }
379            }
380            _ => {}
381        }
382    }
383
384    // Also handle `typedef struct/union { ... } Name;`
385    let cursor2 = root.walk();
386    let mut stack2 = vec![root];
387    while let Some(node) = stack2.pop() {
388        for i in (0..node.child_count()).rev() {
389            if let Some(child) = node.child(i) {
390                stack2.push(child);
391            }
392        }
393        if node.kind() == "type_definition" {
394            if let Some(layout) = parse_typedef_struct_or_union(source, node, arch) {
395                let existing = layouts
396                    .iter()
397                    .position(|l| l.name == layout.name || l.name == "<anonymous>");
398                match existing {
399                    Some(i) if layouts[i].name == "<anonymous>" => {
400                        layouts[i] = layout;
401                    }
402                    None => layouts.push(layout),
403                    _ => {}
404                }
405            }
406        }
407    }
408    let _ = cursor;
409    let _ = cursor2; // silence unused warnings
410}
411
412/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
413fn parse_struct_or_union_specifier(
414    source: &str,
415    node: Node<'_>,
416    arch: &'static ArchConfig,
417    is_union: bool,
418) -> Option<StructLayout> {
419    let mut name = "<anonymous>".to_string();
420    let mut body_node: Option<Node> = None;
421    let mut is_packed = false;
422
423    for i in 0..node.child_count() {
424        let child = node.child(i)?;
425        match child.kind() {
426            "type_identifier" => name = source[child.byte_range()].to_string(),
427            "field_declaration_list" => body_node = Some(child),
428            "attribute_specifier" => {
429                if source[child.byte_range()].contains("packed") {
430                    is_packed = true;
431                }
432            }
433            _ => {}
434        }
435    }
436
437    let body = body_node?;
438    let mut raw_fields: Vec<(String, String, Option<String>)> = Vec::new();
439
440    for i in 0..body.child_count() {
441        let child = body.child(i)?;
442        if child.kind() == "field_declaration" {
443            if let Some((ty, fname, guard)) = parse_field_declaration(source, child) {
444                raw_fields.push((fname, ty, guard));
445            }
446        }
447    }
448
449    if raw_fields.is_empty() {
450        return None;
451    }
452
453    // Bit-field packing is compiler-controlled and cannot be accurately modelled
454    // without a compiler. Skip the entire struct to avoid producing wrong layout
455    // data. Use `padlock analyze` on the compiled binary for accurate results.
456    if raw_fields.iter().any(|(_, ty, _)| is_bitfield_type(ty)) {
457        return None;
458    }
459
460    let mut fields: Vec<Field> = raw_fields
461        .into_iter()
462        .map(|(fname, ty_name, guard)| {
463            let (size, align) = c_type_size_align(&ty_name, arch);
464            let access = if let Some(g) = guard {
465                AccessPattern::Concurrent {
466                    guard: Some(g),
467                    is_atomic: false,
468                }
469            } else {
470                AccessPattern::Unknown
471            };
472            Field {
473                name: fname,
474                ty: TypeInfo::Primitive {
475                    name: ty_name,
476                    size,
477                    align,
478                },
479                offset: 0,
480                size,
481                align,
482                source_file: None,
483                source_line: None,
484                access,
485            }
486        })
487        .collect();
488
489    let line = node.start_position().row as u32 + 1;
490    if is_union {
491        Some(simulate_union_layout(&mut fields, name, arch, Some(line)))
492    } else {
493        Some(simulate_layout(
494            &mut fields,
495            name,
496            arch,
497            Some(line),
498            is_packed,
499        ))
500    }
501}
502
503/// Parse a `typedef struct/union { ... } Name;` type_definition node.
504fn parse_typedef_struct_or_union(
505    source: &str,
506    node: Node<'_>,
507    arch: &'static ArchConfig,
508) -> Option<StructLayout> {
509    let mut specifier_node: Option<Node> = None;
510    let mut is_union = false;
511    let mut typedef_name: Option<String> = None;
512
513    for i in 0..node.child_count() {
514        let child = node.child(i)?;
515        match child.kind() {
516            "struct_specifier" => {
517                specifier_node = Some(child);
518                is_union = false;
519            }
520            "union_specifier" => {
521                specifier_node = Some(child);
522                is_union = true;
523            }
524            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
525            _ => {}
526        }
527    }
528
529    let spec = specifier_node?;
530    let typedef_name = typedef_name?;
531
532    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
533    if layout.name == "<anonymous>" {
534        layout.name = typedef_name;
535    }
536    Some(layout)
537}
538
539// Alias kept for the typedef pass in extract_structs_from_tree.
540#[allow(dead_code)]
541fn parse_typedef_struct(
542    source: &str,
543    node: Node<'_>,
544    arch: &'static ArchConfig,
545) -> Option<StructLayout> {
546    parse_typedef_struct_or_union(source, node, arch)
547}
548
549/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
550/// `__attribute__((pt_guarded_by(X)))` specifier node.
551///
552/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
553/// which expand to the same attribute (Clang thread-safety analysis).
554/// The match is done on the raw source text of any `attribute_specifier` child,
555/// so it works regardless of how tree-sitter structures the inner tokens.
556fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
557    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
558    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
559        if let Some(pos) = field_source.find(kw) {
560            let after = &field_source[pos + kw.len()..];
561            // Expect `(` optionally preceded by whitespace
562            let trimmed = after.trim_start();
563            if let Some(inner) = trimmed.strip_prefix('(') {
564                // Read until the matching ')'
565                if let Some(end) = inner.find(')') {
566                    let guard = inner[..end].trim().trim_matches('"');
567                    if !guard.is_empty() {
568                        return Some(guard.to_string());
569                    }
570                }
571            }
572        }
573    }
574    None
575}
576
577fn parse_field_declaration(
578    source: &str,
579    node: Node<'_>,
580) -> Option<(String, String, Option<String>)> {
581    let mut ty_parts: Vec<String> = Vec::new();
582    let mut field_name: Option<String> = None;
583    // Bit-field width, e.g. `int flags : 3;` → Some("3")
584    let mut bit_width: Option<String> = None;
585    // Collect attribute text for guard extraction
586    let mut attr_text = String::new();
587
588    for i in 0..node.child_count() {
589        let child = node.child(i)?;
590        match child.kind() {
591            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
592                ty_parts.push(source[child.byte_range()].trim().to_string());
593            }
594            // C++ qualified types: std::mutex, ns::Type, etc.
595            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
596            "qualified_identifier" | "template_type" => {
597                ty_parts.push(source[child.byte_range()].trim().to_string());
598            }
599            // Nested struct/union used as a field type: `struct Vec2 tl;`
600            // Extract just the type_identifier name (e.g. "Vec2") so the
601            // nested-struct resolution pass can match it by name.
602            "struct_specifier" | "union_specifier" => {
603                for j in 0..child.child_count() {
604                    if let Some(sub) = child.child(j) {
605                        if sub.kind() == "type_identifier" {
606                            ty_parts.push(source[sub.byte_range()].trim().to_string());
607                            break;
608                        }
609                    }
610                }
611            }
612            "field_identifier" => {
613                field_name = Some(source[child.byte_range()].trim().to_string());
614            }
615            "pointer_declarator" => {
616                field_name = extract_identifier(source, child);
617                ty_parts.push("*".to_string());
618            }
619            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
620            "bitfield_clause" => {
621                let text = source[child.byte_range()].trim();
622                // Strip leading ':' and whitespace to get just the width digits
623                bit_width = Some(text.trim_start_matches(':').trim().to_string());
624            }
625            // GNU attribute specifier: __attribute__((...))
626            "attribute_specifier" | "attribute" => {
627                attr_text.push_str(source[child.byte_range()].trim());
628                attr_text.push(' ');
629            }
630            _ => {}
631        }
632    }
633
634    let base_ty = ty_parts.join(" ");
635    let fname = field_name?;
636    if base_ty.is_empty() {
637        return None;
638    }
639    // Annotate bit-field types as "type:N" so callers can detect and report them;
640    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
641    let ty = if let Some(w) = bit_width {
642        format!("{base_ty}:{w}")
643    } else {
644        base_ty
645    };
646
647    // Also check the full field source text (attribute_specifier may not always
648    // be a direct child depending on tree-sitter grammar version).
649    let field_src = source[node.byte_range()].to_string();
650    let guard = extract_guard_from_c_field_text(&attr_text)
651        .or_else(|| extract_guard_from_c_field_text(&field_src));
652
653    Some((ty, fname, guard))
654}
655
656fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
657    if node.kind() == "field_identifier" || node.kind() == "identifier" {
658        return Some(source[node.byte_range()].to_string());
659    }
660    for i in 0..node.child_count() {
661        if let Some(child) = node.child(i) {
662            if let Some(name) = extract_identifier(source, child) {
663                return Some(name);
664            }
665        }
666    }
667    None
668}
669
670// ── public API ────────────────────────────────────────────────────────────────
671
672pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
673    let mut parser = Parser::new();
674    parser.set_language(&tree_sitter_c::language())?;
675    let tree = parser
676        .parse(source, None)
677        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
678    let mut layouts = Vec::new();
679    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
680    Ok(layouts)
681}
682
683pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
684    let mut parser = Parser::new();
685    parser.set_language(&tree_sitter_cpp::language())?;
686    let tree = parser
687        .parse(source, None)
688        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
689    let mut layouts = Vec::new();
690    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
691    Ok(layouts)
692}
693
694// ── tests ─────────────────────────────────────────────────────────────────────
695
696#[cfg(test)]
697mod tests {
698    use super::*;
699    use padlock_core::arch::X86_64_SYSV;
700
701    #[test]
702    fn parse_simple_c_struct() {
703        let src = r#"
704struct Point {
705    int x;
706    int y;
707};
708"#;
709        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
710        assert_eq!(layouts.len(), 1);
711        assert_eq!(layouts[0].name, "Point");
712        assert_eq!(layouts[0].fields.len(), 2);
713        assert_eq!(layouts[0].fields[0].name, "x");
714        assert_eq!(layouts[0].fields[1].name, "y");
715    }
716
717    #[test]
718    fn parse_typedef_struct() {
719        let src = r#"
720typedef struct {
721    char  is_active;
722    double timeout;
723    int   port;
724} Connection;
725"#;
726        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
727        assert_eq!(layouts.len(), 1);
728        assert_eq!(layouts[0].name, "Connection");
729        assert_eq!(layouts[0].fields.len(), 3);
730    }
731
732    #[test]
733    fn c_layout_computes_offsets() {
734        let src = "struct T { char a; double b; };";
735        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
736        assert_eq!(layouts.len(), 1);
737        let layout = &layouts[0];
738        // char at offset 0, double at offset 8 (7 bytes padding)
739        assert_eq!(layout.fields[0].offset, 0);
740        assert_eq!(layout.fields[1].offset, 8);
741        assert_eq!(layout.total_size, 16);
742    }
743
744    #[test]
745    fn c_layout_detects_padding() {
746        let src = "struct T { char a; int b; };";
747        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
748        let gaps = padlock_core::ir::find_padding(&layouts[0]);
749        assert!(!gaps.is_empty());
750        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
751    }
752
753    #[test]
754    fn parse_cpp_struct() {
755        let src = "struct Vec3 { float x; float y; float z; };";
756        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
757        assert_eq!(layouts.len(), 1);
758        assert_eq!(layouts[0].fields.len(), 3);
759    }
760
761    // ── SIMD types ────────────────────────────────────────────────────────────
762
763    #[test]
764    fn simd_sse_field_size_and_align() {
765        let src = "struct Vecs { __m128 a; __m256 b; };";
766        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
767        assert_eq!(layouts.len(), 1);
768        let f = &layouts[0].fields;
769        assert_eq!(f[0].size, 16); // __m128
770        assert_eq!(f[0].align, 16);
771        assert_eq!(f[1].size, 32); // __m256
772        assert_eq!(f[1].align, 32);
773    }
774
775    #[test]
776    fn simd_avx512_size() {
777        let src = "struct Wide { __m512 v; };";
778        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
779        assert_eq!(layouts[0].fields[0].size, 64);
780        assert_eq!(layouts[0].fields[0].align, 64);
781    }
782
783    #[test]
784    fn simd_padding_detected_when_small_field_before_avx() {
785        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
786        let src = "struct Mixed { char flag; __m256 data; };";
787        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
788        let gaps = padlock_core::ir::find_padding(&layouts[0]);
789        assert!(!gaps.is_empty());
790        assert_eq!(gaps[0].bytes, 31);
791    }
792
793    // ── union parsing ─────────────────────────────────────────────────────────
794
795    #[test]
796    fn union_fields_all_at_offset_zero() {
797        let src = "union Data { int i; float f; double d; };";
798        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
799        assert_eq!(layouts.len(), 1);
800        let u = &layouts[0];
801        assert!(u.is_union);
802        for field in &u.fields {
803            assert_eq!(
804                field.offset, 0,
805                "union field '{}' should be at offset 0",
806                field.name
807            );
808        }
809    }
810
811    #[test]
812    fn union_total_size_is_max_field() {
813        // double is the largest (8 bytes); total should be 8
814        let src = "union Data { int i; float f; double d; };";
815        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
816        assert_eq!(layouts[0].total_size, 8);
817    }
818
819    #[test]
820    fn union_no_padding_finding() {
821        let src = "union Data { int i; double d; };";
822        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
823        let report = padlock_core::findings::Report::from_layouts(&layouts);
824        let sr = &report.structs[0];
825        assert!(!sr
826            .findings
827            .iter()
828            .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. })));
829        assert!(!sr
830            .findings
831            .iter()
832            .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. })));
833    }
834
835    #[test]
836    fn typedef_union_parsed() {
837        let src = "typedef union { int a; double b; } Value;";
838        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
839        assert_eq!(layouts.len(), 1);
840        assert_eq!(layouts[0].name, "Value");
841        assert!(layouts[0].is_union);
842    }
843
844    // ── attribute guard extraction ─────────────────────────────────────────────
845
846    #[test]
847    fn extract_guard_from_c_guarded_by_macro() {
848        let text = "int value GUARDED_BY(mu);";
849        let guard = extract_guard_from_c_field_text(text);
850        assert_eq!(guard.as_deref(), Some("mu"));
851    }
852
853    #[test]
854    fn extract_guard_from_c_attribute_specifier() {
855        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
856        let guard = extract_guard_from_c_field_text(text);
857        assert_eq!(guard.as_deref(), Some("counter_lock"));
858    }
859
860    #[test]
861    fn extract_guard_pt_guarded_by() {
862        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
863        let guard = extract_guard_from_c_field_text(text);
864        assert_eq!(guard.as_deref(), Some("ptr_lock"));
865    }
866
867    #[test]
868    fn no_guard_returns_none() {
869        let guard = extract_guard_from_c_field_text("int x;");
870        assert!(guard.is_none());
871    }
872
873    #[test]
874    fn c_struct_guarded_by_sets_concurrent_access() {
875        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
876        // macro expansions, so test the text-extraction path via parse_field_declaration
877        // indirectly by checking extract_guard_from_c_field_text.
878        let text = "uint64_t readers GUARDED_BY(lock_a);";
879        assert_eq!(
880            extract_guard_from_c_field_text(text).as_deref(),
881            Some("lock_a")
882        );
883    }
884
885    #[test]
886    fn c_struct_different_guards_detected_as_false_sharing() {
887        use padlock_core::arch::X86_64_SYSV;
888        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
889
890        // Manually build a layout with two fields on the same cache line,
891        // different guards — mirrors what the C frontend would produce for
892        // __attribute__((guarded_by(...))) annotated fields.
893        let mut layout = StructLayout {
894            name: "S".into(),
895            total_size: 128,
896            align: 8,
897            fields: vec![
898                Field {
899                    name: "readers".into(),
900                    ty: TypeInfo::Primitive {
901                        name: "uint64_t".into(),
902                        size: 8,
903                        align: 8,
904                    },
905                    offset: 0,
906                    size: 8,
907                    align: 8,
908                    source_file: None,
909                    source_line: None,
910                    access: AccessPattern::Concurrent {
911                        guard: Some("lock_a".into()),
912                        is_atomic: false,
913                    },
914                },
915                Field {
916                    name: "writers".into(),
917                    ty: TypeInfo::Primitive {
918                        name: "uint64_t".into(),
919                        size: 8,
920                        align: 8,
921                    },
922                    offset: 8,
923                    size: 8,
924                    align: 8,
925                    source_file: None,
926                    source_line: None,
927                    access: AccessPattern::Concurrent {
928                        guard: Some("lock_b".into()),
929                        is_atomic: false,
930                    },
931                },
932            ],
933            source_file: None,
934            source_line: None,
935            arch: &X86_64_SYSV,
936            is_packed: false,
937            is_union: false,
938        };
939        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
940            &layout
941        ));
942        // Same guard → no false sharing
943        layout.fields[1].access = AccessPattern::Concurrent {
944            guard: Some("lock_a".into()),
945            is_atomic: false,
946        };
947        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
948            &layout
949        ));
950    }
951
952    // ── C++ class: vtable pointer ─────────────────────────────────────────────
953
954    #[test]
955    fn cpp_class_with_virtual_method_has_vptr() {
956        let src = r#"
957class Widget {
958    virtual void draw();
959    int x;
960    int y;
961};
962"#;
963        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
964        assert_eq!(layouts.len(), 1);
965        let l = &layouts[0];
966        // First field must be __vptr
967        assert_eq!(l.fields[0].name, "__vptr");
968        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
969                                         // __vptr is at offset 0
970        assert_eq!(l.fields[0].offset, 0);
971        // int x should come after the pointer (at offset 8)
972        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
973        assert_eq!(x.offset, 8);
974    }
975
976    #[test]
977    fn cpp_class_without_virtual_has_no_vptr() {
978        let src = "class Plain { int a; int b; };";
979        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
980        assert_eq!(layouts.len(), 1);
981        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
982    }
983
984    #[test]
985    fn cpp_struct_keyword_with_virtual_has_vptr() {
986        // `struct` in C++ can also have virtual methods
987        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
988        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
989        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
990        // (vtable injection is only for `class` nodes)
991        let _ = layouts; // just verify it parses without panic
992    }
993
994    // ── C++ class: single inheritance ─────────────────────────────────────────
995
996    #[test]
997    fn cpp_derived_class_has_base_slot() {
998        let src = r#"
999class Base {
1000    int x;
1001};
1002class Derived : public Base {
1003    int y;
1004};
1005"#;
1006        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1007        // Both Base and Derived should be parsed
1008        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1009        // Derived must have a __base_Base synthetic field
1010        assert!(
1011            derived.fields.iter().any(|f| f.name == "__base_Base"),
1012            "Derived should have a __base_Base field"
1013        );
1014        // The y field should come after __base_Base
1015        let base_field = derived
1016            .fields
1017            .iter()
1018            .find(|f| f.name == "__base_Base")
1019            .unwrap();
1020        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1021        assert!(y_field.offset >= base_field.offset + base_field.size);
1022    }
1023
1024    #[test]
1025    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1026        let src = r#"
1027class A { int a; };
1028class B { int b; };
1029class C : public A, public B { int c; };
1030"#;
1031        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1032        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1033        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1034        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1035    }
1036
1037    #[test]
1038    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1039        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1040        let src = "class V { virtual void f(); int x; };";
1041        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1042        let l = &layouts[0];
1043        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1044        assert_eq!(l.total_size, 16);
1045    }
1046
1047    // ── bitfield handling ─────────────────────────────────────────────────────
1048
1049    #[test]
1050    fn is_bitfield_type_detects_colon_n() {
1051        assert!(is_bitfield_type("int:3"));
1052        assert!(is_bitfield_type("unsigned int:16"));
1053        assert!(is_bitfield_type("uint32_t:1"));
1054        // Not bit-fields — contains ':' but not followed by pure digits
1055        assert!(!is_bitfield_type("std::atomic<int>"));
1056        assert!(!is_bitfield_type("ns::Type"));
1057        assert!(!is_bitfield_type("int"));
1058    }
1059
1060    #[test]
1061    fn struct_with_bitfields_is_skipped() {
1062        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1063        // without a compiler. The struct must be skipped entirely.
1064        let src = r#"
1065struct Flags {
1066    unsigned int active : 1;
1067    unsigned int ready  : 1;
1068    unsigned int error  : 6;
1069    int value;
1070};
1071"#;
1072        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1073        // Flags must not appear — its layout cannot be accurately computed.
1074        assert!(
1075            layouts.iter().all(|l| l.name != "Flags"),
1076            "struct with bitfields should be skipped; got {:?}",
1077            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1078        );
1079    }
1080
1081    #[test]
1082    fn struct_without_bitfields_is_still_parsed() {
1083        // Ensure the bitfield guard doesn't affect normal structs.
1084        let src = "struct Normal { int a; char b; double c; };";
1085        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1086        assert_eq!(layouts.len(), 1);
1087        assert_eq!(layouts[0].name, "Normal");
1088    }
1089
1090    #[test]
1091    fn cpp_class_with_bitfields_is_skipped() {
1092        let src = "class Packed { int x : 4; int y : 4; };";
1093        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1094        assert!(
1095            layouts.iter().all(|l| l.name != "Packed"),
1096            "C++ class with bitfields should be skipped"
1097        );
1098    }
1099
1100    // ── __attribute__((packed)) detection ─────────────────────────────────────
1101
1102    #[test]
1103    fn packed_struct_has_no_alignment_padding() {
1104        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1105        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1106        let src = r#"
1107struct __attribute__((packed)) Tight {
1108    char a;
1109    int  b;
1110    char c;
1111};
1112"#;
1113        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1114        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1115        assert!(l.is_packed, "should be marked is_packed");
1116        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1117        assert_eq!(l.fields[0].offset, 0);
1118        assert_eq!(l.fields[1].offset, 1); // immediately after char
1119        assert_eq!(l.fields[2].offset, 5);
1120    }
1121
1122    #[test]
1123    fn non_packed_struct_has_normal_alignment_padding() {
1124        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1125        let src = r#"
1126struct Normal {
1127    char a;
1128    int  b;
1129    char c;
1130};
1131"#;
1132        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1133        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1134        assert!(!l.is_packed);
1135        assert_eq!(l.total_size, 12);
1136        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1137    }
1138
1139    #[test]
1140    fn cpp_class_packed_attribute_detected() {
1141        let src = r#"
1142class __attribute__((packed)) Dense {
1143    char a;
1144    int  b;
1145};
1146"#;
1147        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1148        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1149        assert!(
1150            l.is_packed,
1151            "C++ class with __attribute__((packed)) must be marked packed"
1152        );
1153        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1154    }
1155}