Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library synchronisation types (Linux/glibc x86-64 defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // Mutexes — all backed by pthread_mutex_t (40 bytes on Linux/glibc)
41        "std::mutex"
42        | "std::recursive_mutex"
43        | "std::timed_mutex"
44        | "std::recursive_timed_mutex"
45        | "pthread_mutex_t" => return (40, 8),
46        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
47        // Condition variables
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49        // std::atomic<T> — same size as T; extract and recurse
50        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
51            let inner = &ty[12..ty.len() - 1];
52            return c_type_size_align(inner.trim(), arch);
53        }
54        _ => {} // fall through to primitive types below
55    }
56    // Primitive / stdint / pointer types
57    match ty {
58        "char" | "_Bool" | "bool" => (1, 1),
59        "short" | "short int" => (2, 2),
60        "int" => (4, 4),
61        "long" => (arch.pointer_size, arch.pointer_size),
62        "long long" => (8, 8),
63        "float" => (4, 4),
64        "double" => (8, 8),
65        "long double" => (16, 16),
66        "int8_t" | "uint8_t" => (1, 1),
67        "int16_t" | "uint16_t" => (2, 2),
68        "int32_t" | "uint32_t" => (4, 4),
69        "int64_t" | "uint64_t" => (8, 8),
70        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
71            (arch.pointer_size, arch.pointer_size)
72        }
73        // Pointer types
74        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
75        // Unknown — use pointer size as a reasonable default
76        _ => (arch.pointer_size, arch.pointer_size),
77    }
78}
79
80// ── struct / union simulation ─────────────────────────────────────────────────
81
82/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
83/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
84fn strip_bitfield_suffix(ty: &str) -> &str {
85    if let Some(pos) = ty.rfind(':') {
86        let suffix = ty[pos + 1..].trim();
87        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
88            return ty[..pos].trim_end();
89        }
90    }
91    ty
92}
93
94/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
95/// Bit-field packing is compiler-controlled and cannot be accurately modelled
96/// without a compiler, so structs containing bit-field members are skipped.
97fn is_bitfield_type(ty: &str) -> bool {
98    strip_bitfield_suffix(ty) != ty
99}
100
101/// Simulate C/C++ struct layout given ordered fields.
102///
103/// When `packed` is `true` the layout mirrors `__attribute__((packed))`:
104/// no inter-field alignment padding is inserted and the struct alignment
105/// is forced to 1. This matches GCC/Clang behaviour for packed structs.
106fn simulate_layout(
107    fields: &mut Vec<Field>,
108    struct_name: String,
109    arch: &'static ArchConfig,
110    source_line: Option<u32>,
111    packed: bool,
112) -> StructLayout {
113    let mut offset = 0usize;
114    let mut struct_align = 1usize;
115
116    for f in fields.iter_mut() {
117        if !packed && f.align > 0 {
118            offset = offset.next_multiple_of(f.align);
119        }
120        f.offset = offset;
121        offset += f.size;
122        if !packed {
123            struct_align = struct_align.max(f.align);
124        }
125    }
126    // Trailing padding (not present in packed structs)
127    if !packed && struct_align > 0 {
128        offset = offset.next_multiple_of(struct_align);
129    }
130
131    StructLayout {
132        name: struct_name,
133        total_size: offset,
134        align: struct_align,
135        fields: std::mem::take(fields),
136        source_file: None,
137        source_line,
138        arch,
139        is_packed: packed,
140        is_union: false,
141    }
142}
143
144/// Simulate a C/C++ union layout: all fields start at offset 0;
145/// total size is the largest field, rounded to max alignment.
146fn simulate_union_layout(
147    fields: &mut Vec<Field>,
148    name: String,
149    arch: &'static ArchConfig,
150    source_line: Option<u32>,
151) -> StructLayout {
152    for f in fields.iter_mut() {
153        f.offset = 0;
154    }
155    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
156    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
157    let total_size = if max_align > 0 {
158        max_size.next_multiple_of(max_align)
159    } else {
160        max_size
161    };
162
163    StructLayout {
164        name,
165        total_size,
166        align: max_align,
167        fields: std::mem::take(fields),
168        source_file: None,
169        source_line,
170        arch,
171        is_packed: false,
172        is_union: true,
173    }
174}
175
176// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
177
178/// Parse a `class_specifier` node, modelling:
179/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
180/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
181///   later by the nested-struct resolution pass in `lib.rs`).
182fn parse_class_specifier(
183    source: &str,
184    node: Node<'_>,
185    arch: &'static ArchConfig,
186) -> Option<StructLayout> {
187    let mut class_name = "<anonymous>".to_string();
188    let mut base_names: Vec<String> = Vec::new();
189    let mut body_node: Option<Node> = None;
190    let mut is_packed = false;
191
192    for i in 0..node.child_count() {
193        let child = node.child(i)?;
194        match child.kind() {
195            "type_identifier" => class_name = source[child.byte_range()].to_string(),
196            "base_class_clause" => {
197                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
198                // type_identifier nodes are direct children of base_class_clause.
199                for j in 0..child.child_count() {
200                    if let Some(base) = child.child(j)
201                        && base.kind() == "type_identifier"
202                    {
203                        base_names.push(source[base.byte_range()].to_string());
204                    }
205                }
206            }
207            "field_declaration_list" => body_node = Some(child),
208            "attribute_specifier" => {
209                if source[child.byte_range()].contains("packed") {
210                    is_packed = true;
211                }
212            }
213            _ => {}
214        }
215    }
216
217    let body = body_node?;
218
219    // Detect virtual methods: look for `virtual` keyword anywhere in body
220    let has_virtual = contains_virtual_keyword(source, body);
221
222    // Collect declared fields
223    let mut raw_fields: Vec<(String, String, Option<String>)> = Vec::new();
224    for i in 0..body.child_count() {
225        if let Some(child) = body.child(i)
226            && child.kind() == "field_declaration"
227            && let Some((ty, fname, guard)) = parse_field_declaration(source, child)
228        {
229            raw_fields.push((fname, ty, guard));
230        }
231    }
232
233    // Build fields: vtable pointer, then base-class slots, then declared fields
234    let mut fields: Vec<Field> = Vec::new();
235
236    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
237    if has_virtual {
238        let ps = arch.pointer_size;
239        fields.push(Field {
240            name: "__vptr".to_string(),
241            ty: TypeInfo::Pointer {
242                size: ps,
243                align: ps,
244            },
245            offset: 0,
246            size: ps,
247            align: ps,
248            source_file: None,
249            source_line: None,
250            access: AccessPattern::Unknown,
251        });
252    }
253
254    // Base class storage (opaque until nested-struct resolver fills in sizes)
255    for base in &base_names {
256        let ps = arch.pointer_size;
257        fields.push(Field {
258            name: format!("__base_{base}"),
259            ty: TypeInfo::Opaque {
260                name: base.clone(),
261                size: ps,
262                align: ps,
263            },
264            offset: 0,
265            size: ps,
266            align: ps,
267            source_file: None,
268            source_line: None,
269            access: AccessPattern::Unknown,
270        });
271    }
272
273    // Skip classes with bit-field members (same reason as structs).
274    if raw_fields.iter().any(|(_, ty, _)| is_bitfield_type(ty)) {
275        return None;
276    }
277
278    // Declared member fields
279    for (fname, ty_name, guard) in raw_fields {
280        let (size, align) = c_type_size_align(&ty_name, arch);
281        let access = if let Some(g) = guard {
282            AccessPattern::Concurrent {
283                guard: Some(g),
284                is_atomic: false,
285            }
286        } else {
287            AccessPattern::Unknown
288        };
289        fields.push(Field {
290            name: fname,
291            ty: TypeInfo::Primitive {
292                name: ty_name,
293                size,
294                align,
295            },
296            offset: 0,
297            size,
298            align,
299            source_file: None,
300            source_line: None,
301            access,
302        });
303    }
304
305    if fields.is_empty() {
306        return None;
307    }
308
309    let line = node.start_position().row as u32 + 1;
310    Some(simulate_layout(
311        &mut fields,
312        class_name,
313        arch,
314        Some(line),
315        is_packed,
316    ))
317}
318
319/// Return true if a `field_declaration_list` node contains any `virtual` keyword
320/// (indicating that the class needs a vtable pointer).
321fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
322    let mut stack = vec![node];
323    while let Some(n) = stack.pop() {
324        if n.kind() == "virtual" {
325            return true;
326        }
327        // Also check raw text for cases where tree-sitter may not produce a
328        // dedicated `virtual` node (e.g. inside complex declarations).
329        if n.child_count() == 0 {
330            let text = &source[n.byte_range()];
331            if text == "virtual" {
332                return true;
333            }
334        }
335        for i in (0..n.child_count()).rev() {
336            if let Some(child) = n.child(i) {
337                stack.push(child);
338            }
339        }
340    }
341    false
342}
343
344// ── tree-sitter walker ────────────────────────────────────────────────────────
345
346fn extract_structs_from_tree(
347    source: &str,
348    root: Node<'_>,
349    arch: &'static ArchConfig,
350    layouts: &mut Vec<StructLayout>,
351) {
352    let cursor = root.walk();
353    let mut stack = vec![root];
354
355    while let Some(node) = stack.pop() {
356        // Push children in reverse so we process left-to-right
357        for i in (0..node.child_count()).rev() {
358            if let Some(child) = node.child(i) {
359                stack.push(child);
360            }
361        }
362
363        match node.kind() {
364            "struct_specifier" => {
365                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
366                    layouts.push(layout);
367                }
368            }
369            "union_specifier" => {
370                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
371                    layouts.push(layout);
372                }
373            }
374            "class_specifier" => {
375                if let Some(layout) = parse_class_specifier(source, node, arch) {
376                    layouts.push(layout);
377                }
378            }
379            _ => {}
380        }
381    }
382
383    // Also handle `typedef struct/union { ... } Name;`
384    let cursor2 = root.walk();
385    let mut stack2 = vec![root];
386    while let Some(node) = stack2.pop() {
387        for i in (0..node.child_count()).rev() {
388            if let Some(child) = node.child(i) {
389                stack2.push(child);
390            }
391        }
392        if node.kind() == "type_definition"
393            && let Some(layout) = parse_typedef_struct_or_union(source, node, arch)
394        {
395            let existing = layouts
396                .iter()
397                .position(|l| l.name == layout.name || l.name == "<anonymous>");
398            match existing {
399                Some(i) if layouts[i].name == "<anonymous>" => {
400                    layouts[i] = layout;
401                }
402                None => layouts.push(layout),
403                _ => {}
404            }
405        }
406    }
407    let _ = cursor;
408    let _ = cursor2; // silence unused warnings
409}
410
411/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
412fn parse_struct_or_union_specifier(
413    source: &str,
414    node: Node<'_>,
415    arch: &'static ArchConfig,
416    is_union: bool,
417) -> Option<StructLayout> {
418    let mut name = "<anonymous>".to_string();
419    let mut body_node: Option<Node> = None;
420    let mut is_packed = false;
421
422    for i in 0..node.child_count() {
423        let child = node.child(i)?;
424        match child.kind() {
425            "type_identifier" => name = source[child.byte_range()].to_string(),
426            "field_declaration_list" => body_node = Some(child),
427            "attribute_specifier" => {
428                if source[child.byte_range()].contains("packed") {
429                    is_packed = true;
430                }
431            }
432            _ => {}
433        }
434    }
435
436    let body = body_node?;
437    let mut raw_fields: Vec<(String, String, Option<String>)> = Vec::new();
438
439    for i in 0..body.child_count() {
440        let child = body.child(i)?;
441        if child.kind() == "field_declaration"
442            && let Some((ty, fname, guard)) = parse_field_declaration(source, child)
443        {
444            raw_fields.push((fname, ty, guard));
445        }
446    }
447
448    if raw_fields.is_empty() {
449        return None;
450    }
451
452    // Bit-field packing is compiler-controlled and cannot be accurately modelled
453    // without a compiler. Skip the entire struct to avoid producing wrong layout
454    // data. Use `padlock analyze` on the compiled binary for accurate results.
455    if raw_fields.iter().any(|(_, ty, _)| is_bitfield_type(ty)) {
456        return None;
457    }
458
459    let mut fields: Vec<Field> = raw_fields
460        .into_iter()
461        .map(|(fname, ty_name, guard)| {
462            let (size, align) = c_type_size_align(&ty_name, arch);
463            let access = if let Some(g) = guard {
464                AccessPattern::Concurrent {
465                    guard: Some(g),
466                    is_atomic: false,
467                }
468            } else {
469                AccessPattern::Unknown
470            };
471            Field {
472                name: fname,
473                ty: TypeInfo::Primitive {
474                    name: ty_name,
475                    size,
476                    align,
477                },
478                offset: 0,
479                size,
480                align,
481                source_file: None,
482                source_line: None,
483                access,
484            }
485        })
486        .collect();
487
488    let line = node.start_position().row as u32 + 1;
489    if is_union {
490        Some(simulate_union_layout(&mut fields, name, arch, Some(line)))
491    } else {
492        Some(simulate_layout(
493            &mut fields,
494            name,
495            arch,
496            Some(line),
497            is_packed,
498        ))
499    }
500}
501
502/// Parse a `typedef struct/union { ... } Name;` type_definition node.
503fn parse_typedef_struct_or_union(
504    source: &str,
505    node: Node<'_>,
506    arch: &'static ArchConfig,
507) -> Option<StructLayout> {
508    let mut specifier_node: Option<Node> = None;
509    let mut is_union = false;
510    let mut typedef_name: Option<String> = None;
511
512    for i in 0..node.child_count() {
513        let child = node.child(i)?;
514        match child.kind() {
515            "struct_specifier" => {
516                specifier_node = Some(child);
517                is_union = false;
518            }
519            "union_specifier" => {
520                specifier_node = Some(child);
521                is_union = true;
522            }
523            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
524            _ => {}
525        }
526    }
527
528    let spec = specifier_node?;
529    let typedef_name = typedef_name?;
530
531    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
532    if layout.name == "<anonymous>" {
533        layout.name = typedef_name;
534    }
535    Some(layout)
536}
537
538// Alias kept for the typedef pass in extract_structs_from_tree.
539#[allow(dead_code)]
540fn parse_typedef_struct(
541    source: &str,
542    node: Node<'_>,
543    arch: &'static ArchConfig,
544) -> Option<StructLayout> {
545    parse_typedef_struct_or_union(source, node, arch)
546}
547
548/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
549/// `__attribute__((pt_guarded_by(X)))` specifier node.
550///
551/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
552/// which expand to the same attribute (Clang thread-safety analysis).
553/// The match is done on the raw source text of any `attribute_specifier` child,
554/// so it works regardless of how tree-sitter structures the inner tokens.
555fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
556    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
557    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
558        if let Some(pos) = field_source.find(kw) {
559            let after = &field_source[pos + kw.len()..];
560            // Expect `(` optionally preceded by whitespace
561            let trimmed = after.trim_start();
562            if let Some(inner) = trimmed.strip_prefix('(') {
563                // Read until the matching ')'
564                if let Some(end) = inner.find(')') {
565                    let guard = inner[..end].trim().trim_matches('"');
566                    if !guard.is_empty() {
567                        return Some(guard.to_string());
568                    }
569                }
570            }
571        }
572    }
573    None
574}
575
576fn parse_field_declaration(
577    source: &str,
578    node: Node<'_>,
579) -> Option<(String, String, Option<String>)> {
580    let mut ty_parts: Vec<String> = Vec::new();
581    let mut field_name: Option<String> = None;
582    // Bit-field width, e.g. `int flags : 3;` → Some("3")
583    let mut bit_width: Option<String> = None;
584    // Collect attribute text for guard extraction
585    let mut attr_text = String::new();
586
587    for i in 0..node.child_count() {
588        let child = node.child(i)?;
589        match child.kind() {
590            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
591                ty_parts.push(source[child.byte_range()].trim().to_string());
592            }
593            // C++ qualified types: std::mutex, ns::Type, etc.
594            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
595            "qualified_identifier" | "template_type" => {
596                ty_parts.push(source[child.byte_range()].trim().to_string());
597            }
598            // Nested struct/union used as a field type: `struct Vec2 tl;`
599            // Extract just the type_identifier name (e.g. "Vec2") so the
600            // nested-struct resolution pass can match it by name.
601            "struct_specifier" | "union_specifier" => {
602                for j in 0..child.child_count() {
603                    if let Some(sub) = child.child(j)
604                        && sub.kind() == "type_identifier"
605                    {
606                        ty_parts.push(source[sub.byte_range()].trim().to_string());
607                        break;
608                    }
609                }
610            }
611            "field_identifier" => {
612                field_name = Some(source[child.byte_range()].trim().to_string());
613            }
614            "pointer_declarator" => {
615                field_name = extract_identifier(source, child);
616                ty_parts.push("*".to_string());
617            }
618            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
619            "bitfield_clause" => {
620                let text = source[child.byte_range()].trim();
621                // Strip leading ':' and whitespace to get just the width digits
622                bit_width = Some(text.trim_start_matches(':').trim().to_string());
623            }
624            // GNU attribute specifier: __attribute__((...))
625            "attribute_specifier" | "attribute" => {
626                attr_text.push_str(source[child.byte_range()].trim());
627                attr_text.push(' ');
628            }
629            _ => {}
630        }
631    }
632
633    let base_ty = ty_parts.join(" ");
634    let fname = field_name?;
635    if base_ty.is_empty() {
636        return None;
637    }
638    // Annotate bit-field types as "type:N" so callers can detect and report them;
639    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
640    let ty = if let Some(w) = bit_width {
641        format!("{base_ty}:{w}")
642    } else {
643        base_ty
644    };
645
646    // Also check the full field source text (attribute_specifier may not always
647    // be a direct child depending on tree-sitter grammar version).
648    let field_src = source[node.byte_range()].to_string();
649    let guard = extract_guard_from_c_field_text(&attr_text)
650        .or_else(|| extract_guard_from_c_field_text(&field_src));
651
652    Some((ty, fname, guard))
653}
654
655fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
656    if node.kind() == "field_identifier" || node.kind() == "identifier" {
657        return Some(source[node.byte_range()].to_string());
658    }
659    for i in 0..node.child_count() {
660        if let Some(child) = node.child(i)
661            && let Some(name) = extract_identifier(source, child)
662        {
663            return Some(name);
664        }
665    }
666    None
667}
668
669// ── public API ────────────────────────────────────────────────────────────────
670
671pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
672    let mut parser = Parser::new();
673    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
674    let tree = parser
675        .parse(source, None)
676        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
677    let mut layouts = Vec::new();
678    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
679    Ok(layouts)
680}
681
682pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
683    let mut parser = Parser::new();
684    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
685    let tree = parser
686        .parse(source, None)
687        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
688    let mut layouts = Vec::new();
689    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
690    Ok(layouts)
691}
692
693// ── tests ─────────────────────────────────────────────────────────────────────
694
695#[cfg(test)]
696mod tests {
697    use super::*;
698    use padlock_core::arch::X86_64_SYSV;
699
700    #[test]
701    fn parse_simple_c_struct() {
702        let src = r#"
703struct Point {
704    int x;
705    int y;
706};
707"#;
708        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
709        assert_eq!(layouts.len(), 1);
710        assert_eq!(layouts[0].name, "Point");
711        assert_eq!(layouts[0].fields.len(), 2);
712        assert_eq!(layouts[0].fields[0].name, "x");
713        assert_eq!(layouts[0].fields[1].name, "y");
714    }
715
716    #[test]
717    fn parse_typedef_struct() {
718        let src = r#"
719typedef struct {
720    char  is_active;
721    double timeout;
722    int   port;
723} Connection;
724"#;
725        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
726        assert_eq!(layouts.len(), 1);
727        assert_eq!(layouts[0].name, "Connection");
728        assert_eq!(layouts[0].fields.len(), 3);
729    }
730
731    #[test]
732    fn c_layout_computes_offsets() {
733        let src = "struct T { char a; double b; };";
734        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
735        assert_eq!(layouts.len(), 1);
736        let layout = &layouts[0];
737        // char at offset 0, double at offset 8 (7 bytes padding)
738        assert_eq!(layout.fields[0].offset, 0);
739        assert_eq!(layout.fields[1].offset, 8);
740        assert_eq!(layout.total_size, 16);
741    }
742
743    #[test]
744    fn c_layout_detects_padding() {
745        let src = "struct T { char a; int b; };";
746        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
747        let gaps = padlock_core::ir::find_padding(&layouts[0]);
748        assert!(!gaps.is_empty());
749        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
750    }
751
752    #[test]
753    fn parse_cpp_struct() {
754        let src = "struct Vec3 { float x; float y; float z; };";
755        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
756        assert_eq!(layouts.len(), 1);
757        assert_eq!(layouts[0].fields.len(), 3);
758    }
759
760    // ── SIMD types ────────────────────────────────────────────────────────────
761
762    #[test]
763    fn simd_sse_field_size_and_align() {
764        let src = "struct Vecs { __m128 a; __m256 b; };";
765        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
766        assert_eq!(layouts.len(), 1);
767        let f = &layouts[0].fields;
768        assert_eq!(f[0].size, 16); // __m128
769        assert_eq!(f[0].align, 16);
770        assert_eq!(f[1].size, 32); // __m256
771        assert_eq!(f[1].align, 32);
772    }
773
774    #[test]
775    fn simd_avx512_size() {
776        let src = "struct Wide { __m512 v; };";
777        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
778        assert_eq!(layouts[0].fields[0].size, 64);
779        assert_eq!(layouts[0].fields[0].align, 64);
780    }
781
782    #[test]
783    fn simd_padding_detected_when_small_field_before_avx() {
784        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
785        let src = "struct Mixed { char flag; __m256 data; };";
786        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
787        let gaps = padlock_core::ir::find_padding(&layouts[0]);
788        assert!(!gaps.is_empty());
789        assert_eq!(gaps[0].bytes, 31);
790    }
791
792    // ── union parsing ─────────────────────────────────────────────────────────
793
794    #[test]
795    fn union_fields_all_at_offset_zero() {
796        let src = "union Data { int i; float f; double d; };";
797        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
798        assert_eq!(layouts.len(), 1);
799        let u = &layouts[0];
800        assert!(u.is_union);
801        for field in &u.fields {
802            assert_eq!(
803                field.offset, 0,
804                "union field '{}' should be at offset 0",
805                field.name
806            );
807        }
808    }
809
810    #[test]
811    fn union_total_size_is_max_field() {
812        // double is the largest (8 bytes); total should be 8
813        let src = "union Data { int i; float f; double d; };";
814        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
815        assert_eq!(layouts[0].total_size, 8);
816    }
817
818    #[test]
819    fn union_no_padding_finding() {
820        let src = "union Data { int i; double d; };";
821        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
822        let report = padlock_core::findings::Report::from_layouts(&layouts);
823        let sr = &report.structs[0];
824        assert!(
825            !sr.findings
826                .iter()
827                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
828        );
829        assert!(
830            !sr.findings
831                .iter()
832                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
833        );
834    }
835
836    #[test]
837    fn typedef_union_parsed() {
838        let src = "typedef union { int a; double b; } Value;";
839        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
840        assert_eq!(layouts.len(), 1);
841        assert_eq!(layouts[0].name, "Value");
842        assert!(layouts[0].is_union);
843    }
844
845    // ── attribute guard extraction ─────────────────────────────────────────────
846
847    #[test]
848    fn extract_guard_from_c_guarded_by_macro() {
849        let text = "int value GUARDED_BY(mu);";
850        let guard = extract_guard_from_c_field_text(text);
851        assert_eq!(guard.as_deref(), Some("mu"));
852    }
853
854    #[test]
855    fn extract_guard_from_c_attribute_specifier() {
856        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
857        let guard = extract_guard_from_c_field_text(text);
858        assert_eq!(guard.as_deref(), Some("counter_lock"));
859    }
860
861    #[test]
862    fn extract_guard_pt_guarded_by() {
863        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
864        let guard = extract_guard_from_c_field_text(text);
865        assert_eq!(guard.as_deref(), Some("ptr_lock"));
866    }
867
868    #[test]
869    fn no_guard_returns_none() {
870        let guard = extract_guard_from_c_field_text("int x;");
871        assert!(guard.is_none());
872    }
873
874    #[test]
875    fn c_struct_guarded_by_sets_concurrent_access() {
876        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
877        // macro expansions, so test the text-extraction path via parse_field_declaration
878        // indirectly by checking extract_guard_from_c_field_text.
879        let text = "uint64_t readers GUARDED_BY(lock_a);";
880        assert_eq!(
881            extract_guard_from_c_field_text(text).as_deref(),
882            Some("lock_a")
883        );
884    }
885
886    #[test]
887    fn c_struct_different_guards_detected_as_false_sharing() {
888        use padlock_core::arch::X86_64_SYSV;
889        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
890
891        // Manually build a layout with two fields on the same cache line,
892        // different guards — mirrors what the C frontend would produce for
893        // __attribute__((guarded_by(...))) annotated fields.
894        let mut layout = StructLayout {
895            name: "S".into(),
896            total_size: 128,
897            align: 8,
898            fields: vec![
899                Field {
900                    name: "readers".into(),
901                    ty: TypeInfo::Primitive {
902                        name: "uint64_t".into(),
903                        size: 8,
904                        align: 8,
905                    },
906                    offset: 0,
907                    size: 8,
908                    align: 8,
909                    source_file: None,
910                    source_line: None,
911                    access: AccessPattern::Concurrent {
912                        guard: Some("lock_a".into()),
913                        is_atomic: false,
914                    },
915                },
916                Field {
917                    name: "writers".into(),
918                    ty: TypeInfo::Primitive {
919                        name: "uint64_t".into(),
920                        size: 8,
921                        align: 8,
922                    },
923                    offset: 8,
924                    size: 8,
925                    align: 8,
926                    source_file: None,
927                    source_line: None,
928                    access: AccessPattern::Concurrent {
929                        guard: Some("lock_b".into()),
930                        is_atomic: false,
931                    },
932                },
933            ],
934            source_file: None,
935            source_line: None,
936            arch: &X86_64_SYSV,
937            is_packed: false,
938            is_union: false,
939        };
940        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
941            &layout
942        ));
943        // Same guard → no false sharing
944        layout.fields[1].access = AccessPattern::Concurrent {
945            guard: Some("lock_a".into()),
946            is_atomic: false,
947        };
948        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
949            &layout
950        ));
951    }
952
953    // ── C++ class: vtable pointer ─────────────────────────────────────────────
954
955    #[test]
956    fn cpp_class_with_virtual_method_has_vptr() {
957        let src = r#"
958class Widget {
959    virtual void draw();
960    int x;
961    int y;
962};
963"#;
964        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
965        assert_eq!(layouts.len(), 1);
966        let l = &layouts[0];
967        // First field must be __vptr
968        assert_eq!(l.fields[0].name, "__vptr");
969        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
970        // __vptr is at offset 0
971        assert_eq!(l.fields[0].offset, 0);
972        // int x should come after the pointer (at offset 8)
973        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
974        assert_eq!(x.offset, 8);
975    }
976
977    #[test]
978    fn cpp_class_without_virtual_has_no_vptr() {
979        let src = "class Plain { int a; int b; };";
980        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
981        assert_eq!(layouts.len(), 1);
982        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
983    }
984
985    #[test]
986    fn cpp_struct_keyword_with_virtual_has_vptr() {
987        // `struct` in C++ can also have virtual methods
988        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
989        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
990        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
991        // (vtable injection is only for `class` nodes)
992        let _ = layouts; // just verify it parses without panic
993    }
994
995    // ── C++ class: single inheritance ─────────────────────────────────────────
996
997    #[test]
998    fn cpp_derived_class_has_base_slot() {
999        let src = r#"
1000class Base {
1001    int x;
1002};
1003class Derived : public Base {
1004    int y;
1005};
1006"#;
1007        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1008        // Both Base and Derived should be parsed
1009        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1010        // Derived must have a __base_Base synthetic field
1011        assert!(
1012            derived.fields.iter().any(|f| f.name == "__base_Base"),
1013            "Derived should have a __base_Base field"
1014        );
1015        // The y field should come after __base_Base
1016        let base_field = derived
1017            .fields
1018            .iter()
1019            .find(|f| f.name == "__base_Base")
1020            .unwrap();
1021        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1022        assert!(y_field.offset >= base_field.offset + base_field.size);
1023    }
1024
1025    #[test]
1026    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1027        let src = r#"
1028class A { int a; };
1029class B { int b; };
1030class C : public A, public B { int c; };
1031"#;
1032        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1033        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1034        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1035        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1036    }
1037
1038    #[test]
1039    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1040        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1041        let src = "class V { virtual void f(); int x; };";
1042        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1043        let l = &layouts[0];
1044        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1045        assert_eq!(l.total_size, 16);
1046    }
1047
1048    // ── bitfield handling ─────────────────────────────────────────────────────
1049
1050    #[test]
1051    fn is_bitfield_type_detects_colon_n() {
1052        assert!(is_bitfield_type("int:3"));
1053        assert!(is_bitfield_type("unsigned int:16"));
1054        assert!(is_bitfield_type("uint32_t:1"));
1055        // Not bit-fields — contains ':' but not followed by pure digits
1056        assert!(!is_bitfield_type("std::atomic<int>"));
1057        assert!(!is_bitfield_type("ns::Type"));
1058        assert!(!is_bitfield_type("int"));
1059    }
1060
1061    #[test]
1062    fn struct_with_bitfields_is_skipped() {
1063        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1064        // without a compiler. The struct must be skipped entirely.
1065        let src = r#"
1066struct Flags {
1067    unsigned int active : 1;
1068    unsigned int ready  : 1;
1069    unsigned int error  : 6;
1070    int value;
1071};
1072"#;
1073        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1074        // Flags must not appear — its layout cannot be accurately computed.
1075        assert!(
1076            layouts.iter().all(|l| l.name != "Flags"),
1077            "struct with bitfields should be skipped; got {:?}",
1078            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1079        );
1080    }
1081
1082    #[test]
1083    fn struct_without_bitfields_is_still_parsed() {
1084        // Ensure the bitfield guard doesn't affect normal structs.
1085        let src = "struct Normal { int a; char b; double c; };";
1086        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1087        assert_eq!(layouts.len(), 1);
1088        assert_eq!(layouts[0].name, "Normal");
1089    }
1090
1091    #[test]
1092    fn cpp_class_with_bitfields_is_skipped() {
1093        let src = "class Packed { int x : 4; int y : 4; };";
1094        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1095        assert!(
1096            layouts.iter().all(|l| l.name != "Packed"),
1097            "C++ class with bitfields should be skipped"
1098        );
1099    }
1100
1101    // ── __attribute__((packed)) detection ─────────────────────────────────────
1102
1103    #[test]
1104    fn packed_struct_has_no_alignment_padding() {
1105        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1106        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1107        let src = r#"
1108struct __attribute__((packed)) Tight {
1109    char a;
1110    int  b;
1111    char c;
1112};
1113"#;
1114        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1115        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1116        assert!(l.is_packed, "should be marked is_packed");
1117        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1118        assert_eq!(l.fields[0].offset, 0);
1119        assert_eq!(l.fields[1].offset, 1); // immediately after char
1120        assert_eq!(l.fields[2].offset, 5);
1121    }
1122
1123    #[test]
1124    fn non_packed_struct_has_normal_alignment_padding() {
1125        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1126        let src = r#"
1127struct Normal {
1128    char a;
1129    int  b;
1130    char c;
1131};
1132"#;
1133        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1134        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1135        assert!(!l.is_packed);
1136        assert_eq!(l.total_size, 12);
1137        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1138    }
1139
1140    #[test]
1141    fn cpp_class_packed_attribute_detected() {
1142        let src = r#"
1143class __attribute__((packed)) Dense {
1144    char a;
1145    int  b;
1146};
1147"#;
1148        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1149        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1150        assert!(
1151            l.is_packed,
1152            "C++ class with __attribute__((packed)) must be marked packed"
1153        );
1154        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1155    }
1156}