Skip to main content

padlock_source/frontends/
c_cpp.rs

1// padlock-source/src/frontends/c_cpp.rs
2//
3// Extracts struct layouts from C / C++ source using tree-sitter.
4// Sizes and alignments are computed from field type names + arch config;
5// there is no compiler involved so the results are approximate for complex types.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13/// Map a C/C++ type name to (size, align) using the target arch.
14fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
15    let ty = ty.trim();
16    // Strip qualifiers
17    for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
18        if let Some(rest) = ty.strip_prefix(qual) {
19            return c_type_size_align(rest, arch);
20        }
21    }
22    // x86 SSE / AVX / AVX-512 SIMD types
23    match ty {
24        "__m64" => return (8, 8),
25        "__m128" | "__m128d" | "__m128i" => return (16, 16),
26        "__m256" | "__m256d" | "__m256i" => return (32, 32),
27        "__m512" | "__m512d" | "__m512i" => return (64, 64),
28        // ARM NEON — 64-bit (double-word) vectors
29        "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
30        | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
31        // ARM NEON — 128-bit (quad-word) vectors
32        "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
33        | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
34        _ => {}
35    }
36    // C++ standard library synchronisation types (Linux/glibc x86-64 defaults).
37    // Sizes are platform-approximate; accuracy is "good enough" for cache-line
38    // bucketing and false-sharing detection.
39    match ty {
40        // Mutexes — all backed by pthread_mutex_t (40 bytes on Linux/glibc)
41        "std::mutex"
42        | "std::recursive_mutex"
43        | "std::timed_mutex"
44        | "std::recursive_timed_mutex"
45        | "pthread_mutex_t" => return (40, 8),
46        "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
47        // Condition variables
48        "std::condition_variable" | "pthread_cond_t" => return (48, 8),
49        // std::atomic<T> — same size as T; extract and recurse
50        ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
51            let inner = &ty[12..ty.len() - 1];
52            return c_type_size_align(inner.trim(), arch);
53        }
54        _ => {} // fall through to primitive types below
55    }
56    // Primitive / stdint / pointer types
57    match ty {
58        "char" | "_Bool" | "bool" => (1, 1),
59        "short" | "short int" => (2, 2),
60        "int" => (4, 4),
61        "long" => (arch.pointer_size, arch.pointer_size),
62        "long long" => (8, 8),
63        "float" => (4, 4),
64        "double" => (8, 8),
65        "long double" => (16, 16),
66        "int8_t" | "uint8_t" => (1, 1),
67        "int16_t" | "uint16_t" => (2, 2),
68        "int32_t" | "uint32_t" => (4, 4),
69        "int64_t" | "uint64_t" => (8, 8),
70        "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
71            (arch.pointer_size, arch.pointer_size)
72        }
73        // Pointer types
74        ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
75        // Unknown — use pointer size as a reasonable default
76        _ => (arch.pointer_size, arch.pointer_size),
77    }
78}
79
80// ── struct / union simulation ─────────────────────────────────────────────────
81
82/// Strip a bit-field width annotation (`:N`) from a type name for size lookup.
83/// `"int:3"` → `"int"`, `"std::atomic"` → unchanged (`:` not followed by digits only).
84fn strip_bitfield_suffix(ty: &str) -> &str {
85    if let Some(pos) = ty.rfind(':') {
86        let suffix = ty[pos + 1..].trim();
87        if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
88            return ty[..pos].trim_end();
89        }
90    }
91    ty
92}
93
94/// Return `true` when `ty` carries a bit-field width annotation (e.g. `"int:3"`).
95/// Bit-field packing is compiler-controlled and cannot be accurately modelled
96/// without a compiler, so structs containing bit-field members are skipped.
97fn is_bitfield_type(ty: &str) -> bool {
98    strip_bitfield_suffix(ty) != ty
99}
100
101/// Simulate C/C++ struct layout given ordered fields.
102///
103/// When `packed` is `true` the layout mirrors `__attribute__((packed))`:
104/// no inter-field alignment padding is inserted and the struct alignment
105/// is forced to 1. This matches GCC/Clang behaviour for packed structs.
106fn simulate_layout(
107    fields: &mut Vec<Field>,
108    struct_name: String,
109    arch: &'static ArchConfig,
110    source_line: Option<u32>,
111    packed: bool,
112) -> StructLayout {
113    let mut offset = 0usize;
114    let mut struct_align = 1usize;
115
116    for f in fields.iter_mut() {
117        if !packed && f.align > 0 {
118            offset = offset.next_multiple_of(f.align);
119        }
120        f.offset = offset;
121        offset += f.size;
122        if !packed {
123            struct_align = struct_align.max(f.align);
124        }
125    }
126    // Trailing padding (not present in packed structs)
127    if !packed && struct_align > 0 {
128        offset = offset.next_multiple_of(struct_align);
129    }
130
131    StructLayout {
132        name: struct_name,
133        total_size: offset,
134        align: struct_align,
135        fields: std::mem::take(fields),
136        source_file: None,
137        source_line,
138        arch,
139        is_packed: packed,
140        is_union: false,
141    }
142}
143
144/// Simulate a C/C++ union layout: all fields start at offset 0;
145/// total size is the largest field, rounded to max alignment.
146fn simulate_union_layout(
147    fields: &mut Vec<Field>,
148    name: String,
149    arch: &'static ArchConfig,
150    source_line: Option<u32>,
151) -> StructLayout {
152    for f in fields.iter_mut() {
153        f.offset = 0;
154    }
155    let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
156    let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
157    let total_size = if max_align > 0 {
158        max_size.next_multiple_of(max_align)
159    } else {
160        max_size
161    };
162
163    StructLayout {
164        name,
165        total_size,
166        align: max_align,
167        fields: std::mem::take(fields),
168        source_file: None,
169        source_line,
170        arch,
171        is_packed: false,
172        is_union: true,
173    }
174}
175
176// ── C++ class parsing (vtable + inheritance) ──────────────────────────────────
177
178/// Parse a `class_specifier` node, modelling:
179/// - A hidden vtable pointer (`__vptr`) when any method is `virtual`.
180/// - Base-class storage as a synthetic `__base_<Name>` field (size resolved
181///   later by the nested-struct resolution pass in `lib.rs`).
182fn parse_class_specifier(
183    source: &str,
184    node: Node<'_>,
185    arch: &'static ArchConfig,
186) -> Option<StructLayout> {
187    let mut class_name = "<anonymous>".to_string();
188    let mut base_names: Vec<String> = Vec::new();
189    let mut body_node: Option<Node> = None;
190    let mut is_packed = false;
191    let mut struct_alignas: Option<usize> = None;
192
193    for i in 0..node.child_count() {
194        let child = node.child(i)?;
195        match child.kind() {
196            "type_identifier" => class_name = source[child.byte_range()].to_string(),
197            "base_class_clause" => {
198                // tree-sitter-cpp structure: ':' [access_specifier] type_identifier
199                // type_identifier nodes are direct children of base_class_clause.
200                for j in 0..child.child_count() {
201                    if let Some(base) = child.child(j)
202                        && base.kind() == "type_identifier"
203                    {
204                        base_names.push(source[base.byte_range()].to_string());
205                    }
206                }
207            }
208            "field_declaration_list" => body_node = Some(child),
209            "attribute_specifier" => {
210                if source[child.byte_range()].contains("packed") {
211                    is_packed = true;
212                }
213            }
214            // C++11 class-level alignas: `class alignas(64) Name { ... };`
215            "alignas_qualifier" | "alignas_specifier" => {
216                if struct_alignas.is_none() {
217                    struct_alignas = parse_alignas_value(source, child);
218                }
219            }
220            _ => {}
221        }
222    }
223
224    let body = body_node?;
225
226    // Detect virtual methods: look for `virtual` keyword anywhere in body
227    let has_virtual = contains_virtual_keyword(source, body);
228
229    // Collect declared fields: (field_name, type_text, guard, alignas_override)
230    let mut raw_fields: Vec<(String, String, Option<String>, Option<usize>)> = Vec::new();
231    for i in 0..body.child_count() {
232        if let Some(child) = body.child(i)
233            && child.kind() == "field_declaration"
234            && let Some((ty, fname, guard, al)) = parse_field_declaration(source, child)
235        {
236            raw_fields.push((fname, ty, guard, al));
237        }
238    }
239
240    // Build fields: vtable pointer, then base-class slots, then declared fields
241    let mut fields: Vec<Field> = Vec::new();
242
243    // Virtual dispatch pointer (hidden, at offset 0 for the first virtual class)
244    if has_virtual {
245        let ps = arch.pointer_size;
246        fields.push(Field {
247            name: "__vptr".to_string(),
248            ty: TypeInfo::Pointer {
249                size: ps,
250                align: ps,
251            },
252            offset: 0,
253            size: ps,
254            align: ps,
255            source_file: None,
256            source_line: None,
257            access: AccessPattern::Unknown,
258        });
259    }
260
261    // Base class storage (opaque until nested-struct resolver fills in sizes)
262    for base in &base_names {
263        let ps = arch.pointer_size;
264        fields.push(Field {
265            name: format!("__base_{base}"),
266            ty: TypeInfo::Opaque {
267                name: base.clone(),
268                size: ps,
269                align: ps,
270            },
271            offset: 0,
272            size: ps,
273            align: ps,
274            source_file: None,
275            source_line: None,
276            access: AccessPattern::Unknown,
277        });
278    }
279
280    // Skip classes with bit-field members (same reason as structs).
281    if raw_fields.iter().any(|(_, ty, _, _)| is_bitfield_type(ty)) {
282        return None;
283    }
284
285    // Declared member fields
286    for (fname, ty_name, guard, alignas) in raw_fields {
287        let (size, natural_align) = c_type_size_align(&ty_name, arch);
288        let align = alignas.unwrap_or(natural_align);
289        let access = if let Some(g) = guard {
290            AccessPattern::Concurrent {
291                guard: Some(g),
292                is_atomic: false,
293            }
294        } else {
295            AccessPattern::Unknown
296        };
297        fields.push(Field {
298            name: fname,
299            ty: TypeInfo::Primitive {
300                name: ty_name,
301                size,
302                align,
303            },
304            offset: 0,
305            size,
306            align,
307            source_file: None,
308            source_line: None,
309            access,
310        });
311    }
312
313    if fields.is_empty() {
314        return None;
315    }
316
317    let line = node.start_position().row as u32 + 1;
318    let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), is_packed);
319
320    if let Some(al) = struct_alignas
321        && al > layout.align
322    {
323        layout.align = al;
324        if !is_packed {
325            layout.total_size = layout.total_size.next_multiple_of(al);
326        }
327    }
328
329    Some(layout)
330}
331
332/// Return true if a `field_declaration_list` node contains any `virtual` keyword
333/// (indicating that the class needs a vtable pointer).
334fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
335    let mut stack = vec![node];
336    while let Some(n) = stack.pop() {
337        if n.kind() == "virtual" {
338            return true;
339        }
340        // Also check raw text for cases where tree-sitter may not produce a
341        // dedicated `virtual` node (e.g. inside complex declarations).
342        if n.child_count() == 0 {
343            let text = &source[n.byte_range()];
344            if text == "virtual" {
345                return true;
346            }
347        }
348        for i in (0..n.child_count()).rev() {
349            if let Some(child) = n.child(i) {
350                stack.push(child);
351            }
352        }
353    }
354    false
355}
356
357// ── tree-sitter walker ────────────────────────────────────────────────────────
358
359fn extract_structs_from_tree(
360    source: &str,
361    root: Node<'_>,
362    arch: &'static ArchConfig,
363    layouts: &mut Vec<StructLayout>,
364) {
365    let cursor = root.walk();
366    let mut stack = vec![root];
367
368    while let Some(node) = stack.pop() {
369        // Push children in reverse so we process left-to-right
370        for i in (0..node.child_count()).rev() {
371            if let Some(child) = node.child(i) {
372                stack.push(child);
373            }
374        }
375
376        match node.kind() {
377            "struct_specifier" => {
378                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, false) {
379                    layouts.push(layout);
380                }
381            }
382            "union_specifier" => {
383                if let Some(layout) = parse_struct_or_union_specifier(source, node, arch, true) {
384                    layouts.push(layout);
385                }
386            }
387            "class_specifier" => {
388                if let Some(layout) = parse_class_specifier(source, node, arch) {
389                    layouts.push(layout);
390                }
391            }
392            _ => {}
393        }
394    }
395
396    // Also handle `typedef struct/union { ... } Name;`
397    let cursor2 = root.walk();
398    let mut stack2 = vec![root];
399    while let Some(node) = stack2.pop() {
400        for i in (0..node.child_count()).rev() {
401            if let Some(child) = node.child(i) {
402                stack2.push(child);
403            }
404        }
405        if node.kind() == "type_definition"
406            && let Some(layout) = parse_typedef_struct_or_union(source, node, arch)
407        {
408            let existing = layouts
409                .iter()
410                .position(|l| l.name == layout.name || l.name == "<anonymous>");
411            match existing {
412                Some(i) if layouts[i].name == "<anonymous>" => {
413                    layouts[i] = layout;
414                }
415                None => layouts.push(layout),
416                _ => {}
417            }
418        }
419    }
420    let _ = cursor;
421    let _ = cursor2; // silence unused warnings
422}
423
424/// Parse a `struct_specifier` or `union_specifier` node into a `StructLayout`.
425fn parse_struct_or_union_specifier(
426    source: &str,
427    node: Node<'_>,
428    arch: &'static ArchConfig,
429    is_union: bool,
430) -> Option<StructLayout> {
431    let mut name = "<anonymous>".to_string();
432    let mut body_node: Option<Node> = None;
433    let mut is_packed = false;
434    // Struct-level alignas: `struct alignas(64) CacheAligned { ... };`
435    let mut struct_alignas: Option<usize> = None;
436
437    for i in 0..node.child_count() {
438        let child = node.child(i)?;
439        match child.kind() {
440            "type_identifier" => name = source[child.byte_range()].to_string(),
441            "field_declaration_list" => body_node = Some(child),
442            "attribute_specifier" => {
443                let text = &source[child.byte_range()];
444                if text.contains("packed") {
445                    is_packed = true;
446                }
447            }
448            // C++11 struct-level alignas: `struct alignas(64) Name { ... };`
449            // tree-sitter-cpp: `alignas_qualifier` as direct child of struct_specifier
450            "alignas_qualifier" | "alignas_specifier" => {
451                if struct_alignas.is_none() {
452                    struct_alignas = parse_alignas_value(source, child);
453                }
454            }
455            _ => {}
456        }
457    }
458
459    let body = body_node?;
460    // (field_name, type_text, guard, alignas_override)
461    let mut raw_fields: Vec<(String, String, Option<String>, Option<usize>)> = Vec::new();
462
463    for i in 0..body.child_count() {
464        let child = body.child(i)?;
465        if child.kind() == "field_declaration"
466            && let Some((ty, fname, guard, al)) = parse_field_declaration(source, child)
467        {
468            raw_fields.push((fname, ty, guard, al));
469        }
470    }
471
472    if raw_fields.is_empty() {
473        return None;
474    }
475
476    // Bit-field packing is compiler-controlled and cannot be accurately modelled
477    // without a compiler. Skip the entire struct to avoid producing wrong layout
478    // data. Use `padlock analyze` on the compiled binary for accurate results.
479    if raw_fields.iter().any(|(_, ty, _, _)| is_bitfield_type(ty)) {
480        return None;
481    }
482
483    let mut fields: Vec<Field> = raw_fields
484        .into_iter()
485        .map(|(fname, ty_name, guard, alignas)| {
486            let (size, natural_align) = c_type_size_align(&ty_name, arch);
487            // alignas(N) on a field overrides its alignment requirement.
488            let align = alignas.unwrap_or(natural_align);
489            let access = if let Some(g) = guard {
490                AccessPattern::Concurrent {
491                    guard: Some(g),
492                    is_atomic: false,
493                }
494            } else {
495                AccessPattern::Unknown
496            };
497            Field {
498                name: fname,
499                ty: TypeInfo::Primitive {
500                    name: ty_name,
501                    size,
502                    align,
503                },
504                offset: 0,
505                size,
506                align,
507                source_file: None,
508                source_line: None,
509                access,
510            }
511        })
512        .collect();
513
514    let line = node.start_position().row as u32 + 1;
515    let mut layout = if is_union {
516        simulate_union_layout(&mut fields, name, arch, Some(line))
517    } else {
518        simulate_layout(&mut fields, name, arch, Some(line), is_packed)
519    };
520
521    // Apply struct-level alignas: the struct's alignment requirement is at
522    // least N; trailing padding may grow to satisfy the new alignment.
523    if let Some(al) = struct_alignas
524        && al > layout.align
525    {
526        layout.align = al;
527        if !is_packed {
528            layout.total_size = layout.total_size.next_multiple_of(al);
529        }
530    }
531
532    Some(layout)
533}
534
535/// Parse a `typedef struct/union { ... } Name;` type_definition node.
536fn parse_typedef_struct_or_union(
537    source: &str,
538    node: Node<'_>,
539    arch: &'static ArchConfig,
540) -> Option<StructLayout> {
541    let mut specifier_node: Option<Node> = None;
542    let mut is_union = false;
543    let mut typedef_name: Option<String> = None;
544
545    for i in 0..node.child_count() {
546        let child = node.child(i)?;
547        match child.kind() {
548            "struct_specifier" => {
549                specifier_node = Some(child);
550                is_union = false;
551            }
552            "union_specifier" => {
553                specifier_node = Some(child);
554                is_union = true;
555            }
556            "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
557            _ => {}
558        }
559    }
560
561    let spec = specifier_node?;
562    let typedef_name = typedef_name?;
563
564    let mut layout = parse_struct_or_union_specifier(source, spec, arch, is_union)?;
565    if layout.name == "<anonymous>" {
566        layout.name = typedef_name;
567    }
568    Some(layout)
569}
570
571// Alias kept for the typedef pass in extract_structs_from_tree.
572#[allow(dead_code)]
573fn parse_typedef_struct(
574    source: &str,
575    node: Node<'_>,
576    arch: &'static ArchConfig,
577) -> Option<StructLayout> {
578    parse_typedef_struct_or_union(source, node, arch)
579}
580
581/// Extract a lock guard name from a C/C++ `__attribute__((guarded_by(X)))` or
582/// `__attribute__((pt_guarded_by(X)))` specifier node.
583///
584/// Also recognises the common macro forms `GUARDED_BY(X)` and `PT_GUARDED_BY(X)`
585/// which expand to the same attribute (Clang thread-safety analysis).
586/// The match is done on the raw source text of any `attribute_specifier` child,
587/// so it works regardless of how tree-sitter structures the inner tokens.
588fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
589    // Patterns to search for (case-insensitive on the keyword, guard name is as-is)
590    for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
591        if let Some(pos) = field_source.find(kw) {
592            let after = &field_source[pos + kw.len()..];
593            // Expect `(` optionally preceded by whitespace
594            let trimmed = after.trim_start();
595            if let Some(inner) = trimmed.strip_prefix('(') {
596                // Read until the matching ')'
597                if let Some(end) = inner.find(')') {
598                    let guard = inner[..end].trim().trim_matches('"');
599                    if !guard.is_empty() {
600                        return Some(guard.to_string());
601                    }
602                }
603            }
604        }
605    }
606    None
607}
608
609/// Parse a numeric value from an `alignas_qualifier` node: `alignas(N)`.
610/// tree-sitter-cpp uses the node kind `alignas_qualifier` for C++11 `alignas`.
611/// Returns `None` when the specifier contains a type expression rather than
612/// an integer literal (e.g. `alignas(double)` — handled elsewhere by the
613/// compiler; we skip those conservatively).
614fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
615    for i in 0..node.child_count() {
616        if let Some(child) = node.child(i) {
617            match child.kind() {
618                "number_literal" | "integer_literal" | "integer" => {
619                    let text = source[child.byte_range()].trim();
620                    if let Ok(n) = text.parse::<usize>() {
621                        return Some(n);
622                    }
623                    // Hex literal: 0x40
624                    if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
625                        return usize::from_str_radix(hex, 16).ok();
626                    }
627                }
628                // Recurse for nested nodes (parenthesised expression, etc.)
629                "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
630                    if let r @ Some(_) = parse_alignas_value(source, child) {
631                        return r;
632                    }
633                }
634                _ => {}
635            }
636        }
637    }
638    None
639}
640
641/// Returns `(ty, field_name, guard, alignas_override)`.
642/// `alignas_override` is `Some(N)` when the field carries `alignas(N)`.
643fn parse_field_declaration(
644    source: &str,
645    node: Node<'_>,
646) -> Option<(String, String, Option<String>, Option<usize>)> {
647    let mut ty_parts: Vec<String> = Vec::new();
648    let mut field_name: Option<String> = None;
649    // Bit-field width, e.g. `int flags : 3;` → Some("3")
650    let mut bit_width: Option<String> = None;
651    // Collect attribute text for guard extraction
652    let mut attr_text = String::new();
653    // Field-level alignas override
654    let mut alignas_override: Option<usize> = None;
655
656    for i in 0..node.child_count() {
657        let child = node.child(i)?;
658        match child.kind() {
659            "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
660                ty_parts.push(source[child.byte_range()].trim().to_string());
661            }
662            // C++ qualified types: std::mutex, ns::Type, etc.
663            // C++ template types:  std::atomic<uint64_t>, std::vector<int>, etc.
664            "qualified_identifier" | "template_type" => {
665                ty_parts.push(source[child.byte_range()].trim().to_string());
666            }
667            // Nested struct/union used as a field type: `struct Vec2 tl;`
668            // Extract just the type_identifier name (e.g. "Vec2") so the
669            // nested-struct resolution pass can match it by name.
670            "struct_specifier" | "union_specifier" => {
671                for j in 0..child.child_count() {
672                    if let Some(sub) = child.child(j)
673                        && sub.kind() == "type_identifier"
674                    {
675                        ty_parts.push(source[sub.byte_range()].trim().to_string());
676                        break;
677                    }
678                }
679            }
680            "field_identifier" => {
681                field_name = Some(source[child.byte_range()].trim().to_string());
682            }
683            "pointer_declarator" => {
684                field_name = extract_identifier(source, child);
685                ty_parts.push("*".to_string());
686            }
687            // Bit-field clause: `: N`  (tree-sitter-c/cpp node)
688            "bitfield_clause" => {
689                let text = source[child.byte_range()].trim();
690                // Strip leading ':' and whitespace to get just the width digits
691                bit_width = Some(text.trim_start_matches(':').trim().to_string());
692            }
693            // GNU attribute specifier: __attribute__((...))
694            "attribute_specifier" | "attribute" => {
695                attr_text.push_str(source[child.byte_range()].trim());
696                attr_text.push(' ');
697            }
698            // C++11 alignas: tree-sitter-cpp wraps it as type_qualifier → alignas_qualifier
699            // Also handle the direct form in case grammar versions differ.
700            "alignas_qualifier" | "alignas_specifier" => {
701                if alignas_override.is_none() {
702                    alignas_override = parse_alignas_value(source, child);
703                }
704            }
705            // type_qualifier wraps alignas_qualifier for field declarations:
706            // `alignas(8) char c;` → type_qualifier { alignas_qualifier { ... } }
707            "type_qualifier" => {
708                if alignas_override.is_none() {
709                    for j in 0..child.child_count() {
710                        if let Some(sub) = child.child(j)
711                            && (sub.kind() == "alignas_qualifier"
712                                || sub.kind() == "alignas_specifier")
713                        {
714                            alignas_override = parse_alignas_value(source, sub);
715                            break;
716                        }
717                    }
718                }
719            }
720            _ => {}
721        }
722    }
723
724    let base_ty = ty_parts.join(" ");
725    let fname = field_name?;
726    if base_ty.is_empty() {
727        return None;
728    }
729    // Annotate bit-field types as "type:N" so callers can detect and report them;
730    // `strip_bitfield_suffix` recovers the base type for size/align lookup.
731    let ty = if let Some(w) = bit_width {
732        format!("{base_ty}:{w}")
733    } else {
734        base_ty
735    };
736
737    // Also check the full field source text (attribute_specifier may not always
738    // be a direct child depending on tree-sitter grammar version).
739    let field_src = source[node.byte_range()].to_string();
740    let guard = extract_guard_from_c_field_text(&attr_text)
741        .or_else(|| extract_guard_from_c_field_text(&field_src));
742
743    Some((ty, fname, guard, alignas_override))
744}
745
746fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
747    if node.kind() == "field_identifier" || node.kind() == "identifier" {
748        return Some(source[node.byte_range()].to_string());
749    }
750    for i in 0..node.child_count() {
751        if let Some(child) = node.child(i)
752            && let Some(name) = extract_identifier(source, child)
753        {
754            return Some(name);
755        }
756    }
757    None
758}
759
760// ── public API ────────────────────────────────────────────────────────────────
761
762pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
763    let mut parser = Parser::new();
764    parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
765    let tree = parser
766        .parse(source, None)
767        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
768    let mut layouts = Vec::new();
769    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
770    Ok(layouts)
771}
772
773pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
774    let mut parser = Parser::new();
775    parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
776    let tree = parser
777        .parse(source, None)
778        .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
779    let mut layouts = Vec::new();
780    extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
781    Ok(layouts)
782}
783
784// ── tests ─────────────────────────────────────────────────────────────────────
785
786#[cfg(test)]
787mod tests {
788    use super::*;
789    use padlock_core::arch::X86_64_SYSV;
790
791    #[test]
792    fn parse_simple_c_struct() {
793        let src = r#"
794struct Point {
795    int x;
796    int y;
797};
798"#;
799        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
800        assert_eq!(layouts.len(), 1);
801        assert_eq!(layouts[0].name, "Point");
802        assert_eq!(layouts[0].fields.len(), 2);
803        assert_eq!(layouts[0].fields[0].name, "x");
804        assert_eq!(layouts[0].fields[1].name, "y");
805    }
806
807    #[test]
808    fn parse_typedef_struct() {
809        let src = r#"
810typedef struct {
811    char  is_active;
812    double timeout;
813    int   port;
814} Connection;
815"#;
816        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
817        assert_eq!(layouts.len(), 1);
818        assert_eq!(layouts[0].name, "Connection");
819        assert_eq!(layouts[0].fields.len(), 3);
820    }
821
822    #[test]
823    fn c_layout_computes_offsets() {
824        let src = "struct T { char a; double b; };";
825        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
826        assert_eq!(layouts.len(), 1);
827        let layout = &layouts[0];
828        // char at offset 0, double at offset 8 (7 bytes padding)
829        assert_eq!(layout.fields[0].offset, 0);
830        assert_eq!(layout.fields[1].offset, 8);
831        assert_eq!(layout.total_size, 16);
832    }
833
834    #[test]
835    fn c_layout_detects_padding() {
836        let src = "struct T { char a; int b; };";
837        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
838        let gaps = padlock_core::ir::find_padding(&layouts[0]);
839        assert!(!gaps.is_empty());
840        assert_eq!(gaps[0].bytes, 3); // 3 bytes padding between char and int
841    }
842
843    #[test]
844    fn parse_cpp_struct() {
845        let src = "struct Vec3 { float x; float y; float z; };";
846        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
847        assert_eq!(layouts.len(), 1);
848        assert_eq!(layouts[0].fields.len(), 3);
849    }
850
851    // ── SIMD types ────────────────────────────────────────────────────────────
852
853    #[test]
854    fn simd_sse_field_size_and_align() {
855        let src = "struct Vecs { __m128 a; __m256 b; };";
856        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
857        assert_eq!(layouts.len(), 1);
858        let f = &layouts[0].fields;
859        assert_eq!(f[0].size, 16); // __m128
860        assert_eq!(f[0].align, 16);
861        assert_eq!(f[1].size, 32); // __m256
862        assert_eq!(f[1].align, 32);
863    }
864
865    #[test]
866    fn simd_avx512_size() {
867        let src = "struct Wide { __m512 v; };";
868        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
869        assert_eq!(layouts[0].fields[0].size, 64);
870        assert_eq!(layouts[0].fields[0].align, 64);
871    }
872
873    #[test]
874    fn simd_padding_detected_when_small_field_before_avx() {
875        // char(1) + [31 pad] + __m256(32) = 64 bytes, 31 wasted
876        let src = "struct Mixed { char flag; __m256 data; };";
877        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
878        let gaps = padlock_core::ir::find_padding(&layouts[0]);
879        assert!(!gaps.is_empty());
880        assert_eq!(gaps[0].bytes, 31);
881    }
882
883    // ── union parsing ─────────────────────────────────────────────────────────
884
885    #[test]
886    fn union_fields_all_at_offset_zero() {
887        let src = "union Data { int i; float f; double d; };";
888        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
889        assert_eq!(layouts.len(), 1);
890        let u = &layouts[0];
891        assert!(u.is_union);
892        for field in &u.fields {
893            assert_eq!(
894                field.offset, 0,
895                "union field '{}' should be at offset 0",
896                field.name
897            );
898        }
899    }
900
901    #[test]
902    fn union_total_size_is_max_field() {
903        // double is the largest (8 bytes); total should be 8
904        let src = "union Data { int i; float f; double d; };";
905        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
906        assert_eq!(layouts[0].total_size, 8);
907    }
908
909    #[test]
910    fn union_no_padding_finding() {
911        let src = "union Data { int i; double d; };";
912        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
913        let report = padlock_core::findings::Report::from_layouts(&layouts);
914        let sr = &report.structs[0];
915        assert!(
916            !sr.findings
917                .iter()
918                .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
919        );
920        assert!(
921            !sr.findings
922                .iter()
923                .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
924        );
925    }
926
927    #[test]
928    fn typedef_union_parsed() {
929        let src = "typedef union { int a; double b; } Value;";
930        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
931        assert_eq!(layouts.len(), 1);
932        assert_eq!(layouts[0].name, "Value");
933        assert!(layouts[0].is_union);
934    }
935
936    // ── attribute guard extraction ─────────────────────────────────────────────
937
938    #[test]
939    fn extract_guard_from_c_guarded_by_macro() {
940        let text = "int value GUARDED_BY(mu);";
941        let guard = extract_guard_from_c_field_text(text);
942        assert_eq!(guard.as_deref(), Some("mu"));
943    }
944
945    #[test]
946    fn extract_guard_from_c_attribute_specifier() {
947        let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
948        let guard = extract_guard_from_c_field_text(text);
949        assert_eq!(guard.as_deref(), Some("counter_lock"));
950    }
951
952    #[test]
953    fn extract_guard_pt_guarded_by() {
954        let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
955        let guard = extract_guard_from_c_field_text(text);
956        assert_eq!(guard.as_deref(), Some("ptr_lock"));
957    }
958
959    #[test]
960    fn no_guard_returns_none() {
961        let guard = extract_guard_from_c_field_text("int x;");
962        assert!(guard.is_none());
963    }
964
965    #[test]
966    fn c_struct_guarded_by_sets_concurrent_access() {
967        // Using GUARDED_BY macro style in comments/text — tree-sitter won't parse
968        // macro expansions, so test the text-extraction path via parse_field_declaration
969        // indirectly by checking extract_guard_from_c_field_text.
970        let text = "uint64_t readers GUARDED_BY(lock_a);";
971        assert_eq!(
972            extract_guard_from_c_field_text(text).as_deref(),
973            Some("lock_a")
974        );
975    }
976
977    #[test]
978    fn c_struct_different_guards_detected_as_false_sharing() {
979        use padlock_core::arch::X86_64_SYSV;
980        use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
981
982        // Manually build a layout with two fields on the same cache line,
983        // different guards — mirrors what the C frontend would produce for
984        // __attribute__((guarded_by(...))) annotated fields.
985        let mut layout = StructLayout {
986            name: "S".into(),
987            total_size: 128,
988            align: 8,
989            fields: vec![
990                Field {
991                    name: "readers".into(),
992                    ty: TypeInfo::Primitive {
993                        name: "uint64_t".into(),
994                        size: 8,
995                        align: 8,
996                    },
997                    offset: 0,
998                    size: 8,
999                    align: 8,
1000                    source_file: None,
1001                    source_line: None,
1002                    access: AccessPattern::Concurrent {
1003                        guard: Some("lock_a".into()),
1004                        is_atomic: false,
1005                    },
1006                },
1007                Field {
1008                    name: "writers".into(),
1009                    ty: TypeInfo::Primitive {
1010                        name: "uint64_t".into(),
1011                        size: 8,
1012                        align: 8,
1013                    },
1014                    offset: 8,
1015                    size: 8,
1016                    align: 8,
1017                    source_file: None,
1018                    source_line: None,
1019                    access: AccessPattern::Concurrent {
1020                        guard: Some("lock_b".into()),
1021                        is_atomic: false,
1022                    },
1023                },
1024            ],
1025            source_file: None,
1026            source_line: None,
1027            arch: &X86_64_SYSV,
1028            is_packed: false,
1029            is_union: false,
1030        };
1031        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1032            &layout
1033        ));
1034        // Same guard → no false sharing
1035        layout.fields[1].access = AccessPattern::Concurrent {
1036            guard: Some("lock_a".into()),
1037            is_atomic: false,
1038        };
1039        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1040            &layout
1041        ));
1042    }
1043
1044    // ── C++ class: vtable pointer ─────────────────────────────────────────────
1045
1046    #[test]
1047    fn cpp_class_with_virtual_method_has_vptr() {
1048        let src = r#"
1049class Widget {
1050    virtual void draw();
1051    int x;
1052    int y;
1053};
1054"#;
1055        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1056        assert_eq!(layouts.len(), 1);
1057        let l = &layouts[0];
1058        // First field must be __vptr
1059        assert_eq!(l.fields[0].name, "__vptr");
1060        assert_eq!(l.fields[0].size, 8); // pointer on x86_64
1061        // __vptr is at offset 0
1062        assert_eq!(l.fields[0].offset, 0);
1063        // int x should come after the pointer (at offset 8)
1064        let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1065        assert_eq!(x.offset, 8);
1066    }
1067
1068    #[test]
1069    fn cpp_class_without_virtual_has_no_vptr() {
1070        let src = "class Plain { int a; int b; };";
1071        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1072        assert_eq!(layouts.len(), 1);
1073        assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1074    }
1075
1076    #[test]
1077    fn cpp_struct_keyword_with_virtual_has_vptr() {
1078        // `struct` in C++ can also have virtual methods
1079        let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1080        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1081        // struct_specifier doesn't go through parse_class_specifier, so no __vptr
1082        // (vtable injection is only for `class` nodes)
1083        let _ = layouts; // just verify it parses without panic
1084    }
1085
1086    // ── C++ class: single inheritance ─────────────────────────────────────────
1087
1088    #[test]
1089    fn cpp_derived_class_has_base_slot() {
1090        let src = r#"
1091class Base {
1092    int x;
1093};
1094class Derived : public Base {
1095    int y;
1096};
1097"#;
1098        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1099        // Both Base and Derived should be parsed
1100        let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1101        // Derived must have a __base_Base synthetic field
1102        assert!(
1103            derived.fields.iter().any(|f| f.name == "__base_Base"),
1104            "Derived should have a __base_Base field"
1105        );
1106        // The y field should come after __base_Base
1107        let base_field = derived
1108            .fields
1109            .iter()
1110            .find(|f| f.name == "__base_Base")
1111            .unwrap();
1112        let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1113        assert!(y_field.offset >= base_field.offset + base_field.size);
1114    }
1115
1116    #[test]
1117    fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1118        let src = r#"
1119class A { int a; };
1120class B { int b; };
1121class C : public A, public B { int c; };
1122"#;
1123        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1124        let c = layouts.iter().find(|l| l.name == "C").unwrap();
1125        assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1126        assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1127    }
1128
1129    #[test]
1130    fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1131        // class with virtual method: size = sizeof(__vptr) + member fields + padding
1132        let src = "class V { virtual void f(); int x; };";
1133        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1134        let l = &layouts[0];
1135        // __vptr(8) + int(4) + 4 pad = 16 bytes on x86_64
1136        assert_eq!(l.total_size, 16);
1137    }
1138
1139    // ── bitfield handling ─────────────────────────────────────────────────────
1140
1141    #[test]
1142    fn is_bitfield_type_detects_colon_n() {
1143        assert!(is_bitfield_type("int:3"));
1144        assert!(is_bitfield_type("unsigned int:16"));
1145        assert!(is_bitfield_type("uint32_t:1"));
1146        // Not bit-fields — contains ':' but not followed by pure digits
1147        assert!(!is_bitfield_type("std::atomic<int>"));
1148        assert!(!is_bitfield_type("ns::Type"));
1149        assert!(!is_bitfield_type("int"));
1150    }
1151
1152    #[test]
1153    fn struct_with_bitfields_is_skipped() {
1154        // Bit-field layout is compiler-controlled and cannot be accurately modelled
1155        // without a compiler. The struct must be skipped entirely.
1156        let src = r#"
1157struct Flags {
1158    unsigned int active : 1;
1159    unsigned int ready  : 1;
1160    unsigned int error  : 6;
1161    int value;
1162};
1163"#;
1164        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1165        // Flags must not appear — its layout cannot be accurately computed.
1166        assert!(
1167            layouts.iter().all(|l| l.name != "Flags"),
1168            "struct with bitfields should be skipped; got {:?}",
1169            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1170        );
1171    }
1172
1173    #[test]
1174    fn struct_without_bitfields_is_still_parsed() {
1175        // Ensure the bitfield guard doesn't affect normal structs.
1176        let src = "struct Normal { int a; char b; double c; };";
1177        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1178        assert_eq!(layouts.len(), 1);
1179        assert_eq!(layouts[0].name, "Normal");
1180    }
1181
1182    #[test]
1183    fn cpp_class_with_bitfields_is_skipped() {
1184        let src = "class Packed { int x : 4; int y : 4; };";
1185        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1186        assert!(
1187            layouts.iter().all(|l| l.name != "Packed"),
1188            "C++ class with bitfields should be skipped"
1189        );
1190    }
1191
1192    // ── __attribute__((packed)) detection ─────────────────────────────────────
1193
1194    #[test]
1195    fn packed_struct_has_no_alignment_padding() {
1196        // Without packed: char(1) + 3-byte pad + int(4) + char(1) + 3-byte pad = 12 bytes
1197        // With packed:    char(1) + int(4) + char(1) = 6 bytes, align=1
1198        let src = r#"
1199struct __attribute__((packed)) Tight {
1200    char a;
1201    int  b;
1202    char c;
1203};
1204"#;
1205        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1206        let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1207        assert!(l.is_packed, "should be marked is_packed");
1208        assert_eq!(l.total_size, 6, "packed: no padding inserted");
1209        assert_eq!(l.fields[0].offset, 0);
1210        assert_eq!(l.fields[1].offset, 1); // immediately after char
1211        assert_eq!(l.fields[2].offset, 5);
1212    }
1213
1214    #[test]
1215    fn non_packed_struct_has_normal_alignment_padding() {
1216        // Confirm baseline: same struct without __attribute__((packed)) gets padded
1217        let src = r#"
1218struct Normal {
1219    char a;
1220    int  b;
1221    char c;
1222};
1223"#;
1224        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1225        let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1226        assert!(!l.is_packed);
1227        assert_eq!(l.total_size, 12);
1228        assert_eq!(l.fields[1].offset, 4); // aligned to 4
1229    }
1230
1231    #[test]
1232    fn cpp_class_packed_attribute_detected() {
1233        let src = r#"
1234class __attribute__((packed)) Dense {
1235    char a;
1236    int  b;
1237};
1238"#;
1239        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1240        let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1241        assert!(
1242            l.is_packed,
1243            "C++ class with __attribute__((packed)) must be marked packed"
1244        );
1245        assert_eq!(l.total_size, 5); // char(1) + int(4), no padding
1246    }
1247
1248    // ── alignas detection ─────────────────────────────────────────────────────
1249
1250    #[test]
1251    fn field_alignas_overrides_natural_alignment() {
1252        // char is normally align=1 but alignas(8) forces it to align-8.
1253        // Layout: c(1B at offset 0, align=8) + x(4B at offset 4, align=4)
1254        // c must start on an 8-byte boundary (trivially satisfied at offset 0).
1255        // After c (1 byte), x aligns to 4: offset = 1.next_multiple_of(4) = 4.
1256        // Struct align = max(8, 4) = 8. Total = 8 bytes (4+4 → 8 → ok for align 8).
1257        let src = r#"
1258struct S {
1259    alignas(8) char c;
1260    int x;
1261};
1262"#;
1263        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1264        let l = layouts.iter().find(|l| l.name == "S").expect("S");
1265        // c should be forced to align 8
1266        let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
1267        assert_eq!(c_field.align, 8);
1268        // x comes after c (1 byte) with natural alignment 4 → offset 4
1269        let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
1270        assert_eq!(x_field.offset, 4);
1271        // Struct alignment is max(alignas(8), int align 4) = 8
1272        assert_eq!(l.align, 8);
1273        // Total = 8 bytes (x at 4, size 4; 4+4=8; 8 is multiple of align 8)
1274        assert_eq!(l.total_size, 8);
1275    }
1276
1277    #[test]
1278    fn struct_level_alignas_increases_struct_alignment() {
1279        // alignas(64) on the struct means its alignment requirement is 64.
1280        // Total size must be a multiple of 64.
1281        let src = r#"
1282struct alignas(64) CacheLine {
1283    int x;
1284    int y;
1285};
1286"#;
1287        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1288        let l = layouts
1289            .iter()
1290            .find(|l| l.name == "CacheLine")
1291            .expect("CacheLine");
1292        assert_eq!(l.align, 64);
1293        assert_eq!(l.total_size % 64, 0);
1294    }
1295
1296    #[test]
1297    fn alignas_on_field_smaller_than_natural_is_ignored() {
1298        // alignas(1) on an int field: does NOT reduce alignment below 4.
1299        // In C++, alignas cannot reduce alignment below the natural alignment.
1300        // Our implementation stores the alignas value; natural alignment wins
1301        // because we take max(alignas, natural) in the caller.
1302        // Note: we currently store alignas directly; this test documents behaviour.
1303        let src = "struct S { int x; int y; };";
1304        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1305        let l = &layouts[0];
1306        assert_eq!(l.fields[0].align, 4); // natural alignment, not reduced
1307    }
1308
1309    #[test]
1310    fn cpp_class_alignas_detected() {
1311        let src = r#"
1312class alignas(32) Aligned {
1313    double x;
1314    double y;
1315};
1316"#;
1317        let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1318        let l = layouts
1319            .iter()
1320            .find(|l| l.name == "Aligned")
1321            .expect("Aligned");
1322        assert_eq!(l.align, 32);
1323        assert_eq!(l.total_size % 32, 0);
1324    }
1325
1326    // ── bad weather: alignas edge cases ───────────────────────────────────────
1327
1328    #[test]
1329    fn struct_without_alignas_unchanged() {
1330        // Ensure the alignas detection path doesn't affect structs without it
1331        let src = "struct Plain { int a; char b; };";
1332        let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1333        let l = &layouts[0];
1334        assert_eq!(l.align, 4); // max field alignment = int = 4
1335        assert_eq!(l.total_size, 8); // int(4) + char(1) + 3 pad
1336    }
1337}