1use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use std::cell::Cell;
10use std::collections::HashMap;
11use tree_sitter::{Node, Parser};
12
13use crate::CppStdlib;
14
15thread_local! {
16 static STDLIB: Cell<CppStdlib> = const { Cell::new(CppStdlib::LibStdCpp) };
17}
18
19pub(crate) fn set_stdlib(s: CppStdlib) {
21 STDLIB.with(|c| c.set(s));
22}
23
24fn active_stdlib() -> CppStdlib {
25 STDLIB.with(|c| c.get())
26}
27
28fn c_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
32 let ty = ty.trim();
33 for qual in &["const ", "volatile ", "restrict ", "unsigned ", "signed "] {
35 if let Some(rest) = ty.strip_prefix(qual) {
36 return c_type_size_align(rest, arch);
37 }
38 }
39 match ty {
41 "__m64" => return (8, 8),
42 "__m128" | "__m128d" | "__m128i" => return (16, 16),
43 "__m256" | "__m256d" | "__m256i" => return (32, 32),
44 "__m512" | "__m512d" | "__m512i" => return (64, 64),
45 "float32x2_t" | "int32x2_t" | "uint32x2_t" | "int8x8_t" | "uint8x8_t" | "int16x4_t"
47 | "uint16x4_t" | "float64x1_t" | "int64x1_t" | "uint64x1_t" => return (8, 8),
48 "float32x4_t" | "int32x4_t" | "uint32x4_t" | "float64x2_t" | "int64x2_t" | "uint64x2_t"
50 | "int8x16_t" | "uint8x16_t" | "int16x8_t" | "uint16x8_t" => return (16, 16),
51 _ => {}
52 }
53 let stdlib = active_stdlib();
55 match ty {
56 "std::mutex"
61 | "std::recursive_mutex"
62 | "std::timed_mutex"
63 | "std::recursive_timed_mutex"
64 | "pthread_mutex_t" => {
65 return match stdlib {
66 CppStdlib::LibCpp => (56, 8), _ => (40, 8),
68 };
69 }
70 "std::shared_mutex" | "std::shared_timed_mutex" => return (56, 8),
71 "std::condition_variable" | "pthread_cond_t" => {
72 return match stdlib {
73 CppStdlib::LibCpp => (40, 8), _ => (48, 8),
75 };
76 }
77
78 "std::string" | "std::wstring" | "std::u8string" | "std::u16string" | "std::u32string"
83 | "std::pmr::string" => {
84 return match stdlib {
85 CppStdlib::LibCpp => (24, 8),
86 _ => (32, 8), };
88 }
89 "std::string_view"
91 | "std::wstring_view"
92 | "std::u8string_view"
93 | "std::u16string_view"
94 | "std::u32string_view" => return (arch.pointer_size * 2, arch.pointer_size),
95
96 ty if ty.starts_with("std::vector<") || ty == "std::vector" => {
100 return (arch.pointer_size * 3, arch.pointer_size);
101 }
102 ty if ty.starts_with("std::deque<") || ty == "std::deque" => return (80, 8),
104 ty if ty.starts_with("std::list<") || ty == "std::list" => {
107 return (arch.pointer_size * 3, arch.pointer_size);
108 }
109 ty if ty.starts_with("std::forward_list<") || ty == "std::forward_list" => {
111 return (arch.pointer_size, arch.pointer_size);
112 }
113 ty if ty.starts_with("std::map<")
120 || ty.starts_with("std::multimap<")
121 || ty.starts_with("std::set<")
122 || ty.starts_with("std::multiset<") =>
123 {
124 return (48, 8);
125 }
126 ty if ty.starts_with("std::unordered_map<")
129 || ty.starts_with("std::unordered_multimap<")
130 || ty.starts_with("std::unordered_set<")
131 || ty.starts_with("std::unordered_multiset<") =>
132 {
133 return (56, 8);
134 }
135
136 ty if ty.starts_with("std::unique_ptr<") || ty == "std::unique_ptr" => {
139 return (arch.pointer_size, arch.pointer_size);
140 }
141 ty if ty.starts_with("std::shared_ptr<")
143 || ty == "std::shared_ptr"
144 || ty.starts_with("std::weak_ptr<")
145 || ty == "std::weak_ptr" =>
146 {
147 return (arch.pointer_size * 2, arch.pointer_size);
148 }
149
150 ty if ty.starts_with("std::function<") || ty == "std::function" => return (32, 8),
154 "std::any" => return (32, 8),
156 "std::error_code" | "std::error_condition" => return (16, 8),
158 "std::exception_ptr" => return (arch.pointer_size, arch.pointer_size),
160 "std::type_index" => return (arch.pointer_size, arch.pointer_size),
162 ty if ty.starts_with("std::span<") || ty == "std::span" => {
164 return (arch.pointer_size * 2, arch.pointer_size);
165 }
166 ty if ty.starts_with("std::optional<") && ty.ends_with('>') => {
169 let inner = &ty["std::optional<".len()..ty.len() - 1];
170 let (t_size, t_align) = c_type_size_align(inner.trim(), arch);
171 let total = (t_size + 1).next_multiple_of(t_align.max(1));
172 return (total, t_align.max(1));
173 }
174
175 ty if ty.starts_with("std::atomic<") && ty.ends_with('>') => {
178 let inner = &ty[12..ty.len() - 1];
179 return c_type_size_align(inner.trim(), arch);
180 }
181 "std::atomic_flag" => return (4, 4),
183
184 _ => {} }
186 match ty {
188 "char" | "_Bool" | "bool" => (1, 1),
189 "short" | "short int" => (2, 2),
190 "int" => (4, 4),
191 "long" | "long int" => (arch.pointer_size, arch.pointer_size),
192 "long long" | "long long int" => (8, 8),
193 "float" => (4, 4),
194 "double" => (8, 8),
195 "long double" => (16, 16),
196
197 "int8_t" | "uint8_t" => (1, 1),
199 "int16_t" | "uint16_t" => (2, 2),
200 "int32_t" | "uint32_t" => (4, 4),
201 "int64_t" | "uint64_t" => (8, 8),
202 "intmax_t" | "uintmax_t" => (8, 8),
203 "size_t" | "ssize_t" | "ptrdiff_t" | "intptr_t" | "uintptr_t" => {
204 (arch.pointer_size, arch.pointer_size)
205 }
206
207 "int_fast8_t" | "uint_fast8_t" => (1, 1),
210 "int_fast16_t" | "uint_fast16_t" => (2, 2),
211 "int_fast32_t" | "uint_fast32_t" | "int_fast64_t" | "uint_fast64_t" => {
212 (arch.pointer_size, arch.pointer_size)
213 }
214
215 "int_least8_t" | "uint_least8_t" => (1, 1),
217 "int_least16_t" | "uint_least16_t" => (2, 2),
218 "int_least32_t" | "uint_least32_t" => (4, 4),
219 "int_least64_t" | "uint_least64_t" => (8, 8),
220
221 "__int128" | "__uint128" | "__int128_t" | "__uint128_t" => (16, 16),
223
224 "u8" | "s8" => (1, 1),
226 "u16" | "s16" => (2, 2),
227 "u32" | "s32" => (4, 4),
228 "u64" | "s64" => (8, 8),
229
230 "__u8" | "__s8" | "__u8__" | "__s8__" => (1, 1),
232 "__u16" | "__s16" | "__be16" | "__le16" => (2, 2),
233 "__u32" | "__s32" | "__be32" | "__le32" => (4, 4),
234 "__u64" | "__s64" | "__be64" | "__le64" => (8, 8),
235
236 "__int8" => (1, 1),
238 "__int16" => (2, 2),
239 "__int32" => (4, 4),
240 "__int64" => (8, 8),
241
242 "BYTE" | "BOOLEAN" | "CHAR" | "INT8" | "UINT8" => (1, 1),
244 "WORD" | "WCHAR" | "SHORT" | "USHORT" | "INT16" | "UINT16" => (2, 2),
245 "DWORD" | "LONG" | "ULONG" | "INT" | "UINT" | "BOOL" | "FLOAT" | "INT32" | "UINT32" => {
246 (4, 4)
247 }
248 "QWORD" | "LONGLONG" | "ULONGLONG" | "INT64" | "UINT64" | "LARGE_INTEGER" => (8, 8),
249 "DWORD64" | "ULONG64" | "LONG64" => (8, 8),
250 "HANDLE" | "LPVOID" | "PVOID" | "LPCVOID" | "LPSTR" | "LPCSTR" | "LPWSTR" | "LPCWSTR"
251 | "SIZE_T" | "SSIZE_T" | "ULONG_PTR" | "LONG_PTR" | "DWORD_PTR" | "INT_PTR"
252 | "UINT_PTR" => (arch.pointer_size, arch.pointer_size),
253
254 "wchar_t" => (4, 4),
258 "char8_t" => (1, 1),
259 "char16_t" => (2, 2),
260 "char32_t" => (4, 4),
261
262 "_Float16" | "__fp16" | "__bf16" => (2, 2),
264 "_Float128" | "__float128" => (16, 16),
266
267 ty if ty.ends_with('*') => (arch.pointer_size, arch.pointer_size),
269 _ => (arch.pointer_size, arch.pointer_size),
271 }
272}
273
274fn strip_bitfield_suffix(ty: &str) -> &str {
279 if let Some(pos) = ty.rfind(':') {
280 let suffix = ty[pos + 1..].trim();
281 if !suffix.is_empty() && suffix.bytes().all(|b| b.is_ascii_digit()) {
282 return ty[..pos].trim_end();
283 }
284 }
285 ty
286}
287
288fn is_bitfield_type(ty: &str) -> bool {
292 strip_bitfield_suffix(ty) != ty
293}
294
295fn simulate_layout(
305 fields: &mut Vec<Field>,
306 struct_name: String,
307 arch: &'static ArchConfig,
308 source_line: Option<u32>,
309 pack_n: usize,
310) -> StructLayout {
311 let mut offset = 0usize;
312 let mut struct_align = 1usize;
313
314 for f in fields.iter_mut() {
315 let eff_align = if pack_n > 0 {
316 f.align.min(pack_n)
317 } else {
318 f.align
319 };
320 if eff_align > 0 {
321 offset = offset.next_multiple_of(eff_align);
322 }
323 f.offset = offset;
324 offset += f.size;
325 struct_align = struct_align.max(eff_align);
326 }
327 if pack_n != 1 && struct_align > 0 {
329 offset = offset.next_multiple_of(struct_align);
330 }
331
332 StructLayout {
333 name: struct_name,
334 total_size: offset,
335 align: struct_align,
336 fields: std::mem::take(fields),
337 source_file: None,
338 source_line,
339 arch,
340 is_packed: pack_n == 1,
341 is_union: false,
342 is_repr_rust: false,
343 suppressed_findings: Vec::new(),
344 uncertain_fields: Vec::new(),
345 }
346}
347
348fn simulate_union_layout(
351 fields: &mut Vec<Field>,
352 name: String,
353 arch: &'static ArchConfig,
354 source_line: Option<u32>,
355) -> StructLayout {
356 for f in fields.iter_mut() {
357 f.offset = 0;
358 }
359 let max_size = fields.iter().map(|f| f.size).max().unwrap_or(0);
360 let max_align = fields.iter().map(|f| f.align).max().unwrap_or(1);
361 let total_size = if max_align > 0 {
362 max_size.next_multiple_of(max_align)
363 } else {
364 max_size
365 };
366
367 StructLayout {
368 name,
369 total_size,
370 align: max_align,
371 fields: std::mem::take(fields),
372 source_file: None,
373 source_line,
374 arch,
375 is_packed: false,
376 is_union: true,
377 is_repr_rust: false,
378 suppressed_findings: Vec::new(),
379 uncertain_fields: Vec::new(),
380 }
381}
382
383fn parse_class_specifier(
390 source: &str,
391 node: Node<'_>,
392 arch: &'static ArchConfig,
393 aliases: &HashMap<String, String>,
394 pragma_pack: usize,
395) -> Option<StructLayout> {
396 let mut class_name = "<anonymous>".to_string();
397 let mut base_names: Vec<String> = Vec::new();
398 let mut body_node: Option<Node> = None;
399 let mut is_packed = false;
400 let mut struct_alignas: Option<usize> = None;
401
402 for i in 0..node.child_count() {
403 let child = node.child(i)?;
404 match child.kind() {
405 "type_identifier" => class_name = source[child.byte_range()].to_string(),
406 "base_class_clause" => {
407 for j in 0..child.child_count() {
410 if let Some(base) = child.child(j)
411 && base.kind() == "type_identifier"
412 {
413 base_names.push(source[base.byte_range()].to_string());
414 }
415 }
416 }
417 "field_declaration_list" => body_node = Some(child),
418 "attribute_specifier" if source[child.byte_range()].contains("packed") => {
419 is_packed = true;
420 }
421 "alignas_qualifier" | "alignas_specifier" if struct_alignas.is_none() => {
423 struct_alignas = parse_alignas_value(source, child);
424 }
425 _ => {}
426 }
427 }
428
429 let body = body_node?;
430
431 let has_virtual = contains_virtual_keyword(source, body);
433
434 let mut raw_fields: Vec<RawField> = Vec::new();
436 for i in 0..body.child_count() {
437 let Some(child) = body.child(i) else {
438 continue;
439 };
440 if child.kind() == "field_declaration" {
441 if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, false) {
442 raw_fields.extend(anon_fields);
443 } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
444 {
445 raw_fields.push((fname, ty, guard, al, ln));
446 }
447 }
448 }
449
450 let mut fields: Vec<Field> = Vec::new();
452
453 if has_virtual {
455 let ps = arch.pointer_size;
456 fields.push(Field {
457 name: "__vptr".to_string(),
458 ty: TypeInfo::Pointer {
459 size: ps,
460 align: ps,
461 },
462 offset: 0,
463 size: ps,
464 align: ps,
465 source_file: None,
466 source_line: None,
467 access: AccessPattern::Unknown,
468 });
469 }
470
471 for base in &base_names {
473 let ps = arch.pointer_size;
474 fields.push(Field {
475 name: format!("__base_{base}"),
476 ty: TypeInfo::Opaque {
477 name: base.clone(),
478 size: ps,
479 align: ps,
480 },
481 offset: 0,
482 size: ps,
483 align: ps,
484 source_file: None,
485 source_line: None,
486 access: AccessPattern::Unknown,
487 });
488 }
489
490 if raw_fields
492 .iter()
493 .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
494 {
495 eprintln!(
496 "padlock: note: skipping '{class_name}' — contains bit-fields \
497 (bit-field layout is compiler-controlled; use binary analysis for accurate results)"
498 );
499 return None;
500 }
501
502 for (fname, ty_name, guard, alignas, field_line) in raw_fields {
504 let resolved = aliases
505 .get(&ty_name)
506 .map(String::as_str)
507 .unwrap_or(&ty_name);
508 let (size, natural_align) = c_type_size_align(resolved, arch);
509 let align = alignas.unwrap_or(natural_align);
510 let access = if let Some(g) = guard {
511 AccessPattern::Concurrent {
512 guard: Some(g),
513 is_atomic: false,
514 is_annotated: true,
515 }
516 } else {
517 AccessPattern::Unknown
518 };
519 fields.push(Field {
520 name: fname,
521 ty: TypeInfo::Primitive {
522 name: ty_name,
523 size,
524 align,
525 },
526 offset: 0,
527 size,
528 align,
529 source_file: None,
530 source_line: Some(field_line),
531 access,
532 });
533 }
534
535 if fields.is_empty() {
536 return None;
537 }
538
539 let line = node.start_position().row as u32 + 1;
540 let pack_n = if is_packed {
541 1
542 } else if pragma_pack > 0 {
543 pragma_pack
544 } else {
545 0
546 };
547 let mut layout = simulate_layout(&mut fields, class_name, arch, Some(line), pack_n);
548
549 if let Some(al) = struct_alignas
550 && al > layout.align
551 {
552 layout.align = al;
553 if pack_n == 0 {
554 layout.total_size = layout.total_size.next_multiple_of(al);
555 }
556 }
557
558 layout.suppressed_findings =
559 super::suppress::suppressed_from_preceding_source(source, node.start_byte());
560
561 Some(layout)
562}
563
564fn contains_virtual_keyword(source: &str, node: Node<'_>) -> bool {
567 let mut stack = vec![node];
568 while let Some(n) = stack.pop() {
569 if n.kind() == "virtual" {
570 return true;
571 }
572 if n.child_count() == 0 {
575 let text = &source[n.byte_range()];
576 if text == "virtual" {
577 return true;
578 }
579 }
580 for i in (0..n.child_count()).rev() {
581 if let Some(child) = n.child(i) {
582 stack.push(child);
583 }
584 }
585 }
586 false
587}
588
589fn collect_typedef_aliases(source: &str, root: Node<'_>) -> HashMap<String, String> {
603 let mut aliases = HashMap::new();
604 let mut stack = vec![root];
605 while let Some(node) = stack.pop() {
606 for i in (0..node.child_count()).rev() {
607 if let Some(child) = node.child(i) {
608 stack.push(child);
609 }
610 }
611 if node.kind() != "type_definition" {
612 continue;
613 }
614 let has_record = (0..node.child_count()).any(|i| {
616 node.child(i)
617 .map(|c| {
618 matches!(
619 c.kind(),
620 "struct_specifier" | "union_specifier" | "class_specifier"
621 )
622 })
623 .unwrap_or(false)
624 });
625 if has_record {
626 continue;
627 }
628 let mut type_parts: Vec<String> = Vec::new();
634 for i in 0..node.child_count() {
635 let Some(child) = node.child(i) else {
636 continue;
637 };
638 match child.kind() {
639 "typedef" | ";" => {}
640 "type_identifier" | "primitive_type" | "sized_type_specifier" => {
641 type_parts.push(source[child.byte_range()].trim().to_string());
642 }
643 _ => {}
644 }
645 }
646 if type_parts.len() < 2 {
647 continue;
648 }
649 let alias_name = type_parts.pop().unwrap();
651 let base_type = type_parts.join(" ");
652 aliases.entry(alias_name).or_insert(base_type);
653 }
654 aliases
655}
656
657fn extract_structs_from_tree(
658 source: &str,
659 root: Node<'_>,
660 arch: &'static ArchConfig,
661 layouts: &mut Vec<StructLayout>,
662) {
663 let aliases = collect_typedef_aliases(source, root);
665
666 let mut pack_stack: Vec<usize> = Vec::new();
674 let mut current_pack: usize = 0;
675
676 let cursor = root.walk();
677 let mut stack = vec![root];
678
679 while let Some(node) = stack.pop() {
680 for i in (0..node.child_count()).rev() {
682 if let Some(child) = node.child(i) {
683 stack.push(child);
684 }
685 }
686
687 if node.kind() == "preproc_call" {
689 let text = &source[node.byte_range()];
690 if text.contains("#pragma") && text.contains("pack(") {
691 current_pack = parse_pragma_pack(text, &mut pack_stack, current_pack);
692 }
693 }
694
695 let in_template = node
699 .parent()
700 .map(|p| p.kind() == "template_declaration")
701 .unwrap_or(false);
702 if in_template {
703 let tpl_name = (0..node.child_count())
704 .filter_map(|i| node.child(i))
705 .find(|c| c.kind() == "type_identifier")
706 .map(|c| source[c.byte_range()].to_string())
707 .unwrap_or_else(|| "(unknown)".to_string());
708 eprintln!(
709 "padlock: note: skipping '{tpl_name}' — template \
710 (layout depends on type arguments; use binary analysis for accurate results)"
711 );
712 continue;
713 }
714
715 match node.kind() {
716 "struct_specifier" => {
717 if let Some(layout) = parse_struct_or_union_specifier(
718 source,
719 node,
720 arch,
721 false,
722 &aliases,
723 current_pack,
724 ) {
725 layouts.push(layout);
726 }
727 }
728 "union_specifier" => {
729 if let Some(layout) = parse_struct_or_union_specifier(
730 source,
731 node,
732 arch,
733 true,
734 &aliases,
735 current_pack,
736 ) {
737 layouts.push(layout);
738 }
739 }
740 "class_specifier" => {
741 if let Some(layout) =
742 parse_class_specifier(source, node, arch, &aliases, current_pack)
743 {
744 layouts.push(layout);
745 }
746 }
747 _ => {}
748 }
749 }
750
751 let cursor2 = root.walk();
758 let mut stack2 = vec![root];
759 while let Some(node) = stack2.pop() {
760 for i in (0..node.child_count()).rev() {
761 if let Some(child) = node.child(i) {
762 stack2.push(child);
763 }
764 }
765 if node.kind() == "type_definition"
766 && let Some(layout) =
767 parse_typedef_struct_or_union(source, node, arch, &aliases, current_pack)
768 {
769 let existing = layouts
770 .iter()
771 .position(|l| l.name == layout.name || l.name == "<anonymous>");
772 match existing {
773 Some(i) if layouts[i].name == "<anonymous>" => {
774 layouts[i] = layout;
775 }
776 None => layouts.push(layout),
777 _ => {}
778 }
779 }
780 }
781 let _ = cursor;
782 let _ = cursor2; }
784
785fn parse_pragma_pack(text: &str, stack: &mut Vec<usize>, current: usize) -> usize {
796 let Some(start) = text.find("pack(") else {
798 return current;
799 };
800 let rest = &text[start + 5..]; let Some(end) = rest.find(')') else {
802 return current;
803 };
804 let args = rest[..end].trim();
805
806 if args.is_empty() {
807 return 0;
809 }
810
811 let parts: Vec<&str> = args.splitn(2, ',').map(str::trim).collect();
813 match parts[0] {
814 "pop" => stack.pop().unwrap_or(0),
815 "push" => {
816 stack.push(current);
817 if let Some(n_str) = parts.get(1) {
818 n_str.parse::<usize>().unwrap_or(current)
819 } else {
820 current }
822 }
823 n_str => n_str.parse::<usize>().unwrap_or(current),
824 }
825}
826
827fn parse_struct_or_union_specifier(
832 source: &str,
833 node: Node<'_>,
834 arch: &'static ArchConfig,
835 is_union: bool,
836 aliases: &HashMap<String, String>,
837 pragma_pack: usize,
838) -> Option<StructLayout> {
839 let mut name = "<anonymous>".to_string();
840 let mut body_node: Option<Node> = None;
841 let mut is_packed = false;
842 let mut struct_alignas: Option<usize> = None;
844
845 for i in 0..node.child_count() {
846 let child = node.child(i)?;
847 match child.kind() {
848 "type_identifier" => name = source[child.byte_range()].to_string(),
849 "field_declaration_list" => body_node = Some(child),
850 "attribute_specifier" => {
851 let text = &source[child.byte_range()];
852 if text.contains("packed") {
853 is_packed = true;
854 }
855 }
856 "alignas_qualifier" | "alignas_specifier" if struct_alignas.is_none() => {
859 struct_alignas = parse_alignas_value(source, child);
860 }
861 _ => {}
862 }
863 }
864
865 let body = body_node?;
866 let mut raw_fields: Vec<RawField> = Vec::new();
867
868 for i in 0..body.child_count() {
869 let child = body.child(i)?;
870 if child.kind() == "field_declaration" {
871 if let Some(anon_fields) = parse_anonymous_nested(source, child, arch, is_union) {
875 raw_fields.extend(anon_fields);
876 } else if let Some((ty, fname, guard, al, ln)) = parse_field_declaration(source, child)
877 {
878 raw_fields.push((fname, ty, guard, al, ln));
879 }
880 }
881 }
882
883 if raw_fields.is_empty() {
884 return None;
885 }
886
887 if raw_fields
891 .iter()
892 .any(|(_, ty, _, _, _)| is_bitfield_type(ty))
893 {
894 eprintln!(
895 "padlock: note: skipping '{name}' — contains bit-fields \
896 (bit-field layout is compiler-controlled; use binary analysis for accurate results)"
897 );
898 return None;
899 }
900
901 let mut fields: Vec<Field> = raw_fields
902 .into_iter()
903 .map(|(fname, ty_name, guard, alignas, field_line)| {
904 let resolved = aliases
905 .get(&ty_name)
906 .map(String::as_str)
907 .unwrap_or(&ty_name);
908 let (size, natural_align) = c_type_size_align(resolved, arch);
909 let align = alignas.unwrap_or(natural_align);
911 let access = if let Some(g) = guard {
912 AccessPattern::Concurrent {
913 guard: Some(g),
914 is_atomic: false,
915 is_annotated: true,
916 }
917 } else {
918 AccessPattern::Unknown
919 };
920 Field {
921 name: fname,
922 ty: TypeInfo::Primitive {
923 name: ty_name,
924 size,
925 align,
926 },
927 offset: 0,
928 size,
929 align,
930 source_file: None,
931 source_line: Some(field_line),
932 access,
933 }
934 })
935 .collect();
936
937 let line = node.start_position().row as u32 + 1;
938 let pack_n = if is_packed {
941 1
942 } else if pragma_pack > 0 {
943 pragma_pack
944 } else {
945 0
946 };
947 let mut layout = if is_union {
948 simulate_union_layout(&mut fields, name, arch, Some(line))
949 } else {
950 simulate_layout(&mut fields, name, arch, Some(line), pack_n)
951 };
952
953 if let Some(al) = struct_alignas
956 && al > layout.align
957 {
958 layout.align = al;
959 if pack_n == 0 {
960 layout.total_size = layout.total_size.next_multiple_of(al);
961 }
962 }
963
964 layout.suppressed_findings =
965 super::suppress::suppressed_from_preceding_source(source, node.start_byte());
966
967 Some(layout)
968}
969
970fn parse_typedef_struct_or_union(
972 source: &str,
973 node: Node<'_>,
974 arch: &'static ArchConfig,
975 aliases: &HashMap<String, String>,
976 pragma_pack: usize,
977) -> Option<StructLayout> {
978 let mut specifier_node: Option<Node> = None;
979 let mut is_union = false;
980 let mut typedef_name: Option<String> = None;
981
982 for i in 0..node.child_count() {
983 let child = node.child(i)?;
984 match child.kind() {
985 "struct_specifier" => {
986 specifier_node = Some(child);
987 is_union = false;
988 }
989 "union_specifier" => {
990 specifier_node = Some(child);
991 is_union = true;
992 }
993 "type_identifier" => typedef_name = Some(source[child.byte_range()].to_string()),
994 _ => {}
995 }
996 }
997
998 let spec = specifier_node?;
999 let typedef_name = typedef_name?;
1000
1001 let mut layout =
1002 parse_struct_or_union_specifier(source, spec, arch, is_union, aliases, pragma_pack)?;
1003 if layout.name == "<anonymous>" {
1004 layout.name = typedef_name;
1005 }
1006 Some(layout)
1007}
1008
1009fn extract_guard_from_c_field_text(field_source: &str) -> Option<String> {
1017 for kw in &["guarded_by", "pt_guarded_by", "GUARDED_BY", "PT_GUARDED_BY"] {
1019 if let Some(pos) = field_source.find(kw) {
1020 let after = &field_source[pos + kw.len()..];
1021 let trimmed = after.trim_start();
1023 if let Some(inner) = trimmed.strip_prefix('(') {
1024 if let Some(end) = inner.find(')') {
1026 let guard = inner[..end].trim().trim_matches('"');
1027 if !guard.is_empty() {
1028 return Some(guard.to_string());
1029 }
1030 }
1031 }
1032 }
1033 }
1034 None
1035}
1036
1037fn parse_alignas_value(source: &str, node: Node<'_>) -> Option<usize> {
1043 for i in 0..node.child_count() {
1044 if let Some(child) = node.child(i) {
1045 match child.kind() {
1046 "number_literal" | "integer_literal" | "integer" => {
1047 let text = source[child.byte_range()].trim();
1048 if let Ok(n) = text.parse::<usize>() {
1049 return Some(n);
1050 }
1051 if let Some(hex) = text.strip_prefix("0x").or_else(|| text.strip_prefix("0X")) {
1053 return usize::from_str_radix(hex, 16).ok();
1054 }
1055 }
1056 "parenthesized_expression" | "argument_list" | "alignas_qualifier" => {
1058 if let r @ Some(_) = parse_alignas_value(source, child) {
1059 return r;
1060 }
1061 }
1062 _ => {}
1063 }
1064 }
1065 }
1066 None
1067}
1068
1069type RawField = (String, String, Option<String>, Option<usize>, u32);
1092
1093#[allow(clippy::only_used_in_recursion)]
1094fn parse_anonymous_nested(
1095 source: &str,
1096 node: Node<'_>,
1097 arch: &'static ArchConfig,
1098 parent_is_union: bool,
1099) -> Option<Vec<RawField>> {
1100 for i in 0..node.child_count() {
1102 let child = node.child(i)?;
1103 if child.kind() != "struct_specifier" && child.kind() != "union_specifier" {
1104 continue;
1105 }
1106 let nested_is_union = child.kind() == "union_specifier";
1107
1108 let mut has_name = false;
1110 let mut body_node: Option<Node> = None;
1111 for j in 0..child.child_count() {
1112 let sub = child.child(j)?;
1113 match sub.kind() {
1114 "type_identifier" => has_name = true,
1115 "field_declaration_list" => body_node = Some(sub),
1116 _ => {}
1117 }
1118 }
1119
1120 if has_name || body_node.is_none() {
1121 continue;
1123 }
1124
1125 let body = body_node?;
1126 let mut nested_raw: Vec<RawField> = Vec::new();
1127
1128 for j in 0..body.child_count() {
1129 let inner = body.child(j)?;
1130 if inner.kind() == "field_declaration" {
1131 if let Some(deeper) = parse_anonymous_nested(source, inner, arch, nested_is_union) {
1133 nested_raw.extend(deeper);
1134 } else if let Some((ty, fname, guard, al, ln)) =
1135 parse_field_declaration(source, inner)
1136 {
1137 nested_raw.push((fname, ty, guard, al, ln));
1138 }
1139 }
1140 }
1141
1142 let _ = (nested_is_union, parent_is_union);
1151
1152 if !nested_raw.is_empty() {
1153 return Some(nested_raw);
1154 }
1155 }
1156 None
1157}
1158
1159fn parse_field_declaration(source: &str, node: Node<'_>) -> Option<RawField> {
1160 let mut ty_parts: Vec<String> = Vec::new();
1161 let mut field_name: Option<String> = None;
1162 let mut bit_width: Option<String> = None;
1164 let mut attr_text = String::new();
1166 let mut alignas_override: Option<usize> = None;
1168
1169 for i in 0..node.child_count() {
1170 let child = node.child(i)?;
1171 match child.kind() {
1172 "type_specifier" | "primitive_type" | "type_identifier" | "sized_type_specifier" => {
1173 ty_parts.push(source[child.byte_range()].trim().to_string());
1174 }
1175 "qualified_identifier" | "template_type" => {
1178 ty_parts.push(source[child.byte_range()].trim().to_string());
1179 }
1180 "struct_specifier" | "union_specifier" => {
1184 for j in 0..child.child_count() {
1185 if let Some(sub) = child.child(j)
1186 && sub.kind() == "type_identifier"
1187 {
1188 ty_parts.push(source[sub.byte_range()].trim().to_string());
1189 break;
1190 }
1191 }
1192 }
1193 "field_identifier" => {
1194 field_name = Some(source[child.byte_range()].trim().to_string());
1195 }
1196 "pointer_declarator" => {
1197 field_name = extract_identifier(source, child);
1198 ty_parts.push("*".to_string());
1199 }
1200 "bitfield_clause" => {
1202 let text = source[child.byte_range()].trim();
1203 bit_width = Some(text.trim_start_matches(':').trim().to_string());
1205 }
1206 "attribute_specifier" | "attribute" => {
1208 attr_text.push_str(source[child.byte_range()].trim());
1209 attr_text.push(' ');
1210 }
1211 "alignas_qualifier" | "alignas_specifier" if alignas_override.is_none() => {
1214 alignas_override = parse_alignas_value(source, child);
1215 }
1216 "type_qualifier" if alignas_override.is_none() => {
1219 for j in 0..child.child_count() {
1220 if let Some(sub) = child.child(j)
1221 && (sub.kind() == "alignas_qualifier" || sub.kind() == "alignas_specifier")
1222 {
1223 alignas_override = parse_alignas_value(source, sub);
1224 break;
1225 }
1226 }
1227 }
1228 _ => {}
1229 }
1230 }
1231
1232 let base_ty = ty_parts.join(" ");
1233 let fname = field_name?;
1234 if base_ty.is_empty() {
1235 return None;
1236 }
1237 let ty = if let Some(w) = bit_width {
1240 format!("{base_ty}:{w}")
1241 } else {
1242 base_ty
1243 };
1244
1245 let field_src = source[node.byte_range()].to_string();
1248 let guard = extract_guard_from_c_field_text(&attr_text)
1249 .or_else(|| extract_guard_from_c_field_text(&field_src));
1250
1251 let line = node.start_position().row as u32 + 1;
1252 Some((ty, fname, guard, alignas_override, line))
1253}
1254
1255fn extract_identifier(source: &str, node: Node<'_>) -> Option<String> {
1256 if node.kind() == "field_identifier" || node.kind() == "identifier" {
1257 return Some(source[node.byte_range()].to_string());
1258 }
1259 for i in 0..node.child_count() {
1260 if let Some(child) = node.child(i)
1261 && let Some(name) = extract_identifier(source, child)
1262 {
1263 return Some(name);
1264 }
1265 }
1266 None
1267}
1268
1269pub fn parse_c(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1272 let mut parser = Parser::new();
1273 parser.set_language(&tree_sitter_c::LANGUAGE.into())?;
1274 let tree = parser
1275 .parse(source, None)
1276 .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1277 let mut layouts = Vec::new();
1278 extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1279 Ok(layouts)
1280}
1281
1282pub fn parse_cpp(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
1283 let mut parser = Parser::new();
1284 parser.set_language(&tree_sitter_cpp::LANGUAGE.into())?;
1285 let tree = parser
1286 .parse(source, None)
1287 .ok_or_else(|| anyhow::anyhow!("tree-sitter parse failed"))?;
1288 let mut layouts = Vec::new();
1289 extract_structs_from_tree(source, tree.root_node(), arch, &mut layouts);
1290 Ok(layouts)
1291}
1292
1293#[cfg(test)]
1296mod tests {
1297 use super::*;
1298 use padlock_core::arch::X86_64_SYSV;
1299
1300 #[test]
1301 fn parse_simple_c_struct() {
1302 let src = r#"
1303struct Point {
1304 int x;
1305 int y;
1306};
1307"#;
1308 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1309 assert_eq!(layouts.len(), 1);
1310 assert_eq!(layouts[0].name, "Point");
1311 assert_eq!(layouts[0].fields.len(), 2);
1312 assert_eq!(layouts[0].fields[0].name, "x");
1313 assert_eq!(layouts[0].fields[1].name, "y");
1314 }
1315
1316 #[test]
1317 fn parse_typedef_struct() {
1318 let src = r#"
1319typedef struct {
1320 char is_active;
1321 double timeout;
1322 int port;
1323} Connection;
1324"#;
1325 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1326 assert_eq!(layouts.len(), 1);
1327 assert_eq!(layouts[0].name, "Connection");
1328 assert_eq!(layouts[0].fields.len(), 3);
1329 }
1330
1331 #[test]
1332 fn c_layout_computes_offsets() {
1333 let src = "struct T { char a; double b; };";
1334 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1335 assert_eq!(layouts.len(), 1);
1336 let layout = &layouts[0];
1337 assert_eq!(layout.fields[0].offset, 0);
1339 assert_eq!(layout.fields[1].offset, 8);
1340 assert_eq!(layout.total_size, 16);
1341 }
1342
1343 #[test]
1344 fn c_layout_detects_padding() {
1345 let src = "struct T { char a; int b; };";
1346 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1347 let gaps = padlock_core::ir::find_padding(&layouts[0]);
1348 assert!(!gaps.is_empty());
1349 assert_eq!(gaps[0].bytes, 3); }
1351
1352 #[test]
1353 fn parse_cpp_struct() {
1354 let src = "struct Vec3 { float x; float y; float z; };";
1355 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1356 assert_eq!(layouts.len(), 1);
1357 assert_eq!(layouts[0].fields.len(), 3);
1358 }
1359
1360 #[test]
1363 fn simd_sse_field_size_and_align() {
1364 let src = "struct Vecs { __m128 a; __m256 b; };";
1365 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1366 assert_eq!(layouts.len(), 1);
1367 let f = &layouts[0].fields;
1368 assert_eq!(f[0].size, 16); assert_eq!(f[0].align, 16);
1370 assert_eq!(f[1].size, 32); assert_eq!(f[1].align, 32);
1372 }
1373
1374 #[test]
1375 fn simd_avx512_size() {
1376 let src = "struct Wide { __m512 v; };";
1377 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1378 assert_eq!(layouts[0].fields[0].size, 64);
1379 assert_eq!(layouts[0].fields[0].align, 64);
1380 }
1381
1382 #[test]
1383 fn simd_padding_detected_when_small_field_before_avx() {
1384 let src = "struct Mixed { char flag; __m256 data; };";
1386 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1387 let gaps = padlock_core::ir::find_padding(&layouts[0]);
1388 assert!(!gaps.is_empty());
1389 assert_eq!(gaps[0].bytes, 31);
1390 }
1391
1392 #[test]
1395 fn union_fields_all_at_offset_zero() {
1396 let src = "union Data { int i; float f; double d; };";
1397 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1398 assert_eq!(layouts.len(), 1);
1399 let u = &layouts[0];
1400 assert!(u.is_union);
1401 for field in &u.fields {
1402 assert_eq!(
1403 field.offset, 0,
1404 "union field '{}' should be at offset 0",
1405 field.name
1406 );
1407 }
1408 }
1409
1410 #[test]
1411 fn union_total_size_is_max_field() {
1412 let src = "union Data { int i; float f; double d; };";
1414 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1415 assert_eq!(layouts[0].total_size, 8);
1416 }
1417
1418 #[test]
1419 fn union_no_padding_finding() {
1420 let src = "union Data { int i; double d; };";
1421 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1422 let report = padlock_core::findings::Report::from_layouts(&layouts);
1423 let sr = &report.structs[0];
1424 assert!(
1425 !sr.findings
1426 .iter()
1427 .any(|f| matches!(f, padlock_core::findings::Finding::PaddingWaste { .. }))
1428 );
1429 assert!(
1430 !sr.findings
1431 .iter()
1432 .any(|f| matches!(f, padlock_core::findings::Finding::ReorderSuggestion { .. }))
1433 );
1434 }
1435
1436 #[test]
1437 fn typedef_union_parsed() {
1438 let src = "typedef union { int a; double b; } Value;";
1439 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1440 assert_eq!(layouts.len(), 1);
1441 assert_eq!(layouts[0].name, "Value");
1442 assert!(layouts[0].is_union);
1443 }
1444
1445 #[test]
1448 fn extract_guard_from_c_guarded_by_macro() {
1449 let text = "int value GUARDED_BY(mu);";
1450 let guard = extract_guard_from_c_field_text(text);
1451 assert_eq!(guard.as_deref(), Some("mu"));
1452 }
1453
1454 #[test]
1455 fn extract_guard_from_c_attribute_specifier() {
1456 let text = "__attribute__((guarded_by(counter_lock))) uint64_t counter;";
1457 let guard = extract_guard_from_c_field_text(text);
1458 assert_eq!(guard.as_deref(), Some("counter_lock"));
1459 }
1460
1461 #[test]
1462 fn extract_guard_pt_guarded_by() {
1463 let text = "int *ptr PT_GUARDED_BY(ptr_lock);";
1464 let guard = extract_guard_from_c_field_text(text);
1465 assert_eq!(guard.as_deref(), Some("ptr_lock"));
1466 }
1467
1468 #[test]
1469 fn no_guard_returns_none() {
1470 let guard = extract_guard_from_c_field_text("int x;");
1471 assert!(guard.is_none());
1472 }
1473
1474 #[test]
1475 fn c_struct_guarded_by_sets_concurrent_access() {
1476 let text = "uint64_t readers GUARDED_BY(lock_a);";
1480 assert_eq!(
1481 extract_guard_from_c_field_text(text).as_deref(),
1482 Some("lock_a")
1483 );
1484 }
1485
1486 #[test]
1487 fn c_struct_different_guards_detected_as_false_sharing() {
1488 use padlock_core::arch::X86_64_SYSV;
1489 use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
1490
1491 let mut layout = StructLayout {
1495 name: "S".into(),
1496 total_size: 128,
1497 align: 8,
1498 fields: vec![
1499 Field {
1500 name: "readers".into(),
1501 ty: TypeInfo::Primitive {
1502 name: "uint64_t".into(),
1503 size: 8,
1504 align: 8,
1505 },
1506 offset: 0,
1507 size: 8,
1508 align: 8,
1509 source_file: None,
1510 source_line: None,
1511 access: AccessPattern::Concurrent {
1512 guard: Some("lock_a".into()),
1513 is_atomic: false,
1514 is_annotated: true,
1515 },
1516 },
1517 Field {
1518 name: "writers".into(),
1519 ty: TypeInfo::Primitive {
1520 name: "uint64_t".into(),
1521 size: 8,
1522 align: 8,
1523 },
1524 offset: 8,
1525 size: 8,
1526 align: 8,
1527 source_file: None,
1528 source_line: None,
1529 access: AccessPattern::Concurrent {
1530 guard: Some("lock_b".into()),
1531 is_atomic: false,
1532 is_annotated: true,
1533 },
1534 },
1535 ],
1536 source_file: None,
1537 source_line: None,
1538 arch: &X86_64_SYSV,
1539 is_packed: false,
1540 is_union: false,
1541 is_repr_rust: false,
1542 suppressed_findings: Vec::new(),
1543 uncertain_fields: Vec::new(),
1544 };
1545 assert!(padlock_core::analysis::false_sharing::has_false_sharing(
1546 &layout
1547 ));
1548 layout.fields[1].access = AccessPattern::Concurrent {
1550 guard: Some("lock_a".into()),
1551 is_atomic: false,
1552 is_annotated: true,
1553 };
1554 assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
1555 &layout
1556 ));
1557 }
1558
1559 #[test]
1562 fn cpp_class_with_virtual_method_has_vptr() {
1563 let src = r#"
1564class Widget {
1565 virtual void draw();
1566 int x;
1567 int y;
1568};
1569"#;
1570 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1571 assert_eq!(layouts.len(), 1);
1572 let l = &layouts[0];
1573 assert_eq!(l.fields[0].name, "__vptr");
1575 assert_eq!(l.fields[0].size, 8); assert_eq!(l.fields[0].offset, 0);
1578 let x = l.fields.iter().find(|f| f.name == "x").unwrap();
1580 assert_eq!(x.offset, 8);
1581 }
1582
1583 #[test]
1584 fn cpp_class_without_virtual_has_no_vptr() {
1585 let src = "class Plain { int a; int b; };";
1586 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1587 assert_eq!(layouts.len(), 1);
1588 assert!(!layouts[0].fields.iter().any(|f| f.name == "__vptr"));
1589 }
1590
1591 #[test]
1592 fn cpp_struct_keyword_with_virtual_has_vptr() {
1593 let src = "struct IFoo { virtual ~IFoo(); virtual void bar(); };";
1595 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1596 let _ = layouts; }
1600
1601 #[test]
1604 fn cpp_derived_class_has_base_slot() {
1605 let src = r#"
1606class Base {
1607 int x;
1608};
1609class Derived : public Base {
1610 int y;
1611};
1612"#;
1613 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1614 let derived = layouts.iter().find(|l| l.name == "Derived").unwrap();
1616 assert!(
1618 derived.fields.iter().any(|f| f.name == "__base_Base"),
1619 "Derived should have a __base_Base field"
1620 );
1621 let base_field = derived
1623 .fields
1624 .iter()
1625 .find(|f| f.name == "__base_Base")
1626 .unwrap();
1627 let y_field = derived.fields.iter().find(|f| f.name == "y").unwrap();
1628 assert!(y_field.offset >= base_field.offset + base_field.size);
1629 }
1630
1631 #[test]
1632 fn cpp_class_multiple_inheritance_has_multiple_base_slots() {
1633 let src = r#"
1634class A { int a; };
1635class B { int b; };
1636class C : public A, public B { int c; };
1637"#;
1638 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1639 let c = layouts.iter().find(|l| l.name == "C").unwrap();
1640 assert!(c.fields.iter().any(|f| f.name == "__base_A"));
1641 assert!(c.fields.iter().any(|f| f.name == "__base_B"));
1642 }
1643
1644 #[test]
1645 fn cpp_virtual_base_class_total_size_accounts_for_vptr() {
1646 let src = "class V { virtual void f(); int x; };";
1648 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1649 let l = &layouts[0];
1650 assert_eq!(l.total_size, 16);
1652 }
1653
1654 #[test]
1657 fn is_bitfield_type_detects_colon_n() {
1658 assert!(is_bitfield_type("int:3"));
1659 assert!(is_bitfield_type("unsigned int:16"));
1660 assert!(is_bitfield_type("uint32_t:1"));
1661 assert!(!is_bitfield_type("std::atomic<int>"));
1663 assert!(!is_bitfield_type("ns::Type"));
1664 assert!(!is_bitfield_type("int"));
1665 }
1666
1667 #[test]
1668 fn struct_with_bitfields_is_skipped() {
1669 let src = r#"
1672struct Flags {
1673 unsigned int active : 1;
1674 unsigned int ready : 1;
1675 unsigned int error : 6;
1676 int value;
1677};
1678"#;
1679 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1680 assert!(
1682 layouts.iter().all(|l| l.name != "Flags"),
1683 "struct with bitfields should be skipped; got {:?}",
1684 layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1685 );
1686 }
1687
1688 #[test]
1689 fn struct_without_bitfields_is_still_parsed() {
1690 let src = "struct Normal { int a; char b; double c; };";
1692 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1693 assert_eq!(layouts.len(), 1);
1694 assert_eq!(layouts[0].name, "Normal");
1695 }
1696
1697 #[test]
1698 fn c_struct_fields_have_source_lines() {
1699 let src = "struct Point {\n int x;\n int y;\n};";
1700 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1701 assert_eq!(layouts.len(), 1);
1702 let fields = &layouts[0].fields;
1703 assert_eq!(fields[0].source_line, Some(2), "x should be line 2");
1705 assert_eq!(fields[1].source_line, Some(3), "y should be line 3");
1706 }
1707
1708 #[test]
1709 fn cpp_class_with_bitfields_is_skipped() {
1710 let src = "class Packed { int x : 4; int y : 4; };";
1711 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1712 assert!(
1713 layouts.iter().all(|l| l.name != "Packed"),
1714 "C++ class with bitfields should be skipped"
1715 );
1716 }
1717
1718 #[test]
1719 fn all_bitfield_struct_is_skipped() {
1720 let src = "struct BitPacked { int x:4; int y:4; };";
1724 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1725 assert!(
1726 layouts.iter().all(|l| l.name != "BitPacked"),
1727 "all-bitfield struct should be skipped; got {:?}",
1728 layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
1729 );
1730 }
1731
1732 #[test]
1735 fn packed_struct_has_no_alignment_padding() {
1736 let src = r#"
1739struct __attribute__((packed)) Tight {
1740 char a;
1741 int b;
1742 char c;
1743};
1744"#;
1745 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1746 let l = layouts.iter().find(|l| l.name == "Tight").expect("Tight");
1747 assert!(l.is_packed, "should be marked is_packed");
1748 assert_eq!(l.total_size, 6, "packed: no padding inserted");
1749 assert_eq!(l.fields[0].offset, 0);
1750 assert_eq!(l.fields[1].offset, 1); assert_eq!(l.fields[2].offset, 5);
1752 }
1753
1754 #[test]
1755 fn non_packed_struct_has_normal_alignment_padding() {
1756 let src = r#"
1758struct Normal {
1759 char a;
1760 int b;
1761 char c;
1762};
1763"#;
1764 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1765 let l = layouts.iter().find(|l| l.name == "Normal").expect("Normal");
1766 assert!(!l.is_packed);
1767 assert_eq!(l.total_size, 12);
1768 assert_eq!(l.fields[1].offset, 4); }
1770
1771 #[test]
1772 fn cpp_class_packed_attribute_detected() {
1773 let src = r#"
1774class __attribute__((packed)) Dense {
1775 char a;
1776 int b;
1777};
1778"#;
1779 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1780 let l = layouts.iter().find(|l| l.name == "Dense").expect("Dense");
1781 assert!(
1782 l.is_packed,
1783 "C++ class with __attribute__((packed)) must be marked packed"
1784 );
1785 assert_eq!(l.total_size, 5); }
1787
1788 #[test]
1791 fn field_alignas_overrides_natural_alignment() {
1792 let src = r#"
1798struct S {
1799 alignas(8) char c;
1800 int x;
1801};
1802"#;
1803 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1804 let l = layouts.iter().find(|l| l.name == "S").expect("S");
1805 let c_field = l.fields.iter().find(|f| f.name == "c").unwrap();
1807 assert_eq!(c_field.align, 8);
1808 let x_field = l.fields.iter().find(|f| f.name == "x").unwrap();
1810 assert_eq!(x_field.offset, 4);
1811 assert_eq!(l.align, 8);
1813 assert_eq!(l.total_size, 8);
1815 }
1816
1817 #[test]
1818 fn struct_level_alignas_increases_struct_alignment() {
1819 let src = r#"
1822struct alignas(64) CacheLine {
1823 int x;
1824 int y;
1825};
1826"#;
1827 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1828 let l = layouts
1829 .iter()
1830 .find(|l| l.name == "CacheLine")
1831 .expect("CacheLine");
1832 assert_eq!(l.align, 64);
1833 assert_eq!(l.total_size % 64, 0);
1834 }
1835
1836 #[test]
1837 fn alignas_on_field_smaller_than_natural_is_ignored() {
1838 let src = "struct S { int x; int y; };";
1844 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1845 let l = &layouts[0];
1846 assert_eq!(l.fields[0].align, 4); }
1848
1849 #[test]
1850 fn cpp_class_alignas_detected() {
1851 let src = r#"
1852class alignas(32) Aligned {
1853 double x;
1854 double y;
1855};
1856"#;
1857 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
1858 let l = layouts
1859 .iter()
1860 .find(|l| l.name == "Aligned")
1861 .expect("Aligned");
1862 assert_eq!(l.align, 32);
1863 assert_eq!(l.total_size % 32, 0);
1864 }
1865
1866 #[test]
1869 fn struct_without_alignas_unchanged() {
1870 let src = "struct Plain { int a; char b; };";
1872 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1873 let l = &layouts[0];
1874 assert_eq!(l.align, 4); assert_eq!(l.total_size, 8); }
1877
1878 #[test]
1881 fn anonymous_nested_union_fields_flattened() {
1882 let src = r#"
1883struct Packet {
1884 union {
1885 uint32_t raw;
1886 uint8_t bytes[4];
1887 };
1888 uint64_t timestamp;
1889};
1890"#;
1891 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1892 let l = layouts.iter().find(|l| l.name == "Packet").expect("Packet");
1893 assert!(
1895 l.fields.iter().any(|f| f.name == "raw"),
1896 "raw field must be flattened into Packet"
1897 );
1898 assert!(
1899 l.fields.iter().any(|f| f.name == "timestamp"),
1900 "timestamp must be present"
1901 );
1902 }
1903
1904 #[test]
1905 fn anonymous_nested_struct_fields_flattened() {
1906 let src = r#"
1907struct Outer {
1908 struct {
1909 int x;
1910 int y;
1911 };
1912 double z;
1913};
1914"#;
1915 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1916 let l = layouts.iter().find(|l| l.name == "Outer").expect("Outer");
1917 assert!(
1918 l.fields.iter().any(|f| f.name == "x"),
1919 "x must be flattened"
1920 );
1921 assert!(
1922 l.fields.iter().any(|f| f.name == "y"),
1923 "y must be flattened"
1924 );
1925 assert!(l.fields.iter().any(|f| f.name == "z"), "z present");
1926 assert_eq!(l.total_size, 16);
1928 }
1929
1930 #[test]
1931 fn named_nested_struct_not_flattened() {
1932 let src = r#"
1934struct Vec2 { float x; float y; };
1935struct Rect { struct Vec2 tl; struct Vec2 br; };
1936"#;
1937 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
1938 let rect = layouts.iter().find(|l| l.name == "Rect").expect("Rect");
1939 assert_eq!(rect.fields.len(), 2);
1941 assert!(rect.fields.iter().any(|f| f.name == "tl"));
1942 assert!(rect.fields.iter().any(|f| f.name == "br"));
1943 }
1944
1945 #[test]
1948 fn linux_kernel_types_correct_size() {
1949 assert_eq!(c_type_size_align("u8", &X86_64_SYSV), (1, 1));
1951 assert_eq!(c_type_size_align("u16", &X86_64_SYSV), (2, 2));
1952 assert_eq!(c_type_size_align("u32", &X86_64_SYSV), (4, 4));
1953 assert_eq!(c_type_size_align("u64", &X86_64_SYSV), (8, 8));
1954 assert_eq!(c_type_size_align("s8", &X86_64_SYSV), (1, 1));
1955 assert_eq!(c_type_size_align("s16", &X86_64_SYSV), (2, 2));
1956 assert_eq!(c_type_size_align("s32", &X86_64_SYSV), (4, 4));
1957 assert_eq!(c_type_size_align("s64", &X86_64_SYSV), (8, 8));
1958 }
1959
1960 #[test]
1961 fn linux_kernel_dunder_types_correct_size() {
1962 assert_eq!(c_type_size_align("__u8", &X86_64_SYSV), (1, 1));
1963 assert_eq!(c_type_size_align("__u16", &X86_64_SYSV), (2, 2));
1964 assert_eq!(c_type_size_align("__u32", &X86_64_SYSV), (4, 4));
1965 assert_eq!(c_type_size_align("__u64", &X86_64_SYSV), (8, 8));
1966 assert_eq!(c_type_size_align("__s8", &X86_64_SYSV), (1, 1));
1967 assert_eq!(c_type_size_align("__s64", &X86_64_SYSV), (8, 8));
1968 assert_eq!(c_type_size_align("__be16", &X86_64_SYSV), (2, 2));
1970 assert_eq!(c_type_size_align("__le32", &X86_64_SYSV), (4, 4));
1971 assert_eq!(c_type_size_align("__be64", &X86_64_SYSV), (8, 8));
1972 }
1973
1974 #[test]
1975 fn c99_fast_types_correct_size() {
1976 assert_eq!(c_type_size_align("uint_fast8_t", &X86_64_SYSV), (1, 1));
1978 assert_eq!(c_type_size_align("uint_fast16_t", &X86_64_SYSV), (2, 2));
1979 assert_eq!(c_type_size_align("uint_fast32_t", &X86_64_SYSV), (8, 8));
1981 assert_eq!(c_type_size_align("uint_fast64_t", &X86_64_SYSV), (8, 8));
1982 assert_eq!(c_type_size_align("uint_least8_t", &X86_64_SYSV), (1, 1));
1984 assert_eq!(c_type_size_align("uint_least32_t", &X86_64_SYSV), (4, 4));
1985 assert_eq!(c_type_size_align("uint_least64_t", &X86_64_SYSV), (8, 8));
1986 assert_eq!(c_type_size_align("intmax_t", &X86_64_SYSV), (8, 8));
1987 assert_eq!(c_type_size_align("uintmax_t", &X86_64_SYSV), (8, 8));
1988 }
1989
1990 #[test]
1991 fn gcc_int128_correct_size() {
1992 assert_eq!(c_type_size_align("__int128", &X86_64_SYSV), (16, 16));
1993 assert_eq!(c_type_size_align("__uint128", &X86_64_SYSV), (16, 16));
1994 assert_eq!(c_type_size_align("__int128_t", &X86_64_SYSV), (16, 16));
1995 assert_eq!(
1997 c_type_size_align("unsigned __int128", &X86_64_SYSV),
1998 (16, 16)
1999 );
2000 }
2001
2002 #[test]
2003 fn windows_types_correct_size() {
2004 assert_eq!(c_type_size_align("BYTE", &X86_64_SYSV), (1, 1));
2005 assert_eq!(c_type_size_align("WORD", &X86_64_SYSV), (2, 2));
2006 assert_eq!(c_type_size_align("DWORD", &X86_64_SYSV), (4, 4));
2007 assert_eq!(c_type_size_align("QWORD", &X86_64_SYSV), (8, 8));
2008 assert_eq!(c_type_size_align("BOOL", &X86_64_SYSV), (4, 4));
2009 assert_eq!(c_type_size_align("UINT8", &X86_64_SYSV), (1, 1));
2010 assert_eq!(c_type_size_align("INT32", &X86_64_SYSV), (4, 4));
2011 assert_eq!(c_type_size_align("UINT64", &X86_64_SYSV), (8, 8));
2012 assert_eq!(c_type_size_align("HANDLE", &X86_64_SYSV), (8, 8));
2013 assert_eq!(c_type_size_align("LPVOID", &X86_64_SYSV), (8, 8));
2014 }
2015
2016 #[test]
2017 fn char_types_correct_size() {
2018 assert_eq!(c_type_size_align("wchar_t", &X86_64_SYSV), (4, 4));
2019 assert_eq!(c_type_size_align("char8_t", &X86_64_SYSV), (1, 1));
2020 assert_eq!(c_type_size_align("char16_t", &X86_64_SYSV), (2, 2));
2021 assert_eq!(c_type_size_align("char32_t", &X86_64_SYSV), (4, 4));
2022 }
2023
2024 #[test]
2025 fn half_precision_types_correct_size() {
2026 assert_eq!(c_type_size_align("_Float16", &X86_64_SYSV), (2, 2));
2027 assert_eq!(c_type_size_align("__fp16", &X86_64_SYSV), (2, 2));
2028 assert_eq!(c_type_size_align("__bf16", &X86_64_SYSV), (2, 2));
2029 assert_eq!(c_type_size_align("_Float128", &X86_64_SYSV), (16, 16));
2030 }
2031
2032 #[test]
2033 fn unsigned_prefix_stripped_correctly() {
2034 assert_eq!(c_type_size_align("unsigned short", &X86_64_SYSV), (2, 2));
2036 assert_eq!(c_type_size_align("unsigned int", &X86_64_SYSV), (4, 4));
2037 assert_eq!(
2038 c_type_size_align("unsigned long long", &X86_64_SYSV),
2039 (8, 8)
2040 );
2041 assert_eq!(
2042 c_type_size_align("long int", &X86_64_SYSV),
2043 (X86_64_SYSV.pointer_size, X86_64_SYSV.pointer_size)
2044 );
2045 }
2046
2047 #[test]
2048 fn linux_kernel_struct_with_new_types() {
2049 let src = r#"
2051struct NetHeader {
2052 __be32 src_ip;
2053 __be32 dst_ip;
2054 __be16 src_port;
2055 __be16 dst_port;
2056 u8 protocol;
2057 u8 ttl;
2058};
2059"#;
2060 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2061 assert_eq!(layouts.len(), 1);
2062 let l = &layouts[0];
2063 assert_eq!(l.total_size, 16);
2065 assert_eq!(l.fields[0].size, 4); assert_eq!(l.fields[2].size, 2); assert_eq!(l.fields[4].size, 1); }
2069
2070 #[test]
2073 fn cpp_string_is_32_bytes() {
2074 assert_eq!(c_type_size_align("std::string", &X86_64_SYSV), (32, 8));
2075 assert_eq!(c_type_size_align("std::wstring", &X86_64_SYSV), (32, 8));
2076 }
2077
2078 #[test]
2079 fn cpp_string_view_is_two_words() {
2080 assert_eq!(c_type_size_align("std::string_view", &X86_64_SYSV), (16, 8));
2081 }
2082
2083 #[test]
2084 fn cpp_vector_is_24_bytes() {
2085 assert_eq!(c_type_size_align("std::vector<int>", &X86_64_SYSV), (24, 8));
2086 assert_eq!(
2087 c_type_size_align("std::vector<uint64_t>", &X86_64_SYSV),
2088 (24, 8)
2089 );
2090 assert_eq!(
2092 c_type_size_align("std::vector<std::string>", &X86_64_SYSV),
2093 (24, 8)
2094 );
2095 }
2096
2097 #[test]
2098 fn cpp_smart_pointers_correct_size() {
2099 assert_eq!(
2101 c_type_size_align("std::unique_ptr<int>", &X86_64_SYSV),
2102 (8, 8)
2103 );
2104 assert_eq!(
2106 c_type_size_align("std::shared_ptr<int>", &X86_64_SYSV),
2107 (16, 8)
2108 );
2109 assert_eq!(
2110 c_type_size_align("std::weak_ptr<int>", &X86_64_SYSV),
2111 (16, 8)
2112 );
2113 }
2114
2115 #[test]
2116 fn cpp_optional_recursive_size() {
2117 assert_eq!(
2119 c_type_size_align("std::optional<bool>", &X86_64_SYSV),
2120 (2, 1)
2121 );
2122 assert_eq!(
2125 c_type_size_align("std::optional<int>", &X86_64_SYSV),
2126 (8, 4)
2127 );
2128 assert_eq!(
2130 c_type_size_align("std::optional<double>", &X86_64_SYSV),
2131 (16, 8)
2132 );
2133 }
2134
2135 #[test]
2136 fn cpp_function_is_32_bytes() {
2137 assert_eq!(
2138 c_type_size_align("std::function<void()>", &X86_64_SYSV),
2139 (32, 8)
2140 );
2141 assert_eq!(
2142 c_type_size_align("std::function<int(int)>", &X86_64_SYSV),
2143 (32, 8)
2144 );
2145 }
2146
2147 #[test]
2148 fn cpp_stdlib_struct_with_string_field() {
2149 let src = r#"
2151struct Config {
2152 std::string name;
2153 int version;
2154 bool enabled;
2155};
2156"#;
2157 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2158 let l = &layouts[0];
2159 assert_eq!(l.fields[0].size, 32); assert_eq!(l.fields[1].offset, 32);
2162 assert_eq!(l.fields[1].size, 4);
2163 }
2164
2165 #[test]
2168 fn typedef_scalar_alias_resolves_correct_size() {
2169 let src = r#"
2171typedef uint32_t UserId;
2172
2173struct User {
2174 UserId id;
2175 char name[16];
2176};
2177"#;
2178 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2179 let l = layouts.iter().find(|l| l.name == "User").expect("User");
2180 let id = l.fields.iter().find(|f| f.name == "id").expect("id field");
2181 assert_eq!(id.size, 4, "UserId alias of uint32_t must be 4 bytes");
2182 assert_eq!(id.align, 4);
2183 }
2184
2185 #[test]
2186 fn typedef_alias_layout_correct_total_size() {
2187 let src = r#"
2191typedef uint32_t Token;
2192
2193struct Auth {
2194 char prefix;
2195 Token token;
2196};
2197"#;
2198 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2199 let l = layouts.iter().find(|l| l.name == "Auth").expect("Auth");
2200 assert_eq!(l.total_size, 8, "alias-resolved layout should be 8 bytes");
2202 }
2203
2204 #[test]
2205 fn typedef_pointer_not_confused_with_scalar_alias() {
2206 let src = r#"
2210typedef int *IntPtr;
2211
2212struct S {
2213 IntPtr p;
2214 int x;
2215};
2216"#;
2217 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2218 let l = layouts.iter().find(|l| l.name == "S").expect("S");
2219 let p = l.fields.iter().find(|f| f.name == "p").expect("p field");
2220 assert_eq!(p.size, 8, "pointer typedef should be 8 bytes on x86_64");
2222 }
2223
2224 #[test]
2225 fn typedef_struct_not_collected_as_scalar_alias() {
2226 let src = r#"
2229typedef struct {
2230 int x;
2231 int y;
2232} Point;
2233
2234struct Line {
2235 Point a;
2236 Point b;
2237};
2238"#;
2239 let layouts = parse_c(src, &X86_64_SYSV).unwrap();
2240 assert!(
2242 layouts.iter().any(|l| l.name == "Point"),
2243 "typedef struct should emit a StructLayout"
2244 );
2245 }
2246
2247 #[test]
2248 fn cpp_class_typedef_alias_resolved() {
2249 let src = r#"
2251typedef uint64_t Timestamp;
2252
2253class Event {
2254 Timestamp when;
2255 int kind;
2256};
2257"#;
2258 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2259 let l = layouts.iter().find(|l| l.name == "Event").expect("Event");
2260 let when = l.fields.iter().find(|f| f.name == "when").expect("when");
2261 assert_eq!(when.size, 8, "Timestamp alias of uint64_t must be 8 bytes");
2262 assert_eq!(when.align, 8);
2263 }
2264
2265 #[test]
2268 fn cpp_template_struct_is_skipped() {
2269 let src = "template<typename T> struct Wrapper { T value; int count; };";
2271 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2272 assert!(
2273 layouts.iter().all(|l| l.name != "Wrapper"),
2274 "template struct must be skipped, not emitted with wrong sizes"
2275 );
2276 }
2277
2278 #[test]
2279 fn cpp_template_class_is_skipped() {
2280 let src = "template<typename T, typename U> class Pair { T first; U second; };";
2281 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2282 assert!(
2283 layouts.iter().all(|l| l.name != "Pair"),
2284 "template class must be skipped"
2285 );
2286 }
2287
2288 #[test]
2289 fn cpp_non_template_struct_alongside_template_is_parsed() {
2290 let src = r#"
2292template<typename T> struct Generic { T val; };
2293struct Concrete { int x; double y; };
2294"#;
2295 let layouts = parse_cpp(src, &X86_64_SYSV).unwrap();
2296 assert!(
2297 layouts.iter().all(|l| l.name != "Generic"),
2298 "Generic template must be skipped"
2299 );
2300 let concrete = layouts
2301 .iter()
2302 .find(|l| l.name == "Concrete")
2303 .expect("Concrete must be parsed");
2304 assert_eq!(concrete.fields.len(), 2);
2305 }
2306}