Skip to main content

padlock_source/frontends/
rust.rs

1// padlock-source/src/frontends/rust.rs
2//
3// Extracts struct layouts from Rust source using syn + the Visit API.
4// Sizes are approximated from type names using the target arch config.
5// Only repr(C) / repr(packed) / plain structs are handled; generics are opaque.
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{AccessPattern, Field, StructLayout, TypeInfo};
9use quote::ToTokens;
10use syn::{visit::Visit, Fields, ItemStruct, Type};
11
12// ── attribute guard extraction ────────────────────────────────────────────────
13
14/// Extract a lock guard name from field attributes.
15///
16/// Recognised forms:
17/// - `#[lock_protected_by = "mu"]`
18/// - `#[protected_by = "mu"]`
19/// - `#[guarded_by("mu")]` or `#[guarded_by(mu)]`
20/// - `#[pt_guarded_by("mu")]` or `#[pt_guarded_by(mu)]` (pointer variant)
21pub fn extract_guard_from_attrs(attrs: &[syn::Attribute]) -> Option<String> {
22    for attr in attrs {
23        let path = attr.path();
24        // Name-value form: #[lock_protected_by = "mu"] / #[protected_by = "mu"]
25        if path.is_ident("lock_protected_by") || path.is_ident("protected_by") {
26            if let syn::Meta::NameValue(nv) = &attr.meta {
27                if let syn::Expr::Lit(syn::ExprLit {
28                    lit: syn::Lit::Str(s),
29                    ..
30                }) = &nv.value
31                {
32                    return Some(s.value());
33                }
34            }
35        }
36        // List form: #[guarded_by("mu")] / #[guarded_by(mu)] / #[pt_guarded_by(...)]
37        if path.is_ident("guarded_by") || path.is_ident("pt_guarded_by") {
38            // Try string literal first
39            if let Ok(s) = attr.parse_args::<syn::LitStr>() {
40                return Some(s.value());
41            }
42            // Fall back to bare identifier
43            if let Ok(id) = attr.parse_args::<syn::Ident>() {
44                return Some(id.to_string());
45            }
46        }
47    }
48    None
49}
50
51// ── type resolution ───────────────────────────────────────────────────────────
52
53fn rust_type_size_align(ty: &Type, arch: &'static ArchConfig) -> (usize, usize, TypeInfo) {
54    match ty {
55        Type::Path(tp) => {
56            let name = tp
57                .path
58                .segments
59                .last()
60                .map(|s| s.ident.to_string())
61                .unwrap_or_default();
62            let (size, align) = primitive_size_align(&name, arch);
63            (size, align, TypeInfo::Primitive { name, size, align })
64        }
65        Type::Ptr(_) | Type::Reference(_) => {
66            let s = arch.pointer_size;
67            (s, s, TypeInfo::Pointer { size: s, align: s })
68        }
69        Type::Array(arr) => {
70            let (elem_size, elem_align, elem_ty) = rust_type_size_align(&arr.elem, arch);
71            let count = array_len_from_expr(&arr.len);
72            let size = elem_size * count;
73            (
74                size,
75                elem_align,
76                TypeInfo::Array {
77                    element: Box::new(elem_ty),
78                    count,
79                    size,
80                    align: elem_align,
81                },
82            )
83        }
84        _ => {
85            let s = arch.pointer_size;
86            (
87                s,
88                s,
89                TypeInfo::Opaque {
90                    name: "(unknown)".into(),
91                    size: s,
92                    align: s,
93                },
94            )
95        }
96    }
97}
98
99fn primitive_size_align(name: &str, arch: &'static ArchConfig) -> (usize, usize) {
100    let ps = arch.pointer_size;
101    match name {
102        // ── language primitives ───────────────────────────────────────────────
103        "bool" | "u8" | "i8" => (1, 1),
104        "u16" | "i16" => (2, 2),
105        "u32" | "i32" | "f32" => (4, 4),
106        "u64" | "i64" | "f64" => (8, 8),
107        "u128" | "i128" => (16, 16),
108        "usize" | "isize" => (ps, ps),
109        "char" => (4, 4), // Rust char is a Unicode scalar (4 bytes)
110
111        // ── std atomics ───────────────────────────────────────────────────────
112        "AtomicBool" | "AtomicU8" | "AtomicI8" => (1, 1),
113        "AtomicU16" | "AtomicI16" => (2, 2),
114        "AtomicU32" | "AtomicI32" => (4, 4),
115        "AtomicU64" | "AtomicI64" => (8, 8),
116        "AtomicUsize" | "AtomicIsize" | "AtomicPtr" => (ps, ps),
117
118        // ── heap-allocated collections: ptr + len + cap (3 words) ────────────
119        // Size is independent of the element type T (generic arg already stripped).
120        "Vec" | "String" | "OsString" | "CString" | "PathBuf" => (3 * ps, ps),
121        "VecDeque" | "LinkedList" | "BinaryHeap" => (3 * ps, ps),
122        "HashMap" | "HashSet" | "BTreeMap" | "BTreeSet" => (3 * ps, ps),
123
124        // ── single-pointer smart pointers ─────────────────────────────────────
125        "Box" | "Rc" | "Arc" | "Weak" | "NonNull" | "Cell" => (ps, ps),
126
127        // ── interior-mutability / sync wrappers ───────────────────────────────
128        // Size depends on T but pointer-size is a reasonable approximation for
129        // display purposes; use binary analysis for precise results.
130        "RefCell" | "Mutex" | "RwLock" => (ps, ps),
131
132        // ── channels ─────────────────────────────────────────────────────────
133        "Sender" | "Receiver" | "SyncSender" => (ps, ps),
134
135        // ── zero-sized types ──────────────────────────────────────────────────
136        "PhantomData" | "PhantomPinned" => (0, 1),
137
138        // ── common fixed-size stdlib types ────────────────────────────────────
139        // Duration: u64 secs (8B) + u32 nanos (4B) → 12B + 4B trailing = 16B
140        "Duration" => (16, 8),
141        "Instant" | "SystemTime" => (16, 8),
142
143        // ── Pin<T> wraps T, pointer-size approximation ────────────────────────
144        "Pin" => (ps, ps),
145
146        // ── x86 SSE / AVX / AVX-512 SIMD types ───────────────────────────────
147        "__m64" => (8, 8),
148        "__m128" | "__m128d" | "__m128i" => (16, 16),
149        "__m256" | "__m256d" | "__m256i" => (32, 32),
150        "__m512" | "__m512d" | "__m512i" => (64, 64),
151
152        // ── Rust portable SIMD / packed_simd types ────────────────────────────
153        "f32x4" | "i32x4" | "u32x4" => (16, 16),
154        "f64x2" | "i64x2" | "u64x2" => (16, 16),
155        "f32x8" | "i32x8" | "u32x8" => (32, 32),
156        "f64x4" | "i64x4" | "u64x4" => (32, 32),
157        "f32x16" | "i32x16" | "u32x16" => (64, 64),
158
159        // ── unknown / third-party / generic type params (T, E, …) ────────────
160        _ => (ps, ps),
161    }
162}
163
164fn array_len_from_expr(expr: &syn::Expr) -> usize {
165    if let syn::Expr::Lit(syn::ExprLit {
166        lit: syn::Lit::Int(n),
167        ..
168    }) = expr
169    {
170        n.base10_parse::<usize>().unwrap_or(0)
171    } else {
172        0
173    }
174}
175
176// ── struct repr detection ─────────────────────────────────────────────────────
177
178fn is_packed(attrs: &[syn::Attribute]) -> bool {
179    attrs
180        .iter()
181        .any(|a| a.path().is_ident("repr") && a.to_token_stream().to_string().contains("packed"))
182}
183
184fn simulate_rust_layout(
185    name: String,
186    fields: &[(String, Type)],
187    packed: bool,
188    arch: &'static ArchConfig,
189) -> StructLayout {
190    let mut offset = 0usize;
191    let mut struct_align = 1usize;
192    let mut out_fields: Vec<Field> = Vec::new();
193
194    for (fname, ty) in fields {
195        let (size, align, type_info) = rust_type_size_align(ty, arch);
196        let effective_align = if packed { 1 } else { align };
197
198        if effective_align > 0 {
199            offset = offset.next_multiple_of(effective_align);
200        }
201        struct_align = struct_align.max(effective_align);
202
203        out_fields.push(Field {
204            name: fname.clone(),
205            ty: type_info,
206            offset,
207            size,
208            align: effective_align,
209            source_file: None,
210            source_line: None,
211            access: AccessPattern::Unknown,
212        });
213        offset += size;
214    }
215
216    if !packed && struct_align > 0 {
217        offset = offset.next_multiple_of(struct_align);
218    }
219
220    StructLayout {
221        name,
222        total_size: offset,
223        align: struct_align,
224        fields: out_fields,
225        source_file: None,
226        source_line: None,
227        arch,
228        is_packed: packed,
229        is_union: false,
230    }
231}
232
233// ── visitor ───────────────────────────────────────────────────────────────────
234
235struct StructVisitor {
236    arch: &'static ArchConfig,
237    layouts: Vec<StructLayout>,
238}
239
240impl<'ast> Visit<'ast> for StructVisitor {
241    fn visit_item_struct(&mut self, node: &'ast ItemStruct) {
242        syn::visit::visit_item_struct(self, node); // recurse into nested items
243
244        // Generic structs (e.g. `struct Foo<T>`) cannot be accurately laid out
245        // without knowing the concrete type arguments. Skip them rather than
246        // producing wrong field sizes for the type parameters.
247        if !node.generics.params.is_empty() {
248            return;
249        }
250
251        let name = node.ident.to_string();
252        let packed = is_packed(&node.attrs);
253
254        // Collect (field_name, type, optional_guard)
255        let fields: Vec<(String, Type, Option<String>)> = match &node.fields {
256            Fields::Named(nf) => nf
257                .named
258                .iter()
259                .map(|f| {
260                    let fname = f.ident.as_ref().map(|i| i.to_string()).unwrap_or_default();
261                    let guard = extract_guard_from_attrs(&f.attrs);
262                    (fname, f.ty.clone(), guard)
263                })
264                .collect(),
265            Fields::Unnamed(uf) => uf
266                .unnamed
267                .iter()
268                .enumerate()
269                .map(|(i, f)| {
270                    let guard = extract_guard_from_attrs(&f.attrs);
271                    (format!("_{i}"), f.ty.clone(), guard)
272                })
273                .collect(),
274            Fields::Unit => vec![],
275        };
276
277        let name_ty: Vec<(String, Type)> = fields
278            .iter()
279            .map(|(n, t, _)| (n.clone(), t.clone()))
280            .collect();
281        let mut layout = simulate_rust_layout(name, &name_ty, packed, self.arch);
282        layout.source_line = Some(node.ident.span().start().line as u32);
283
284        // Apply explicit guard annotations; these take precedence over the
285        // heuristic type-name pass in concurrency.rs (which skips non-Unknown fields).
286        for (i, (_, _, guard)) in fields.iter().enumerate() {
287            if let Some(g) = guard {
288                layout.fields[i].access = AccessPattern::Concurrent {
289                    guard: Some(g.clone()),
290                    is_atomic: false,
291                };
292            }
293        }
294
295        self.layouts.push(layout);
296    }
297}
298
299// ── public API ────────────────────────────────────────────────────────────────
300
301pub fn parse_rust(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
302    let file: syn::File = syn::parse_str(source)?;
303    let mut visitor = StructVisitor {
304        arch,
305        layouts: Vec::new(),
306    };
307    visitor.visit_file(&file);
308    Ok(visitor.layouts)
309}
310
311// ── tests ─────────────────────────────────────────────────────────────────────
312
313#[cfg(test)]
314mod tests {
315    use super::*;
316    use padlock_core::arch::X86_64_SYSV;
317
318    #[test]
319    fn parse_simple_struct() {
320        let src = "struct Foo { a: u8, b: u64, c: u32 }";
321        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
322        assert_eq!(layouts.len(), 1);
323        let l = &layouts[0];
324        assert_eq!(l.name, "Foo");
325        assert_eq!(l.fields.len(), 3);
326        assert_eq!(l.fields[0].size, 1); // u8
327        assert_eq!(l.fields[1].size, 8); // u64
328        assert_eq!(l.fields[2].size, 4); // u32
329    }
330
331    #[test]
332    fn layout_includes_padding() {
333        // u8 then u64: 7 bytes padding inserted
334        let src = "struct T { a: u8, b: u64 }";
335        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
336        let l = &layouts[0];
337        assert_eq!(l.fields[0].offset, 0);
338        assert_eq!(l.fields[1].offset, 8); // u64 aligned to 8
339        assert_eq!(l.total_size, 16);
340        let gaps = padlock_core::ir::find_padding(l);
341        assert_eq!(gaps[0].bytes, 7);
342    }
343
344    #[test]
345    fn multiple_structs_parsed() {
346        let src = "struct A { x: u32 } struct B { y: u64 }";
347        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
348        assert_eq!(layouts.len(), 2);
349    }
350
351    #[test]
352    fn packed_struct_no_padding() {
353        let src = "#[repr(packed)] struct P { a: u8, b: u64 }";
354        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
355        let l = &layouts[0];
356        assert!(l.is_packed);
357        assert_eq!(l.fields[1].offset, 1); // no padding, b immediately after a
358        let gaps = padlock_core::ir::find_padding(l);
359        assert!(gaps.is_empty());
360    }
361
362    #[test]
363    fn pointer_field_uses_arch_size() {
364        let src = "struct S { p: *const u8 }";
365        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
366        assert_eq!(layouts[0].fields[0].size, 8); // 64-bit pointer
367    }
368
369    // ── attribute guard extraction ─────────────────────────────────────────────
370
371    #[test]
372    fn lock_protected_by_attr_sets_guard() {
373        let src = r#"
374struct Cache {
375    #[lock_protected_by = "mu"]
376    readers: u64,
377    mu: u64,
378}
379"#;
380        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
381        let readers = &layouts[0].fields[0];
382        assert_eq!(readers.name, "readers");
383        if let AccessPattern::Concurrent { guard, .. } = &readers.access {
384            assert_eq!(guard.as_deref(), Some("mu"));
385        } else {
386            panic!("expected Concurrent, got {:?}", readers.access);
387        }
388    }
389
390    #[test]
391    fn guarded_by_string_attr_sets_guard() {
392        let src = r#"
393struct S {
394    #[guarded_by("lock")]
395    value: u32,
396}
397"#;
398        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
399        if let AccessPattern::Concurrent { guard, .. } = &layouts[0].fields[0].access {
400            assert_eq!(guard.as_deref(), Some("lock"));
401        } else {
402            panic!("expected Concurrent");
403        }
404    }
405
406    #[test]
407    fn guarded_by_ident_attr_sets_guard() {
408        let src = r#"
409struct S {
410    #[guarded_by(mu)]
411    count: u64,
412}
413"#;
414        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
415        if let AccessPattern::Concurrent { guard, .. } = &layouts[0].fields[0].access {
416            assert_eq!(guard.as_deref(), Some("mu"));
417        } else {
418            panic!("expected Concurrent");
419        }
420    }
421
422    #[test]
423    fn protected_by_attr_sets_guard() {
424        let src = r#"
425struct S {
426    #[protected_by = "lock_a"]
427    x: u64,
428}
429"#;
430        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
431        if let AccessPattern::Concurrent { guard, .. } = &layouts[0].fields[0].access {
432            assert_eq!(guard.as_deref(), Some("lock_a"));
433        } else {
434            panic!("expected Concurrent");
435        }
436    }
437
438    #[test]
439    fn different_guards_on_same_cache_line_is_false_sharing() {
440        // readers and writers are at offsets 0 and 8 — same cache line (line 0).
441        // They have different explicit guards → confirmed false sharing.
442        let src = r#"
443struct HotPath {
444    #[lock_protected_by = "mu_a"]
445    readers: u64,
446    #[lock_protected_by = "mu_b"]
447    writers: u64,
448}
449"#;
450        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
451        assert!(padlock_core::analysis::false_sharing::has_false_sharing(
452            &layouts[0]
453        ));
454    }
455
456    #[test]
457    fn same_guard_on_same_cache_line_is_not_false_sharing() {
458        let src = r#"
459struct Safe {
460    #[lock_protected_by = "mu"]
461    a: u64,
462    #[lock_protected_by = "mu"]
463    b: u64,
464}
465"#;
466        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
467        assert!(!padlock_core::analysis::false_sharing::has_false_sharing(
468            &layouts[0]
469        ));
470    }
471
472    #[test]
473    fn unannotated_field_stays_unknown() {
474        let src = "struct S { x: u64 }";
475        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
476        assert!(matches!(
477            layouts[0].fields[0].access,
478            AccessPattern::Unknown
479        ));
480    }
481
482    // ── stdlib type sizes ─────────────────────────────────────────────────────
483
484    #[test]
485    fn vec_field_has_three_pointer_size() {
486        // Vec<T> is always ptr + len + cap regardless of T
487        let src = "struct S { items: Vec<u64> }";
488        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
489        assert_eq!(layouts[0].fields[0].size, 24); // 3 × 8 on x86-64
490    }
491
492    #[test]
493    fn string_field_has_three_pointer_size() {
494        let src = "struct S { name: String }";
495        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
496        assert_eq!(layouts[0].fields[0].size, 24);
497    }
498
499    #[test]
500    fn box_field_has_pointer_size() {
501        let src = "struct S { inner: Box<u64> }";
502        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
503        assert_eq!(layouts[0].fields[0].size, 8);
504    }
505
506    #[test]
507    fn arc_field_has_pointer_size() {
508        let src = "struct S { shared: Arc<Vec<u8>> }";
509        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
510        assert_eq!(layouts[0].fields[0].size, 8);
511    }
512
513    #[test]
514    fn phantom_data_is_zero_sized() {
515        let src = "struct S { a: u64, _marker: PhantomData<u8> }";
516        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
517        let marker = layouts[0]
518            .fields
519            .iter()
520            .find(|f| f.name == "_marker")
521            .unwrap();
522        assert_eq!(marker.size, 0);
523    }
524
525    #[test]
526    fn duration_field_is_16_bytes() {
527        let src = "struct S { timeout: Duration }";
528        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
529        assert_eq!(layouts[0].fields[0].size, 16);
530    }
531
532    #[test]
533    fn atomic_u64_has_correct_size() {
534        let src = "struct S { counter: AtomicU64 }";
535        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
536        assert_eq!(layouts[0].fields[0].size, 8);
537    }
538
539    #[test]
540    fn atomic_bool_has_correct_size() {
541        let src = "struct S { flag: AtomicBool }";
542        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
543        assert_eq!(layouts[0].fields[0].size, 1);
544    }
545
546    // ── generic struct skipping ───────────────────────────────────────────────
547
548    #[test]
549    fn generic_struct_is_skipped() {
550        // Cannot accurately lay out struct Foo<T> without knowing T.
551        let src = "struct Wrapper<T> { value: T, count: usize }";
552        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
553        assert!(
554            layouts.is_empty(),
555            "generic structs should be skipped; got {:?}",
556            layouts.iter().map(|l| &l.name).collect::<Vec<_>>()
557        );
558    }
559
560    #[test]
561    fn generic_struct_with_multiple_params_is_skipped() {
562        let src = "struct Pair<A, B> { first: A, second: B }";
563        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
564        assert!(layouts.is_empty());
565    }
566
567    #[test]
568    fn non_generic_struct_still_parsed_when_generic_sibling_exists() {
569        let src = r#"
570struct Generic<T> { value: T }
571struct Concrete { a: u32, b: u64 }
572"#;
573        let layouts = parse_rust(src, &X86_64_SYSV).unwrap();
574        assert_eq!(layouts.len(), 1);
575        assert_eq!(layouts[0].name, "Concrete");
576    }
577}