Skip to main content

padlock_source/
lib.rs

1// padlock-source/src/lib.rs
2
3pub mod concurrency;
4pub mod fixgen;
5pub mod frontends;
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use padlock_core::arch::ArchConfig;
11use padlock_core::ir::{StructLayout, TypeInfo};
12
13#[derive(Debug, Clone, PartialEq)]
14pub enum SourceLanguage {
15    C,
16    Cpp,
17    Rust,
18    Go,
19}
20
21/// Detect language from file extension.
22pub fn detect_language(path: &Path) -> Option<SourceLanguage> {
23    match path.extension().and_then(|e| e.to_str()) {
24        Some("c") | Some("h") => Some(SourceLanguage::C),
25        Some("cpp") | Some("cc") | Some("cxx") | Some("hpp") => Some(SourceLanguage::Cpp),
26        Some("rs") => Some(SourceLanguage::Rust),
27        Some("go") => Some(SourceLanguage::Go),
28        _ => None,
29    }
30}
31
32/// Parse a source file and return struct layouts.
33pub fn parse_source(path: &Path, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
34    let lang = detect_language(path)
35        .ok_or_else(|| anyhow::anyhow!("unsupported file type: {}", path.display()))?;
36    let source = std::fs::read_to_string(path)?;
37    parse_source_str(&source, &lang, arch)
38}
39
40/// Parse source text directly (useful for tests and piped input).
41pub fn parse_source_str(
42    source: &str,
43    lang: &SourceLanguage,
44    arch: &'static ArchConfig,
45) -> anyhow::Result<Vec<StructLayout>> {
46    let mut layouts = match lang {
47        SourceLanguage::C => frontends::c_cpp::parse_c(source, arch)?,
48        SourceLanguage::Cpp => frontends::c_cpp::parse_cpp(source, arch)?,
49        SourceLanguage::Rust => frontends::rust::parse_rust(source, arch)?,
50        SourceLanguage::Go => frontends::go::parse_go(source, arch)?,
51    };
52
53    // Resolve fields whose type names match other structs in this file.
54    // This makes nested struct sizes accurate (instead of defaulting to pointer size).
55    resolve_nested_structs(&mut layouts);
56
57    // Annotate concurrency patterns
58    for layout in &mut layouts {
59        concurrency::annotate_concurrency(layout, lang);
60    }
61
62    // Remove structs explicitly opted out via `// padlock:ignore`
63    layouts.retain(|layout| !is_padlock_ignored(source, &layout.name));
64
65    Ok(layouts)
66}
67
68// ── nested struct resolution ──────────────────────────────────────────────────
69
70/// Returns true if `name` is a well-known primitive type name in any supported
71/// language. These must never be shadowed by a user-defined struct name.
72fn is_known_primitive(name: &str) -> bool {
73    matches!(
74        name,
75        // Rust primitives
76        "bool" | "u8" | "i8" | "u16" | "i16" | "u32" | "i32" | "f32" | "u64" | "i64" | "f64"
77            | "u128" | "i128" | "usize" | "isize" | "char" | "str"
78            // C/C++ primitives
79            | "int" | "long" | "short" | "float" | "double" | "void"
80            | "int8_t" | "uint8_t" | "int16_t" | "uint16_t" | "int32_t" | "uint32_t"
81            | "int64_t" | "uint64_t" | "size_t" | "ssize_t" | "ptrdiff_t"
82            | "intptr_t" | "uintptr_t" | "_Bool"
83            // Go primitives
84            | "int8" | "uint8" | "byte" | "int16" | "uint16" | "int32" | "uint32"
85            | "int64" | "uint64" | "float32" | "float64" | "complex64" | "complex128"
86            | "rune" | "string" | "error"
87            // SIMD
88            | "__m64" | "__m128" | "__m128d" | "__m128i"
89            | "__m256" | "__m256d" | "__m256i"
90            | "__m512" | "__m512d" | "__m512i"
91    )
92}
93
94/// Resolve fields whose type name matches another parsed struct.
95///
96/// Runs in a loop until stable to handle transitive nesting (struct A contains
97/// B which contains C). In practice, 2–3 iterations suffice for typical code.
98fn resolve_nested_structs(layouts: &mut Vec<StructLayout>) {
99    loop {
100        // Build name → (total_size, align) from whatever we have so far.
101        let known: HashMap<String, (usize, usize)> = layouts
102            .iter()
103            .map(|l| (l.name.clone(), (l.total_size, l.align)))
104            .collect();
105
106        let mut changed_any = false;
107
108        for layout in layouts.iter_mut() {
109            let mut changed = false;
110
111            for field in layout.fields.iter_mut() {
112                // Extract the type name from Primitive or Opaque variants.
113                // Struct/Pointer/Array variants are already correctly sized.
114                let type_name: String = match &field.ty {
115                    TypeInfo::Primitive { name, .. } | TypeInfo::Opaque { name, .. } => {
116                        name.clone()
117                    }
118                    _ => continue,
119                };
120
121                // Never shadow built-in primitives.
122                if is_known_primitive(&type_name) {
123                    continue;
124                }
125
126                // Don't resolve a struct to itself (circular).
127                if type_name == layout.name {
128                    continue;
129                }
130
131                if let Some(&(struct_size, struct_align)) = known.get(&type_name) {
132                    // Only update if the size would change — avoids infinite loops
133                    // for pointer-sized structs that already have the right size.
134                    if field.size == struct_size && field.align == struct_align {
135                        continue;
136                    }
137                    let eff_align = if layout.is_packed { 1 } else { struct_align };
138                    field.ty = TypeInfo::Opaque {
139                        name: type_name,
140                        size: struct_size,
141                        align: struct_align,
142                    };
143                    field.size = struct_size;
144                    field.align = eff_align;
145                    changed = true;
146                }
147            }
148
149            if changed {
150                resimulate_layout(layout);
151                changed_any = true;
152            }
153        }
154
155        if !changed_any {
156            break;
157        }
158    }
159}
160
161/// Re-simulate field offsets and total_size after field sizes have been updated.
162fn resimulate_layout(layout: &mut StructLayout) {
163    if layout.is_union {
164        for field in layout.fields.iter_mut() {
165            field.offset = 0;
166        }
167        let max_size = layout.fields.iter().map(|f| f.size).max().unwrap_or(0);
168        let max_align = layout.fields.iter().map(|f| f.align).max().unwrap_or(1);
169        layout.total_size = if max_align > 0 {
170            max_size.next_multiple_of(max_align)
171        } else {
172            max_size
173        };
174        layout.align = max_align;
175        return;
176    }
177
178    let packed = layout.is_packed;
179    let mut offset = 0usize;
180    let mut struct_align = 1usize;
181
182    for field in layout.fields.iter_mut() {
183        let eff_align = if packed { 1 } else { field.align };
184        if eff_align > 0 {
185            offset = offset.next_multiple_of(eff_align);
186        }
187        field.offset = offset;
188        offset += field.size;
189        struct_align = struct_align.max(eff_align);
190    }
191
192    if !packed && struct_align > 0 {
193        offset = offset.next_multiple_of(struct_align);
194    }
195
196    layout.total_size = offset;
197    layout.align = struct_align;
198}
199
200/// Returns `true` if a `// padlock:ignore` comment appears on the line
201/// immediately before (or inline on the same line as) the struct/union/type
202/// declaration for `struct_name`.
203///
204/// This allows callers to suppress analysis for a specific struct by writing:
205/// ```c
206/// // padlock:ignore
207/// struct MySpecialLayout { ... };
208/// ```
209fn is_padlock_ignored(source: &str, struct_name: &str) -> bool {
210    // Keywords that introduce named type definitions across all supported languages
211    for keyword in &["struct", "union", "type"] {
212        let needle = format!("{keyword} {struct_name}");
213        let mut search = 0usize;
214        while let Some(rel) = source[search..].find(&needle) {
215            let abs = search + rel;
216            // Ensure the character after the name is a word boundary (not part of a longer name)
217            let after_name = abs + needle.len();
218            let is_boundary = source[after_name..]
219                .chars()
220                .next()
221                .map_or(true, |c| !c.is_alphanumeric() && c != '_');
222            if is_boundary {
223                let line_start = source[..abs].rfind('\n').map(|i| i + 1).unwrap_or(0);
224                // Check the line containing the struct keyword for an inline annotation
225                let line_end = source[abs..]
226                    .find('\n')
227                    .map(|i| abs + i)
228                    .unwrap_or(source.len());
229                if source[line_start..line_end].contains("padlock:ignore") {
230                    return true;
231                }
232                // Check the immediately preceding line for an annotation comment.
233                // Only accept it if the preceding line is a pure comment (starts with `//`
234                // after trimming), so that an inline annotation on a prior struct's closing
235                // line doesn't accidentally suppress the following struct.
236                if line_start > 0 {
237                    let prev_end = line_start - 1;
238                    let prev_start = source[..prev_end].rfind('\n').map(|i| i + 1).unwrap_or(0);
239                    let prev_trimmed = source[prev_start..prev_end].trim();
240                    if prev_trimmed.starts_with("//") && prev_trimmed.contains("padlock:ignore") {
241                        return true;
242                    }
243                }
244            }
245            search = abs + 1;
246        }
247    }
248    false
249}
250
251// ── tests ─────────────────────────────────────────────────────────────────────
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256    use padlock_core::arch::X86_64_SYSV;
257
258    #[test]
259    fn detect_c_extensions() {
260        assert_eq!(detect_language(Path::new("foo.c")), Some(SourceLanguage::C));
261        assert_eq!(detect_language(Path::new("foo.h")), Some(SourceLanguage::C));
262    }
263
264    #[test]
265    fn detect_cpp_extensions() {
266        assert_eq!(
267            detect_language(Path::new("foo.cpp")),
268            Some(SourceLanguage::Cpp)
269        );
270        assert_eq!(
271            detect_language(Path::new("foo.cc")),
272            Some(SourceLanguage::Cpp)
273        );
274        assert_eq!(
275            detect_language(Path::new("foo.hpp")),
276            Some(SourceLanguage::Cpp)
277        );
278    }
279
280    #[test]
281    fn detect_rust_extension() {
282        assert_eq!(
283            detect_language(Path::new("foo.rs")),
284            Some(SourceLanguage::Rust)
285        );
286    }
287
288    #[test]
289    fn detect_go_extension() {
290        assert_eq!(
291            detect_language(Path::new("foo.go")),
292            Some(SourceLanguage::Go)
293        );
294    }
295
296    #[test]
297    fn detect_unknown_is_none() {
298        assert_eq!(detect_language(Path::new("foo.py")), None);
299        assert_eq!(detect_language(Path::new("foo")), None);
300    }
301
302    #[test]
303    fn parse_source_str_c_roundtrip() {
304        let src = "struct Point { int x; int y; };";
305        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
306        assert_eq!(layouts.len(), 1);
307        assert_eq!(layouts[0].name, "Point");
308    }
309
310    #[test]
311    fn parse_source_str_rust_roundtrip() {
312        let src = "struct Foo { x: u32, y: u64 }";
313        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
314        assert_eq!(layouts.len(), 1);
315        assert_eq!(layouts[0].name, "Foo");
316    }
317
318    #[test]
319    fn padlock_ignore_suppresses_c_struct() {
320        let src = "// padlock:ignore\nstruct Hidden { int x; int y; };\nstruct Visible { int a; };";
321        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
322        assert_eq!(layouts.len(), 1);
323        assert_eq!(layouts[0].name, "Visible");
324    }
325
326    #[test]
327    fn padlock_ignore_inline_suppresses_c_struct() {
328        // Inline annotation on the struct's own line suppresses it, but must NOT
329        // suppress the struct that follows (the next struct's preceding line is a
330        // code line with a trailing comment, not a pure `//` comment line).
331        let src = "struct Hidden { int x; }; // padlock:ignore\nstruct Visible { int a; };";
332        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
333        assert_eq!(layouts.len(), 1, "only Visible should remain");
334        assert_eq!(layouts[0].name, "Visible");
335    }
336
337    #[test]
338    fn padlock_ignore_suppresses_rust_struct() {
339        let src = "// padlock:ignore\nstruct Hidden { x: u32 }\nstruct Visible { a: u32 }";
340        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
341        assert_eq!(layouts.len(), 1);
342        assert_eq!(layouts[0].name, "Visible");
343    }
344
345    #[test]
346    fn padlock_ignore_without_annotation_keeps_struct() {
347        let src = "struct Visible { int x; int y; };";
348        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
349        assert_eq!(layouts.len(), 1);
350        assert_eq!(layouts[0].name, "Visible");
351    }
352
353    // ── nested struct resolution ───────────────────────────────────────────────
354
355    #[test]
356    fn nested_rust_struct_size_resolved() {
357        // Inner is 8 bytes. Outer has a field of type Inner.
358        // Without resolution, Inner's field size would be pointer_size (8) — coincidentally
359        // correct here, but offset placement still validates the pass runs.
360        let src = "struct Inner { x: u64 }\nstruct Outer { a: u8, b: Inner }";
361        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
362        let outer = layouts.iter().find(|l| l.name == "Outer").unwrap();
363        let b = outer.fields.iter().find(|f| f.name == "b").unwrap();
364        assert_eq!(b.size, 8, "Inner is 8 bytes");
365        assert_eq!(b.align, 8, "Inner aligns to 8");
366        // Outer: u8 at 0, [7 pad], Inner at 8 → total 16
367        assert_eq!(outer.total_size, 16);
368    }
369
370    #[test]
371    fn nested_rust_struct_non_pointer_size_resolved() {
372        // Point is 8 bytes (two i32). Line contains two Points — should be 16 bytes, not
373        // 2 * pointer_size = 16 (same here, but alignment is distinct).
374        let src = "struct Point { x: i32, y: i32 }\nstruct Line { a: Point, b: Point }";
375        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
376        let line = layouts.iter().find(|l| l.name == "Line").unwrap();
377        assert_eq!(line.total_size, 16);
378        assert_eq!(line.fields[0].size, 8);
379        assert_eq!(line.fields[1].size, 8);
380        assert_eq!(line.fields[1].offset, 8);
381    }
382
383    #[test]
384    fn nested_rust_struct_large_inner_triggers_padding() {
385        // SmallHeader: bool (1 byte). BigPayload: [u64; 4] = 32 bytes.
386        // Wrapper { flag: SmallHeader, data: BigPayload }
387        // Without resolution: SmallHeader is pointer-sized (8), total 8+32=40 → wrong.
388        // With resolution: SmallHeader is 1 byte, then 7 pad, then BigPayload at 8 → total 40.
389        // Actually u64 array: [u64;4] parsed as Array of 4 u64 = 32 bytes, align 8.
390        let src = "struct SmallHeader { flag: bool }\nstruct Wrapper { h: SmallHeader, data: u64 }";
391        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
392        let wrapper = layouts.iter().find(|l| l.name == "Wrapper").unwrap();
393        let h = wrapper.fields.iter().find(|f| f.name == "h").unwrap();
394        // SmallHeader has total_size=1, align=1
395        assert_eq!(h.size, 1, "SmallHeader resolved to 1 byte");
396        assert_eq!(h.align, 1);
397        // data (u64, align 8) should be at offset 8 (7 bytes padding after SmallHeader)
398        let data = wrapper.fields.iter().find(|f| f.name == "data").unwrap();
399        assert_eq!(data.offset, 8);
400        assert_eq!(wrapper.total_size, 16);
401    }
402
403    #[test]
404    fn nested_c_struct_resolved() {
405        let src = "struct Vec2 { float x; float y; };\nstruct Rect { struct Vec2 tl; struct Vec2 br; };";
406        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
407        let rect = layouts.iter().find(|l| l.name == "Rect").unwrap();
408        // Each Vec2 is 8 bytes (two floats). Rect = 16 bytes, no padding.
409        assert_eq!(rect.total_size, 16, "Rect should be 16 bytes");
410        assert_eq!(rect.fields[0].size, 8);
411        assert_eq!(rect.fields[1].size, 8);
412        assert_eq!(rect.fields[1].offset, 8);
413    }
414
415    #[test]
416    fn nested_go_struct_resolved() {
417        let src = "package p\ntype Vec2 struct { X float32; Y float32 }\ntype Rect struct { TL Vec2; BR Vec2 }";
418        let layouts = parse_source_str(src, &SourceLanguage::Go, &X86_64_SYSV).unwrap();
419        let rect = layouts.iter().find(|l| l.name == "Rect").unwrap();
420        assert_eq!(rect.total_size, 16);
421        assert_eq!(rect.fields[0].size, 8);
422        assert_eq!(rect.fields[1].size, 8);
423        assert_eq!(rect.fields[1].offset, 8);
424    }
425
426    #[test]
427    fn primitive_types_not_shadowed_by_struct_resolution() {
428        // A struct named "u64" would be very unusual, but primitives must not be overwritten.
429        let src = "struct Wrapper { x: u64, y: bool }";
430        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
431        let w = &layouts[0];
432        let x = w.fields.iter().find(|f| f.name == "x").unwrap();
433        assert_eq!(x.size, 8, "u64 must stay 8 bytes");
434    }
435
436    #[test]
437    fn is_padlock_ignored_does_not_match_partial_names() {
438        // "struct Foo" annotation must not suppress "struct FooBar"
439        assert!(!is_padlock_ignored(
440            "// padlock:ignore\nstruct FooBar { int x; };",
441            "Foo"
442        ));
443    }
444}