Skip to main content

padlock_source/
lib.rs

1// padlock-source/src/lib.rs
2
3pub mod concurrency;
4pub mod fixgen;
5pub mod frontends;
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use padlock_core::arch::ArchConfig;
11use padlock_core::ir::{StructLayout, TypeInfo};
12
13#[derive(Debug, Clone, PartialEq)]
14pub enum SourceLanguage {
15    C,
16    Cpp,
17    Rust,
18    Go,
19    Zig,
20}
21
22/// Detect language from file extension.
23pub fn detect_language(path: &Path) -> Option<SourceLanguage> {
24    match path.extension().and_then(|e| e.to_str()) {
25        Some("c") | Some("h") => Some(SourceLanguage::C),
26        Some("cpp") | Some("cc") | Some("cxx") | Some("hpp") => Some(SourceLanguage::Cpp),
27        Some("rs") => Some(SourceLanguage::Rust),
28        Some("go") => Some(SourceLanguage::Go),
29        Some("zig") => Some(SourceLanguage::Zig),
30        _ => None,
31    }
32}
33
34/// Parse a source file and return struct layouts.
35pub fn parse_source(path: &Path, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
36    let lang = detect_language(path)
37        .ok_or_else(|| anyhow::anyhow!("unsupported file type: {}", path.display()))?;
38    let source = std::fs::read_to_string(path)?;
39    let mut layouts = parse_source_str(&source, &lang, arch)?;
40    let file_str = path.to_string_lossy().into_owned();
41    for layout in &mut layouts {
42        layout.source_file = Some(file_str.clone());
43    }
44    Ok(layouts)
45}
46
47/// Parse source text directly (useful for tests and piped input).
48pub fn parse_source_str(
49    source: &str,
50    lang: &SourceLanguage,
51    arch: &'static ArchConfig,
52) -> anyhow::Result<Vec<StructLayout>> {
53    let mut layouts = match lang {
54        SourceLanguage::C => frontends::c_cpp::parse_c(source, arch)?,
55        SourceLanguage::Cpp => frontends::c_cpp::parse_cpp(source, arch)?,
56        SourceLanguage::Rust => frontends::rust::parse_rust(source, arch)?,
57        SourceLanguage::Go => frontends::go::parse_go(source, arch)?,
58        SourceLanguage::Zig => frontends::zig::parse_zig(source, arch)?,
59    };
60
61    // Resolve fields whose type names match other structs in this file.
62    // This makes nested struct sizes accurate (instead of defaulting to pointer size).
63    resolve_nested_structs(&mut layouts);
64
65    // Annotate concurrency patterns
66    for layout in &mut layouts {
67        concurrency::annotate_concurrency(layout, lang);
68    }
69
70    // Remove structs explicitly opted out via `// padlock:ignore`
71    layouts.retain(|layout| !is_padlock_ignored(source, &layout.name));
72
73    Ok(layouts)
74}
75
76// ── nested struct resolution ──────────────────────────────────────────────────
77
78/// Returns true if `name` is a well-known primitive type name in any supported
79/// language. These must never be shadowed by a user-defined struct name.
80fn is_known_primitive(name: &str) -> bool {
81    matches!(
82        name,
83        // Rust primitives
84        "bool" | "u8" | "i8" | "u16" | "i16" | "u32" | "i32" | "f32" | "u64" | "i64" | "f64"
85            | "u128" | "i128" | "usize" | "isize" | "char" | "str"
86            // C/C++ primitives
87            | "int" | "long" | "short" | "float" | "double" | "void"
88            | "int8_t" | "uint8_t" | "int16_t" | "uint16_t" | "int32_t" | "uint32_t"
89            | "int64_t" | "uint64_t" | "size_t" | "ssize_t" | "ptrdiff_t"
90            | "intptr_t" | "uintptr_t" | "_Bool"
91            // Go primitives
92            | "int8" | "uint8" | "byte" | "int16" | "uint16" | "int32" | "uint32"
93            | "int64" | "uint64" | "float32" | "float64" | "complex64" | "complex128"
94            | "rune" | "string" | "error"
95            // SIMD
96            | "__m64" | "__m128" | "__m128d" | "__m128i"
97            | "__m256" | "__m256d" | "__m256i"
98            | "__m512" | "__m512d" | "__m512i"
99    )
100}
101
102/// Resolve fields whose type name matches another parsed struct.
103///
104/// Runs in a loop until stable to handle transitive nesting (struct A contains
105/// B which contains C). In practice, 2–3 iterations suffice for typical code.
106fn resolve_nested_structs(layouts: &mut [StructLayout]) {
107    loop {
108        // Build name → (total_size, align) from whatever we have so far.
109        let known: HashMap<String, (usize, usize)> = layouts
110            .iter()
111            .map(|l| (l.name.clone(), (l.total_size, l.align)))
112            .collect();
113
114        let mut changed_any = false;
115
116        for layout in layouts.iter_mut() {
117            let mut changed = false;
118
119            for field in layout.fields.iter_mut() {
120                // Extract the type name from Primitive or Opaque variants.
121                // Struct/Pointer/Array variants are already correctly sized.
122                let type_name: String = match &field.ty {
123                    TypeInfo::Primitive { name, .. } | TypeInfo::Opaque { name, .. } => {
124                        name.clone()
125                    }
126                    _ => continue,
127                };
128
129                // Never shadow built-in primitives.
130                if is_known_primitive(&type_name) {
131                    continue;
132                }
133
134                // Don't resolve a struct to itself (circular).
135                if type_name == layout.name {
136                    continue;
137                }
138
139                if let Some(&(struct_size, struct_align)) = known.get(&type_name) {
140                    // Only update if the size would change — avoids infinite loops
141                    // for pointer-sized structs that already have the right size.
142                    if field.size == struct_size && field.align == struct_align {
143                        continue;
144                    }
145                    let eff_align = if layout.is_packed { 1 } else { struct_align };
146                    field.ty = TypeInfo::Opaque {
147                        name: type_name,
148                        size: struct_size,
149                        align: struct_align,
150                    };
151                    field.size = struct_size;
152                    field.align = eff_align;
153                    changed = true;
154                }
155            }
156
157            if changed {
158                resimulate_layout(layout);
159                changed_any = true;
160            }
161        }
162
163        if !changed_any {
164            break;
165        }
166    }
167}
168
169/// Re-simulate field offsets and total_size after field sizes have been updated.
170fn resimulate_layout(layout: &mut StructLayout) {
171    if layout.is_union {
172        for field in layout.fields.iter_mut() {
173            field.offset = 0;
174        }
175        let max_size = layout.fields.iter().map(|f| f.size).max().unwrap_or(0);
176        let max_align = layout.fields.iter().map(|f| f.align).max().unwrap_or(1);
177        layout.total_size = if max_align > 0 {
178            max_size.next_multiple_of(max_align)
179        } else {
180            max_size
181        };
182        layout.align = max_align;
183        return;
184    }
185
186    let packed = layout.is_packed;
187    let mut offset = 0usize;
188    let mut struct_align = 1usize;
189
190    for field in layout.fields.iter_mut() {
191        let eff_align = if packed { 1 } else { field.align };
192        if eff_align > 0 {
193            offset = offset.next_multiple_of(eff_align);
194        }
195        field.offset = offset;
196        offset += field.size;
197        struct_align = struct_align.max(eff_align);
198    }
199
200    if !packed && struct_align > 0 {
201        offset = offset.next_multiple_of(struct_align);
202    }
203
204    layout.total_size = offset;
205    layout.align = struct_align;
206}
207
208/// Returns `true` if a `// padlock:ignore` comment appears on the line
209/// immediately before (or inline on the same line as) the struct/union/type
210/// declaration for `struct_name`.
211///
212/// This allows callers to suppress analysis for a specific struct by writing:
213/// ```c
214/// // padlock:ignore
215/// struct MySpecialLayout { ... };
216/// ```
217fn is_padlock_ignored(source: &str, struct_name: &str) -> bool {
218    // Keywords that introduce named type definitions across all supported languages
219    for keyword in &["struct", "union", "type"] {
220        let needle = format!("{keyword} {struct_name}");
221        let mut search = 0usize;
222        while let Some(rel) = source[search..].find(&needle) {
223            let abs = search + rel;
224            // Ensure the character after the name is a word boundary (not part of a longer name)
225            let after_name = abs + needle.len();
226            let is_boundary = source[after_name..]
227                .chars()
228                .next()
229                .is_none_or(|c| !c.is_alphanumeric() && c != '_');
230            if is_boundary {
231                let line_start = source[..abs].rfind('\n').map(|i| i + 1).unwrap_or(0);
232                // Check the line containing the struct keyword for an inline annotation
233                let line_end = source[abs..]
234                    .find('\n')
235                    .map(|i| abs + i)
236                    .unwrap_or(source.len());
237                if source[line_start..line_end].contains("padlock:ignore") {
238                    return true;
239                }
240                // Check the immediately preceding line for an annotation comment.
241                // Only accept it if the preceding line is a pure comment (starts with `//`
242                // after trimming), so that an inline annotation on a prior struct's closing
243                // line doesn't accidentally suppress the following struct.
244                if line_start > 0 {
245                    let prev_end = line_start - 1;
246                    let prev_start = source[..prev_end].rfind('\n').map(|i| i + 1).unwrap_or(0);
247                    let prev_trimmed = source[prev_start..prev_end].trim();
248                    if prev_trimmed.starts_with("//") && prev_trimmed.contains("padlock:ignore") {
249                        return true;
250                    }
251                }
252            }
253            search = abs + 1;
254        }
255    }
256    false
257}
258
259// ── tests ─────────────────────────────────────────────────────────────────────
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use padlock_core::arch::X86_64_SYSV;
265
266    #[test]
267    fn detect_c_extensions() {
268        assert_eq!(detect_language(Path::new("foo.c")), Some(SourceLanguage::C));
269        assert_eq!(detect_language(Path::new("foo.h")), Some(SourceLanguage::C));
270    }
271
272    #[test]
273    fn detect_cpp_extensions() {
274        assert_eq!(
275            detect_language(Path::new("foo.cpp")),
276            Some(SourceLanguage::Cpp)
277        );
278        assert_eq!(
279            detect_language(Path::new("foo.cc")),
280            Some(SourceLanguage::Cpp)
281        );
282        assert_eq!(
283            detect_language(Path::new("foo.hpp")),
284            Some(SourceLanguage::Cpp)
285        );
286    }
287
288    #[test]
289    fn detect_rust_extension() {
290        assert_eq!(
291            detect_language(Path::new("foo.rs")),
292            Some(SourceLanguage::Rust)
293        );
294    }
295
296    #[test]
297    fn detect_go_extension() {
298        assert_eq!(
299            detect_language(Path::new("foo.go")),
300            Some(SourceLanguage::Go)
301        );
302    }
303
304    #[test]
305    fn detect_zig_extension() {
306        assert_eq!(
307            detect_language(Path::new("foo.zig")),
308            Some(SourceLanguage::Zig)
309        );
310    }
311
312    #[test]
313    fn detect_unknown_is_none() {
314        assert_eq!(detect_language(Path::new("foo.py")), None);
315        assert_eq!(detect_language(Path::new("foo")), None);
316    }
317
318    #[test]
319    fn parse_source_str_c_roundtrip() {
320        let src = "struct Point { int x; int y; };";
321        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
322        assert_eq!(layouts.len(), 1);
323        assert_eq!(layouts[0].name, "Point");
324    }
325
326    #[test]
327    fn parse_source_str_rust_roundtrip() {
328        let src = "struct Foo { x: u32, y: u64 }";
329        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
330        assert_eq!(layouts.len(), 1);
331        assert_eq!(layouts[0].name, "Foo");
332    }
333
334    #[test]
335    fn padlock_ignore_suppresses_c_struct() {
336        let src = "// padlock:ignore\nstruct Hidden { int x; int y; };\nstruct Visible { int a; };";
337        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
338        assert_eq!(layouts.len(), 1);
339        assert_eq!(layouts[0].name, "Visible");
340    }
341
342    #[test]
343    fn padlock_ignore_inline_suppresses_c_struct() {
344        // Inline annotation on the struct's own line suppresses it, but must NOT
345        // suppress the struct that follows (the next struct's preceding line is a
346        // code line with a trailing comment, not a pure `//` comment line).
347        let src = "struct Hidden { int x; }; // padlock:ignore\nstruct Visible { int a; };";
348        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
349        assert_eq!(layouts.len(), 1, "only Visible should remain");
350        assert_eq!(layouts[0].name, "Visible");
351    }
352
353    #[test]
354    fn padlock_ignore_suppresses_rust_struct() {
355        let src = "// padlock:ignore\nstruct Hidden { x: u32 }\nstruct Visible { a: u32 }";
356        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
357        assert_eq!(layouts.len(), 1);
358        assert_eq!(layouts[0].name, "Visible");
359    }
360
361    #[test]
362    fn padlock_ignore_without_annotation_keeps_struct() {
363        let src = "struct Visible { int x; int y; };";
364        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
365        assert_eq!(layouts.len(), 1);
366        assert_eq!(layouts[0].name, "Visible");
367    }
368
369    // ── nested struct resolution ───────────────────────────────────────────────
370
371    #[test]
372    fn nested_rust_struct_size_resolved() {
373        // Inner is 8 bytes. Outer has a field of type Inner.
374        // Without resolution, Inner's field size would be pointer_size (8) — coincidentally
375        // correct here, but offset placement still validates the pass runs.
376        let src = "struct Inner { x: u64 }\nstruct Outer { a: u8, b: Inner }";
377        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
378        let outer = layouts.iter().find(|l| l.name == "Outer").unwrap();
379        let b = outer.fields.iter().find(|f| f.name == "b").unwrap();
380        assert_eq!(b.size, 8, "Inner is 8 bytes");
381        assert_eq!(b.align, 8, "Inner aligns to 8");
382        // Outer: u8 at 0, [7 pad], Inner at 8 → total 16
383        assert_eq!(outer.total_size, 16);
384    }
385
386    #[test]
387    fn nested_rust_struct_non_pointer_size_resolved() {
388        // Point is 8 bytes (two i32). Line contains two Points — should be 16 bytes, not
389        // 2 * pointer_size = 16 (same here, but alignment is distinct).
390        let src = "struct Point { x: i32, y: i32 }\nstruct Line { a: Point, b: Point }";
391        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
392        let line = layouts.iter().find(|l| l.name == "Line").unwrap();
393        assert_eq!(line.total_size, 16);
394        assert_eq!(line.fields[0].size, 8);
395        assert_eq!(line.fields[1].size, 8);
396        assert_eq!(line.fields[1].offset, 8);
397    }
398
399    #[test]
400    fn nested_rust_struct_large_inner_triggers_padding() {
401        // SmallHeader: bool (1 byte). BigPayload: [u64; 4] = 32 bytes.
402        // Wrapper { flag: SmallHeader, data: BigPayload }
403        // Without resolution: SmallHeader is pointer-sized (8), total 8+32=40 → wrong.
404        // With resolution: SmallHeader is 1 byte, then 7 pad, then BigPayload at 8 → total 40.
405        // Actually u64 array: [u64;4] parsed as Array of 4 u64 = 32 bytes, align 8.
406        let src = "struct SmallHeader { flag: bool }\nstruct Wrapper { h: SmallHeader, data: u64 }";
407        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
408        let wrapper = layouts.iter().find(|l| l.name == "Wrapper").unwrap();
409        let h = wrapper.fields.iter().find(|f| f.name == "h").unwrap();
410        // SmallHeader has total_size=1, align=1
411        assert_eq!(h.size, 1, "SmallHeader resolved to 1 byte");
412        assert_eq!(h.align, 1);
413        // data (u64, align 8) should be at offset 8 (7 bytes padding after SmallHeader)
414        let data = wrapper.fields.iter().find(|f| f.name == "data").unwrap();
415        assert_eq!(data.offset, 8);
416        assert_eq!(wrapper.total_size, 16);
417    }
418
419    #[test]
420    fn nested_c_struct_resolved() {
421        let src =
422            "struct Vec2 { float x; float y; };\nstruct Rect { struct Vec2 tl; struct Vec2 br; };";
423        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
424        let rect = layouts.iter().find(|l| l.name == "Rect").unwrap();
425        // Each Vec2 is 8 bytes (two floats). Rect = 16 bytes, no padding.
426        assert_eq!(rect.total_size, 16, "Rect should be 16 bytes");
427        assert_eq!(rect.fields[0].size, 8);
428        assert_eq!(rect.fields[1].size, 8);
429        assert_eq!(rect.fields[1].offset, 8);
430    }
431
432    #[test]
433    fn nested_go_struct_resolved() {
434        let src = "package p\ntype Vec2 struct { X float32; Y float32 }\ntype Rect struct { TL Vec2; BR Vec2 }";
435        let layouts = parse_source_str(src, &SourceLanguage::Go, &X86_64_SYSV).unwrap();
436        let rect = layouts.iter().find(|l| l.name == "Rect").unwrap();
437        assert_eq!(rect.total_size, 16);
438        assert_eq!(rect.fields[0].size, 8);
439        assert_eq!(rect.fields[1].size, 8);
440        assert_eq!(rect.fields[1].offset, 8);
441    }
442
443    #[test]
444    fn primitive_types_not_shadowed_by_struct_resolution() {
445        // A struct named "u64" would be very unusual, but primitives must not be overwritten.
446        let src = "struct Wrapper { x: u64, y: bool }";
447        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
448        let w = &layouts[0];
449        let x = w.fields.iter().find(|f| f.name == "x").unwrap();
450        assert_eq!(x.size, 8, "u64 must stay 8 bytes");
451    }
452
453    #[test]
454    fn is_padlock_ignored_does_not_match_partial_names() {
455        // "struct Foo" annotation must not suppress "struct FooBar"
456        assert!(!is_padlock_ignored(
457            "// padlock:ignore\nstruct FooBar { int x; };",
458            "Foo"
459        ));
460    }
461}