Skip to main content

padlock_source/
lib.rs

1// padlock-source/src/lib.rs
2
3pub mod concurrency;
4pub mod fixgen;
5pub mod frontends;
6
7use std::collections::HashMap;
8use std::path::Path;
9
10use padlock_core::arch::ArchConfig;
11use padlock_core::ir::{StructLayout, TypeInfo};
12
13/// C++ standard library implementation variant.
14///
15/// Affects hardcoded sizes of types like `std::string`, `std::mutex`, etc.
16/// The default is `LibStdCpp` (GCC / Linux / glibc).
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
18pub enum CppStdlib {
19    /// GCC libstdc++ (Linux/glibc default). `std::string` = 32B.
20    #[default]
21    LibStdCpp,
22    /// LLVM libc++ (Clang/macOS/Android). `std::string` = 24B.
23    LibCpp,
24    /// Microsoft MSVC STL (Windows). `std::string` = 32B (SSO = 16 chars).
25    Msvc,
26}
27
28/// Set the C++ stdlib variant used for type-size lookups during source analysis.
29///
30/// This is a thread-local setting; it takes effect for all subsequent calls to
31/// `parse_source` / `parse_source_str` on the current thread.  The default is
32/// `CppStdlib::LibStdCpp`.  Call this from the CLI before invoking analysis.
33pub fn set_cpp_stdlib(stdlib: CppStdlib) {
34    frontends::c_cpp::set_stdlib(stdlib);
35}
36
37#[derive(Debug, Clone, PartialEq)]
38pub enum SourceLanguage {
39    C,
40    Cpp,
41    Rust,
42    Go,
43    Zig,
44}
45
46/// Detect language from file extension.
47pub fn detect_language(path: &Path) -> Option<SourceLanguage> {
48    match path.extension().and_then(|e| e.to_str()) {
49        Some("c") | Some("h") => Some(SourceLanguage::C),
50        Some("cpp") | Some("cc") | Some("cxx") | Some("hpp") => Some(SourceLanguage::Cpp),
51        Some("rs") => Some(SourceLanguage::Rust),
52        Some("go") => Some(SourceLanguage::Go),
53        Some("zig") => Some(SourceLanguage::Zig),
54        _ => None,
55    }
56}
57
58/// Parse a source file and return struct layouts.
59pub fn parse_source(path: &Path, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
60    let lang = detect_language(path)
61        .ok_or_else(|| anyhow::anyhow!("unsupported file type: {}", path.display()))?;
62    let source = std::fs::read_to_string(path)?;
63    let mut layouts = parse_source_str(&source, &lang, arch)?;
64    let file_str = path.to_string_lossy().into_owned();
65    for layout in &mut layouts {
66        layout.source_file = Some(file_str.clone());
67    }
68    Ok(layouts)
69}
70
71/// Parse source text directly (useful for tests and piped input).
72pub fn parse_source_str(
73    source: &str,
74    lang: &SourceLanguage,
75    arch: &'static ArchConfig,
76) -> anyhow::Result<Vec<StructLayout>> {
77    let mut layouts = match lang {
78        SourceLanguage::C => frontends::c_cpp::parse_c(source, arch)?,
79        SourceLanguage::Cpp => frontends::c_cpp::parse_cpp(source, arch)?,
80        SourceLanguage::Rust => frontends::rust::parse_rust(source, arch)?,
81        SourceLanguage::Go => frontends::go::parse_go(source, arch)?,
82        SourceLanguage::Zig => frontends::zig::parse_zig(source, arch)?,
83    };
84
85    // Resolve fields whose type names match other structs in this file.
86    // This makes nested struct sizes accurate (instead of defaulting to pointer size).
87    resolve_nested_structs(&mut layouts);
88
89    // Annotate concurrency patterns
90    for layout in &mut layouts {
91        concurrency::annotate_concurrency(layout, lang);
92    }
93
94    // Remove structs explicitly opted out via `// padlock:ignore`
95    layouts.retain(|layout| !is_padlock_ignored(source, &layout.name));
96
97    Ok(layouts)
98}
99
100// ── nested struct resolution ──────────────────────────────────────────────────
101
102/// Returns true if `name` is a well-known primitive type name in any supported
103/// language. These must never be shadowed by a user-defined struct name.
104fn is_known_primitive(name: &str) -> bool {
105    matches!(
106        name,
107        // Rust primitives
108        "bool" | "u8" | "i8" | "u16" | "i16" | "u32" | "i32" | "f32" | "u64" | "i64" | "f64"
109            | "u128" | "i128" | "usize" | "isize" | "char" | "str"
110            // C/C++ primitives
111            | "int" | "long" | "short" | "float" | "double" | "void"
112            | "int8_t" | "uint8_t" | "int16_t" | "uint16_t" | "int32_t" | "uint32_t"
113            | "int64_t" | "uint64_t" | "size_t" | "ssize_t" | "ptrdiff_t"
114            | "intptr_t" | "uintptr_t" | "_Bool"
115            // Go primitives
116            | "int8" | "uint8" | "byte" | "int16" | "uint16" | "int32" | "uint32"
117            | "int64" | "uint64" | "float32" | "float64" | "complex64" | "complex128"
118            | "rune" | "string" | "error"
119            // SIMD
120            | "__m64" | "__m128" | "__m128d" | "__m128i"
121            | "__m256" | "__m256d" | "__m256i"
122            | "__m512" | "__m512d" | "__m512i"
123    )
124}
125
126/// Resolve fields whose type name matches another parsed struct.
127///
128/// Runs in a loop until stable to handle transitive nesting (struct A contains
129/// B which contains C). In practice, 2–3 iterations suffice for typical code.
130fn resolve_nested_structs(layouts: &mut [StructLayout]) {
131    loop {
132        // Build name → (total_size, align) from whatever we have so far.
133        let known: HashMap<String, (usize, usize)> = layouts
134            .iter()
135            .map(|l| (l.name.clone(), (l.total_size, l.align)))
136            .collect();
137
138        let mut changed_any = false;
139
140        for layout in layouts.iter_mut() {
141            let mut changed = false;
142
143            for field in layout.fields.iter_mut() {
144                // Extract the type name from Primitive or Opaque variants.
145                // Struct/Pointer/Array variants are already correctly sized.
146                let type_name: String = match &field.ty {
147                    TypeInfo::Primitive { name, .. } | TypeInfo::Opaque { name, .. } => {
148                        name.clone()
149                    }
150                    _ => continue,
151                };
152
153                // Never shadow built-in primitives.
154                if is_known_primitive(&type_name) {
155                    continue;
156                }
157
158                // Don't resolve a struct to itself (circular).
159                if type_name == layout.name {
160                    continue;
161                }
162
163                if let Some(&(struct_size, struct_align)) = known.get(&type_name) {
164                    // Only update if the size would change — avoids infinite loops
165                    // for pointer-sized structs that already have the right size.
166                    if field.size == struct_size && field.align == struct_align {
167                        continue;
168                    }
169                    let eff_align = if layout.is_packed { 1 } else { struct_align };
170                    field.ty = TypeInfo::Opaque {
171                        name: type_name,
172                        size: struct_size,
173                        align: struct_align,
174                    };
175                    field.size = struct_size;
176                    field.align = eff_align;
177                    changed = true;
178                }
179            }
180
181            if changed {
182                resimulate_layout(layout);
183                changed_any = true;
184            }
185        }
186
187        if !changed_any {
188            break;
189        }
190    }
191}
192
193/// Re-simulate field offsets and total_size after field sizes have been updated.
194fn resimulate_layout(layout: &mut StructLayout) {
195    if layout.is_union {
196        for field in layout.fields.iter_mut() {
197            field.offset = 0;
198        }
199        let max_size = layout.fields.iter().map(|f| f.size).max().unwrap_or(0);
200        let max_align = layout.fields.iter().map(|f| f.align).max().unwrap_or(1);
201        layout.total_size = if max_align > 0 {
202            max_size.next_multiple_of(max_align)
203        } else {
204            max_size
205        };
206        layout.align = max_align;
207        return;
208    }
209
210    let packed = layout.is_packed;
211    let mut offset = 0usize;
212    let mut struct_align = 1usize;
213
214    for field in layout.fields.iter_mut() {
215        let eff_align = if packed { 1 } else { field.align };
216        if eff_align > 0 {
217            offset = offset.next_multiple_of(eff_align);
218        }
219        field.offset = offset;
220        offset += field.size;
221        struct_align = struct_align.max(eff_align);
222    }
223
224    if !packed && struct_align > 0 {
225        offset = offset.next_multiple_of(struct_align);
226    }
227
228    layout.total_size = offset;
229    layout.align = struct_align;
230}
231
232/// Returns `true` if a `// padlock:ignore` comment appears on the line
233/// immediately before (or inline on the same line as) the struct/union/type
234/// declaration for `struct_name`.
235///
236/// This allows callers to suppress analysis for a specific struct by writing:
237/// ```c
238/// // padlock:ignore
239/// struct MySpecialLayout { ... };
240/// ```
241fn is_padlock_ignored(source: &str, struct_name: &str) -> bool {
242    // Keywords that introduce named type definitions across all supported languages
243    for keyword in &["struct", "union", "type"] {
244        let needle = format!("{keyword} {struct_name}");
245        let mut search = 0usize;
246        while let Some(rel) = source[search..].find(&needle) {
247            let abs = search + rel;
248            // Ensure the character after the name is a word boundary (not part of a longer name)
249            let after_name = abs + needle.len();
250            let is_boundary = source[after_name..]
251                .chars()
252                .next()
253                .is_none_or(|c| !c.is_alphanumeric() && c != '_');
254            if is_boundary {
255                let line_start = source[..abs].rfind('\n').map(|i| i + 1).unwrap_or(0);
256                // Check the line containing the struct keyword for an inline annotation
257                let line_end = source[abs..]
258                    .find('\n')
259                    .map(|i| abs + i)
260                    .unwrap_or(source.len());
261                if source[line_start..line_end].contains("padlock:ignore") {
262                    return true;
263                }
264                // Check the immediately preceding line for an annotation comment.
265                // Only accept it if the preceding line is a pure comment (starts with `//`
266                // after trimming), so that an inline annotation on a prior struct's closing
267                // line doesn't accidentally suppress the following struct.
268                if line_start > 0 {
269                    let prev_end = line_start - 1;
270                    let prev_start = source[..prev_end].rfind('\n').map(|i| i + 1).unwrap_or(0);
271                    let prev_trimmed = source[prev_start..prev_end].trim();
272                    if prev_trimmed.starts_with("//") && prev_trimmed.contains("padlock:ignore") {
273                        return true;
274                    }
275                }
276            }
277            search = abs + 1;
278        }
279    }
280    false
281}
282
283// ── tests ─────────────────────────────────────────────────────────────────────
284
285#[cfg(test)]
286mod tests {
287    use super::*;
288    use padlock_core::arch::X86_64_SYSV;
289
290    #[test]
291    fn detect_c_extensions() {
292        assert_eq!(detect_language(Path::new("foo.c")), Some(SourceLanguage::C));
293        assert_eq!(detect_language(Path::new("foo.h")), Some(SourceLanguage::C));
294    }
295
296    #[test]
297    fn detect_cpp_extensions() {
298        assert_eq!(
299            detect_language(Path::new("foo.cpp")),
300            Some(SourceLanguage::Cpp)
301        );
302        assert_eq!(
303            detect_language(Path::new("foo.cc")),
304            Some(SourceLanguage::Cpp)
305        );
306        assert_eq!(
307            detect_language(Path::new("foo.hpp")),
308            Some(SourceLanguage::Cpp)
309        );
310    }
311
312    #[test]
313    fn detect_rust_extension() {
314        assert_eq!(
315            detect_language(Path::new("foo.rs")),
316            Some(SourceLanguage::Rust)
317        );
318    }
319
320    #[test]
321    fn detect_go_extension() {
322        assert_eq!(
323            detect_language(Path::new("foo.go")),
324            Some(SourceLanguage::Go)
325        );
326    }
327
328    #[test]
329    fn detect_zig_extension() {
330        assert_eq!(
331            detect_language(Path::new("foo.zig")),
332            Some(SourceLanguage::Zig)
333        );
334    }
335
336    #[test]
337    fn detect_unknown_is_none() {
338        assert_eq!(detect_language(Path::new("foo.py")), None);
339        assert_eq!(detect_language(Path::new("foo")), None);
340    }
341
342    #[test]
343    fn parse_source_str_c_roundtrip() {
344        let src = "struct Point { int x; int y; };";
345        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
346        assert_eq!(layouts.len(), 1);
347        assert_eq!(layouts[0].name, "Point");
348    }
349
350    #[test]
351    fn parse_source_str_rust_roundtrip() {
352        let src = "struct Foo { x: u32, y: u64 }";
353        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
354        assert_eq!(layouts.len(), 1);
355        assert_eq!(layouts[0].name, "Foo");
356    }
357
358    #[test]
359    fn padlock_ignore_suppresses_c_struct() {
360        let src = "// padlock:ignore\nstruct Hidden { int x; int y; };\nstruct Visible { int a; };";
361        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
362        assert_eq!(layouts.len(), 1);
363        assert_eq!(layouts[0].name, "Visible");
364    }
365
366    #[test]
367    fn padlock_ignore_inline_suppresses_c_struct() {
368        // Inline annotation on the struct's own line suppresses it, but must NOT
369        // suppress the struct that follows (the next struct's preceding line is a
370        // code line with a trailing comment, not a pure `//` comment line).
371        let src = "struct Hidden { int x; }; // padlock:ignore\nstruct Visible { int a; };";
372        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
373        assert_eq!(layouts.len(), 1, "only Visible should remain");
374        assert_eq!(layouts[0].name, "Visible");
375    }
376
377    #[test]
378    fn padlock_ignore_suppresses_rust_struct() {
379        let src = "// padlock:ignore\nstruct Hidden { x: u32 }\nstruct Visible { a: u32 }";
380        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
381        assert_eq!(layouts.len(), 1);
382        assert_eq!(layouts[0].name, "Visible");
383    }
384
385    #[test]
386    fn padlock_ignore_without_annotation_keeps_struct() {
387        let src = "struct Visible { int x; int y; };";
388        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
389        assert_eq!(layouts.len(), 1);
390        assert_eq!(layouts[0].name, "Visible");
391    }
392
393    // ── nested struct resolution ───────────────────────────────────────────────
394
395    #[test]
396    fn nested_rust_struct_size_resolved() {
397        // Inner is 8 bytes. Outer has a field of type Inner.
398        // Without resolution, Inner's field size would be pointer_size (8) — coincidentally
399        // correct here, but offset placement still validates the pass runs.
400        let src = "struct Inner { x: u64 }\nstruct Outer { a: u8, b: Inner }";
401        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
402        let outer = layouts.iter().find(|l| l.name == "Outer").unwrap();
403        let b = outer.fields.iter().find(|f| f.name == "b").unwrap();
404        assert_eq!(b.size, 8, "Inner is 8 bytes");
405        assert_eq!(b.align, 8, "Inner aligns to 8");
406        // Outer: u8 at 0, [7 pad], Inner at 8 → total 16
407        assert_eq!(outer.total_size, 16);
408    }
409
410    #[test]
411    fn nested_rust_struct_non_pointer_size_resolved() {
412        // Point is 8 bytes (two i32). Line contains two Points — should be 16 bytes, not
413        // 2 * pointer_size = 16 (same here, but alignment is distinct).
414        let src = "struct Point { x: i32, y: i32 }\nstruct Line { a: Point, b: Point }";
415        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
416        let line = layouts.iter().find(|l| l.name == "Line").unwrap();
417        assert_eq!(line.total_size, 16);
418        assert_eq!(line.fields[0].size, 8);
419        assert_eq!(line.fields[1].size, 8);
420        assert_eq!(line.fields[1].offset, 8);
421    }
422
423    #[test]
424    fn nested_rust_struct_large_inner_triggers_padding() {
425        // SmallHeader: bool (1 byte). BigPayload: [u64; 4] = 32 bytes.
426        // Wrapper { flag: SmallHeader, data: BigPayload }
427        // Without resolution: SmallHeader is pointer-sized (8), total 8+32=40 → wrong.
428        // With resolution: SmallHeader is 1 byte, then 7 pad, then BigPayload at 8 → total 40.
429        // Actually u64 array: [u64;4] parsed as Array of 4 u64 = 32 bytes, align 8.
430        let src = "struct SmallHeader { flag: bool }\nstruct Wrapper { h: SmallHeader, data: u64 }";
431        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
432        let wrapper = layouts.iter().find(|l| l.name == "Wrapper").unwrap();
433        let h = wrapper.fields.iter().find(|f| f.name == "h").unwrap();
434        // SmallHeader has total_size=1, align=1
435        assert_eq!(h.size, 1, "SmallHeader resolved to 1 byte");
436        assert_eq!(h.align, 1);
437        // data (u64, align 8) should be at offset 8 (7 bytes padding after SmallHeader)
438        let data = wrapper.fields.iter().find(|f| f.name == "data").unwrap();
439        assert_eq!(data.offset, 8);
440        assert_eq!(wrapper.total_size, 16);
441    }
442
443    #[test]
444    fn nested_c_struct_resolved() {
445        let src =
446            "struct Vec2 { float x; float y; };\nstruct Rect { struct Vec2 tl; struct Vec2 br; };";
447        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
448        let rect = layouts.iter().find(|l| l.name == "Rect").unwrap();
449        // Each Vec2 is 8 bytes (two floats). Rect = 16 bytes, no padding.
450        assert_eq!(rect.total_size, 16, "Rect should be 16 bytes");
451        assert_eq!(rect.fields[0].size, 8);
452        assert_eq!(rect.fields[1].size, 8);
453        assert_eq!(rect.fields[1].offset, 8);
454    }
455
456    #[test]
457    fn nested_go_struct_resolved() {
458        let src = "package p\ntype Vec2 struct { X float32; Y float32 }\ntype Rect struct { TL Vec2; BR Vec2 }";
459        let layouts = parse_source_str(src, &SourceLanguage::Go, &X86_64_SYSV).unwrap();
460        let rect = layouts.iter().find(|l| l.name == "Rect").unwrap();
461        assert_eq!(rect.total_size, 16);
462        assert_eq!(rect.fields[0].size, 8);
463        assert_eq!(rect.fields[1].size, 8);
464        assert_eq!(rect.fields[1].offset, 8);
465    }
466
467    #[test]
468    fn primitive_types_not_shadowed_by_struct_resolution() {
469        // A struct named "u64" would be very unusual, but primitives must not be overwritten.
470        let src = "struct Wrapper { x: u64, y: bool }";
471        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
472        let w = &layouts[0];
473        let x = w.fields.iter().find(|f| f.name == "x").unwrap();
474        assert_eq!(x.size, 8, "u64 must stay 8 bytes");
475    }
476
477    #[test]
478    fn is_padlock_ignored_does_not_match_partial_names() {
479        // "struct Foo" annotation must not suppress "struct FooBar"
480        assert!(!is_padlock_ignored(
481            "// padlock:ignore\nstruct FooBar { int x; };",
482            "Foo"
483        ));
484    }
485
486    // ── per-finding suppression integration ───────────────────────────────────
487
488    #[test]
489    fn per_finding_suppress_reorder_in_c() {
490        // The struct has padding waste (bool before u64) — without suppression
491        // this would produce PaddingWaste + ReorderSuggestion. With the annotation
492        // on the preceding line, only PaddingWaste should survive.
493        let src = "// padlock: ignore[ReorderSuggestion]\nstruct Foo { char a; long b; };";
494        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
495        assert_eq!(layouts.len(), 1);
496        assert_eq!(layouts[0].suppressed_findings, vec!["ReorderSuggestion"]);
497    }
498
499    #[test]
500    fn per_finding_suppress_multiple_kinds_in_c() {
501        let src =
502            "// padlock: ignore[PaddingWaste, ReorderSuggestion]\nstruct Bar { char a; long b; };";
503        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
504        assert_eq!(layouts.len(), 1);
505        assert_eq!(
506            layouts[0].suppressed_findings,
507            vec!["PaddingWaste", "ReorderSuggestion"]
508        );
509    }
510
511    #[test]
512    fn per_finding_suppress_in_rust() {
513        let src = "// padlock: ignore[FalseSharing]\nstruct Foo { x: u64, y: u64 }";
514        let layouts = parse_source_str(src, &SourceLanguage::Rust, &X86_64_SYSV).unwrap();
515        assert_eq!(layouts.len(), 1);
516        assert_eq!(layouts[0].suppressed_findings, vec!["FalseSharing"]);
517    }
518
519    #[test]
520    fn per_finding_suppress_in_go() {
521        let src =
522            "package p\n// padlock: ignore[LocalityIssue]\ntype Foo struct { X int64; Y int64 }";
523        let layouts = parse_source_str(src, &SourceLanguage::Go, &X86_64_SYSV).unwrap();
524        assert_eq!(layouts.len(), 1);
525        assert_eq!(layouts[0].suppressed_findings, vec!["LocalityIssue"]);
526    }
527
528    #[test]
529    fn unannotated_struct_has_no_suppressed_findings() {
530        let src = "struct Clean { int x; int y; };";
531        let layouts = parse_source_str(src, &SourceLanguage::C, &X86_64_SYSV).unwrap();
532        assert_eq!(layouts.len(), 1);
533        assert!(layouts[0].suppressed_findings.is_empty());
534    }
535
536    // ── C++ inheritance base-size resolution ─────────────────────────────────
537
538    #[test]
539    fn cpp_inheritance_base_size_resolved_via_parse_source_str() {
540        // Base has two ints = 8 bytes. Derived inherits Base and adds one int.
541        // After resolve_nested_structs, __base_Base must be 8 bytes (not
542        // pointer-sized 8B by coincidence — we use a 4-byte base to verify).
543        let src = r#"
544class SmallBase { int x; };
545class BigDerived : public SmallBase { int a; int b; int c; };
546"#;
547        let layouts = parse_source_str(src, &SourceLanguage::Cpp, &X86_64_SYSV).unwrap();
548        let derived = layouts.iter().find(|l| l.name == "BigDerived").unwrap();
549        let base_field = derived
550            .fields
551            .iter()
552            .find(|f| f.name == "__base_SmallBase")
553            .unwrap();
554        // SmallBase is 4 bytes (single int, no padding), so after resolution
555        // the synthetic field must be 4 bytes, not 8 (pointer size).
556        assert_eq!(
557            base_field.size, 4,
558            "__base_SmallBase should be resolved to 4 bytes (sizeof SmallBase)"
559        );
560        // BigDerived total: 4 (base) + 4*3 (a,b,c) = 16 bytes
561        assert_eq!(derived.total_size, 16);
562    }
563
564    #[test]
565    fn cpp_multi_level_inheritance_resolved() {
566        let src = r#"
567class A { int x; };
568class B : public A { int y; };
569class C : public B { int z; };
570"#;
571        let layouts = parse_source_str(src, &SourceLanguage::Cpp, &X86_64_SYSV).unwrap();
572        let c = layouts.iter().find(|l| l.name == "C").unwrap();
573        // C has __base_B (which is 8 bytes: A(4)+y(4)) + z(4) = 12 bytes
574        let base_b = c.fields.iter().find(|f| f.name == "__base_B").unwrap();
575        assert_eq!(base_b.size, 8, "B is 8 bytes (A's int + B's int)");
576        assert_eq!(c.total_size, 12);
577    }
578}