Skip to main content

padlock_source/frontends/
zig.rs

1// padlock-source/src/frontends/zig.rs
2//
3// Extracts struct layouts from Zig source using tree-sitter-zig.
4// Handles regular, extern, and packed struct variants.
5// Sizes use Zig's platform-native alignment rules (same as C on the target arch).
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13fn zig_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
14    match ty.trim() {
15        "bool" => (1, 1),
16        "u8" | "i8" => (1, 1),
17        "u16" | "i16" | "f16" => (2, 2),
18        "u32" | "i32" | "f32" => (4, 4),
19        "u64" | "i64" | "f64" => (8, 8),
20        "u128" | "i128" | "f128" => (16, 16),
21        // f80 is the x87 80-bit float; stored as 10 bytes, aligned to 16 on x86-64
22        "f80" => (10, 16),
23        "usize" | "isize" => (arch.pointer_size, arch.pointer_size),
24        "void" | "anyopaque" => (0, 1),
25        // comptime-only or type-erased — treat as pointer-sized
26        "type" | "anytype" | "comptime_int" | "comptime_float" => {
27            (arch.pointer_size, arch.pointer_size)
28        }
29        _ => (arch.pointer_size, arch.pointer_size),
30    }
31}
32
33/// Determine size/align of a type node, dispatching by node kind.
34fn type_node_size_align(source: &str, node: Node<'_>, arch: &'static ArchConfig) -> (usize, usize) {
35    match node.kind() {
36        "builtin_type" | "identifier" => {
37            let text = source[node.byte_range()].trim();
38            zig_type_size_align(text, arch)
39        }
40        // *T — single pointer
41        "pointer_type" => (arch.pointer_size, arch.pointer_size),
42        // ?T — optional; if T is a pointer the optional is pointer-sized (null = 0),
43        // otherwise it is T + 1 byte tag, rounded up. Approximate as pointer-sized.
44        "nullable_type" => {
45            // Check if the inner type is a pointer — if so, null-pointer optimisation applies
46            if let Some(inner) = find_child_by_kinds(node, &["pointer_type"]) {
47                let _ = inner; // pointer optionals are pointer-sized
48                (arch.pointer_size, arch.pointer_size)
49            } else if let Some(inner) = find_first_type_child(source, node) {
50                let (sz, al) = type_node_size_align(source, inner, arch);
51                // Add 1 byte tag, round up to alignment
52                let tagged = (sz + 1).next_multiple_of(al.max(1));
53                (tagged, al.max(1))
54            } else {
55                (arch.pointer_size, arch.pointer_size)
56            }
57        }
58        // []T — slice = (ptr, len)
59        "slice_type" => (arch.pointer_size * 2, arch.pointer_size),
60        // [N]T — array; try to parse N and recursively get element size
61        "array_type" => {
62            if let Some((count, elem_sz, elem_al)) = parse_array_type(source, node, arch) {
63                (elem_sz * count, elem_al)
64            } else {
65                (arch.pointer_size, arch.pointer_size)
66            }
67        }
68        // error union E!T — approximate as two words
69        "error_union" => (arch.pointer_size * 2, arch.pointer_size),
70        _ => (arch.pointer_size, arch.pointer_size),
71    }
72}
73
74/// For `[N]T` nodes, return `Some((count, elem_size, elem_align))`.
75fn parse_array_type(
76    source: &str,
77    node: Node<'_>,
78    arch: &'static ArchConfig,
79) -> Option<(usize, usize, usize)> {
80    // array_type children: [ integer_literal ] type_expr
81    let mut count: Option<usize> = None;
82    let mut elem: Option<(usize, usize)> = None;
83
84    for i in 0..node.child_count() {
85        let child = node.child(i)?;
86        match child.kind() {
87            "integer" | "integer_literal" => {
88                let text = source[child.byte_range()].trim();
89                count = text.parse::<usize>().ok();
90            }
91            "builtin_type" | "identifier" | "pointer_type" | "slice_type" | "array_type"
92            | "nullable_type" => {
93                elem = Some(type_node_size_align(source, child, arch));
94            }
95            _ => {}
96        }
97    }
98
99    let count = count?;
100    let (esz, eal) = elem.unwrap_or((arch.pointer_size, arch.pointer_size));
101    Some((count, esz, eal))
102}
103
104fn find_child_by_kinds<'a>(node: Node<'a>, kinds: &[&str]) -> Option<Node<'a>> {
105    for i in 0..node.child_count() {
106        if let Some(c) = node.child(i)
107            && kinds.contains(&c.kind())
108        {
109            return Some(c);
110        }
111    }
112    None
113}
114
115fn find_first_type_child<'a>(source: &str, node: Node<'a>) -> Option<Node<'a>> {
116    let _ = source;
117    for i in 0..node.child_count() {
118        if let Some(c) = node.child(i) {
119            match c.kind() {
120                "builtin_type" | "identifier" | "pointer_type" | "slice_type" | "array_type"
121                | "nullable_type" | "error_union" => return Some(c),
122                _ => {}
123            }
124        }
125    }
126    None
127}
128
129// ── tree-sitter walker ────────────────────────────────────────────────────────
130
131fn extract_structs(source: &str, root: Node<'_>, arch: &'static ArchConfig) -> Vec<StructLayout> {
132    let mut layouts = Vec::new();
133    let mut stack = vec![root];
134
135    while let Some(node) = stack.pop() {
136        for i in (0..node.child_count()).rev() {
137            if let Some(c) = node.child(i) {
138                stack.push(c);
139            }
140        }
141
142        if node.kind() == "variable_declaration"
143            && let Some(layout) = parse_variable_declaration(source, node, arch)
144        {
145            layouts.push(layout);
146        }
147    }
148    layouts
149}
150
151fn parse_variable_declaration(
152    source: &str,
153    node: Node<'_>,
154    arch: &'static ArchConfig,
155) -> Option<StructLayout> {
156    let source_line = node.start_position().row as u32 + 1;
157    let mut name: Option<String> = None;
158    let mut struct_node: Option<Node> = None;
159
160    for i in 0..node.child_count() {
161        let child = node.child(i)?;
162        match child.kind() {
163            "identifier" => {
164                // The first identifier after `const`/`var` is the name
165                if name.is_none() {
166                    name = Some(source[child.byte_range()].to_string());
167                }
168            }
169            "struct_declaration" => struct_node = Some(child),
170            _ => {}
171        }
172    }
173
174    let name = name?;
175    let struct_node = struct_node?;
176    parse_struct_declaration(source, struct_node, name, arch, source_line)
177}
178
179fn parse_struct_declaration(
180    source: &str,
181    node: Node<'_>,
182    name: String,
183    arch: &'static ArchConfig,
184    source_line: u32,
185) -> Option<StructLayout> {
186    let mut is_packed = false;
187    let mut is_extern = false;
188    // (field_name, type_text, size, align)
189    let mut raw_fields: Vec<(String, String, usize, usize)> = Vec::new();
190
191    for i in 0..node.child_count() {
192        let child = node.child(i)?;
193        match child.kind() {
194            "packed" => is_packed = true,
195            "extern" => is_extern = true,
196            "container_field" => {
197                if let Some(f) = parse_container_field(source, child, arch, is_packed) {
198                    raw_fields.push(f);
199                }
200            }
201            _ => {}
202        }
203    }
204
205    if raw_fields.is_empty() {
206        return None;
207    }
208
209    // Regular Zig structs have implementation-defined layout (reordering allowed).
210    // Only extern and packed structs have stable C-compatible / bit-exact layout.
211    // For analysis purposes we simulate the declared order for all variants,
212    // since that is what the developer sees and intends to reason about.
213    let mut offset = 0usize;
214    let mut struct_align = 1usize;
215    let mut fields: Vec<Field> = Vec::new();
216
217    for (fname, type_text, size, align) in raw_fields {
218        let eff_align = if is_packed { 1 } else { align };
219        if eff_align > 0 {
220            offset = offset.next_multiple_of(eff_align);
221        }
222        struct_align = struct_align.max(eff_align);
223        fields.push(Field {
224            name: fname,
225            ty: TypeInfo::Primitive {
226                name: type_text,
227                size,
228                align,
229            },
230            offset,
231            size,
232            align: eff_align,
233            source_file: None,
234            source_line: None,
235            access: padlock_core::ir::AccessPattern::Unknown,
236        });
237        offset += size;
238    }
239
240    if !is_packed && struct_align > 0 {
241        offset = offset.next_multiple_of(struct_align);
242    }
243
244    let _ = is_extern; // affects ABI guarantees, not layout simulation
245
246    Some(StructLayout {
247        name,
248        total_size: offset,
249        align: struct_align,
250        fields,
251        source_file: None,
252        source_line: Some(source_line),
253        arch,
254        is_packed,
255        is_union: false,
256    })
257}
258
259/// Parse a `container_field` node and return `(name, type_text, size, align)`.
260fn parse_container_field(
261    source: &str,
262    node: Node<'_>,
263    arch: &'static ArchConfig,
264    is_packed: bool,
265) -> Option<(String, String, usize, usize)> {
266    let mut field_name: Option<String> = None;
267    let mut type_text: Option<String> = None;
268    let mut size_align: Option<(usize, usize)> = None;
269
270    for i in 0..node.child_count() {
271        let child = node.child(i)?;
272        match child.kind() {
273            "identifier" if field_name.is_none() => {
274                field_name = Some(source[child.byte_range()].to_string());
275            }
276            "builtin_type" | "pointer_type" | "nullable_type" | "slice_type" | "array_type"
277            | "error_union" => {
278                let text = source[child.byte_range()].to_string();
279                size_align = Some(type_node_size_align(source, child, arch));
280                type_text = Some(text);
281            }
282            "identifier" => {
283                // Second identifier = type name (e.g. a named struct type)
284                let text = source[child.byte_range()].trim().to_string();
285                size_align = Some(zig_type_size_align(&text, arch));
286                type_text = Some(text);
287            }
288            _ => {}
289        }
290    }
291
292    let name = field_name?;
293    let ty = type_text.unwrap_or_else(|| "anyopaque".to_string());
294    let (mut size, align) = size_align.unwrap_or((arch.pointer_size, arch.pointer_size));
295
296    if is_packed && size == 0 {
297        size = 0; // void fields in packed structs stay 0
298    }
299
300    Some((name, ty, size, align))
301}
302
303// ── public API ────────────────────────────────────────────────────────────────
304
305pub fn parse_zig(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
306    let mut parser = Parser::new();
307    parser.set_language(&tree_sitter_zig::LANGUAGE.into())?;
308    let tree = parser
309        .parse(source, None)
310        .ok_or_else(|| anyhow::anyhow!("tree-sitter-zig parse failed"))?;
311    Ok(extract_structs(source, tree.root_node(), arch))
312}
313
314// ── tests ─────────────────────────────────────────────────────────────────────
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use padlock_core::arch::X86_64_SYSV;
320
321    #[test]
322    fn parse_simple_zig_struct() {
323        let src = "const Point = struct { x: u32, y: u32 };";
324        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
325        assert_eq!(layouts.len(), 1);
326        assert_eq!(layouts[0].name, "Point");
327        assert_eq!(layouts[0].fields.len(), 2);
328        assert_eq!(layouts[0].total_size, 8);
329    }
330
331    #[test]
332    fn zig_layout_with_padding() {
333        let src = "const T = struct { a: bool, b: u64 };";
334        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
335        assert_eq!(layouts.len(), 1);
336        let l = &layouts[0];
337        assert_eq!(l.fields[0].offset, 0); // bool at 0
338        assert_eq!(l.fields[1].offset, 8); // u64 at 8 (7 bytes padding)
339        assert_eq!(l.total_size, 16);
340    }
341
342    #[test]
343    fn zig_packed_struct_no_padding() {
344        let src = "const Packed = packed struct { a: u8, b: u32 };";
345        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
346        assert_eq!(layouts.len(), 1);
347        let l = &layouts[0];
348        assert!(l.is_packed);
349        assert_eq!(l.fields[0].offset, 0);
350        assert_eq!(l.fields[1].offset, 1); // immediately after u8, no padding
351        assert_eq!(l.total_size, 5);
352    }
353
354    #[test]
355    fn zig_extern_struct_detected() {
356        let src = "const Extern = extern struct { x: i32, y: f64 };";
357        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
358        assert_eq!(layouts.len(), 1);
359        let l = &layouts[0];
360        // extern struct has C layout: x at 0 (4B), 4B pad, y at 8 (8B)
361        assert_eq!(l.fields[0].offset, 0);
362        assert_eq!(l.fields[1].offset, 8);
363        assert_eq!(l.total_size, 16);
364    }
365
366    #[test]
367    fn zig_pointer_field_is_pointer_sized() {
368        let src = "const S = struct { ptr: *u8 };";
369        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
370        assert_eq!(layouts[0].fields[0].size, 8);
371        assert_eq!(layouts[0].fields[0].align, 8);
372    }
373
374    #[test]
375    fn zig_optional_pointer_is_pointer_sized() {
376        let src = "const S = struct { opt: ?*u8 };";
377        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
378        assert_eq!(layouts[0].fields[0].size, 8);
379    }
380
381    #[test]
382    fn zig_slice_is_two_words() {
383        let src = "const S = struct { buf: []u8 };";
384        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
385        assert_eq!(layouts[0].fields[0].size, 16); // ptr + len
386    }
387
388    #[test]
389    fn zig_usize_follows_arch() {
390        let src = "const S = struct { n: usize };";
391        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
392        assert_eq!(layouts[0].fields[0].size, 8);
393    }
394
395    #[test]
396    fn zig_multiple_structs_parsed() {
397        let src = "const A = struct { x: u8 };\nconst B = struct { y: u64 };";
398        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
399        assert_eq!(layouts.len(), 2);
400        assert!(layouts.iter().any(|l| l.name == "A"));
401        assert!(layouts.iter().any(|l| l.name == "B"));
402    }
403
404    #[test]
405    fn zig_array_field_size() {
406        let src = "const S = struct { buf: [4]u32 };";
407        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
408        assert_eq!(layouts[0].fields[0].size, 16); // 4 * 4
409    }
410}