Skip to main content

padlock_source/frontends/
zig.rs

1// padlock-source/src/frontends/zig.rs
2//
3// Extracts struct layouts from Zig source using tree-sitter-zig.
4// Handles regular, extern, and packed struct variants.
5// Sizes use Zig's platform-native alignment rules (same as C on the target arch).
6
7use padlock_core::arch::ArchConfig;
8use padlock_core::ir::{Field, StructLayout, TypeInfo};
9use tree_sitter::{Node, Parser};
10
11// ── type resolution ───────────────────────────────────────────────────────────
12
13fn zig_type_size_align(ty: &str, arch: &'static ArchConfig) -> (usize, usize) {
14    match ty.trim() {
15        "bool" => (1, 1),
16        "u8" | "i8" => (1, 1),
17        "u16" | "i16" | "f16" => (2, 2),
18        "u32" | "i32" | "f32" => (4, 4),
19        "u64" | "i64" | "f64" => (8, 8),
20        "u128" | "i128" | "f128" => (16, 16),
21        // f80 is the x87 80-bit float; stored as 10 bytes, aligned to 16 on x86-64
22        "f80" => (10, 16),
23        "usize" | "isize" => (arch.pointer_size, arch.pointer_size),
24        "void" | "anyopaque" => (0, 1),
25        // comptime-only or type-erased — treat as pointer-sized
26        "type" | "anytype" | "comptime_int" | "comptime_float" => {
27            (arch.pointer_size, arch.pointer_size)
28        }
29        _ => (arch.pointer_size, arch.pointer_size),
30    }
31}
32
33/// Determine size/align of a type node, dispatching by node kind.
34fn type_node_size_align(
35    source: &str,
36    node: Node<'_>,
37    arch: &'static ArchConfig,
38) -> (usize, usize) {
39    match node.kind() {
40        "builtin_type" | "identifier" => {
41            let text = source[node.byte_range()].trim();
42            zig_type_size_align(text, arch)
43        }
44        // *T — single pointer
45        "pointer_type" => (arch.pointer_size, arch.pointer_size),
46        // ?T — optional; if T is a pointer the optional is pointer-sized (null = 0),
47        // otherwise it is T + 1 byte tag, rounded up. Approximate as pointer-sized.
48        "nullable_type" => {
49            // Check if the inner type is a pointer — if so, null-pointer optimisation applies
50            if let Some(inner) = find_child_by_kinds(node, &["pointer_type"]) {
51                let _ = inner; // pointer optionals are pointer-sized
52                (arch.pointer_size, arch.pointer_size)
53            } else if let Some(inner) = find_first_type_child(source, node) {
54                let (sz, al) = type_node_size_align(source, inner, arch);
55                // Add 1 byte tag, round up to alignment
56                let tagged = (sz + 1).next_multiple_of(al.max(1));
57                (tagged, al.max(1))
58            } else {
59                (arch.pointer_size, arch.pointer_size)
60            }
61        }
62        // []T — slice = (ptr, len)
63        "slice_type" => (arch.pointer_size * 2, arch.pointer_size),
64        // [N]T — array; try to parse N and recursively get element size
65        "array_type" => {
66            if let Some((count, elem_sz, elem_al)) = parse_array_type(source, node, arch) {
67                (elem_sz * count, elem_al)
68            } else {
69                (arch.pointer_size, arch.pointer_size)
70            }
71        }
72        // error union E!T — approximate as two words
73        "error_union" => (arch.pointer_size * 2, arch.pointer_size),
74        _ => (arch.pointer_size, arch.pointer_size),
75    }
76}
77
78/// For `[N]T` nodes, return `Some((count, elem_size, elem_align))`.
79fn parse_array_type(
80    source: &str,
81    node: Node<'_>,
82    arch: &'static ArchConfig,
83) -> Option<(usize, usize, usize)> {
84    // array_type children: [ integer_literal ] type_expr
85    let mut count: Option<usize> = None;
86    let mut elem: Option<(usize, usize)> = None;
87
88    for i in 0..node.child_count() {
89        let child = node.child(i)?;
90        match child.kind() {
91            "integer" | "integer_literal" => {
92                let text = source[child.byte_range()].trim();
93                count = text.parse::<usize>().ok();
94            }
95            "builtin_type" | "identifier" | "pointer_type" | "slice_type" | "array_type"
96            | "nullable_type" => {
97                elem = Some(type_node_size_align(source, child, arch));
98            }
99            _ => {}
100        }
101    }
102
103    let count = count?;
104    let (esz, eal) = elem.unwrap_or((arch.pointer_size, arch.pointer_size));
105    Some((count, esz, eal))
106}
107
108fn find_child_by_kinds<'a>(node: Node<'a>, kinds: &[&str]) -> Option<Node<'a>> {
109    for i in 0..node.child_count() {
110        if let Some(c) = node.child(i) {
111            if kinds.contains(&c.kind()) {
112                return Some(c);
113            }
114        }
115    }
116    None
117}
118
119fn find_first_type_child<'a>(source: &str, node: Node<'a>) -> Option<Node<'a>> {
120    let _ = source;
121    for i in 0..node.child_count() {
122        if let Some(c) = node.child(i) {
123            match c.kind() {
124                "builtin_type" | "identifier" | "pointer_type" | "slice_type" | "array_type"
125                | "nullable_type" | "error_union" => return Some(c),
126                _ => {}
127            }
128        }
129    }
130    None
131}
132
133// ── tree-sitter walker ────────────────────────────────────────────────────────
134
135fn extract_structs(source: &str, root: Node<'_>, arch: &'static ArchConfig) -> Vec<StructLayout> {
136    let mut layouts = Vec::new();
137    let mut stack = vec![root];
138
139    while let Some(node) = stack.pop() {
140        for i in (0..node.child_count()).rev() {
141            if let Some(c) = node.child(i) {
142                stack.push(c);
143            }
144        }
145
146        if node.kind() == "variable_declaration" {
147            if let Some(layout) = parse_variable_declaration(source, node, arch) {
148                layouts.push(layout);
149            }
150        }
151    }
152    layouts
153}
154
155fn parse_variable_declaration(
156    source: &str,
157    node: Node<'_>,
158    arch: &'static ArchConfig,
159) -> Option<StructLayout> {
160    let source_line = node.start_position().row as u32 + 1;
161    let mut name: Option<String> = None;
162    let mut struct_node: Option<Node> = None;
163
164    for i in 0..node.child_count() {
165        let child = node.child(i)?;
166        match child.kind() {
167            "identifier" => {
168                // The first identifier after `const`/`var` is the name
169                if name.is_none() {
170                    name = Some(source[child.byte_range()].to_string());
171                }
172            }
173            "struct_declaration" => struct_node = Some(child),
174            _ => {}
175        }
176    }
177
178    let name = name?;
179    let struct_node = struct_node?;
180    parse_struct_declaration(source, struct_node, name, arch, source_line)
181}
182
183fn parse_struct_declaration(
184    source: &str,
185    node: Node<'_>,
186    name: String,
187    arch: &'static ArchConfig,
188    source_line: u32,
189) -> Option<StructLayout> {
190    let mut is_packed = false;
191    let mut is_extern = false;
192    // (field_name, type_text, size, align)
193    let mut raw_fields: Vec<(String, String, usize, usize)> = Vec::new();
194
195    for i in 0..node.child_count() {
196        let child = node.child(i)?;
197        match child.kind() {
198            "packed" => is_packed = true,
199            "extern" => is_extern = true,
200            "container_field" => {
201                if let Some(f) = parse_container_field(source, child, arch, is_packed) {
202                    raw_fields.push(f);
203                }
204            }
205            _ => {}
206        }
207    }
208
209    if raw_fields.is_empty() {
210        return None;
211    }
212
213    // Regular Zig structs have implementation-defined layout (reordering allowed).
214    // Only extern and packed structs have stable C-compatible / bit-exact layout.
215    // For analysis purposes we simulate the declared order for all variants,
216    // since that is what the developer sees and intends to reason about.
217    let mut offset = 0usize;
218    let mut struct_align = 1usize;
219    let mut fields: Vec<Field> = Vec::new();
220
221    for (fname, type_text, size, align) in raw_fields {
222        let eff_align = if is_packed { 1 } else { align };
223        if eff_align > 0 {
224            offset = offset.next_multiple_of(eff_align);
225        }
226        struct_align = struct_align.max(eff_align);
227        fields.push(Field {
228            name: fname,
229            ty: TypeInfo::Primitive {
230                name: type_text,
231                size,
232                align,
233            },
234            offset,
235            size,
236            align: eff_align,
237            source_file: None,
238            source_line: None,
239            access: padlock_core::ir::AccessPattern::Unknown,
240        });
241        offset += size;
242    }
243
244    if !is_packed && struct_align > 0 {
245        offset = offset.next_multiple_of(struct_align);
246    }
247
248    let _ = is_extern; // affects ABI guarantees, not layout simulation
249
250    Some(StructLayout {
251        name,
252        total_size: offset,
253        align: struct_align,
254        fields,
255        source_file: None,
256        source_line: Some(source_line),
257        arch,
258        is_packed,
259        is_union: false,
260    })
261}
262
263/// Parse a `container_field` node and return `(name, type_text, size, align)`.
264fn parse_container_field(
265    source: &str,
266    node: Node<'_>,
267    arch: &'static ArchConfig,
268    is_packed: bool,
269) -> Option<(String, String, usize, usize)> {
270    let mut field_name: Option<String> = None;
271    let mut type_text: Option<String> = None;
272    let mut size_align: Option<(usize, usize)> = None;
273
274    for i in 0..node.child_count() {
275        let child = node.child(i)?;
276        match child.kind() {
277            "identifier" if field_name.is_none() => {
278                field_name = Some(source[child.byte_range()].to_string());
279            }
280            "builtin_type" | "pointer_type" | "nullable_type" | "slice_type" | "array_type"
281            | "error_union" => {
282                let text = source[child.byte_range()].to_string();
283                size_align = Some(type_node_size_align(source, child, arch));
284                type_text = Some(text);
285            }
286            "identifier" => {
287                // Second identifier = type name (e.g. a named struct type)
288                let text = source[child.byte_range()].trim().to_string();
289                size_align = Some(zig_type_size_align(&text, arch));
290                type_text = Some(text);
291            }
292            _ => {}
293        }
294    }
295
296    let name = field_name?;
297    let ty = type_text.unwrap_or_else(|| "anyopaque".to_string());
298    let (mut size, align) = size_align.unwrap_or((arch.pointer_size, arch.pointer_size));
299
300    if is_packed && size == 0 {
301        size = 0; // void fields in packed structs stay 0
302    }
303
304    Some((name, ty, size, align))
305}
306
307// ── public API ────────────────────────────────────────────────────────────────
308
309pub fn parse_zig(source: &str, arch: &'static ArchConfig) -> anyhow::Result<Vec<StructLayout>> {
310    let mut parser = Parser::new();
311    parser.set_language(&tree_sitter_zig::LANGUAGE.into())?;
312    let tree = parser
313        .parse(source, None)
314        .ok_or_else(|| anyhow::anyhow!("tree-sitter-zig parse failed"))?;
315    Ok(extract_structs(source, tree.root_node(), arch))
316}
317
318// ── tests ─────────────────────────────────────────────────────────────────────
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use padlock_core::arch::X86_64_SYSV;
324
325    #[test]
326    fn parse_simple_zig_struct() {
327        let src = "const Point = struct { x: u32, y: u32 };";
328        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
329        assert_eq!(layouts.len(), 1);
330        assert_eq!(layouts[0].name, "Point");
331        assert_eq!(layouts[0].fields.len(), 2);
332        assert_eq!(layouts[0].total_size, 8);
333    }
334
335    #[test]
336    fn zig_layout_with_padding() {
337        let src = "const T = struct { a: bool, b: u64 };";
338        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
339        assert_eq!(layouts.len(), 1);
340        let l = &layouts[0];
341        assert_eq!(l.fields[0].offset, 0); // bool at 0
342        assert_eq!(l.fields[1].offset, 8); // u64 at 8 (7 bytes padding)
343        assert_eq!(l.total_size, 16);
344    }
345
346    #[test]
347    fn zig_packed_struct_no_padding() {
348        let src = "const Packed = packed struct { a: u8, b: u32 };";
349        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
350        assert_eq!(layouts.len(), 1);
351        let l = &layouts[0];
352        assert!(l.is_packed);
353        assert_eq!(l.fields[0].offset, 0);
354        assert_eq!(l.fields[1].offset, 1); // immediately after u8, no padding
355        assert_eq!(l.total_size, 5);
356    }
357
358    #[test]
359    fn zig_extern_struct_detected() {
360        let src = "const Extern = extern struct { x: i32, y: f64 };";
361        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
362        assert_eq!(layouts.len(), 1);
363        let l = &layouts[0];
364        // extern struct has C layout: x at 0 (4B), 4B pad, y at 8 (8B)
365        assert_eq!(l.fields[0].offset, 0);
366        assert_eq!(l.fields[1].offset, 8);
367        assert_eq!(l.total_size, 16);
368    }
369
370    #[test]
371    fn zig_pointer_field_is_pointer_sized() {
372        let src = "const S = struct { ptr: *u8 };";
373        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
374        assert_eq!(layouts[0].fields[0].size, 8);
375        assert_eq!(layouts[0].fields[0].align, 8);
376    }
377
378    #[test]
379    fn zig_optional_pointer_is_pointer_sized() {
380        let src = "const S = struct { opt: ?*u8 };";
381        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
382        assert_eq!(layouts[0].fields[0].size, 8);
383    }
384
385    #[test]
386    fn zig_slice_is_two_words() {
387        let src = "const S = struct { buf: []u8 };";
388        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
389        assert_eq!(layouts[0].fields[0].size, 16); // ptr + len
390    }
391
392    #[test]
393    fn zig_usize_follows_arch() {
394        let src = "const S = struct { n: usize };";
395        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
396        assert_eq!(layouts[0].fields[0].size, 8);
397    }
398
399    #[test]
400    fn zig_multiple_structs_parsed() {
401        let src = "const A = struct { x: u8 };\nconst B = struct { y: u64 };";
402        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
403        assert_eq!(layouts.len(), 2);
404        assert!(layouts.iter().any(|l| l.name == "A"));
405        assert!(layouts.iter().any(|l| l.name == "B"));
406    }
407
408    #[test]
409    fn zig_array_field_size() {
410        let src = "const S = struct { buf: [4]u32 };";
411        let layouts = parse_zig(src, &X86_64_SYSV).unwrap();
412        assert_eq!(layouts[0].fields[0].size, 16); // 4 * 4
413    }
414}