Skip to main content

mangle_ir/
lib.rs

1// Copyright 2025 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Intermediate Representation (IR) for Mangle.
16//!
17//! This IR uses a flat, indexed representation (similar to Carbon's SemIR).
18//! Instructions are stored in a vector and referenced by `InstId`.
19//!
20//! # Relation Representation
21//!
22//! Relations (predicates) are primarily identified by `NameId`.
23//! In the Logical IR, they appear in `Inst::Atom` and `Inst::Decl`.
24//!
25//! In the Physical IR (`physical::Op`), relations are abstract data sources.
26//! Operations like `Scan` and `Insert` refer to relations by name/ID, but the
27//! actual storage format (row-oriented, column-oriented, B-Tree, etc.) is
28//! determined by the runtime `Host` implementation.
29//!
30//! # Physical Operations
31//!
32//! The `physical` module defines the imperative operations for execution:
33//!
34//! *   **Iterate/Scan**: Provides an iterator over a relation.
35//! *   **Filter**: Selects tuples matching a condition.
36//! *   **Insert**: Adds derived facts to a relation.
37//! *   **Let**: Binds values to variables for projection or calculation.
38//!
39//! The Planner transforms declarative rules into trees of these operations.
40
41pub mod physical;
42
43use std::collections::HashMap;
44use std::hash::Hash;
45use std::num::NonZeroU32;
46
47/// Index of an instruction in the IR.
48#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
49pub struct InstId(pub NonZeroU32);
50
51impl InstId {
52    pub fn new(index: usize) -> Self {
53        // We use 0-based index internally, but 1-based NonZeroU32 storage
54        // to allow Option<InstId> to be same size as InstId.
55        // index 0 -> 1
56        InstId(NonZeroU32::new((index + 1) as u32).expect("index overflow"))
57    }
58
59    pub fn index(&self) -> usize {
60        (self.0.get() - 1) as usize
61    }
62}
63
64pub trait InternKey: Copy + Eq + Hash {
65    fn new(index: usize) -> Self;
66    fn index(&self) -> usize;
67}
68
69/// Index of a name (identifier) in the IR.
70#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
71pub struct NameId(pub NonZeroU32);
72
73impl InternKey for NameId {
74    fn new(index: usize) -> Self {
75        NameId(NonZeroU32::new((index + 1) as u32).expect("index overflow"))
76    }
77
78    fn index(&self) -> usize {
79        (self.0.get() - 1) as usize
80    }
81}
82
83/// Index of a string constant in the IR.
84#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
85pub struct StringId(pub NonZeroU32);
86
87impl InternKey for StringId {
88    fn new(index: usize) -> Self {
89        StringId(NonZeroU32::new((index + 1) as u32).expect("index overflow"))
90    }
91
92    fn index(&self) -> usize {
93        (self.0.get() - 1) as usize
94    }
95}
96
97/// A simple interner for strings.
98#[derive(Debug)]
99pub struct Store<K: InternKey> {
100    map: HashMap<String, K>,
101    vec: Vec<String>,
102}
103
104impl<K: InternKey> Default for Store<K> {
105    fn default() -> Self {
106        Self {
107            map: HashMap::default(),
108            vec: Vec::new(),
109        }
110    }
111}
112
113impl<K: InternKey> Store<K> {
114    pub fn new() -> Self {
115        Self::default()
116    }
117
118    pub fn intern(&mut self, name: String) -> K {
119        if let Some(&id) = self.map.get(&name) {
120            return id;
121        }
122        let id = K::new(self.vec.len());
123        self.vec.push(name.clone());
124        self.map.insert(name, id);
125        id
126    }
127
128    pub fn get(&self, id: K) -> &str {
129        &self.vec[id.index()]
130    }
131
132    pub fn lookup(&self, name: &str) -> Option<K> {
133        self.map.get(name).copied()
134    }
135
136    /// Returns all interned strings in ID order (index 0 = ID 1, etc.).
137    pub fn values(&self) -> &[String] {
138        &self.vec
139    }
140}
141
142/// The IR container.
143#[derive(Default, Debug)]
144pub struct Ir {
145    pub insts: Vec<Inst>,
146    pub name_store: Store<NameId>,
147    pub string_store: Store<StringId>,
148    /// Predicates declared or inferred as temporal.
149    /// Temporal predicates have 2 synthetic columns (start_time, end_time)
150    /// appended to their regular arguments.
151    pub temporal_predicates: std::collections::HashSet<NameId>,
152}
153
154impl Ir {
155    pub fn new() -> Self {
156        Self::default()
157    }
158
159    pub fn add_inst(&mut self, inst: Inst) -> InstId {
160        let id = InstId::new(self.insts.len());
161        self.insts.push(inst);
162        id
163    }
164
165    pub fn get(&self, id: InstId) -> &Inst {
166        &self.insts[id.index()]
167    }
168
169    pub fn intern_name(&mut self, name: impl Into<String>) -> NameId {
170        self.name_store.intern(name.into())
171    }
172
173    pub fn resolve_name(&self, id: NameId) -> &str {
174        self.name_store.get(id)
175    }
176
177    pub fn intern_string(&mut self, s: impl Into<String>) -> StringId {
178        self.string_store.intern(s.into())
179    }
180
181    pub fn resolve_string(&self, id: StringId) -> &str {
182        self.string_store.get(id)
183    }
184}
185
186/// Instructions in the Mangle IR.
187#[derive(Clone, Debug, PartialEq)]
188pub enum Inst {
189    // --- Constants ---
190    Bool(bool),
191    Number(i64),
192    Float(f64),
193    /// A string constant (e.g. "foo").
194    String(StringId),
195    Bytes(Vec<u8>),
196    /// A name constant (e.g. /foo).
197    Name(NameId),
198    /// Time as nanoseconds since Unix epoch.
199    Time(i64),
200    /// Duration as nanoseconds.
201    Duration(i64),
202
203    /// A list of values. args point to other Constant-like instructions.
204    List(Vec<InstId>),
205    /// A map of values. keys and values must have same length.
206    Map {
207        keys: Vec<InstId>,
208        values: Vec<InstId>,
209    },
210    /// A struct. fields and values must have same length.
211    Struct {
212        fields: Vec<NameId>,
213        values: Vec<InstId>,
214    },
215
216    // --- Variables ---
217    /// A variable.
218    Var(NameId),
219
220    // --- Expressions (BaseTerm) ---
221    /// Application of a function.
222    ApplyFn {
223        function: NameId,
224        args: Vec<InstId>,
225    },
226
227    // --- Logical Formulas (Term / Atom) ---
228    /// An atom (predicate application).
229    Atom {
230        predicate: NameId,
231        args: Vec<InstId>,
232    },
233    /// Negation of an atom.
234    NegAtom(InstId),
235    /// Equality constraint (left = right).
236    Eq(InstId, InstId),
237    /// Inequality constraint (left != right).
238    Ineq(InstId, InstId),
239
240    // --- Transforms ---
241    /// A transform statement: let var = app.
242    Transform {
243        var: Option<NameId>,
244        app: InstId,
245    },
246
247    // --- Structure (Clauses / Decls) ---
248    /// A Horn clause (Rule).
249    Rule {
250        head: InstId,           // Points to Atom
251        premises: Vec<InstId>,  // Points to Atom, NegAtom, Eq, Ineq
252        transform: Vec<InstId>, // Points to Transform
253    },
254
255    /// A Declaration.
256    Decl {
257        atom: InstId,
258        descr: Vec<InstId>,          // Atoms
259        bounds: Vec<InstId>,         // BoundDecls
260        constraints: Option<InstId>, // Constraints
261    },
262
263    /// Bound Declaration.
264    BoundDecl {
265        base_terms: Vec<InstId>,
266    },
267
268    /// Constraints.
269    Constraints {
270        consequences: Vec<InstId>,      // Atoms
271        alternatives: Vec<Vec<InstId>>, // List of List of Atoms
272    },
273}
274
275#[cfg(test)]
276mod compat_test;
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    #[test]
283    fn basic_ir_construction() {
284        let mut ir = Ir::new();
285
286        // p(X) :- q(X).
287
288        let x_name = ir.intern_name("X");
289        let var_x = ir.add_inst(Inst::Var(x_name));
290
291        let p_name = ir.intern_name("p");
292        let atom_head = ir.add_inst(Inst::Atom {
293            predicate: p_name,
294            args: vec![var_x],
295        });
296
297        let q_name = ir.intern_name("q");
298        let atom_body = ir.add_inst(Inst::Atom {
299            predicate: q_name,
300            args: vec![var_x],
301        });
302
303        let rule = ir.add_inst(Inst::Rule {
304            head: atom_head,
305            premises: vec![atom_body],
306            transform: vec![],
307        });
308
309        assert_eq!(ir.insts.len(), 4);
310
311        if let Inst::Rule { head, .. } = ir.get(rule) {
312            assert_eq!(*head, atom_head);
313        } else {
314            panic!("Expected Rule");
315        }
316    }
317
318    #[test]
319    fn complex_types() {
320        let mut ir = Ir::new();
321
322        // Model: .Pair</string, .Struct<opt /a : /string>>
323
324        // Constants / Symbols
325
326        let n_string = ir.intern_name("/string");
327        let s_string = ir.add_inst(Inst::Name(n_string));
328
329        let n_a = ir.intern_name("/a");
330        let s_a = ir.add_inst(Inst::Name(n_a));
331
332        // Struct field type: opt /a : /string
333
334        // In Mangle Go AST, this is ApplyFn("fn:opt", [/a, /string])
335
336        let fn_opt_name = ir.intern_name("fn:opt");
337        let fn_opt = ir.add_inst(Inst::ApplyFn {
338            function: fn_opt_name,
339            args: vec![s_a, s_string],
340        });
341
342        // Struct type: .Struct<...>
343
344        // ApplyFn("fn:Struct", [fn_opt])
345
346        let fn_struct_name = ir.intern_name("fn:Struct");
347        let fn_struct = ir.add_inst(Inst::ApplyFn {
348            function: fn_struct_name,
349            args: vec![fn_opt],
350        });
351
352        // Pair type: .Pair</string, Struct...>
353
354        let fn_pair_name = ir.intern_name("fn:Pair");
355        let fn_pair = ir.add_inst(Inst::ApplyFn {
356            function: fn_pair_name,
357            args: vec![s_string, fn_struct],
358        });
359
360        // Check structure
361
362        if let Inst::ApplyFn { function, args } = ir.get(fn_pair) {
363            assert_eq!(ir.resolve_name(*function), "fn:Pair");
364
365            assert_eq!(args.len(), 2);
366
367            assert_eq!(args[0], s_string);
368
369            assert_eq!(args[1], fn_struct);
370        } else {
371            panic!("Expected ApplyFn");
372        }
373    }
374}