Skip to main content

lumen_compiler/compiler/
lir.rs

1//! LIR (Lumen Intermediate Representation) data types.
2//! 32-bit fixed-width instructions, Lua-style register VM.
3
4use serde::{Deserialize, Serialize};
5
6/// Opcodes for the Lumen register VM.
7/// Hex values match SPEC section 40.2.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9#[repr(u8)]
10pub enum OpCode {
11    // Misc
12    Nop = 0x00, // Ax: no operation
13
14    // Register and constant ops
15    LoadK = 0x01,    // A, Bx: load constant Bx into register A
16    LoadNil = 0x02,  // A, B:  set registers A..A+B to nil
17    LoadBool = 0x03, // A, B, C: load bool B into A; if C, skip next
18    LoadInt = 0x04,  // A, sB: R[A] = sB as i64 (small integer)
19    Move = 0x05,     // A, B:  copy register B to A
20
21    // Data construction
22    NewList = 0x06,   // A, B:  create list from B values at A+1..
23    NewMap = 0x07,    // A, B:  create map from B kv pairs at A+1..
24    NewRecord = 0x08, // A, Bx: create record of type Bx
25    NewUnion = 0x09,  // A, B, C: create union tag=B payload=C
26    NewTuple = 0x0A,  // A, B:  create tuple from B values at A+1..
27    NewSet = 0x0B,    // A, B:  create set from B values at A+1..
28
29    // Access
30    GetField = 0x10, // A, B, C: A = B.field[C]
31    SetField = 0x11, // A, B, C: A.field[B] = C
32    GetIndex = 0x12, // A, B, C: A = B[C]
33    SetIndex = 0x13, // A, B, C: A[B] = C
34    GetTuple = 0x14, // A, B, C: A = R[B].elements[C]
35
36    // Arithmetic
37    Add = 0x20,      // A, B, C: A = B + C
38    Sub = 0x21,      // A, B, C: A = B - C
39    Mul = 0x22,      // A, B, C: A = B * C
40    Div = 0x23,      // A, B, C: A = B / C
41    Mod = 0x24,      // A, B, C: A = B % C
42    Pow = 0x25,      // A, B, C: A = B ** C
43    Neg = 0x26,      // A, B:    A = -B
44    Concat = 0x27,   // A, B, C: A = B ++ C
45    FloorDiv = 0x2E, // A, B, C: A = B // C (floor division)
46
47    // Bitwise
48    BitOr = 0x28,  // A, B, C: A = B | C
49    BitAnd = 0x29, // A, B, C: A = B & C
50    BitXor = 0x2A, // A, B, C: A = B ^ C
51    BitNot = 0x2B, // A, B:    A = ~B
52    Shl = 0x2C,    // A, B, C: A = B << C
53    Shr = 0x2D,    // A, B, C: A = B >> C
54
55    // Comparison / logic
56    Eq = 0x30,     // A, B, C: if (B == C) != A then skip next
57    Lt = 0x31,     // A, B, C: if (B < C) != A then skip next
58    Le = 0x32,     // A, B, C: if (B <= C) != A then skip next
59    Not = 0x33,    // A, B:    A = not B
60    And = 0x34,    // A, B, C: A = B and C
61    Or = 0x35,     // A, B, C: A = B or C
62    In = 0x36,     // A, B, C: A = B in C
63    Is = 0x37,     // A, B, C: A = typeof(B) == type(C)
64    NullCo = 0x38, // A, B, C: A = if B != null then B else C
65    Test = 0x39,   // A, C: if (Reg[A] is truthy) != C then skip next
66
67    // Control flow
68    Jmp = 0x40,      // Ax: jump by signed offset
69    Call = 0x41,     // A, B, C: call A with B args, C results
70    TailCall = 0x42, // A, B, C: tail-call A with B args
71    Return = 0x43,   // A, B: return B values starting from A
72    Halt = 0x44,     // A: halt with error message in A
73    Loop = 0x45,     // AsB: decrement counter, jump if > 0
74    ForPrep = 0x46,  // A, sB: prepare for-loop
75    ForLoop = 0x47,  // A, sB: iterate for-loop
76    ForIn = 0x48,    // A, B, C: for-in iterator step
77    Break = 0x49,    // Ax: break from enclosing loop
78    Continue = 0x4A, // Ax: continue to next iteration
79
80    // Intrinsics
81    Intrinsic = 0x50, // A, B, C: A = intrinsic[B](args at C)
82
83    // Closures
84    Closure = 0x51,  // A, Bx: R[A] = closure(proto=Bx, upvalues from regs)
85    GetUpval = 0x52, // A, B:  R[A] = upvalue[B]
86    SetUpval = 0x53, // A, B:  upvalue[B] = R[A]
87
88    // Effects
89    ToolCall = 0x60, // A, Bx: tool_call(tool=Bx, args from subsequent regs)
90    Schema = 0x61,   // A, B: validate A against schema type B
91    Emit = 0x62,     // A: emit output R[A]
92    TraceRef = 0x63, // A: R[A] = current trace reference
93    Await = 0x64,    // A, B: R[A] = await future R[B]
94    Spawn = 0x65,    // A, Bx: R[A] = spawn async(proto=Bx)
95
96    // List ops
97    Append = 0x70, // A, B: append B to list A
98
99    // Type checks
100    IsVariant = 0x71, // A, Bx: if A is variant w/ tag Bx, skip next
101    Unbox = 0x72,     // A, B: A = B.payload (for unions)
102}
103
104/// Intrinsic function IDs
105#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
106#[repr(u8)]
107pub enum IntrinsicId {
108    Length = 0,
109    Count = 1,
110    Matches = 2,
111    Hash = 3,
112    Diff = 4,
113    Patch = 5,
114    Redact = 6,
115    Validate = 7,
116    TraceRef = 8,
117    Print = 9,
118    ToString = 10,
119    ToInt = 11,
120    ToFloat = 12,
121    TypeOf = 13,
122    Keys = 14,
123    Values = 15,
124    Contains = 16,
125    Join = 17,
126    Split = 18,
127    Trim = 19,
128    Upper = 20,
129    Lower = 21,
130    Replace = 22,
131    Slice = 23,
132    Append = 24,
133    Range = 25,
134    Abs = 26,
135    Min = 27,
136    Max = 28,
137    // New stdlib intrinsics
138    Sort = 29,
139    Reverse = 30,
140    Map = 31,
141    Filter = 32,
142    Reduce = 33,
143    FlatMap = 34,
144    Zip = 35,
145    Enumerate = 36,
146    Any = 37,
147    All = 38,
148    Find = 39,
149    Position = 40,
150    GroupBy = 41,
151    Chunk = 42,
152    Window = 43,
153    Flatten = 44,
154    Unique = 45,
155    Take = 46,
156    Drop = 47,
157    First = 48,
158    Last = 49,
159    IsEmpty = 50,
160    Chars = 51,
161    StartsWith = 52,
162    EndsWith = 53,
163    IndexOf = 54,
164    PadLeft = 55,
165    PadRight = 56,
166    Round = 57,
167    Ceil = 58,
168    Floor = 59,
169    Sqrt = 60,
170    Pow = 61,
171    Log = 62,
172    Sin = 63,
173    Cos = 64,
174    Clamp = 65,
175    Clone = 66,
176    Sizeof = 67,
177    Debug = 68,
178    ToSet = 69,
179    // Map/Set operations
180    HasKey = 70,
181    Merge = 71,
182    Size = 72,
183    Add = 73,
184    Remove = 74,
185    Entries = 75,
186}
187
188/// A 32-bit instruction
189#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
190pub struct Instruction {
191    pub op: OpCode,
192    pub a: u8,
193    pub b: u8,
194    pub c: u8,
195}
196
197impl Instruction {
198    pub fn abc(op: OpCode, a: u8, b: u8, c: u8) -> Self {
199        Self { op, a, b, c }
200    }
201    pub fn abx(op: OpCode, a: u8, bx: u16) -> Self {
202        Self {
203            op,
204            a,
205            b: (bx >> 8) as u8,
206            c: (bx & 0xFF) as u8,
207        }
208    }
209    pub fn ax(op: OpCode, ax: u32) -> Self {
210        Self {
211            op,
212            a: ((ax >> 16) & 0xFF) as u8,
213            b: ((ax >> 8) & 0xFF) as u8,
214            c: (ax & 0xFF) as u8,
215        }
216    }
217    /// Signed 24-bit AX constructor for jump offsets (supports negative values)
218    pub fn sax(op: OpCode, offset: i32) -> Self {
219        let bits = (offset as u32) & 0xFFFFFF;
220        Self {
221            op,
222            a: ((bits >> 16) & 0xFF) as u8,
223            b: ((bits >> 8) & 0xFF) as u8,
224            c: (bits & 0xFF) as u8,
225        }
226    }
227    pub fn bx(&self) -> u16 {
228        ((self.b as u16) << 8) | (self.c as u16)
229    }
230    pub fn ax_val(&self) -> u32 {
231        ((self.a as u32) << 16) | ((self.b as u32) << 8) | (self.c as u32)
232    }
233    /// Signed 24-bit AX value with sign extension for jump offsets
234    pub fn sax_val(&self) -> i32 {
235        let raw = self.ax_val();
236        if raw & 0x800000 != 0 {
237            (raw | 0xFF000000) as i32
238        } else {
239            raw as i32
240        }
241    }
242    pub fn sbx(&self) -> i16 {
243        self.bx() as i16
244    }
245}
246
247/// Constant value in the constant pool
248#[derive(Debug, Clone, Serialize, Deserialize)]
249pub enum Constant {
250    Null,
251    Bool(bool),
252    Int(i64),
253    Float(f64),
254    String(String),
255}
256
257/// Type definition in LIR
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct LirType {
260    pub kind: String,
261    pub name: String,
262    pub fields: Vec<LirField>,
263    pub variants: Vec<LirVariant>,
264}
265
266#[derive(Debug, Clone, Serialize, Deserialize)]
267pub struct LirField {
268    pub name: String,
269    #[serde(rename = "type")]
270    pub ty: String,
271    pub constraints: Vec<String>,
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct LirVariant {
276    pub name: String,
277    pub payload: Option<String>,
278}
279
280/// A compiled cell in LIR
281#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct LirCell {
283    pub name: String,
284    pub params: Vec<LirParam>,
285    pub returns: Option<String>,
286    pub registers: u8,
287    pub constants: Vec<Constant>,
288    pub instructions: Vec<Instruction>,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct LirParam {
293    pub name: String,
294    #[serde(rename = "type")]
295    pub ty: String,
296    pub register: u8,
297    #[serde(default)]
298    pub variadic: bool,
299}
300
301/// Tool declaration in LIR
302#[derive(Debug, Clone, Serialize, Deserialize)]
303pub struct LirTool {
304    pub alias: String,
305    pub tool_id: String,
306    pub version: String,
307    pub mcp_url: Option<String>,
308}
309
310/// Policy/grant in LIR
311#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct LirPolicy {
313    pub tool_alias: String,
314    pub grants: serde_json::Value,
315}
316
317#[derive(Debug, Clone, Serialize, Deserialize)]
318pub struct LirAgent {
319    pub name: String,
320    pub methods: Vec<String>,
321}
322
323#[derive(Debug, Clone, Serialize, Deserialize)]
324pub struct LirAddon {
325    pub kind: String,
326    pub name: Option<String>,
327}
328
329#[derive(Debug, Clone, Serialize, Deserialize)]
330pub struct LirEffect {
331    pub name: String,
332    pub operations: Vec<LirEffectOp>,
333}
334
335#[derive(Debug, Clone, Serialize, Deserialize)]
336pub struct LirEffectOp {
337    pub name: String,
338    pub params: Vec<LirParam>,
339    pub returns: Option<String>,
340    pub effects: Vec<String>,
341}
342
343#[derive(Debug, Clone, Serialize, Deserialize)]
344pub struct LirEffectBind {
345    pub effect_path: String,
346    pub tool_alias: String,
347}
348
349#[derive(Debug, Clone, Serialize, Deserialize)]
350pub struct LirHandler {
351    pub name: String,
352    pub handles: Vec<LirHandle>,
353}
354
355#[derive(Debug, Clone, Serialize, Deserialize)]
356pub struct LirHandle {
357    pub operation: String,
358    pub cell: String,
359}
360
361/// Complete LIR module
362#[derive(Debug, Clone, Serialize, Deserialize)]
363pub struct LirModule {
364    pub version: String,
365    pub doc_hash: String,
366    pub strings: Vec<String>,
367    pub types: Vec<LirType>,
368    pub cells: Vec<LirCell>,
369    pub tools: Vec<LirTool>,
370    pub policies: Vec<LirPolicy>,
371    pub agents: Vec<LirAgent>,
372    pub addons: Vec<LirAddon>,
373    pub effects: Vec<LirEffect>,
374    pub effect_binds: Vec<LirEffectBind>,
375    pub handlers: Vec<LirHandler>,
376}
377
378impl LirModule {
379    pub fn new(doc_hash: String) -> Self {
380        Self {
381            version: "1.0.0".to_string(),
382            doc_hash,
383            strings: Vec::new(),
384            types: Vec::new(),
385            cells: Vec::new(),
386            tools: Vec::new(),
387            policies: Vec::new(),
388            agents: Vec::new(),
389            addons: Vec::new(),
390            effects: Vec::new(),
391            effect_binds: Vec::new(),
392            handlers: Vec::new(),
393        }
394    }
395
396    /// Merge another module's definitions into this module.
397    ///
398    /// This is used during import resolution to link imported modules into the main module.
399    /// String table entries are deduplicated. Other items (cells, types, etc.) are appended,
400    /// assuming no name conflicts (the resolver should have already checked this).
401    pub fn merge(&mut self, other: &LirModule) {
402        use std::collections::HashMap;
403
404        // Build a map from old string indices in `other` to new indices in `self`
405        let mut string_remap: HashMap<usize, usize> = HashMap::new();
406        for (old_idx, s) in other.strings.iter().enumerate() {
407            if let Some(existing_idx) = self.strings.iter().position(|x| x == s) {
408                string_remap.insert(old_idx, existing_idx);
409            } else {
410                string_remap.insert(old_idx, self.strings.len());
411                self.strings.push(s.clone());
412            }
413        }
414
415        // Merge types (no string remapping needed for simple names)
416        for ty in &other.types {
417            if !self.types.iter().any(|t| t.name == ty.name) {
418                self.types.push(ty.clone());
419            }
420        }
421
422        // Merge cells (no string remapping needed for simple names)
423        for cell in &other.cells {
424            if !self.cells.iter().any(|c| c.name == cell.name) {
425                self.cells.push(cell.clone());
426            }
427        }
428
429        // Merge tools
430        for tool in &other.tools {
431            if !self.tools.iter().any(|t| t.alias == tool.alias) {
432                self.tools.push(tool.clone());
433            }
434        }
435
436        // Merge policies
437        for policy in &other.policies {
438            if !self
439                .policies
440                .iter()
441                .any(|p| p.tool_alias == policy.tool_alias)
442            {
443                self.policies.push(policy.clone());
444            }
445        }
446
447        // Merge agents
448        for agent in &other.agents {
449            if !self.agents.iter().any(|a| a.name == agent.name) {
450                self.agents.push(agent.clone());
451            }
452        }
453
454        // Merge addons
455        self.addons.extend_from_slice(&other.addons);
456
457        // Merge effects
458        for effect in &other.effects {
459            if !self.effects.iter().any(|e| e.name == effect.name) {
460                self.effects.push(effect.clone());
461            }
462        }
463
464        // Merge effect bindings
465        for bind in &other.effect_binds {
466            if !self
467                .effect_binds
468                .iter()
469                .any(|b| b.effect_path == bind.effect_path && b.tool_alias == bind.tool_alias)
470            {
471                self.effect_binds.push(bind.clone());
472            }
473        }
474
475        // Merge handlers
476        for handler in &other.handlers {
477            if !self.handlers.iter().any(|h| h.name == handler.name) {
478                self.handlers.push(handler.clone());
479            }
480        }
481    }
482}