unluac 1.1.1

Multi-dialect Lua decompiler written in Rust.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
//! 这个文件集中声明 transformer 层的统一 low-IR 类型。
//!
//! 之所以把这些定义收拢到一个 common 模块,是因为 low-IR 是后续 CFG、
//! Dataflow、HIR 共同依赖的稳定契约;具体某个 dialect 的 lowering 规则可以
//! 分目录演进,但这里的类型应该尽量保持统一、明确、可复用。

use std::fmt;

use crate::parser::{
    ChunkHeader, Origin, ProtoFrameInfo, ProtoLineRange, ProtoSignature, RawConstPool,
    RawDebugInfo, RawProto, RawString, RawUpvalueInfo,
};

/// transformer 层的根对象,保留 chunk 级元数据和主 proto。
#[derive(Debug, Clone, PartialEq)]
pub struct LoweredChunk {
    pub header: ChunkHeader,
    pub main: LoweredProto,
    pub origin: Origin,
}

/// 一个已经完成 dialect-specific lowering 的 proto。
#[derive(Debug, Clone, PartialEq)]
pub struct LoweredProto {
    pub source: Option<RawString>,
    pub line_range: ProtoLineRange,
    pub signature: ProtoSignature,
    pub frame: ProtoFrameInfo,
    pub constants: RawConstPool,
    pub upvalues: RawUpvalueInfo,
    pub debug_info: RawDebugInfo,
    pub children: Vec<LoweredProto>,
    pub instrs: Vec<LowInstr>,
    pub lowering_map: LoweringMap,
    pub origin: Origin,
}

/// 基于 proto upvalue 描述符和父链传播结果,恢复当前 proto 哪些 upvalue 表示 `_ENV`。
///
/// 这里优先使用 debug upvalue 名字;当 chunk 被 `luac -s` 剥掉调试信息后,再退回到
/// “根 proto 的第一个 upvalue 是环境、子 proto 通过 upvalue 链继承环境” 这条
/// 结构事实。这样能把 5.2+ 的全局访问重新落回 `AccessBase::Env`,而不是在后层
/// 继续把 `_ENV` 当普通表 upvalue 猜来猜去。
pub(crate) fn resolve_env_upvalues(
    raw: &RawProto,
    parent_env_upvalues: Option<&[bool]>,
) -> Vec<bool> {
    let count = usize::from(raw.common.upvalues.common.count);
    let descriptors = &raw.common.upvalues.common.descriptors;
    let mut env_upvalues = vec![false; count];

    for (index, name) in raw
        .common
        .debug_info
        .common
        .upvalue_names
        .iter()
        .enumerate()
    {
        if index >= count {
            break;
        }
        if raw_string_value(name).is_some_and(|value| value == "_ENV") {
            env_upvalues[index] = true;
        }
    }

    if let Some(parent_env_upvalues) = parent_env_upvalues {
        for (index, descriptor) in descriptors.iter().enumerate() {
            if index >= count || descriptor.in_stack {
                continue;
            }
            if parent_env_upvalues
                .get(usize::from(descriptor.index))
                .copied()
                .unwrap_or(false)
            {
                env_upvalues[index] = true;
            }
        }
    } else if !env_upvalues.iter().any(|is_env| *is_env) && !env_upvalues.is_empty() {
        // Lua 5.2+ 根 proto 在 load 时会把第一个 upvalue 绑定到当前环境。
        env_upvalues[0] = true;
    }

    env_upvalues
}

fn raw_string_value(raw: &RawString) -> Option<&str> {
    raw.text.as_ref().map(|text| text.value.as_str())
}

/// low/raw/debug 之间的统一映射关系。
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct LoweringMap {
    pub low_to_raw: Vec<Vec<RawInstrRef>>,
    pub raw_to_low: Vec<Vec<InstrRef>>,
    pub pc_map: Vec<Vec<u32>>,
    pub line_hints: Vec<Option<u32>>,
}

/// low-IR 指令的稳定索引。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct InstrRef(pub usize);

impl InstrRef {
    pub const fn index(self) -> usize {
        self.0
    }
}

impl fmt::Display for InstrRef {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "@{}", self.0)
    }
}

/// raw 指令在线性 proto 指令数组里的稳定索引。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct RawInstrRef(pub usize);

impl RawInstrRef {
    pub const fn index(self) -> usize {
        self.0
    }
}

/// VM 寄存器引用。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct Reg(pub usize);

impl Reg {
    pub const fn index(self) -> usize {
        self.0
    }
}

impl fmt::Display for Reg {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "r{}", self.0)
    }
}

/// 一段连续寄存器区间。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct RegRange {
    pub start: Reg,
    pub len: usize,
}

impl RegRange {
    pub const fn new(start: Reg, len: usize) -> Self {
        Self { start, len }
    }
}

/// 当前 proto 常量池里的常量引用。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct ConstRef(pub usize);

impl ConstRef {
    pub const fn index(self) -> usize {
        self.0
    }
}

/// 以 bit-pattern 保留的数值字面量。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct NumberLiteral(pub u64);

impl NumberLiteral {
    pub fn from_f64(value: f64) -> Self {
        Self(value.to_bits())
    }

    pub fn to_f64(self) -> f64 {
        f64::from_bits(self.0)
    }
}

/// 当前 proto upvalue 表里的引用。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct UpvalueRef(pub usize);

impl UpvalueRef {
    pub const fn index(self) -> usize {
        self.0
    }
}

/// 当前 proto 子 proto 表里的引用。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct ProtoRef(pub usize);

impl ProtoRef {
    pub const fn index(self) -> usize {
        self.0
    }
}

/// raw bytecode 原本就允许 RK 的位置,在 low-IR 里继续保留寄存器/常量二选一。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum ValueOperand {
    Reg(Reg),
    Const(ConstRef),
    Integer(i64),
}

/// 统一 low-IR 指令枚举。
#[derive(Debug, Clone, PartialEq)]
pub enum LowInstr {
    Move(MoveInstr),
    LoadNil(LoadNilInstr),
    LoadBool(LoadBoolInstr),
    LoadConst(LoadConstInstr),
    LoadInteger(LoadIntegerInstr),
    LoadNumber(LoadNumberInstr),
    UnaryOp(UnaryOpInstr),
    BinaryOp(BinaryOpInstr),
    Concat(ConcatInstr),
    GetUpvalue(GetUpvalueInstr),
    SetUpvalue(SetUpvalueInstr),
    GetTable(GetTableInstr),
    SetTable(SetTableInstr),
    ErrNil(ErrNilInstr),
    NewTable(NewTableInstr),
    SetList(SetListInstr),
    Call(CallInstr),
    TailCall(TailCallInstr),
    VarArg(VarArgInstr),
    Return(ReturnInstr),
    Closure(ClosureInstr),
    Close(CloseInstr),
    Tbc(TbcInstr),
    NumericForInit(NumericForInitInstr),
    NumericForLoop(NumericForLoopInstr),
    GenericForCall(GenericForCallInstr),
    GenericForLoop(GenericForLoopInstr),
    Jump(JumpInstr),
    Branch(BranchInstr),
}

/// 一元运算种类。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum UnaryOpKind {
    Not,
    Neg,
    BitNot,
    Length,
}

/// 二元运算种类。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum BinaryOpKind {
    Add,
    Sub,
    Mul,
    Div,
    FloorDiv,
    Mod,
    Pow,
    BitAnd,
    BitOr,
    BitXor,
    Shl,
    Shr,
}

/// 调用形态,区分普通调用和方法糖。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum CallKind {
    Normal,
    Method,
}

/// `SELF` 在 low-IR 上携带的 method 名提示。
///
/// 这里只保留“常量池里的字段名索引”,避免在 transformer 层过早解码字符串;
/// 到 HIR / AST 再按各层自己的字符串语义恢复。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct MethodNameHint {
    pub const_ref: ConstRef,
}

/// 参数值包。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum ValuePack {
    Fixed(RegRange),
    Open(Reg),
}

/// 结果值包。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum ResultPack {
    Fixed(RegRange),
    Open(Reg),
    Ignore,
}

/// 表访问的 base。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum AccessBase {
    Reg(Reg),
    Env,
    Upvalue(UpvalueRef),
}

/// 表访问的 key。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum AccessKey {
    Reg(Reg),
    Const(ConstRef),
    Integer(i64),
}

/// 闭包 capture 来源。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum CaptureSource {
    Reg(Reg),
    Upvalue(UpvalueRef),
}

/// capture 的方言扩展槽位。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Default)]
pub enum DialectCaptureExtra {
    #[default]
    None,
}

/// 一个闭包捕获项。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct Capture {
    pub source: CaptureSource,
    pub extra: DialectCaptureExtra,
}

/// 条件跳转的谓词。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum BranchPredicate {
    Truthy,
    Eq,
    Lt,
    Le,
}

/// 条件操作数。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum CondOperand {
    Reg(Reg),
    Const(ConstRef),
    Nil,
    Boolean(bool),
    Integer(i64),
    Number(NumberLiteral),
}

/// 条件的操作数形态。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub enum BranchOperands {
    Unary(CondOperand),
    Binary(CondOperand, CondOperand),
}

/// 无副作用条件本体。
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct BranchCond {
    pub predicate: BranchPredicate,
    pub operands: BranchOperands,
    pub negated: bool,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct MoveInstr {
    pub dst: Reg,
    pub src: Reg,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct LoadNilInstr {
    pub dst: RegRange,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct LoadBoolInstr {
    pub dst: Reg,
    pub value: bool,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct LoadConstInstr {
    pub dst: Reg,
    pub value: ConstRef,
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LoadIntegerInstr {
    pub dst: Reg,
    pub value: i64,
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub struct LoadNumberInstr {
    pub dst: Reg,
    pub value: f64,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct UnaryOpInstr {
    pub dst: Reg,
    pub op: UnaryOpKind,
    pub src: Reg,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct BinaryOpInstr {
    pub dst: Reg,
    pub op: BinaryOpKind,
    pub lhs: ValueOperand,
    pub rhs: ValueOperand,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct ConcatInstr {
    pub dst: Reg,
    pub src: RegRange,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct GetUpvalueInstr {
    pub dst: Reg,
    pub src: UpvalueRef,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct SetUpvalueInstr {
    pub dst: UpvalueRef,
    pub src: Reg,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct GetTableInstr {
    pub dst: Reg,
    pub base: AccessBase,
    pub key: AccessKey,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct SetTableInstr {
    pub base: AccessBase,
    pub key: AccessKey,
    pub value: ValueOperand,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct ErrNilInstr {
    pub subject: Reg,
    pub name: Option<ConstRef>,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct NewTableInstr {
    pub dst: Reg,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct SetListInstr {
    pub base: Reg,
    pub values: ValuePack,
    pub start_index: u32,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct CallInstr {
    pub callee: Reg,
    pub args: ValuePack,
    pub results: ResultPack,
    pub kind: CallKind,
    pub method_name: Option<MethodNameHint>,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct TailCallInstr {
    pub callee: Reg,
    pub args: ValuePack,
    pub kind: CallKind,
    pub method_name: Option<MethodNameHint>,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct VarArgInstr {
    pub results: ResultPack,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct ReturnInstr {
    pub values: ValuePack,
}

#[derive(Debug, Clone, PartialEq)]
pub struct ClosureInstr {
    pub dst: Reg,
    pub proto: ProtoRef,
    pub captures: Vec<Capture>,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct CloseInstr {
    pub from: Reg,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct TbcInstr {
    pub reg: Reg,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct NumericForInitInstr {
    pub index: Reg,
    pub limit: Reg,
    pub step: Reg,
    pub binding: Reg,
    pub body_target: InstrRef,
    pub exit_target: InstrRef,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct NumericForLoopInstr {
    pub index: Reg,
    pub limit: Reg,
    pub step: Reg,
    pub binding: Reg,
    pub body_target: InstrRef,
    pub exit_target: InstrRef,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct GenericForCallInstr {
    pub state: RegRange,
    pub results: ResultPack,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct GenericForLoopInstr {
    pub control: Reg,
    pub bindings: RegRange,
    pub body_target: InstrRef,
    pub exit_target: InstrRef,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct JumpInstr {
    pub target: InstrRef,
}

#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
pub struct BranchInstr {
    pub cond: BranchCond,
    pub then_target: InstrRef,
    pub else_target: InstrRef,
}