1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
use crate::records::bytecode_builder::BytecodeBuilder;
use crate::records::debug_local_bytecode_builder::DebugLocal;
use crate::records::jump::Jump;
use crate::records::typed_local_bytecode_builder::TypedLocal;
use luaur_common::enums::luau_opcode::LuauOpcode;
use luaur_common::functions::get_op_length::getOpLength;
use luaur_common::macros::luau_assert::LUAU_ASSERT;
use luaur_common::macros::luau_insn_d::LUAU_INSN_D;
use luaur_common::macros::luau_insn_op::LUAU_INSN_OP;
impl BytecodeBuilder {
pub fn expand_jumps(&mut self) {
if !self.has_long_jumps {
return;
}
// we have some jump instructions that couldn't be patched which means their offset didn't fit into 16 bits
// our strategy for replacing instructions is as follows: instead of
// OP jumpoffset
// we will synthesize a jump trampoline before our instruction (note that jump offsets are relative to next instruction):
// JUMP +1
// JUMPX jumpoffset
// OP -2
// the idea is that during forward execution, we will jump over JUMPX into OP; if OP decides to jump, it will jump to JUMPX
// JUMPX can carry a 24-bit jump offset
// jump trampolines expand the code size, which can increase existing jump distances.
// because of this, we may need to expand jumps that previously fit into 16-bit just fine.
// the worst-case expansion is 3x, so to be conservative we will repatch all jumps that have an offset >= 32767/3
const K_MAX_JUMP_DISTANCE_CONSERVATIVE: i32 = 32767 / 3;
// we will need to process jumps in order
self.jumps.sort_by(|lhs, rhs| lhs.source.cmp(&rhs.source));
// first, let's add jump thunks for every jump with a distance that's too big
// we will create new instruction buffers, with remap table keeping track of the moves: remap[oldpc] = newpc
let mut remap: Vec<u32> = vec![0; self.insns.len()];
let mut newinsns: Vec<u32> = Vec::with_capacity(self.insns.len());
let mut newlines: Vec<i32> = Vec::with_capacity(self.insns.len());
LUAU_ASSERT!(self.insns.len() == self.lines.len());
let mut current_jump: usize = 0;
let mut pending_trampolines: usize = 0;
let mut i: usize = 0;
while i < self.insns.len() {
let op = LUAU_INSN_OP(self.insns[i]) as u8;
LUAU_ASSERT!(op < LuauOpcode::LOP__COUNT as u8);
if current_jump < self.jumps.len() && self.jumps[current_jump].source == i as u32 {
let offset = (self.jumps[current_jump].target as i32)
- (self.jumps[current_jump].source as i32)
- 1;
if offset.abs() > K_MAX_JUMP_DISTANCE_CONSERVATIVE {
// insert jump trampoline as described above; we keep JUMPX offset uninitialized in this pass
newinsns.push(LuauOpcode::LOP_JUMP as u32 | (1 << 16));
newinsns.push(LuauOpcode::LOP_JUMPX as u32);
newlines.push(self.lines[i]);
newlines.push(self.lines[i]);
pending_trampolines += 1;
}
current_jump += 1;
}
let oplen = getOpLength(unsafe { core::mem::transmute::<u8, LuauOpcode>(op) });
// copy instruction and line info to the new stream
for _ in 0..oplen {
remap[i] = newinsns.len() as u32;
newinsns.push(self.insns[i]);
newlines.push(self.lines[i]);
i += 1;
}
}
LUAU_ASSERT!(current_jump == self.jumps.len());
LUAU_ASSERT!(pending_trampolines > 0);
// now we need to recompute offsets for jump instructions - we could not do this in the first pass because the offsets are between *target*
// instructions
for jump in &mut self.jumps {
let offset = (jump.target as i32) - (jump.source as i32) - 1;
let newoffset =
(remap[jump.target as usize] as i32) - (remap[jump.source as usize] as i32) - 1;
if offset.abs() > K_MAX_JUMP_DISTANCE_CONSERVATIVE {
// fix up jump trampoline
let trampoline_pos = remap[jump.source as usize] as usize - 1;
let op_pos = trampoline_pos + 1;
let (left, right) = newinsns.split_at_mut(op_pos);
let insnt = &mut left[trampoline_pos];
let insnj = &mut right[0];
LUAU_ASSERT!(LUAU_INSN_OP(*insnt) == LuauOpcode::LOP_JUMPX as u32);
// patch JUMPX to JUMPX to target location; note that newoffset is the offset of the jump *relative to OP*, so we need to add 1 to make it
// relative to JUMPX
*insnt &= 0xff;
*insnt |= ((newoffset + 1) as u32) << 8;
// patch OP to OP -2
*insnj &= 0xffff;
*insnj |= ((-2i16) as u32) << 16;
pending_trampolines -= 1;
} else {
let insn = &mut newinsns[remap[jump.source as usize] as usize];
// make sure jump instruction had the correct offset before we started
LUAU_ASSERT!(LUAU_INSN_D(*insn) == offset);
// patch instruction with the new offset
LUAU_ASSERT!(i32::from((newoffset as i16)) == newoffset);
*insn &= 0xffff;
*insn |= (newoffset as u32) << 16;
}
}
LUAU_ASSERT!(pending_trampolines == 0);
// this was hard, but we're done.
core::mem::swap(&mut self.insns, &mut newinsns);
core::mem::swap(&mut self.lines, &mut newlines);
for debug_local in &mut self.debug_locals {
// endpc is exclusive, to get the right remapping, we need to remap the location before the end
if debug_local.startpc != debug_local.endpc {
debug_local.endpc = remap[(debug_local.endpc - 1) as usize] + 1;
} else {
debug_local.endpc = remap[debug_local.endpc as usize];
}
debug_local.startpc = remap[debug_local.startpc as usize];
}
for typed_local in &mut self.typed_locals {
// endpc is exclusive, to get the right remapping, we need to remap the location before the end
if typed_local.startpc != typed_local.endpc {
typed_local.endpc = remap[(typed_local.endpc - 1) as usize] + 1;
} else {
typed_local.endpc = remap[typed_local.endpc as usize];
}
typed_local.startpc = remap[typed_local.startpc as usize];
}
}
}