substrate/arch/
aarch64.rs

1use crate::error::{Result, SubstrateError};
2use crate::hook::memory::ProtectedMemory;
3use std::ptr;
4use std::sync::atomic::{AtomicI32, Ordering};
5
6const A64_MAX_INSTRUCTIONS: usize = 5;
7const A64_MAX_REFERENCES: usize = A64_MAX_INSTRUCTIONS * 2;
8const A64_NOP: u32 = 0xd503201f;
9const A64_MAX_BACKUPS: usize = 256;
10
11#[repr(C)]
12struct FixInfo {
13    bp: *mut u32,
14    ls: u32,
15    ad: u32,
16}
17
18#[repr(C)]
19struct InsnsInfo {
20    insp: *mut u32,
21    fmap: [FixInfo; A64_MAX_REFERENCES],
22}
23
24struct Context {
25    basep: i64,
26    endp: i64,
27    dat: [InsnsInfo; A64_MAX_INSTRUCTIONS],
28}
29
30impl Context {
31    fn new(inp: *mut u32, count: i32) -> Self {
32        let basep = inp as i64;
33        let endp = unsafe { inp.add(count as usize) } as i64;
34
35        Self {
36            basep,
37            endp,
38            dat: unsafe { std::mem::zeroed() },
39        }
40    }
41
42    fn is_in_fixing_range(&self, absolute_addr: i64) -> bool {
43        absolute_addr >= self.basep && absolute_addr < self.endp
44    }
45
46    fn get_ref_ins_index(&self, absolute_addr: i64) -> isize {
47        ((absolute_addr - self.basep) / 4) as isize
48    }
49
50    fn get_and_set_current_index(&mut self, inp: *mut u32, outp: *mut u32) -> isize {
51        let current_idx = self.get_ref_ins_index(inp as i64);
52        self.dat[current_idx as usize].insp = outp;
53        current_idx
54    }
55
56    fn reset_current_ins(&mut self, idx: isize, outp: *mut u32) {
57        self.dat[idx as usize].insp = outp;
58    }
59
60    fn insert_fix_map(&mut self, idx: isize, bp: *mut u32, ls: u32, ad: u32) {
61        for f in &mut self.dat[idx as usize].fmap {
62            if f.bp.is_null() {
63                f.bp = bp;
64                f.ls = ls;
65                f.ad = ad;
66                return;
67            }
68        }
69    }
70
71    fn process_fix_map(&mut self, idx: isize) {
72        for f in &mut self.dat[idx as usize].fmap {
73            if f.bp.is_null() {
74                break;
75            }
76            unsafe {
77                let offset = ((self.dat[idx as usize].insp as i64 - f.bp as i64) >> 2) as i32;
78                *f.bp |= ((offset << f.ls) as u32) & f.ad;
79                f.bp = ptr::null_mut();
80            }
81        }
82    }
83}
84
85unsafe fn fix_branch_imm(
86    inpp: &mut *mut u32,
87    outpp: &mut *mut u32,
88    ctxp: &mut Context,
89) -> bool {
90    const MASK: u32 = 0xfc000000;
91    const RMASK: u32 = 0x03ffffff;
92    const OP_B: u32 = 0x14000000;
93    const OP_BL: u32 = 0x94000000;
94
95    let ins = **inpp;
96    let opc = ins & MASK;
97
98    if opc == OP_B || opc == OP_BL {
99        let current_idx = ctxp.get_and_set_current_index(*inpp, *outpp);
100        let absolute_addr = (*inpp as i64) + (((ins << 6) as i32 >> 4) as i64);
101        let mut new_pc_offset = (absolute_addr - *outpp as i64) >> 2;
102        let special_fix_type = ctxp.is_in_fixing_range(absolute_addr);
103
104        if !special_fix_type && new_pc_offset.abs() >= (RMASK as i64 >> 1) {
105            let b_aligned = ((*outpp.add(2) as usize) & 7) == 0;
106
107            if opc == OP_B {
108                if !b_aligned {
109                    **outpp = A64_NOP;
110                    *outpp = outpp.add(1);
111                    ctxp.reset_current_ins(current_idx, *outpp);
112                }
113                (**outpp) = 0x58000051;
114                outpp.add(1).write(0xd61f0220);
115                ptr::copy_nonoverlapping(
116                    &absolute_addr as *const i64 as *const u32,
117                    outpp.add(2),
118                    2,
119                );
120                *outpp = outpp.add(4);
121            } else {
122                if b_aligned {
123                    **outpp = A64_NOP;
124                    *outpp = outpp.add(1);
125                    ctxp.reset_current_ins(current_idx, *outpp);
126                }
127                **outpp = 0x58000071;
128                outpp.add(1).write(0x1000009e);
129                outpp.add(2).write(0xd61f0220);
130                ptr::copy_nonoverlapping(
131                    &absolute_addr as *const i64 as *const u32,
132                    outpp.add(3),
133                    2,
134                );
135                *outpp = outpp.add(5);
136            }
137        } else {
138            if special_fix_type {
139                let ref_idx = ctxp.get_ref_ins_index(absolute_addr);
140                if ref_idx <= current_idx {
141                    new_pc_offset = (ctxp.dat[ref_idx as usize].insp as i64 - *outpp as i64) >> 2;
142                } else {
143                    ctxp.insert_fix_map(ref_idx, *outpp, 0, RMASK);
144                    new_pc_offset = 0;
145                }
146            }
147
148            **outpp = opc | ((new_pc_offset as u32) & !MASK);
149            *outpp = outpp.add(1);
150        }
151
152        *inpp = inpp.add(1);
153        ctxp.process_fix_map(current_idx);
154        return true;
155    }
156
157    false
158}
159
160unsafe fn fix_cond_comp_test_branch(
161    inpp: &mut *mut u32,
162    outpp: &mut *mut u32,
163    ctxp: &mut Context,
164) -> bool {
165    const LSB: u32 = 5;
166    const LMASK01: u32 = 0xff00001f;
167    const MASK0: u32 = 0xff000010;
168    const OP_BC: u32 = 0x54000000;
169    const MASK1: u32 = 0x7f000000;
170    const OP_CBZ: u32 = 0x34000000;
171    const OP_CBNZ: u32 = 0x35000000;
172    const LMASK2: u32 = 0xfff8001f;
173    const MASK2: u32 = 0x7f000000;
174    const OP_TBZ: u32 = 0x36000000;
175    const OP_TBNZ: u32 = 0x37000000;
176
177    let ins = **inpp;
178    let mut lmask = LMASK01;
179
180    if (ins & MASK0) != OP_BC {
181        let mut opc = ins & MASK1;
182        if opc != OP_CBZ && opc != OP_CBNZ {
183            opc = ins & MASK2;
184            if opc != OP_TBZ && opc != OP_TBNZ {
185                return false;
186            }
187            lmask = LMASK2;
188        }
189    }
190
191    let current_idx = ctxp.get_and_set_current_index(*inpp, *outpp);
192    let absolute_addr = (*inpp as i64) + (((ins & !lmask) >> (LSB - 2)) as i64);
193    let mut new_pc_offset = (absolute_addr - *outpp as i64) >> 2;
194    let special_fix_type = ctxp.is_in_fixing_range(absolute_addr);
195
196    if !special_fix_type && new_pc_offset.abs() >= (!lmask >> (LSB + 1)) as i64 {
197        if ((*outpp.add(4) as usize) & 7) != 0 {
198            **outpp = A64_NOP;
199            *outpp = outpp.add(1);
200            ctxp.reset_current_ins(current_idx, *outpp);
201        }
202        **outpp = (((8 >> 2) << LSB) & !lmask) | (ins & lmask);
203        outpp.add(1).write(0x14000005);
204        outpp.add(2).write(0x58000051);
205        outpp.add(3).write(0xd61f0220);
206        ptr::copy_nonoverlapping(
207            &absolute_addr as *const i64 as *const u32,
208            outpp.add(4),
209            2,
210        );
211        *outpp = outpp.add(6);
212    } else {
213        if special_fix_type {
214            let ref_idx = ctxp.get_ref_ins_index(absolute_addr);
215            if ref_idx <= current_idx {
216                new_pc_offset = (ctxp.dat[ref_idx as usize].insp as i64 - *outpp as i64) >> 2;
217            } else {
218                ctxp.insert_fix_map(ref_idx, *outpp, LSB, !lmask);
219                new_pc_offset = 0;
220            }
221        }
222
223        **outpp = (((new_pc_offset as u32) << LSB) & !lmask) | (ins & lmask);
224        *outpp = outpp.add(1);
225    }
226
227    *inpp = inpp.add(1);
228    ctxp.process_fix_map(current_idx);
229    true
230}
231
232unsafe fn fix_loadlit(
233    inpp: &mut *mut u32,
234    outpp: &mut *mut u32,
235    ctxp: &mut Context,
236) -> bool {
237    let ins = **inpp;
238
239    if (ins & 0xff000000) == 0xd8000000 {
240        let idx = ctxp.get_and_set_current_index(*inpp, *outpp);
241        ctxp.process_fix_map(idx);
242        *inpp = inpp.add(1);
243        return true;
244    }
245
246    const MSB: u32 = 8;
247    const LSB: u32 = 5;
248    const MASK_30: u32 = 0x40000000;
249    const MASK_31: u32 = 0x80000000;
250    const LMASK: u32 = 0xff00001f;
251    const MASK_LDR: u32 = 0xbf000000;
252    const OP_LDR: u32 = 0x18000000;
253    const MASK_LDRV: u32 = 0x3f000000;
254    const OP_LDRV: u32 = 0x1c000000;
255    const MASK_LDRSW: u32 = 0xff000000;
256    const OP_LDRSW: u32 = 0x98000000;
257
258    let mut mask = MASK_LDR;
259    let mut faligned = if (ins & MASK_30) != 0 { 7 } else { 3 };
260
261    if (ins & MASK_LDR) != OP_LDR {
262        mask = MASK_LDRV;
263        if faligned != 7 {
264            faligned = if (ins & MASK_31) != 0 { 15 } else { 3 };
265        }
266        if (ins & MASK_LDRV) != OP_LDRV {
267            if (ins & MASK_LDRSW) != OP_LDRSW {
268                return false;
269            }
270            mask = MASK_LDRSW;
271            faligned = 7;
272        }
273    }
274
275    let current_idx = ctxp.get_and_set_current_index(*inpp, *outpp);
276    let absolute_addr = (*inpp as i64) + (((((ins << MSB) as i32) >> (MSB + LSB - 2)) & !3) as i64);
277    let new_pc_offset = (absolute_addr - *outpp as i64) >> 2;
278    let special_fix_type = ctxp.is_in_fixing_range(absolute_addr);
279
280    if special_fix_type || (new_pc_offset.abs() + ((faligned + 1 - 4) / 4) as i64) >= (!LMASK >> (LSB + 1)) as i64 {
281        while ((*outpp.add(2) as usize) & faligned) != 0 {
282            **outpp = A64_NOP;
283            *outpp = outpp.add(1);
284        }
285        ctxp.reset_current_ins(current_idx, *outpp);
286
287        let ns = (faligned + 1) / 4;
288        **outpp = (((8 >> 2) << LSB) & !mask) | (ins & LMASK);
289        outpp.add(1).write((0x14000001 + ns) as u32);
290        ptr::copy_nonoverlapping(
291            absolute_addr as *const u32,
292            outpp.add(2),
293            ns as usize,
294        );
295        *outpp = outpp.add(2 + ns as usize);
296    } else {
297        let mut new_offset = new_pc_offset;
298        let faligned_shifted = faligned >> 2;
299        while (new_offset & (faligned_shifted as i64)) != 0 {
300            **outpp = A64_NOP;
301            *outpp = outpp.add(1);
302            new_offset = (absolute_addr - *outpp as i64) >> 2;
303        }
304        ctxp.reset_current_ins(current_idx, *outpp);
305
306        **outpp = (((new_offset as u32) << LSB) & !mask) | (ins & LMASK);
307        *outpp = outpp.add(1);
308    }
309
310    *inpp = inpp.add(1);
311    ctxp.process_fix_map(current_idx);
312    true
313}
314
315unsafe fn fix_pcreladdr(
316    inpp: &mut *mut u32,
317    outpp: &mut *mut u32,
318    ctxp: &mut Context,
319) -> bool {
320    const MSB: u32 = 8;
321    const LSB: u32 = 5;
322    const MASK: u32 = 0x9f000000;
323    const RMASK: u32 = 0x0000001f;
324    const LMASK: u32 = 0xff00001f;
325    const FMASK: u32 = 0x00ffffff;
326    const MAX_VAL: u32 = 0x001fffff;
327    const OP_ADR: u32 = 0x10000000;
328    const OP_ADRP: u32 = 0x90000000;
329
330    let ins = **inpp;
331
332    match ins & MASK {
333        OP_ADR => {
334            let current_idx = ctxp.get_and_set_current_index(*inpp, *outpp);
335            let lsb_bytes = ((ins << 1) >> 30) as i64;
336            let absolute_addr = (*inpp as i64) + (((((ins << MSB) as i32) >> (MSB + LSB - 2)) & !3) as i64 | lsb_bytes);
337            let mut new_pc_offset = absolute_addr - *outpp as i64;
338            let special_fix_type = ctxp.is_in_fixing_range(absolute_addr);
339
340            if !special_fix_type && new_pc_offset.abs() >= (MAX_VAL as i64 >> 1) {
341                if ((*outpp.add(2) as usize) & 7) != 0 {
342                    **outpp = A64_NOP;
343                    *outpp = outpp.add(1);
344                    ctxp.reset_current_ins(current_idx, *outpp);
345                }
346
347                **outpp = 0x58000000 | (((8 >> 2) << LSB) & !MASK) | (ins & RMASK);
348                outpp.add(1).write(0x14000003);
349                ptr::copy_nonoverlapping(
350                    &absolute_addr as *const i64 as *const u32,
351                    outpp.add(2),
352                    2,
353                );
354                *outpp = outpp.add(4);
355            } else {
356                if special_fix_type {
357                    let ref_idx = ctxp.get_ref_ins_index(absolute_addr & !3);
358                    if ref_idx <= current_idx {
359                        new_pc_offset = ctxp.dat[ref_idx as usize].insp as i64 - *outpp as i64;
360                    } else {
361                        ctxp.insert_fix_map(ref_idx, *outpp, LSB, FMASK);
362                        new_pc_offset = 0;
363                    }
364                }
365
366                **outpp = (((new_pc_offset as u32) << (LSB - 2)) & FMASK) | (ins & LMASK);
367                *outpp = outpp.add(1);
368            }
369
370            *inpp = inpp.add(1);
371            ctxp.process_fix_map(current_idx);
372            true
373        }
374        OP_ADRP => {
375            let current_idx = ctxp.get_and_set_current_index(*inpp, *outpp);
376            let lsb_bytes = ((ins << 1) >> 30) as i32;
377            let absolute_addr = ((*inpp as i64) & !0xfff) + (((((((ins << MSB) as i32) >> (MSB + LSB - 2)) & !3) | lsb_bytes) as i64) << 12);
378
379            if ctxp.is_in_fixing_range(absolute_addr) {
380                **outpp = ins;
381                *outpp = outpp.add(1);
382            } else {
383                if ((*outpp.add(2) as usize) & 7) != 0 {
384                    **outpp = A64_NOP;
385                    *outpp = outpp.add(1);
386                    ctxp.reset_current_ins(current_idx, *outpp);
387                }
388
389                **outpp = 0x58000000 | (((8 >> 2) << LSB) & !MASK) | (ins & RMASK);
390                outpp.add(1).write(0x14000003);
391                ptr::copy_nonoverlapping(
392                    &absolute_addr as *const i64 as *const u32,
393                    outpp.add(2),
394                    2,
395                );
396                *outpp = outpp.add(4);
397            }
398
399            *inpp = inpp.add(1);
400            ctxp.process_fix_map(current_idx);
401            true
402        }
403        _ => false,
404    }
405}
406
407unsafe fn fix_instructions(inp: *mut u32, count: i32, outp: *mut u32) {
408    let mut ctx = Context::new(inp, count);
409    let outp_base = outp;
410    let mut inp_cur = inp;
411    let mut outp_cur = outp;
412    let mut remaining = count;
413
414    while remaining > 0 {
415        if fix_branch_imm(&mut inp_cur, &mut outp_cur, &mut ctx) {
416            remaining -= 1;
417            continue;
418        }
419        if fix_cond_comp_test_branch(&mut inp_cur, &mut outp_cur, &mut ctx) {
420            remaining -= 1;
421            continue;
422        }
423        if fix_loadlit(&mut inp_cur, &mut outp_cur, &mut ctx) {
424            remaining -= 1;
425            continue;
426        }
427        if fix_pcreladdr(&mut inp_cur, &mut outp_cur, &mut ctx) {
428            remaining -= 1;
429            continue;
430        }
431
432        let idx = ctx.get_and_set_current_index(inp_cur, outp_cur);
433        ctx.process_fix_map(idx);
434        *outp_cur = *inp_cur;
435        inp_cur = inp_cur.add(1);
436        outp_cur = outp_cur.add(1);
437        remaining -= 1;
438    }
439
440    let callback = inp_cur;
441    let pc_offset = (callback as i64 - outp_cur as i64) >> 2;
442
443    if pc_offset.abs() >= (0x03ffffff >> 1) {
444        if ((outp_cur.add(2) as usize) & 7) != 0 {
445            *outp_cur = A64_NOP;
446            outp_cur = outp_cur.add(1);
447        }
448        *outp_cur = 0x58000051;
449        *outp_cur.add(1) = 0xd61f0220;
450        ptr::copy_nonoverlapping(
451            &callback as *const *mut u32 as *const u32,
452            outp_cur.add(2),
453            2,
454        );
455        outp_cur = outp_cur.add(4);
456    } else {
457        *outp_cur = 0x14000000 | ((pc_offset & 0x03ffffff) as u32);
458        outp_cur = outp_cur.add(1);
459    }
460
461    let total = (outp_cur as usize - outp_base as usize) / 4;
462    clear_cache(outp_base as *mut u8, total * 4);
463}
464
465unsafe fn clear_cache(ptr: *mut u8, size: usize) {
466    #[cfg(target_arch = "aarch64")]
467    {
468        unsafe extern "C" {
469            fn __clear_cache(start: *mut u8, end: *mut u8);
470        }
471        unsafe { __clear_cache(ptr, ptr.add(size)) };
472    }
473}
474
475static TRAMPOLINE_INDEX: AtomicI32 = AtomicI32::new(-1);
476static mut INSNS_POOL: [[u32; A64_MAX_INSTRUCTIONS * 10]; A64_MAX_BACKUPS] =
477    [[0; A64_MAX_INSTRUCTIONS * 10]; A64_MAX_BACKUPS];
478
479unsafe fn fast_allocate_trampoline() -> Option<*mut u32> {
480    let i = TRAMPOLINE_INDEX.fetch_add(1, Ordering::SeqCst) + 1;
481    if i >= 0 && i < A64_MAX_BACKUPS as i32 {
482        Some(INSNS_POOL[i as usize].as_mut_ptr())
483    } else {
484        None
485    }
486}
487
488pub unsafe fn hook_function_aarch64(
489    symbol: *mut u8,
490    replace: *mut u8,
491    result: *mut *mut u8,
492) -> Result<usize> {
493    if symbol.is_null() {
494        return Err(SubstrateError::NullPointer);
495    }
496
497    static POOL_INIT: std::sync::Once = std::sync::Once::new();
498    POOL_INIT.call_once(|| {
499        let pool_ptr = core::ptr::addr_of_mut!(INSNS_POOL) as *mut u8;
500        let pool_size = core::mem::size_of::<[[u32; A64_MAX_INSTRUCTIONS * 10]; A64_MAX_BACKUPS]>();
501        let _ = unsafe { ProtectedMemory::new(pool_ptr, pool_size) };
502    });
503
504    let trampoline = if !result.is_null() {
505        match fast_allocate_trampoline() {
506            Some(t) => t as *mut u8,
507            None => return Err(SubstrateError::HookFailed("Failed to allocate trampoline".to_string())),
508        }
509    } else {
510        ptr::null_mut()
511    };
512
513    let original = symbol as *mut u32;
514    let pc_offset = (replace as i64 - symbol as i64) >> 2;
515
516    if pc_offset.abs() >= (0x03ffffff >> 1) {
517        let count = if ((original.add(2) as usize) & 7) != 0 { 5 } else { 4 };
518
519        if !trampoline.is_null() {
520            fix_instructions(original, count, trampoline as *mut u32);
521        }
522
523        let _code = ProtectedMemory::new(original as *mut u8, 5 * 4)?;
524
525        if count == 5 {
526            *original = A64_NOP;
527            let target = original.add(1);
528            *target = 0x58000051;
529            *target.add(1) = 0xd61f0220;
530            ptr::copy_nonoverlapping(
531                &replace as *const *mut u8 as *const u32,
532                target.add(2),
533                2,
534            );
535        } else {
536            *original = 0x58000051;
537            *original.add(1) = 0xd61f0220;
538            ptr::copy_nonoverlapping(
539                &replace as *const *mut u8 as *const u32,
540                original.add(2),
541                2,
542            );
543        }
544
545        clear_cache(symbol, 5 * 4);
546    } else {
547        if !trampoline.is_null() {
548            fix_instructions(original, 1, trampoline as *mut u32);
549        }
550
551        let _code = ProtectedMemory::new(original as *mut u8, 4)?;
552        *original = 0x14000000 | ((pc_offset & 0x03ffffff) as u32);
553        clear_cache(symbol, 4);
554    }
555
556    if !result.is_null() {
557        *result = trampoline;
558    }
559
560    Ok(if pc_offset.abs() >= (0x03ffffff >> 1) { 5 * 4 } else { 4 })
561}