1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
// generated source. do not edit.
#![allow(non_upper_case_globals, unused_macros, unused_imports)]
use crate::low::macros::*;
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0
// ----------------------------------------------------------------------------
// Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1]
// into z[0..width-1].
//
// extern void bignum_copy_row_from_table
// (uint64_t *z, const uint64_t *table, uint64_t height, uint64_t width,
// uint64_t idx);
//
// Standard x86-64 ABI: RDI = z, RSI = table, RDX = height, RCX = width,
// R8 = idx
// Microsoft x64 ABI: RCX = z, RDX = table, R8 = height, R9 = width,
// [RSP+40] = idx
// ----------------------------------------------------------------------------
macro_rules! z {
() => {
"rdi"
};
}
macro_rules! table {
() => {
"rsi"
};
}
macro_rules! height {
() => {
"rdx"
};
}
macro_rules! width {
() => {
"rcx"
};
}
macro_rules! idx {
() => {
"r8"
};
}
macro_rules! i {
() => {
"r9"
};
}
macro_rules! j {
() => {
"r10"
};
}
/// Given table: uint64_t[height*width], copy table[idx*width...(idx+1)*width-1]
///
/// into z[0..width-1].
pub(crate) fn bignum_copy_row_from_table(
z: &mut [u64],
table: &[u64],
height: u64,
width: u64,
index: u64,
) {
debug_assert!(z.len() as u64 == width);
// SAFETY: inline assembly. see [crate::low::inline_assembly_safety] for safety info.
unsafe {
core::arch::asm!(
Q!(" endbr64 " ),
Q!(" test " height!() ", " height!()),
Q!(" jz " Label!("bignum_copy_row_from_table_end", 2, After)),
Q!(" test " width!() ", " width!()),
Q!(" jz " Label!("bignum_copy_row_from_table_end", 2, After)),
Q!(" mov " "rax, " z!()),
Q!(" mov " i!() ", " width!()),
Q!(Label!("bignum_copy_row_from_table_initzero", 3) ":"),
Q!(" mov " "QWORD PTR [rax], 0"),
Q!(" add " "rax, 8"),
Q!(" dec " i!()),
Q!(" jnz " Label!("bignum_copy_row_from_table_initzero", 3, Before)),
Q!(" mov " i!() ", 0"),
Q!(" mov " "rax, " table!()),
Q!(Label!("bignum_copy_row_from_table_outerloop", 4) ":"),
Q!(" mov " j!() ", 0"),
Q!(Label!("bignum_copy_row_from_table_innerloop", 5) ":"),
Q!(" xor " "r11, r11"),
Q!(" cmp " i!() ", " idx!()),
// cmov always read the memory address
// https://stackoverflow.com/a/54050427
Q!(" cmove " "r11, [rax + 8 * " j!() "]"),
Q!(" or " "[" z!() "+ 8 * " j!() "], r11"),
Q!(" inc " j!()),
Q!(" cmp " j!() ", " width!()),
Q!(" jne " Label!("bignum_copy_row_from_table_innerloop", 5, Before)),
Q!(Label!("bignum_copy_row_from_table_innerloop_done", 6) ":"),
Q!(" lea " j!() ", [" width!() "* 8]"),
Q!(" add " "rax, " j!()),
Q!(" inc " i!()),
Q!(" cmp " i!() ", " height!()),
Q!(" jne " Label!("bignum_copy_row_from_table_outerloop", 4, Before)),
Q!(Label!("bignum_copy_row_from_table_end", 2) ":"),
inout("rdi") z.as_mut_ptr() => _,
inout("rsi") table.as_ptr() => _,
inout("rdx") height => _,
inout("rcx") width => _,
inout("r8") index => _,
// clobbers
out("r10") _,
out("r11") _,
out("r9") _,
out("rax") _,
)
};
}