#ifndef GPU_INTEGER_DIVIDE_H
#define GPU_INTEGER_DIVIDE_H
template<class type, int size> void normalize_divisor(fixed_integer<type, size>& b, int& shift_limbs, int& shift_bits) {
shift_limbs=0;
for (int x=0;x<size;++x) {
if (b[size-1]==0) {
++shift_limbs;
b.left_shift_limbs(1);
} else {
break;
}
}
shift_bits=clz(b[size-1]);
b<<=shift_bits;
}
uint64 calculate_reciprocal(uint32 high, uint32 low) {
assert((high>>31)!=0);
uint64 both_source=uint64(low) | (uint64(high)<<32);
uint64 both=both_source;
both>>=2*32-53;
both&=~(1ull<<52);
uint64 res;
if (both<=1) {
res=1ull<<63;
} else {
--both;
uint64 bits=both;
bits|=1023ull<<52;
double bits_double=*(double*)&bits;
bits_double=1.0/(bits_double);
bits=*(uint64*)&bits_double;
bits&=(1ull<<52)-1;
res=bits;
++res;
res|=1ull<<52;
res<<=(62-52);
}
return res;
}
uint32 calculate_quotient(uint32 high, uint32 low, uint64 reciprocal, uint32 b) {
uint64 both=uint64(low) | (uint64(high)<<32);
uint64 product_high=(uint128(both)*uint128(reciprocal))>>64;
++product_high;
uint64 res=product_high>>(32-2);
if (res>=1ull<<32) {
res=(1ull<<32)-1;
}
return uint32(res);
}
fixed_integer<uint64, 2> calculate_reciprocal(uint64 high, uint64 low);
uint64 calculate_quotient(uint64 high, uint64 low, fixed_integer<uint64, 2> reciprocal, uint64 b);
template<class type, int size_a, int size_b>
void divide_integers_impl(
fixed_integer<type, size_a> a, fixed_integer<type, size_b> b, int b_shift_limbs,
fixed_integer<type, size_a-1>& q, fixed_integer<type, size_b>& r
) {
const int max_quotient_size=size_a-1;
fixed_integer<type, max_quotient_size> res;
auto reciprocal=calculate_reciprocal(b[size_b-1], (size_b>=2)? b[size_b-2] : 0);
fixed_integer<type, size_a> b_shifted;
b_shifted=b;
b_shifted.left_shift_limbs(size_a-size_b-1);
int quotient_size=size_a-(size_b-b_shift_limbs);
for (int x=0;x<max_quotient_size;++x) {
if (x>=quotient_size) {
break;
}
{
type qj=calculate_quotient(a[size_a-1-x], a[size_a-2-x], reciprocal, b[size_b-1]);
auto a_start=a;
type qj_start=qj;
auto b_shifted_qj=b_shifted;
b_shifted_qj*=qj;
a-=b_shifted_qj;
while (a.is_negative()) {
--qj;
a+=b_shifted;
}
b_shifted.right_shift_limbs(1);
res[max_quotient_size-1-x]=qj;
}
}
for (int x=0;x<max_quotient_size;++x) {
if (quotient_size>=max_quotient_size) {
break;
}
res.right_shift_limbs(1);
++quotient_size;
}
q=res;
r=a;
}
template<class type, int size_a, int size_b>
void divide_integers(
const fixed_integer<type, size_a>& a, const fixed_integer<type, size_b>& b,
fixed_integer<type, size_a>& q, fixed_integer<type, size_b>& r
) {
int shift_limbs;
int shift_bits;
auto b_normalized=b;
b_normalized.set_negative(false);
normalize_divisor(b_normalized, shift_limbs, shift_bits);
fixed_integer<type, size_a+1> a_shifted;
a_shifted=a;
a_shifted.set_negative(false);
a_shifted<<=shift_bits;
fixed_integer<type, size_a> q_unsigned;
divide_integers_impl(a_shifted, b_normalized, shift_limbs, q_unsigned, r);
r>>=shift_bits;
if (a.is_negative()!=b.is_negative()) {
if (r==fixed_integer<type, size_b>(integer(0u))) {
q=q_unsigned;
q=-q;
} else {
q=q_unsigned+fixed_integer<type, size_a>(integer(1u));
q=-q;
auto abs_b=b;
abs_b.set_negative(false);
r=abs_b-r;
}
} else {
q=q_unsigned;
}
r.set_negative(b.is_negative());
{
integer a_int(a);
integer b_int(b);
integer q_expected=a_int/b_int;
integer r_expected=a_int.fdiv_r(b_int);
integer r_expected_2=a_int%b_int;
integer q_actual=q;
integer r_actual=r;
assert(q_expected==q_actual);
assert(r_expected==r_actual);
}
}
template<class type, int size_a, int size_b>
fixed_integer<type, size_a> operator/(
const fixed_integer<type, size_a>& a, const fixed_integer<type, size_b>& b
) {
fixed_integer<type, size_a> q;
fixed_integer<type, size_b> r;
divide_integers(a, b, q, r);
return q;
}
template<class type, int size_a, int size_b>
fixed_integer<type, size_b> operator%(
const fixed_integer<type, size_a>& a, fixed_integer<type, size_b> b
) {
fixed_integer<type, size_a> q;
fixed_integer<type, size_b> r;
b.set_negative(false);
divide_integers(a, b, q, r);
return r;
}
fixed_integer<uint64, 2> calculate_reciprocal(uint64 high, uint64 low) {
assert((high>>63)!=0);
fixed_integer<uint32, 6> a;
a[5]=1u<<31;
fixed_integer<uint32, 3> b;
b[0]=uint32(low>>32);
b[1]=uint32(high);
b[2]=uint32(high>>32);
b-=fixed_integer<uint32, 3>(integer(1));
return fixed_integer<uint64, 2>(to_uint64(a/b + fixed_integer<uint32, 6>(integer(1)))<<31);
}
uint64 calculate_quotient(uint64 high, uint64 low, fixed_integer<uint64, 2> reciprocal, uint64 b) {
fixed_integer<uint64, 2> both;
both[0]=low;
both[1]=high;
integer both_int(both);
integer reciprocal_int(reciprocal);
integer product_both_int(both_int*reciprocal_int);
fixed_integer<uint64, 4> product_both(both*reciprocal);
assert(integer(product_both)==product_both_int);
product_both.right_shift_limbs(2);
product_both_int>>=128;
assert(integer(product_both)==product_both_int);
fixed_integer<uint64, 2> product_high(product_both);
product_high+=fixed_integer<uint64, 2>(integer(1));
product_high>>=64-2;
uint64 res;
if (product_high[1]!=0) {
res=~uint64(0);
} else {
res=product_high[0];
}
return res;
}
#endif