use crate::bid_decimal_data::{BID_ESTIMATE_DECIMAL_DIGITS, BID_POWER10_INDEX_BINEXP_128, BID_POWER10_TABLE_128};
use crate::bid_internal::*;
use crate::bid_sqrt_macros::{bid_long_sqrt128, short_sqrt128};
use crate::d128::{_IDEC_flags, StatusFlags, RoundingMode};
pub(crate) fn bid128_sqrt(x: &BID_UINT128, rnd_mode: RoundingMode, pfpsf: &mut _IDEC_flags) -> BID_UINT128 {
let mut M256: BID_UINT256 = Default::default();
let C256: BID_UINT256;
let mut C4: BID_UINT256 = Default::default();
let mut C8: BID_UINT256 = Default::default();
let mut CX: BID_UINT128 = Default::default();
let CX1: BID_UINT128;
let mut CX2: BID_UINT128 = Default::default();
let mut A10: BID_UINT128;
let S2: BID_UINT128;
let T128: &BID_UINT128;
let TP128: &BID_UINT128;
let mut CS: BID_UINT128 = Default::default();
let mut CSM: BID_UINT128 = Default::default();
let mut res: BID_UINT128 = Default::default();
let mut sign_x: BID_UINT64 = 0;
let mut Carry: BID_UINT64;
let D: BID_SINT64;
let mut fx: BID_UI32FLOAT = Default::default();
let mut f64: BID_UI32FLOAT = Default::default();
let mut exponent_x: i32 = 0;
let bin_expon_cx: i32;
let mut digits: i32;
let mut scale: i32;
let exponent_q: i32;
if unpack_BID128_value(&mut sign_x, &mut exponent_x, &mut CX, x) == 0 {
res.w[1] = CX.w[1];
res.w[0] = CX.w[0];
if (x.w[1] & 0x7c00000000000000u64) == 0x7c00000000000000u64 {
if (x.w[1] & 0x7e00000000000000u64) == 0x7e00000000000000u64 {
__set_status_flags(pfpsf, StatusFlags::BID_INVALID_EXCEPTION);
}
res.w[1] = CX.w[1] & QUIET_MASK64;
return res;
}
if (x.w[1] & 0x7800000000000000u64) == 0x7800000000000000u64 {
res.w[1] = CX.w[1];
if sign_x != 0 {
res.w[1] = 0x7c00000000000000u64;
__set_status_flags(pfpsf, StatusFlags::BID_INVALID_EXCEPTION);
}
return res;
}
res.w[1] = sign_x | ((((exponent_x + DECIMAL_EXPONENT_BIAS_128) as BID_UINT64) >> 1) << 49);
res.w[0] = 0;
return res;
}
if sign_x != 0 {
res.w[1] = 0x7c00000000000000u64;
res.w[0] = 0;
__set_status_flags(pfpsf, StatusFlags::BID_INVALID_EXCEPTION);
return res;
}
unsafe {
f64.ui32 = 0x5f800000;
fx.d = (CX.w[1] as f32) * f64.d + (CX.w[0] as f32);
bin_expon_cx = (((fx.ui32 >> 23) & 0xff) - 0x7f) as i32;
digits = BID_ESTIMATE_DECIMAL_DIGITS[bin_expon_cx as usize];
}
A10 = CX;
if (exponent_x & 1) == 1 {
A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61);
A10.w[0] = CX.w[0] << 3;
CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63);
CX2.w[0] = CX.w[0] << 1;
A10 = __add_128_128(&A10, &CX2);
}
CS.w[0] = short_sqrt128(&A10);
CS.w[1] = 0;
if CS.w[0] * CS.w[0] == A10.w[0] {
S2 = __mul_64x64_to_128_fast(CS.w[0], CS.w[0]);
if S2.w[1] == A10.w[1] {
res = bid_get_BID128_very_fast(0, (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1, &CS);
return res;
}
}
D = (CX.w[1] - BID_POWER10_INDEX_BINEXP_128[bin_expon_cx as usize].w[1]) as BID_SINT64;
if D > 0 || (D == 0 && CX.w[0] >= BID_POWER10_INDEX_BINEXP_128[bin_expon_cx as usize].w[0]) {
digits += 1;
}
scale = 67 - digits;
exponent_q = exponent_x - scale;
scale += exponent_q & 1;
if scale > 38 {
T128 = &BID_POWER10_TABLE_128[(scale - 37) as usize];
CX1 = __mul_128x128_low(&CX, T128);
TP128 = &BID_POWER10_TABLE_128[37];
C256 = __mul_128x128_to_256(&CX1, TP128);
} else {
T128 = &BID_POWER10_TABLE_128[scale as usize];
C256 = __mul_128x128_to_256(&CX, T128);
}
C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62);
C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62);
C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62);
C4.w[0] = C256.w[0] << 2;
bid_long_sqrt128(&mut CS, &C256);
if ((rnd_mode as u32) & 3) == 0 {
CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
CSM.w[0] = (CS.w[0] + CS.w[0]) | 1;
__sqr128_to_256(&mut M256, &CSM);
if C4.w[3] > M256.w[3]
|| (C4.w[3] == M256.w[3]
&& (C4.w[2] > M256.w[2]
|| (C4.w[2] == M256.w[2]
&& (C4.w[1] > M256.w[1]
|| (C4.w[1] == M256.w[1]
&& C4.w[0] > M256.w[0]))))) {
CS.w[0] += 1;
if CS.w[0] == 0 {
CS.w[1] += 1;
}
} else {
C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61);
C8.w[0] = CS.w[0] << 3;
(M256.w[0], Carry) = __sub_borrow_out(M256.w[0], C8.w[0]);
(M256.w[1], Carry) = __sub_borrow_in_out(M256.w[1], C8.w[1], Carry);
(M256.w[2], Carry) = __sub_borrow_in_out(M256.w[2], 0, Carry);
M256.w[3] -= Carry;
if M256.w[3] > C4.w[3]
|| (M256.w[3] == C4.w[3]
&& (M256.w[2] > C4.w[2]
|| (M256.w[2] == C4.w[2]
&& (M256.w[1] > C4.w[1]
|| (M256.w[1] == C4.w[1]
&& M256.w[0] > C4.w[0]))))) {
if CS.w[0] == 0 {
CS.w[1] -= 1;
}
CS.w[0] -= 1;
}
}
} else {
__sqr128_to_256(&mut M256, &CS);
C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63);
C8.w[0] = CS.w[0] << 1;
if M256.w[3] > C256.w[3]
|| (M256.w[3] == C256.w[3]
&& (M256.w[2] > C256.w[2]
|| (M256.w[2] == C256.w[2]
&& (M256.w[1] > C256.w[1]
|| (M256.w[1] == C256.w[1]
&& M256.w[0] > C256.w[0]))))) {
(M256.w[0], Carry) = __sub_borrow_out(M256.w[0], C8.w[0]);
(M256.w[1], Carry) = __sub_borrow_in_out(M256.w[1], C8.w[1], Carry);
(M256.w[2], Carry) = __sub_borrow_in_out(M256.w[2], 0, Carry);
M256.w[3] -= Carry;
M256.w[0] += 1;
if M256.w[0] == 0 {
M256.w[1] += 1;
if M256.w[1] == 0 {
M256.w[2] += 1;
if M256.w[2] == 0 {
M256.w[3] += 1;
}
}
}
if CS.w[0] == 0 {
CS.w[1] -= 1;
}
CS.w[0] -= 1;
if M256.w[3] > C256.w[3]
|| (M256.w[3] == C256.w[3]
&& (M256.w[2] > C256.w[2]
|| (M256.w[2] == C256.w[2]
&& (M256.w[1] > C256.w[1]
|| (M256.w[1] == C256.w[1]
&& M256.w[0] > C256.w[0]))))) {
if CS.w[0] == 0 {
CS.w[1] -= 1;
}
CS.w[0] -= 1;
}
} else {
(M256.w[0], Carry) = __add_carry_out(M256.w[0], C8.w[0]);
(M256.w[1], Carry) = __add_carry_in_out(M256.w[1], C8.w[1], Carry);
(M256.w[2], Carry) = __add_carry_in_out(M256.w[2], 0, Carry);
M256.w[3] += Carry;
M256.w[0] += 1;
if M256.w[0] == 0 {
M256.w[1] += 1;
if M256.w[1] == 0 {
M256.w[2] += 1;
if M256.w[2] == 0 {
M256.w[3] += 1;
}
}
}
if M256.w[3] < C256.w[3]
|| (M256.w[3] == C256.w[3]
&& (M256.w[2] < C256.w[2]
|| (M256.w[2] == C256.w[2]
&& (M256.w[1] < C256.w[1]
|| (M256.w[1] == C256.w[1]
&& M256.w[0] <= C256.w[0]))))) {
CS.w[0] += 1;
if CS.w[0] != 0 {
CS.w[1] += 1;
}
}
}
if (rnd_mode) == RoundingMode::Upward {
CS.w[0] += 1;
if CS.w[0] == 0 {
CS.w[1] += 1;
}
}
}
__set_status_flags(pfpsf, StatusFlags::BID_INEXACT_EXCEPTION);
let mut expon = (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1;
res = bid_get_BID128_fast(0, &mut expon, &mut CS);
res
}