graphddb_runtime 0.7.5

//! Format an `f64` byte-identically to CPython's `repr(float)` / `json.dumps(float)`.
//!
//! The Python runtime is the parity SSoT (see `python/graphddb_runtime/cursor.py`):
//! its cursor encoder and JSON output go through `float()` then CPython's float
//! formatter. CPython `float_repr` emits the shortest round-trip decimal digits
//! (David Gay mode 0) then, in `format_float_short('r')`, chooses fixed vs
//! scientific by the decimal-point position and formats the exponent with a sign
//! and >= 2 digits.
//!
//! Rust's `{}` (ryu) yields the SAME shortest round-trip digits, but its surface
//! format disagrees with Python on (a) the fixed-vs-scientific threshold, (b)
//! whether an integral-mantissa scientific value keeps a `.0`, and (c) exponent
//! zero-padding. This module extracts Rust's shortest digits and re-applies
//! CPython's exact `format_float_short('r')` rules, matching Python across the
//! whole double range (validated against `json.dumps` — see the tests, whose
//! vectors are pinned to `python3` output, never to Rust's own).

use crate::errors::GraphDDBError;

/// Render `f` exactly as CPython `json.dumps(float)` / `repr(float)` would
/// (finite floats only; the runtime never serializes NaN/Inf into a DynamoDB N).
pub fn py_repr(f: f64) -> Result<String, GraphDDBError> {
    if f.is_nan() || f.is_infinite() {
        return Err(GraphDDBError::new("Cannot format non-finite number"));
    }
    // Zero — handled BEFORE any digit extraction. `f == 0.0` matches BOTH +0.0 and
    // -0.0 (IEEE: -0.0 == 0.0), so the sign is recovered via `is_sign_negative`
    // without dividing by zero. Python: json.dumps(0.0) == "0.0",
    // json.dumps(-0.0) == "-0.0".
    if f == 0.0 {
        return Ok(if f.is_sign_negative() {
            "-0.0".to_string()
        } else {
            "0.0".to_string()
        });
    }

    let (neg, digits, decpt) = shortest_digits(f);
    let n = digits.len() as i32;

    // CPython format_float_short 'r': scientific iff decpt <= -4 or decpt > 16.
    let out = if decpt <= -4 || decpt > 16 {
        let mut mant = String::new();
        mant.push(digits.as_bytes()[0] as char);
        if n > 1 {
            mant.push('.');
            mant.push_str(&digits[1..]);
        }
        let e = decpt - 1;
        let esign = if e < 0 { '-' } else { '+' };
        let mut eabs = e.abs().to_string();
        if eabs.len() < 2 {
            eabs = format!("0{eabs}");
        }
        format!("{mant}e{esign}{eabs}")
    } else if decpt <= 0 {
        // 0.00ddd
        format!("0.{}{}", "0".repeat((-decpt) as usize), digits)
    } else if decpt >= n {
        // ddd00.0 — Python keeps a trailing ".0" on an integral-valued float.
        format!("{}{}.0", digits, "0".repeat((decpt - n) as usize))
    } else {
        // dd.ddd
        format!(
            "{}.{}",
            &digits[..decpt as usize],
            &digits[decpt as usize..]
        )
    };

    Ok(if neg { format!("-{out}") } else { out })
}

/// Shortest round-trip decimal digits + decimal-point position for a nonzero
/// finite float. Returns `(neg, digits, decpt)` where the value is
/// `±0.<digits> × 10^decpt` (the point sits after `decpt` digits, so decpt=1,
/// digits="15" means 1.5). Digits carry no leading/trailing zeros.
///
/// Replicates CPython's David-Gay mode-0 shortest selection INCLUDING its
/// round-half-to-even tie-break: `format!("{:e}")` (ryu) also yields a shortest
/// round-trip, but it can pick the OTHER digit on an exact tie (e.g.
/// `2204138485456065.25` → ryu `…65.3` vs CPython `…65.2`). To match Python
/// exactly we instead find the smallest precision `p` (0..=17) whose
/// round-half-to-even `{:.p e}` formatting still round-trips — Rust's `{:.*e}`
/// rounds half-to-even, the same rule CPython uses — and take those digits.
fn shortest_digits(f: f64) -> (bool, String, i32) {
    let mut chosen: Option<String> = None;
    for p in 0..=17usize {
        let s = format!("{:.*e}", p, f);
        if s.parse::<f64>() == Ok(f) {
            chosen = Some(s);
            break;
        }
    }
    // p=17 always round-trips a finite f64, so `chosen` is always Some.
    let s = chosen.unwrap_or_else(|| format!("{:.17e}", f));
    let mut bytes = s.as_str();
    let neg = bytes.starts_with('-');
    if neg {
        bytes = &bytes[1..];
    }
    let (mantissa, exp_part) = match bytes.split_once('e') {
        Some((m, e)) => (m, e),
        None => (bytes, "0"),
    };
    let exp: i32 = exp_part
        .parse()
        .expect("f64 {:e} exponent is always an integer");

    let (int_part, frac_part) = match mantissa.split_once('.') {
        Some((i, f)) => (i, f),
        None => (mantissa, ""),
    };
    // `{:e}` always emits exactly one integer digit, so digits = int ++ frac and
    // the decimal point sits after `int_part.len() + exp` digits.
    let mut digits: String = format!("{int_part}{frac_part}");
    let mut decpt = int_part.len() as i32 + exp;

    // Strip leading zeros (each shifts decpt left). Unreachable for nonzero f in
    // scientific form (the leading digit is nonzero), but kept for robustness.
    let lead = digits.len() - digits.trim_start_matches('0').len();
    if lead == digits.len() {
        return (neg, "0".to_string(), 1);
    }
    digits = digits[lead..].to_string();
    decpt -= lead as i32;

    // Strip trailing zeros (do not affect decpt).
    let trimmed = digits.trim_end_matches('0');
    let digits = if trimmed.is_empty() {
        "0".to_string()
    } else {
        trimmed.to_string()
    };

    (neg, digits, decpt)
}

#[cfg(test)]
mod tests {
    use super::*;

    // Every expected string below is pinned to the ACTUAL output of python3
    // `json.dumps(float)` (see the task's number-parity ground truth), NEVER to
    // Rust's own formatting.
    #[test]
    #[allow(clippy::excessive_precision)] // deliberate: the f64 rounds, matching Python's lossy path
    fn matches_python_json_dumps_float() {
        let cases: &[(f64, &str)] = &[
            (0.0, "0.0"),
            (-0.0, "-0.0"),
            (1e20, "1e+20"),
            (2.6755e-9, "2.6755e-09"),
            (9999999999999999.5, "1e+16"),
            (3.0, "3.0"),
            (-5.0, "-5.0"),
            (1.5, "1.5"),
            (1e-5, "1e-05"),
            (1e-4, "0.0001"),
            (1e16, "1e+16"),
            (1e15, "1000000000000000.0"),
            (123456789012345.678, "123456789012345.67"),
            (1e-9, "1e-09"),
            (-1e-9, "-1e-09"),
            (2.5e-10, "2.5e-10"),
        ];
        for (f, expected) in cases {
            assert_eq!(py_repr(*f).unwrap(), *expected, "py_repr({f})");
        }
    }

    #[test]
    fn signed_zero_distinguished_without_div() {
        assert_eq!(py_repr(0.0).unwrap(), "0.0");
        assert_eq!(py_repr(-0.0).unwrap(), "-0.0");
    }

    #[test]
    fn rejects_non_finite() {
        assert!(py_repr(f64::NAN).is_err());
        assert!(py_repr(f64::INFINITY).is_err());
        assert!(py_repr(f64::NEG_INFINITY).is_err());
    }

    /// Fuzz `py_repr` against python3 `json.dumps(float)` across the double range —
    /// signed zeros, subnormals, sci-notation thresholds, large integers, and many
    /// random-magnitude bit patterns. Expectations are regenerated FROM python3 in
    /// this test run (never pinned to Rust's own output). Skipped (not failed) when
    /// python3 is unavailable, so a python-less dev box still builds.
    #[test]
    #[allow(clippy::excessive_precision)] // literal below deliberately over-specifies the double
    fn fuzz_against_python3() {
        use std::process::Command;

        // A deterministic spread of f64 bit patterns + notable edge values.
        let mut inputs: Vec<f64> = vec![
            0.0,
            -0.0,
            1.0,
            -1.0,
            f64::MIN_POSITIVE,                     // smallest normal
            f64::from_bits(1),                     // smallest subnormal
            f64::from_bits(0x000f_ffff_ffff_ffff), // largest subnormal
            1e-5,
            1e-4,
            9.999e-5,
            1e15,
            1e16,
            9.999_999_999_999_999e15,
            1e20,
            2.6755e-9,
            5e-324,
            1.7976931348623157e308, // MAX
        ];
        // A pseudo-random LCG over exponents/mantissas for broad coverage.
        let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
        for _ in 0..5000 {
            state = state
                .wrapping_mul(6364136223846793005)
                .wrapping_add(1442695040888963407);
            let bits = state ^ (state >> 29);
            let f = f64::from_bits(bits);
            if f.is_finite() {
                inputs.push(f);
            }
        }

        // Feed every input's exact bit pattern to python3 as a hex float so the two
        // sides operate on the IDENTICAL double, then compare json.dumps output.
        let script = r#"
import sys, json
for line in sys.stdin:
    line = line.strip()
    if not line:
        continue
    f = float.fromhex(line)
    print(json.dumps(f))
"#;
        let stdin_data: String = inputs
            .iter()
            .map(|f| format!("{}\n", hex_float(*f)))
            .collect();

        let out = Command::new("python3")
            .arg("-c")
            .arg(script)
            .stdin(std::process::Stdio::piped())
            .stdout(std::process::Stdio::piped())
            .spawn()
            .and_then(|mut child| {
                use std::io::Write;
                child
                    .stdin
                    .take()
                    .unwrap()
                    .write_all(stdin_data.as_bytes())?;
                child.wait_with_output()
            });
        let out = match out {
            Ok(o) if o.status.success() => o,
            _ => {
                eprintln!("fuzz_against_python3: python3 unavailable — skipping");
                return;
            }
        };
        let expected: Vec<&str> = std::str::from_utf8(&out.stdout).unwrap().lines().collect();
        assert_eq!(
            expected.len(),
            inputs.len(),
            "python3 produced a different count"
        );
        for (f, exp) in inputs.iter().zip(expected) {
            assert_eq!(
                py_repr(*f).unwrap(),
                exp,
                "py_repr mismatch for {:#018x} ({})",
                f.to_bits(),
                hex_float(*f)
            );
        }
    }

    /// Rust `f64` -> C99 hex float string that python3 `float.fromhex` round-trips
    /// exactly (so both sides operate on the identical double).
    fn hex_float(f: f64) -> String {
        if f == 0.0 {
            return if f.is_sign_negative() {
                "-0x0p+0".into()
            } else {
                "0x0p+0".into()
            };
        }
        let bits = f.to_bits();
        let sign = if bits >> 63 == 1 { "-" } else { "" };
        let exp = ((bits >> 52) & 0x7ff) as i64;
        let mantissa = bits & 0x000f_ffff_ffff_ffff;
        if exp == 0 {
            // Subnormal: 0x0.<mantissa>p-1022
            format!("{sign}0x0.{:013x}p-1022", mantissa)
        } else {
            // Normal: 0x1.<mantissa>p<exp-1023>
            format!("{sign}0x1.{:013x}p{:+}", mantissa, exp - 1023)
        }
    }
}