graphddb_runtime 0.7.5

Rust runtime for GraphDDB — interprets the language-neutral IR (manifest.json + operations.json) and executes the validated access patterns against DynamoDB.
Documentation
//! A DynamoDB number rendered into a cursor token byte-identically to the Python
//! runtime — the parity SSoT.
//!
//! DynamoDB stores every number as a string; boto3's `TypeDeserializer` turns it
//! into a Python `Decimal`, and the Python cursor encoder
//! (`python/graphddb_runtime/cursor.py`, the `_default` number path) renders it as:
//!
//! ```text
//! int(Decimal(X))   if Decimal(X) is integral   (arbitrary precision, exact)
//! float(Decimal(X)) otherwise                    (IEEE-754 double, LOSSY)
//! ```
//!
//! then `json.dumps` formats the result. This module matches that exactly:
//!
//! - **integral** value  -> the exact full-precision integer (e.g. `1E+20` ->
//!   `100000000000000000000`, `1E2` -> `100`, `-0` -> `0`), computed with exact
//!   string arithmetic so a huge integer never overflows to a float;
//! - **fractional** value -> `float(Decimal(X))` then CPython's `json.dumps(float)`
//!   formatting, which is genuinely LOSSY (double round-trip) — a DELIBERATE match
//!   of the Python reference (e.g. `9999999999999999.5` -> `1e+16`,
//!   `123456789012345.678` -> `123456789012345.67`). See [`crate::pyfloat`].
//!
//! The integrality decision uses the EXACT decimal value (string arithmetic), so
//! `9999999999999999.5` is correctly treated as fractional even though its double
//! rounds to an integer.

use crate::errors::GraphDDBError;
use crate::pyfloat::py_repr;

/// Produce the Python cursor token for a DynamoDB "N" string, replicating
/// `json.dumps(int(Decimal(X)) if integral else float(Decimal(X)))`.
pub fn cursor_token(input: &str) -> Result<String, GraphDDBError> {
    let (neg, int_digits, frac_digits) = exact_decimal(input)?;

    // Fractional -> Python's LOSSY float path: float(Decimal(X)) then json.dumps.
    if !frac_digits.is_empty() {
        let f: f64 = input
            .trim()
            .parse()
            .map_err(|_| GraphDDBError::new(format!("invalid DynamoDB number '{input}'")))?;
        return py_repr(f);
    }

    // Integral -> exact full-precision integer (int(Decimal(X))).
    let trimmed = int_digits.trim_start_matches('0');
    let int_digits = if trimmed.is_empty() { "0" } else { trimmed };
    // Sign dropped for zero (Python: int(Decimal('-0')) == 0 -> json "0").
    if neg && int_digits != "0" {
        Ok(format!("-{int_digits}"))
    } else {
        Ok(int_digits.to_string())
    }
}

/// Parse a DynamoDB number string into an EXACT decimal: `(neg, integer_digits,
/// fraction_digits)` with the decimal point applied and trailing fraction zeros
/// stripped. Uses only string arithmetic, so the integral/fractional decision and
/// the integer digits are exact for any magnitude/precision.
fn exact_decimal(input: &str) -> Result<(bool, String, String), GraphDDBError> {
    let s = input.trim();
    if s.is_empty() {
        return Err(GraphDDBError::new("empty DynamoDB number string"));
    }
    let bytes = s.as_bytes();
    let mut idx = 0;
    let mut neg = false;
    if bytes[0] == b'+' || bytes[0] == b'-' {
        neg = bytes[0] == b'-';
        idx = 1;
    }
    let mut s = &s[idx..];

    let mut exp: i64 = 0;
    if let Some(pos) = s.find(['e', 'E']) {
        let (mantissa, exp_part) = s.split_at(pos);
        let exp_part = &exp_part[1..];
        exp = exp_part
            .parse()
            .map_err(|_| GraphDDBError::new(format!("invalid DynamoDB number '{input}'")))?;
        s = mantissa;
    }

    let (ip, fp) = match s.split_once('.') {
        Some((i, f)) => (i, f),
        None => (s, ""),
    };
    if ip.is_empty() && fp.is_empty() {
        return Err(GraphDDBError::new(format!(
            "invalid DynamoDB number '{input}'"
        )));
    }
    if !ip.bytes().all(|b| b.is_ascii_digit()) || !fp.bytes().all(|b| b.is_ascii_digit()) {
        return Err(GraphDDBError::new(format!(
            "invalid DynamoDB number '{input}'"
        )));
    }

    let mut digits = format!("{ip}{fp}");
    let mut point_pos = ip.len() as i64 + exp;

    if point_pos < 0 {
        digits = format!("{}{}", "0".repeat((-point_pos) as usize), digits);
        point_pos = 0;
    }
    if point_pos as usize > digits.len() {
        digits.push_str(&"0".repeat(point_pos as usize - digits.len()));
    }

    let point = point_pos as usize;
    let int_digits = &digits[..point];
    let frac_digits = digits[point..].trim_end_matches('0');
    let int_digits = if int_digits.is_empty() {
        "0"
    } else {
        int_digits
    };
    Ok((neg, int_digits.to_string(), frac_digits.to_string()))
}

#[cfg(test)]
mod tests {
    use super::*;

    // Every expected token is pinned to the ACTUAL output of python3
    // `json.dumps(int(Decimal(x)) if integral else float(Decimal(x)))`.
    #[test]
    fn matches_python_cursor_tokens() {
        let cases: &[(&str, &str)] = &[
            ("0", "0"),
            ("-0", "0"),
            ("1e20", "100000000000000000000"),
            ("2.6755e-9", "2.6755e-09"),
            ("9999999999999999.5", "1e+16"),
            ("12345678901234567890", "12345678901234567890"),
            ("123456789012345.678", "123456789012345.67"),
            ("100", "100"),
            ("1E+20", "100000000000000000000"),
            ("1E2", "100"),
            ("-0.0", "0"),
            ("3", "3"),
            ("3.0", "3"),
            ("-5", "-5"),
        ];
        for (input, expected) in cases {
            assert_eq!(
                cursor_token(input).unwrap(),
                *expected,
                "cursor_token({input})"
            );
        }
    }

    /// Fuzz `cursor_token` against python3
    /// `json.dumps(int(Decimal(x)) if Decimal(x)==Decimal(x).to_integral_value()
    /// else float(Decimal(x)))` across integral (incl. huge / signed-zero) and
    /// fractional (lossy) inputs. Expectations are regenerated FROM python3; skipped
    /// when python3 is unavailable.
    #[test]
    fn fuzz_cursor_tokens_against_python3() {
        use std::process::Command;

        let mut inputs: Vec<String> = vec![
            "0".into(),
            "-0".into(),
            "-0.0".into(),
            "1e20".into(),
            "1E+20".into(),
            "12345678901234567890".into(),
            "9999999999999999.5".into(),
            "123456789012345.678".into(),
            "2.6755e-9".into(),
            "100".into(),
            "-5".into(),
            "3.0".into(),
        ];
        let mut state: u64 = 0xDEAD_BEEF_CAFE_1234;
        for _ in 0..3000 {
            state = state.wrapping_mul(6364136223846793005).wrapping_add(1);
            let magnitude = (state % 30) as i32 - 15;
            let mantissa = state >> 8;
            // Some integral, some fractional.
            if state & 1 == 0 {
                inputs.push(format!("{}e{}", mantissa, magnitude.max(0)));
            } else {
                inputs.push(format!(
                    "{}.{}",
                    mantissa % 1_000_000,
                    (state >> 20) % 1_000_000
                ));
            }
        }

        let script = r#"
import sys, json
from decimal import Decimal
for line in sys.stdin:
    line = line.strip()
    if not line:
        continue
    d = Decimal(line)
    v = int(d) if d == d.to_integral_value() else float(d)
    print(json.dumps(v))
"#;
        let stdin_data: String = inputs.iter().map(|s| format!("{s}\n")).collect();
        let out = Command::new("python3")
            .arg("-c")
            .arg(script)
            .stdin(std::process::Stdio::piped())
            .stdout(std::process::Stdio::piped())
            .spawn()
            .and_then(|mut child| {
                use std::io::Write;
                child
                    .stdin
                    .take()
                    .unwrap()
                    .write_all(stdin_data.as_bytes())?;
                child.wait_with_output()
            });
        let out = match out {
            Ok(o) if o.status.success() => o,
            _ => {
                eprintln!("fuzz_cursor_tokens_against_python3: python3 unavailable — skipping");
                return;
            }
        };
        let expected: Vec<&str> = std::str::from_utf8(&out.stdout).unwrap().lines().collect();
        assert_eq!(expected.len(), inputs.len());
        for (input, exp) in inputs.iter().zip(expected) {
            assert_eq!(cursor_token(input).unwrap(), exp, "cursor_token({input})");
        }
    }
}