Skip to main content

prototext_core/serialize/common/
floats.rs

1// SPDX-FileCopyrightText: 2025 - 2026 Frederic Ruget <fred@atlant.is> <fred@s3ns.io> (GitHub: @douzebis)
2// SPDX-FileCopyrightText: 2025 - 2026 Thales Cloud Sécurisé
3//
4// SPDX-License-Identifier: MIT
5
6// ── Float / double formatting ─────────────────────────────────────────────────
7// Mirrors format_double_like_text_format / format_float_like_text_format.
8//
9// The Python functions delegate to protobuf's C++ text_format for exact
10// compatibility.  The Rust versions replicate the same algorithm:
11//
12//   double: Python str(float64) shortest round-trip.
13//   float:  ToShortestFloat — try 6..=9 significant digits until f32 round-trips.
14
15pub fn format_double(v: f64) -> String {
16    if v.is_nan() {
17        let bits = v.to_bits();
18        return if bits == f64::NAN.to_bits() {
19            "nan".to_owned()
20        } else {
21            format!("nan(0x{:016x})", bits)
22        };
23    }
24    if v.is_infinite() {
25        return if v > 0.0 {
26            "inf".to_owned()
27        } else {
28            "-inf".to_owned()
29        };
30    }
31
32    // Match Python's str(float) behavior: use scientific notation when
33    // the exponent is < -4 or >= 16 (matching Python's default float repr).
34    // This ensures compatibility with Python's text_format output.
35    let abs_v = v.abs();
36    let use_scientific = abs_v >= 1e16 || (abs_v != 0.0 && abs_v < 1e-4);
37
38    let s = if use_scientific {
39        // Use scientific notation, then convert to Python style
40        format!("{:e}", v)
41    } else {
42        // Use default formatting (which may already include .0 for integers)
43        format!("{}", v)
44    };
45    python_exponent_style(&s)
46}
47
48pub fn format_float(v: f32) -> String {
49    if v.is_nan() {
50        let bits = v.to_bits();
51        return if bits == f32::NAN.to_bits() {
52            "nan".to_owned()
53        } else {
54            format!("nan(0x{:08x})", bits)
55        };
56    }
57    if v.is_infinite() {
58        return if v > 0.0 {
59            "inf".to_owned()
60        } else {
61            "-inf".to_owned()
62        };
63    }
64    for prec in 6usize..=9 {
65        let s = format!("{:.prec$e}", v, prec = prec - 1);
66        let g_str = rust_sci_to_g_style(&s, prec);
67        if let Ok(reparsed) = g_str.parse::<f32>() {
68            if reparsed == v {
69                return python_exponent_style(&g_str);
70            }
71        }
72    }
73    let s = format!("{:.8e}", v);
74    python_exponent_style(&rust_sci_to_g_style(&s, 9))
75}
76
77/// Protoc-style double: try 15 significant digits (%g), fall back to 17.
78///
79/// Mirrors Python's `_format_floating_point_like_protoc(short_precision=15, long_precision=17)`:
80/// `f'{value:.15g}'`, falling back to `f'{value:.17g}'` if needed for exact round-trip.
81pub fn format_double_protoc(v: f64) -> String {
82    if v.is_nan() {
83        return "nan".to_owned();
84    }
85    if v.is_infinite() {
86        return if v > 0.0 {
87            "inf".to_owned()
88        } else {
89            "-inf".to_owned()
90        };
91    }
92    let s15 = format!("{:.14e}", v);
93    let g15 = rust_sci_to_g_style(&s15, 15);
94    if let Ok(r) = g15.parse::<f64>() {
95        if r == v {
96            return protoc_exponent_style(&g15);
97        }
98    }
99    let s17 = format!("{:.16e}", v);
100    protoc_exponent_style(&rust_sci_to_g_style(&s17, 17))
101}
102
103/// Protoc-style float: try 6 significant digits (%g), fall back to 9.
104///
105/// Mirrors Python's `_format_floating_point_like_protoc(short_precision=6, long_precision=9)`
106/// with the vanilla protoc (Google C++) exact bit-level round-trip check:
107/// re-parse the 6g string as f32 and compare bits, falling back to 9g only when
108/// the bit patterns differ.  This replaces the former approximate `1e-7` tolerance (D3 fix).
109pub fn format_float_protoc(v: f32) -> String {
110    if v.is_nan() {
111        return "nan".to_owned();
112    }
113    if v.is_infinite() {
114        return if v > 0.0 {
115            "inf".to_owned()
116        } else {
117            "-inf".to_owned()
118        };
119    }
120    let s6 = format!("{:.5e}", v);
121    let g6 = rust_sci_to_g_style(&s6, 6);
122    if let Ok(r) = g6.parse::<f32>() {
123        if r.to_bits() == v.to_bits() {
124            return protoc_exponent_style(&g6);
125        } // D3: exact bit check
126    }
127    let s9 = format!("{:.8e}", v);
128    protoc_exponent_style(&rust_sci_to_g_style(&s9, 9))
129}
130
131/// Format exponent notation for protoc style (Python `%g`).
132///
133/// Normalises scientific notation to Python style (`e+01` → `e+01`, `e-04` → `e-04`).
134/// Unlike `python_exponent_style`, does NOT append `.0` for whole numbers —
135/// Python's `%g` format already omits trailing decimal points.
136fn protoc_exponent_style(s: &str) -> String {
137    if let Some(e_pos) = s.find('e') {
138        let mantissa = &s[..e_pos];
139        let exp_part = &s[e_pos + 1..];
140        let (sign, digits) = if let Some(rest) = exp_part.strip_prefix('-') {
141            ("-", rest)
142        } else if let Some(rest) = exp_part.strip_prefix('+') {
143            ("+", rest)
144        } else {
145            ("+", exp_part)
146        };
147        let digits: String = digits.trim_start_matches('0').to_owned();
148        let digits = if digits.is_empty() {
149            "0".to_owned()
150        } else {
151            digits
152        };
153        let formatted_exp = if digits.len() < 2 {
154            format!("0{}", digits)
155        } else {
156            digits
157        };
158        format!("{}e{}{}", mantissa, sign, formatted_exp)
159    } else {
160        // No scientific notation — return as-is (do NOT add ".0")
161        s.to_owned()
162    }
163}
164
165fn python_exponent_style(s: &str) -> String {
166    if let Some(e_pos) = s.find('e') {
167        let mantissa = &s[..e_pos];
168        let exp_part = &s[e_pos + 1..];
169        let (sign, digits) = if let Some(rest) = exp_part.strip_prefix('-') {
170            ("-", rest)
171        } else if let Some(rest) = exp_part.strip_prefix('+') {
172            ("+", rest)
173        } else {
174            ("+", exp_part)
175        };
176        let digits: String = digits.trim_start_matches('0').to_owned();
177        let digits = if digits.is_empty() {
178            "0".to_owned()
179        } else {
180            digits
181        };
182        let formatted_exp = if digits.len() < 2 {
183            format!("0{}", digits)
184        } else {
185            digits
186        };
187        format!("{}e{}{}", mantissa, sign, formatted_exp)
188    } else if !s.contains('.') && !s.contains('n') && !s.contains('i') {
189        format!("{}.0", s)
190    } else {
191        s.to_owned()
192    }
193}
194
195fn rust_sci_to_g_style(rust_sci: &str, prec: usize) -> String {
196    let (mantissa_str, exp_str) = if let Some(pos) = rust_sci.find('e') {
197        (&rust_sci[..pos], &rust_sci[pos + 1..])
198    } else {
199        return rust_sci.to_owned();
200    };
201    let exp: i32 = exp_str
202        .parse()
203        .expect("rust_sci_to_g_style: malformed exponent from Rust float formatter");
204    let is_neg = mantissa_str.starts_with('-');
205    // digits_str contains only ASCII decimal digits; char count == byte count.
206    let digits_str = mantissa_str.trim_start_matches('-').replace('.', "");
207    let sig_digits = digits_str.as_bytes();
208
209    if exp >= -4 && exp < prec as i32 {
210        // Keep decimal_pos as i32: (exp + 1) is negative when exp < -1, and
211        // casting a negative i32 to usize would silently wrap to near-usize::MAX,
212        // causing a capacity overflow panic in the .repeat() calls below.
213        let decimal_pos = exp + 1; // i32; ≤ 0 when exp ∈ {-4,-3,-2,-1}
214        let mut result = String::new();
215        if is_neg {
216            result.push('-');
217        }
218        if decimal_pos <= 0 {
219            // exp < 0: output is "0.000…XYZ" with (-exp-1) leading zeros.
220            // (-exp-1) as usize is safe here: exp ∈ [-4..-1] → value ∈ [0..3].
221            result.push('0');
222            result.push('.');
223            result.push_str(&"0".repeat((-exp - 1) as usize));
224            for d in sig_digits {
225                result.push(*d as char);
226            }
227        } else if decimal_pos as usize >= sig_digits.len() {
228            // decimal_pos > 0 here, so the cast to usize is safe.
229            for d in sig_digits {
230                result.push(*d as char);
231            }
232            result.push_str(&"0".repeat(decimal_pos as usize - sig_digits.len()));
233            // No trailing ".0" — Python %g strips the decimal point entirely
234            // for whole numbers (e.g. 1.0f32 → "1", not "1.0").
235        } else {
236            for (i, d) in sig_digits.iter().enumerate() {
237                if i == decimal_pos as usize {
238                    result.push('.');
239                }
240                result.push(*d as char);
241            }
242        }
243        trim_trailing_zeros_after_dot(&mut result);
244        result
245    } else {
246        let mut result = String::new();
247        if is_neg {
248            result.push('-');
249        }
250        if sig_digits.is_empty() {
251            result.push('0');
252        } else {
253            result.push(sig_digits[0] as char);
254            if sig_digits.len() > 1 {
255                result.push('.');
256                for d in &sig_digits[1..] {
257                    result.push(*d as char);
258                }
259                trim_trailing_zeros_after_dot(&mut result);
260            }
261        }
262        if exp >= 0 {
263            result.push_str(&format!("e+{:02}", exp));
264        } else {
265            result.push_str(&format!("e-{:02}", -exp));
266        }
267        result
268    }
269}
270
271fn trim_trailing_zeros_after_dot(s: &mut String) {
272    if s.contains('.') {
273        while s.ends_with('0') {
274            s.pop();
275        }
276        if s.ends_with('.') {
277            s.pop();
278        }
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285
286    // ── format_double_protoc — negative-exponent decimal form ─────────────────
287    //
288    // Regression for capacity-overflow panic in rust_sci_to_g_style.
289    // Values with |v| ∈ [1e-4, 1e-2) have exponents -4, -3, -2.  Before the
290    // fix, `decimal_pos = (exp + 1) as usize` wrapped a negative i32 to
291    // near-usize::MAX, making `.repeat()` panic with a capacity overflow.
292
293    #[test]
294    fn double_protoc_exp_minus4() {
295        // exp = -4 was the deepest broken case (decimal_pos → usize::MAX - 2).
296        assert_eq!(format_double_protoc(1e-4_f64), "0.0001");
297        assert_eq!(format_double_protoc(-1e-4_f64), "-0.0001");
298        assert_eq!(format_double_protoc(5.5e-4_f64), "0.00055");
299    }
300
301    #[test]
302    fn double_protoc_exp_minus3() {
303        assert_eq!(format_double_protoc(1e-3_f64), "0.001");
304        assert_eq!(format_double_protoc(-1.5e-3_f64), "-0.0015");
305    }
306
307    #[test]
308    fn double_protoc_exp_minus2() {
309        assert_eq!(format_double_protoc(1e-2_f64), "0.01");
310        assert_eq!(format_double_protoc(3.75e-2_f64), "0.0375");
311    }
312
313    // ── Adjacent exponents that were always correct (boundary sanity) ─────────
314
315    #[test]
316    fn double_protoc_exp_minus1() {
317        assert_eq!(format_double_protoc(1e-1_f64), "0.1");
318        assert_eq!(format_double_protoc(2.5e-1_f64), "0.25");
319    }
320
321    #[test]
322    fn double_protoc_exp_zero_and_positive() {
323        assert_eq!(format_double_protoc(1.0_f64), "1");
324        assert_eq!(format_double_protoc(1.5_f64), "1.5");
325        assert_eq!(format_double_protoc(123.456_f64), "123.456");
326    }
327
328    #[test]
329    fn double_protoc_scientific_below_threshold() {
330        // exp = -5: below the -4 threshold, stays in scientific form.
331        assert_eq!(format_double_protoc(1e-5_f64), "1e-05");
332    }
333
334    #[test]
335    fn double_protoc_special_values() {
336        assert_eq!(format_double_protoc(f64::NAN), "nan");
337        assert_eq!(format_double_protoc(f64::INFINITY), "inf");
338        assert_eq!(format_double_protoc(f64::NEG_INFINITY), "-inf");
339    }
340
341    // ── format_float_protoc — same broken range applies to f32 ────────────────
342
343    #[test]
344    fn float_protoc_exp_minus4() {
345        assert_eq!(format_float_protoc(1e-4_f32), "0.0001");
346        assert_eq!(format_float_protoc(5.5e-4_f32), "0.00055");
347    }
348
349    #[test]
350    fn float_protoc_exp_minus3() {
351        assert_eq!(format_float_protoc(1.5e-3_f32), "0.0015");
352    }
353
354    #[test]
355    fn float_protoc_exp_minus2() {
356        assert_eq!(format_float_protoc(1e-2_f32), "0.01");
357    }
358}