prototext_core/serialize/
common.rs

1// SPDX-FileCopyrightText: 2025 - 2026 Frederic Ruget <fred@atlant.is> <fred@s3ns.io> (GitHub: @douzebis)
2// SPDX-FileCopyrightText: 2025 - 2026 Thales Cloud Sécurisé
3//
4// SPDX-License-Identifier: MIT
5
6use crate::decoder::{ProtoTextContent, ProtoTextField};
7
8// ── Byte / string escaping ────────────────────────────────────────────────────
9// Mirrors escape_bytes() / escape_string() in common.py.
10
11pub fn escape_bytes(b: &[u8]) -> String {
12    let mut out = String::with_capacity(b.len());
13    for &byte in b {
14        match byte {
15            b'\\' => out.push_str("\\\\"),
16            b'"' => out.push_str("\\\""),
17            b'\'' => out.push_str("\\'"),
18            b'\n' => out.push_str("\\n"),
19            b'\r' => out.push_str("\\r"),
20            b'\t' => out.push_str("\\t"),
21            32..=126 => out.push(byte as char),
22            _ => {
23                out.push('\\');
24                out.push_str(&format!("{:03o}", byte));
25            }
26        }
27    }
28    out
29}
30
31/// Zero-allocation variant of `escape_bytes`: appends escaped bytes directly to `out`.
32#[inline]
33pub fn escape_bytes_into(b: &[u8], out: &mut Vec<u8>) {
34    for &byte in b {
35        match byte {
36            b'\\' => out.extend_from_slice(b"\\\\"),
37            b'"' => out.extend_from_slice(b"\\\""),
38            b'\'' => out.extend_from_slice(b"\\'"),
39            b'\n' => out.extend_from_slice(b"\\n"),
40            b'\r' => out.extend_from_slice(b"\\r"),
41            b'\t' => out.extend_from_slice(b"\\t"),
42            32..=126 => out.push(byte),
43            _ => {
44                // Octal escape: \NNN
45                out.push(b'\\');
46                out.push(b'0' + (byte >> 6));
47                out.push(b'0' + ((byte >> 3) & 7));
48                out.push(b'0' + (byte & 7));
49            }
50        }
51    }
52}
53
54pub fn escape_string(s: &str) -> String {
55    let mut out = String::with_capacity(s.len());
56    for c in s.chars() {
57        match c {
58            '\\' => out.push_str("\\\\"),
59            '"' => out.push_str("\\\""),
60            '\n' => out.push_str("\\n"),
61            '\r' => out.push_str("\\r"),
62            '\t' => out.push_str("\\t"),
63            c if (c as u32) < 32 => {
64                out.push_str(&format!("\\{:03o}", c as u32));
65            }
66            c => out.push(c),
67        }
68    }
69    out
70}
71
72/// Zero-allocation variant of `escape_string`: appends escaped bytes directly to `out`.
73#[inline]
74pub fn escape_string_into(s: &str, out: &mut Vec<u8>) {
75    for c in s.chars() {
76        match c {
77            '\\' => out.extend_from_slice(b"\\\\"),
78            '"' => out.extend_from_slice(b"\\\""),
79            '\n' => out.extend_from_slice(b"\\n"),
80            '\r' => out.extend_from_slice(b"\\r"),
81            '\t' => out.extend_from_slice(b"\\t"),
82            c if (c as u32) < 32 => {
83                // Octal escape: \NNN (c < 32 so top octal digit is always 0)
84                let v = c as u32;
85                out.push(b'\\');
86                out.push(b'0' + (v >> 6) as u8);
87                out.push(b'0' + ((v >> 3) & 7) as u8);
88                out.push(b'0' + (v & 7) as u8);
89            }
90            c => {
91                let mut buf = [0u8; 4];
92                out.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
93            }
94        }
95    }
96}
97
98// ── Float / double formatting ─────────────────────────────────────────────────
99// Mirrors format_double_like_text_format / format_float_like_text_format.
100//
101// The Python functions delegate to protobuf's C++ text_format for exact
102// compatibility.  The Rust versions replicate the same algorithm:
103//
104//   double: Python str(float64) shortest round-trip.
105//   float:  ToShortestFloat — try 6..=9 significant digits until f32 round-trips.
106
107pub fn format_double(v: f64) -> String {
108    if v.is_nan() {
109        let bits = v.to_bits();
110        return if bits == f64::NAN.to_bits() {
111            "nan".to_owned()
112        } else {
113            format!("nan(0x{:016x})", bits)
114        };
115    }
116    if v.is_infinite() {
117        return if v > 0.0 {
118            "inf".to_owned()
119        } else {
120            "-inf".to_owned()
121        };
122    }
123
124    // Match Python's str(float) behavior: use scientific notation when
125    // the exponent is < -4 or >= 16 (matching Python's default float repr).
126    // This ensures compatibility with Python's text_format output.
127    let abs_v = v.abs();
128    let use_scientific = abs_v >= 1e16 || (abs_v != 0.0 && abs_v < 1e-4);
129
130    let s = if use_scientific {
131        // Use scientific notation, then convert to Python style
132        format!("{:e}", v)
133    } else {
134        // Use default formatting (which may already include .0 for integers)
135        format!("{}", v)
136    };
137    python_exponent_style(&s)
138}
139
140pub fn format_float(v: f32) -> String {
141    if v.is_nan() {
142        let bits = v.to_bits();
143        return if bits == f32::NAN.to_bits() {
144            "nan".to_owned()
145        } else {
146            format!("nan(0x{:08x})", bits)
147        };
148    }
149    if v.is_infinite() {
150        return if v > 0.0 {
151            "inf".to_owned()
152        } else {
153            "-inf".to_owned()
154        };
155    }
156    for prec in 6usize..=9 {
157        let s = format!("{:.prec$e}", v, prec = prec - 1);
158        let g_str = rust_sci_to_g_style(&s, prec);
159        if let Ok(reparsed) = g_str.parse::<f32>() {
160            if reparsed == v {
161                return python_exponent_style(&g_str);
162            }
163        }
164    }
165    let s = format!("{:.8e}", v);
166    python_exponent_style(&rust_sci_to_g_style(&s, 9))
167}
168
169/// Protoc-style double: try 15 significant digits (%g), fall back to 17.
170///
171/// Mirrors Python's `_format_floating_point_like_protoc(short_precision=15, long_precision=17)`:
172/// `f'{value:.15g}'`, falling back to `f'{value:.17g}'` if needed for exact round-trip.
173pub fn format_double_protoc(v: f64) -> String {
174    if v.is_nan() {
175        let bits = v.to_bits();
176        return if bits == f64::NAN.to_bits() {
177            "nan".to_owned()
178        } else {
179            format!("nan(0x{:016x})", bits)
180        };
181    }
182    if v.is_infinite() {
183        return if v > 0.0 {
184            "inf".to_owned()
185        } else {
186            "-inf".to_owned()
187        };
188    }
189    let s15 = format!("{:.14e}", v);
190    let g15 = rust_sci_to_g_style(&s15, 15);
191    if let Ok(r) = g15.parse::<f64>() {
192        if r == v {
193            return protoc_exponent_style(&g15);
194        }
195    }
196    let s17 = format!("{:.16e}", v);
197    protoc_exponent_style(&rust_sci_to_g_style(&s17, 17))
198}
199
200/// Protoc-style float: try 6 significant digits (%g), fall back to 9.
201///
202/// Mirrors Python's `_format_floating_point_like_protoc(short_precision=6, long_precision=9)`
203/// with the vanilla protoc (Google C++) exact bit-level round-trip check:
204/// re-parse the 6g string as f32 and compare bits, falling back to 9g only when
205/// the bit patterns differ.  This replaces the former approximate `1e-7` tolerance (D3 fix).
206pub fn format_float_protoc(v: f32) -> String {
207    if v.is_nan() {
208        let bits = v.to_bits();
209        return if bits == f32::NAN.to_bits() {
210            "nan".to_owned()
211        } else {
212            format!("nan(0x{:08x})", bits)
213        };
214    }
215    if v.is_infinite() {
216        return if v > 0.0 {
217            "inf".to_owned()
218        } else {
219            "-inf".to_owned()
220        };
221    }
222    let s6 = format!("{:.5e}", v);
223    let g6 = rust_sci_to_g_style(&s6, 6);
224    if let Ok(r) = g6.parse::<f32>() {
225        if r.to_bits() == v.to_bits() {
226            return protoc_exponent_style(&g6);
227        } // D3: exact bit check
228    }
229    let s9 = format!("{:.8e}", v);
230    protoc_exponent_style(&rust_sci_to_g_style(&s9, 9))
231}
232
233// ── Per-type protoc-compatible scalar formatters ───────────────────────────────
234//
235// One function per proto2 type, matching `protoc --decode` output exactly.
236// See ext/prototext_codec/FLOATS_AND_DOUBLES.md §1 for the ground-truth tables.
237//
238// All of these are trivial wrappers today; they exist so that:
239//   (a) render_text.rs and format_protoc_value have a single named call-site per type,
240//   (b) future changes (e.g. adopting ryu for double) are made in one place only.
241
242/// `int32` → signed decimal.
243#[inline]
244pub fn format_int32_protoc(v: i32) -> String {
245    v.to_string()
246}
247/// `int64` → signed decimal.
248#[inline]
249pub fn format_int64_protoc(v: i64) -> String {
250    v.to_string()
251}
252/// `uint32` → unsigned decimal.
253#[inline]
254pub fn format_uint32_protoc(v: u32) -> String {
255    v.to_string()
256}
257/// `uint64` → unsigned decimal.
258#[inline]
259pub fn format_uint64_protoc(v: u64) -> String {
260    v.to_string()
261}
262/// `sint32` → signed decimal (caller has already applied zigzag decode).
263#[inline]
264pub fn format_sint32_protoc(v: i32) -> String {
265    v.to_string()
266}
267/// `sint64` → signed decimal (caller has already applied zigzag decode).
268#[inline]
269pub fn format_sint64_protoc(v: i64) -> String {
270    v.to_string()
271}
272/// `fixed32` → unsigned decimal (NOT hex; protoc renders `fixed32` as decimal).
273#[inline]
274pub fn format_fixed32_protoc(v: u32) -> String {
275    v.to_string()
276}
277/// `fixed64` → unsigned decimal (NOT hex).
278#[inline]
279pub fn format_fixed64_protoc(v: u64) -> String {
280    v.to_string()
281}
282/// `sfixed32` → signed decimal.
283#[inline]
284pub fn format_sfixed32_protoc(v: i32) -> String {
285    v.to_string()
286}
287/// `sfixed64` → signed decimal.
288#[inline]
289pub fn format_sfixed64_protoc(v: i64) -> String {
290    v.to_string()
291}
292/// `bool` → `"true"` or `"false"`.
293#[inline]
294pub fn format_bool_protoc(v: bool) -> &'static str {
295    if v {
296        "true"
297    } else {
298        "false"
299    }
300}
301/// `enum` → signed decimal (same representation as `int32`).
302#[inline]
303pub fn format_enum_protoc(v: i32) -> String {
304    v.to_string()
305}
306
307// ── Wire-type fallback formatters ─────────────────────────────────────────────
308//
309// Used for unknown fields (field number absent from schema) and wire-type
310// mismatches (field in schema but wire type differs from declared type).
311//
312// Protoc renders any such field solely by its actual wire type — schema is ignored.
313// `--decode_raw` and `--decode=Msg` produce byte-for-byte identical output.
314// See FLOATS_AND_DOUBLES.md §1.3 and §2.2 D2 for ground truth.
315
316/// Unknown / mismatch VARINT (wt=0) → unsigned decimal (uint64).
317#[inline]
318pub fn format_wire_varint_protoc(v: u64) -> String {
319    v.to_string()
320}
321/// Unknown / mismatch FIXED32 (wt=5) → `0x` + 8 lowercase hex digits (zero-padded).
322#[inline]
323pub fn format_wire_fixed32_protoc(v: u32) -> String {
324    format!("0x{:08x}", v)
325}
326/// Unknown / mismatch FIXED64 (wt=1) → `0x` + 16 lowercase hex digits (zero-padded).
327#[inline]
328pub fn format_wire_fixed64_protoc(v: u64) -> String {
329    format!("0x{:016x}", v)
330}
331
332/// Format exponent notation for protoc style (Python `%g`).
333///
334/// Normalises scientific notation to Python style (`e+01` → `e+01`, `e-04` → `e-04`).
335/// Unlike `python_exponent_style`, does NOT append `.0` for whole numbers —
336/// Python's `%g` format already omits trailing decimal points.
337fn protoc_exponent_style(s: &str) -> String {
338    if let Some(e_pos) = s.find('e') {
339        let mantissa = &s[..e_pos];
340        let exp_part = &s[e_pos + 1..];
341        let (sign, digits) = if let Some(rest) = exp_part.strip_prefix('-') {
342            ("-", rest)
343        } else if let Some(rest) = exp_part.strip_prefix('+') {
344            ("+", rest)
345        } else {
346            ("+", exp_part)
347        };
348        let digits: String = digits.trim_start_matches('0').to_owned();
349        let digits = if digits.is_empty() {
350            "0".to_owned()
351        } else {
352            digits
353        };
354        let formatted_exp = if digits.len() < 2 {
355            format!("0{}", digits)
356        } else {
357            digits
358        };
359        format!("{}e{}{}", mantissa, sign, formatted_exp)
360    } else {
361        // No scientific notation — return as-is (do NOT add ".0")
362        s.to_owned()
363    }
364}
365
366fn python_exponent_style(s: &str) -> String {
367    if let Some(e_pos) = s.find('e') {
368        let mantissa = &s[..e_pos];
369        let exp_part = &s[e_pos + 1..];
370        let (sign, digits) = if let Some(rest) = exp_part.strip_prefix('-') {
371            ("-", rest)
372        } else if let Some(rest) = exp_part.strip_prefix('+') {
373            ("+", rest)
374        } else {
375            ("+", exp_part)
376        };
377        let digits: String = digits.trim_start_matches('0').to_owned();
378        let digits = if digits.is_empty() {
379            "0".to_owned()
380        } else {
381            digits
382        };
383        let formatted_exp = if digits.len() < 2 {
384            format!("0{}", digits)
385        } else {
386            digits
387        };
388        format!("{}e{}{}", mantissa, sign, formatted_exp)
389    } else if !s.contains('.') && !s.contains('n') && !s.contains('i') {
390        format!("{}.0", s)
391    } else {
392        s.to_owned()
393    }
394}
395
396fn rust_sci_to_g_style(rust_sci: &str, prec: usize) -> String {
397    let (mantissa_str, exp_str) = if let Some(pos) = rust_sci.find('e') {
398        (&rust_sci[..pos], &rust_sci[pos + 1..])
399    } else {
400        return rust_sci.to_owned();
401    };
402    let exp: i32 = exp_str.parse().unwrap_or(0);
403    let is_neg = mantissa_str.starts_with('-');
404    let digits_str = mantissa_str.trim_start_matches('-').replace('.', "");
405    let sig_digits: Vec<char> = digits_str.chars().collect();
406
407    if exp >= -4 && exp < prec as i32 {
408        // Keep decimal_pos as i32: (exp + 1) is negative when exp < -1, and
409        // casting a negative i32 to usize would silently wrap to near-usize::MAX,
410        // causing a capacity overflow panic in the .repeat() calls below.
411        let decimal_pos = exp + 1; // i32; ≤ 0 when exp ∈ {-4,-3,-2,-1}
412        let mut result = String::new();
413        if is_neg {
414            result.push('-');
415        }
416        if decimal_pos <= 0 {
417            // exp < 0: output is "0.000…XYZ" with (-exp-1) leading zeros.
418            // (-exp-1) as usize is safe here: exp ∈ [-4..-1] → value ∈ [0..3].
419            result.push('0');
420            result.push('.');
421            result.push_str(&"0".repeat((-exp - 1) as usize));
422            for d in &sig_digits {
423                result.push(*d);
424            }
425        } else if decimal_pos as usize >= sig_digits.len() {
426            // decimal_pos > 0 here, so the cast to usize is safe.
427            for d in &sig_digits {
428                result.push(*d);
429            }
430            result.push_str(&"0".repeat(decimal_pos as usize - sig_digits.len()));
431            // No trailing ".0" — Python %g strips the decimal point entirely
432            // for whole numbers (e.g. 1.0f32 → "1", not "1.0").
433        } else {
434            for (i, d) in sig_digits.iter().enumerate() {
435                if i == decimal_pos as usize {
436                    result.push('.');
437                }
438                result.push(*d);
439            }
440        }
441        trim_trailing_zeros_after_dot(&mut result);
442        result
443    } else {
444        let mut result = String::new();
445        if is_neg {
446            result.push('-');
447        }
448        if sig_digits.is_empty() {
449            result.push('0');
450        } else {
451            result.push(sig_digits[0]);
452            if sig_digits.len() > 1 {
453                result.push('.');
454                for d in &sig_digits[1..] {
455                    result.push(*d);
456                }
457                trim_trailing_zeros_after_dot(&mut result);
458            }
459        }
460        if exp >= 0 {
461            result.push_str(&format!("e+{:02}", exp));
462        } else {
463            result.push_str(&format!("e-{:02}", -exp));
464        }
465        result
466    }
467}
468
469fn trim_trailing_zeros_after_dot(s: &mut String) {
470    if s.contains('.') {
471        let trimmed = s.trim_end_matches('0').trim_end_matches('.');
472        *s = trimmed.to_owned();
473    }
474}
475
476// ── format_protoc_value ───────────────────────────────────────────────────────
477//
478// Mirrors `format_protoc_value()` in common.py.
479// Returns `None` for fields that should be skipped (INVALID_*, packed repeats,
480// untyped BYTES/message when handling is done as nested).
481// `include_wire_types`: true for protoc, false for protoc_meticulous.
482
483pub fn format_protoc_value(field: &ProtoTextField, include_wire_types: bool) -> Option<String> {
484    match &field.content {
485        // Always skip invalid fields
486        ProtoTextContent::InvalidTagType(_)
487        | ProtoTextContent::InvalidVarint(_)
488        | ProtoTextContent::InvalidFixed64(_)
489        | ProtoTextContent::InvalidFixed32(_)
490        | ProtoTextContent::InvalidBytesLength(_)
491        | ProtoTextContent::TruncatedBytes(_)
492        | ProtoTextContent::InvalidPackedRecords(_)
493        | ProtoTextContent::InvalidString(_)
494        | ProtoTextContent::InvalidGroupEnd(_) => None,
495
496        // Generic wire types: only in protoc (include_wire_types=true)
497        ProtoTextContent::WireVarint(v) => {
498            if include_wire_types {
499                Some(format_wire_varint_protoc(*v))
500            } else {
501                None
502            }
503        }
504        ProtoTextContent::WireFixed64(v) => {
505            if include_wire_types {
506                Some(format_wire_fixed64_protoc(*v))
507            } else {
508                None
509            }
510        }
511        ProtoTextContent::WireFixed32(v) => {
512            if include_wire_types {
513                Some(format_wire_fixed32_protoc(*v))
514            } else {
515                None
516            }
517        }
518        ProtoTextContent::WireBytes(b) => {
519            if include_wire_types {
520                Some(format!("\"{}\"", escape_bytes(b)))
521            } else {
522                None
523            }
524        }
525
526        // Typed VARINT
527        ProtoTextContent::Int64(v) => Some(format_int64_protoc(*v)),
528        ProtoTextContent::Uint64(v) => Some(format_uint64_protoc(*v)),
529        ProtoTextContent::Int32(v) => Some(format_int32_protoc(*v)),
530        ProtoTextContent::Uint32(v) => Some(format_uint32_protoc(*v)),
531        ProtoTextContent::Bool(v) => Some(format_bool_protoc(*v).to_owned()),
532        ProtoTextContent::Enum(v) => Some(format_enum_protoc(*v)),
533        ProtoTextContent::Sint32(v) => Some(format_sint32_protoc(*v)),
534        ProtoTextContent::Sint64(v) => Some(format_sint64_protoc(*v)),
535
536        // Typed FIXED64
537        ProtoTextContent::Double(v) => Some(format_double_protoc(*v)),
538        ProtoTextContent::PFixed64(v) => Some(format_fixed64_protoc(*v)),
539        ProtoTextContent::Sfixed64(v) => Some(format_sfixed64_protoc(*v)),
540
541        // Typed FIXED32
542        ProtoTextContent::Float(v) => Some(format_float_protoc(*v)),
543        ProtoTextContent::PFixed32(v) => Some(format_fixed32_protoc(*v)),
544        ProtoTextContent::Sfixed32(v) => Some(format_sfixed32_protoc(*v)),
545
546        // Length-delimited scalars
547        ProtoTextContent::StringVal(s) => Some(format!("\"{}\"", escape_string(s))),
548        ProtoTextContent::BytesVal(b) => Some(format!("\"{}\"", escape_bytes(b))),
549
550        // Nested (handled as child block, not here)
551        ProtoTextContent::MessageVal(_)
552        | ProtoTextContent::Group(_)
553        | ProtoTextContent::WireGroup(_) => None,
554
555        // Packed repeated: render as "[val1, val2, ...]" (matching Python's protoc output)
556        ProtoTextContent::Doubles(vs) => Some(format!(
557            "[{}]",
558            vs.iter()
559                .map(|v| format_double_protoc(*v))
560                .collect::<Vec<_>>()
561                .join(", ")
562        )),
563        ProtoTextContent::Floats(vs) => Some(format!(
564            "[{}]",
565            vs.iter()
566                .map(|v| format_float_protoc(*v))
567                .collect::<Vec<_>>()
568                .join(", ")
569        )),
570        ProtoTextContent::Int64s(vs) => Some(format!(
571            "[{}]",
572            vs.iter()
573                .map(|v| format_int64_protoc(*v))
574                .collect::<Vec<_>>()
575                .join(", ")
576        )),
577        ProtoTextContent::Uint64s(vs) => Some(format!(
578            "[{}]",
579            vs.iter()
580                .map(|v| format_uint64_protoc(*v))
581                .collect::<Vec<_>>()
582                .join(", ")
583        )),
584        ProtoTextContent::Int32s(vs) => Some(format!(
585            "[{}]",
586            vs.iter()
587                .map(|v| format_int32_protoc(*v))
588                .collect::<Vec<_>>()
589                .join(", ")
590        )),
591        ProtoTextContent::Fixed64s(vs) => Some(format!(
592            "[{}]",
593            vs.iter()
594                .map(|v| format_fixed64_protoc(*v))
595                .collect::<Vec<_>>()
596                .join(", ")
597        )),
598        ProtoTextContent::Fixed32s(vs) => Some(format!(
599            "[{}]",
600            vs.iter()
601                .map(|v| format_fixed32_protoc(*v))
602                .collect::<Vec<_>>()
603                .join(", ")
604        )),
605        ProtoTextContent::Bools(vs) => Some(format!(
606            "[{}]",
607            vs.iter()
608                .map(|v| format_bool_protoc(*v))
609                .collect::<Vec<_>>()
610                .join(", ")
611        )),
612        ProtoTextContent::Uint32s(vs) => Some(format!(
613            "[{}]",
614            vs.iter()
615                .map(|v| format_uint32_protoc(*v))
616                .collect::<Vec<_>>()
617                .join(", ")
618        )),
619        ProtoTextContent::Enums(vs) => Some(format!(
620            "[{}]",
621            vs.iter()
622                .map(|v| format_enum_protoc(*v))
623                .collect::<Vec<_>>()
624                .join(", ")
625        )),
626        ProtoTextContent::Sfixed32s(vs) => Some(format!(
627            "[{}]",
628            vs.iter()
629                .map(|v| format_sfixed32_protoc(*v))
630                .collect::<Vec<_>>()
631                .join(", ")
632        )),
633        ProtoTextContent::Sfixed64s(vs) => Some(format!(
634            "[{}]",
635            vs.iter()
636                .map(|v| format_sfixed64_protoc(*v))
637                .collect::<Vec<_>>()
638                .join(", ")
639        )),
640        ProtoTextContent::Sint32s(vs) => Some(format!(
641            "[{}]",
642            vs.iter()
643                .map(|v| format_sint32_protoc(*v))
644                .collect::<Vec<_>>()
645                .join(", ")
646        )),
647        ProtoTextContent::Sint64s(vs) => Some(format!(
648            "[{}]",
649            vs.iter()
650                .map(|v| format_sint64_protoc(*v))
651                .collect::<Vec<_>>()
652                .join(", ")
653        )),
654
655        ProtoTextContent::Unset => None,
656    }
657}
658
659// ── is_nested / is_invalid helpers ───────────────────────────────────────────
660
661#[inline]
662pub fn is_nested(content: &ProtoTextContent) -> bool {
663    matches!(
664        content,
665        ProtoTextContent::MessageVal(_)
666            | ProtoTextContent::Group(_)
667            | ProtoTextContent::WireGroup(_)
668    )
669}
670
671#[inline]
672pub fn is_invalid(content: &ProtoTextContent) -> bool {
673    matches!(
674        content,
675        ProtoTextContent::InvalidTagType(_)
676            | ProtoTextContent::InvalidVarint(_)
677            | ProtoTextContent::InvalidFixed64(_)
678            | ProtoTextContent::InvalidFixed32(_)
679            | ProtoTextContent::InvalidBytesLength(_)
680            | ProtoTextContent::TruncatedBytes(_)
681            | ProtoTextContent::InvalidPackedRecords(_)
682            | ProtoTextContent::InvalidString(_)
683            | ProtoTextContent::InvalidGroupEnd(_)
684    )
685}
686
687// ── Modifier lines ────────────────────────────────────────────────────────────
688//
689// Mirrors `get_modifier_strings(include_type=True)` in common.py.
690
691pub struct Modifier {
692    pub text: String,
693}
694
695pub fn get_modifiers(field: &ProtoTextField) -> Vec<Modifier> {
696    let mut out = Vec::new();
697
698    if let Some(v) = field.tag_overhang_count {
699        out.push(Modifier {
700            text: format!("tag_overhang_count: {}", v),
701        });
702    }
703    if field.tag_is_out_of_range {
704        out.push(Modifier {
705            text: "tag_is_out_of_range: true".to_owned(),
706        });
707    }
708    if let Some(v) = field.value_overhang_count {
709        out.push(Modifier {
710            text: format!("value_overhang_count: {}", v),
711        });
712    }
713    if let Some(v) = field.length_overhang_count {
714        out.push(Modifier {
715            text: format!("length_overhang_count: {}", v),
716        });
717    }
718    if let Some(v) = field.missing_bytes_count {
719        out.push(Modifier {
720            text: format!("missing_bytes_count: {}", v),
721        });
722    }
723    if let Some(v) = field.mismatched_group_end {
724        out.push(Modifier {
725            text: format!("mismatched_group_end: {}", v),
726        });
727    }
728    if field.open_ended_group {
729        out.push(Modifier {
730            text: "open_ended_group: true".to_owned(),
731        });
732    }
733    if let Some(v) = field.end_tag_overhang_count {
734        out.push(Modifier {
735            text: format!("end_tag_overhang_count: {}", v),
736        });
737    }
738    if field.end_tag_is_out_of_range {
739        out.push(Modifier {
740            text: "end_tag_is_out_of_range: true".to_owned(),
741        });
742    }
743    if field.proto2_has_type_mismatch {
744        out.push(Modifier {
745            text: "proto2_has_type_mismatch: true".to_owned(),
746        });
747    }
748    if !field.records_overhung_count.is_empty() {
749        let vals: Vec<String> = field
750            .records_overhung_count
751            .iter()
752            .map(|v| v.to_string())
753            .collect();
754        out.push(Modifier {
755            text: format!("records_overhung_count: [{}]", vals.join(", ")),
756        });
757    }
758    out
759}
760
761#[cfg(test)]
762mod tests {
763    use super::*;
764
765    // ── format_double_protoc — negative-exponent decimal form ─────────────────
766    //
767    // Regression for capacity-overflow panic in rust_sci_to_g_style.
768    // Values with |v| ∈ [1e-4, 1e-2) have exponents -4, -3, -2.  Before the
769    // fix, `decimal_pos = (exp + 1) as usize` wrapped a negative i32 to
770    // near-usize::MAX, making `.repeat()` panic with a capacity overflow.
771
772    #[test]
773    fn double_protoc_exp_minus4() {
774        // exp = -4 was the deepest broken case (decimal_pos → usize::MAX - 2).
775        assert_eq!(format_double_protoc(1e-4_f64), "0.0001");
776        assert_eq!(format_double_protoc(-1e-4_f64), "-0.0001");
777        assert_eq!(format_double_protoc(5.5e-4_f64), "0.00055");
778    }
779
780    #[test]
781    fn double_protoc_exp_minus3() {
782        assert_eq!(format_double_protoc(1e-3_f64), "0.001");
783        assert_eq!(format_double_protoc(-1.5e-3_f64), "-0.0015");
784    }
785
786    #[test]
787    fn double_protoc_exp_minus2() {
788        assert_eq!(format_double_protoc(1e-2_f64), "0.01");
789        assert_eq!(format_double_protoc(3.75e-2_f64), "0.0375");
790    }
791
792    // ── Adjacent exponents that were always correct (boundary sanity) ─────────
793
794    #[test]
795    fn double_protoc_exp_minus1() {
796        assert_eq!(format_double_protoc(1e-1_f64), "0.1");
797        assert_eq!(format_double_protoc(2.5e-1_f64), "0.25");
798    }
799
800    #[test]
801    fn double_protoc_exp_zero_and_positive() {
802        assert_eq!(format_double_protoc(1.0_f64), "1");
803        assert_eq!(format_double_protoc(1.5_f64), "1.5");
804        assert_eq!(format_double_protoc(123.456_f64), "123.456");
805    }
806
807    #[test]
808    fn double_protoc_scientific_below_threshold() {
809        // exp = -5: below the -4 threshold, stays in scientific form.
810        assert_eq!(format_double_protoc(1e-5_f64), "1e-05");
811    }
812
813    #[test]
814    fn double_protoc_special_values() {
815        assert_eq!(format_double_protoc(f64::NAN), "nan");
816        assert_eq!(format_double_protoc(f64::INFINITY), "inf");
817        assert_eq!(format_double_protoc(f64::NEG_INFINITY), "-inf");
818    }
819
820    // ── format_float_protoc — same broken range applies to f32 ────────────────
821
822    #[test]
823    fn float_protoc_exp_minus4() {
824        assert_eq!(format_float_protoc(1e-4_f32), "0.0001");
825        assert_eq!(format_float_protoc(5.5e-4_f32), "0.00055");
826    }
827
828    #[test]
829    fn float_protoc_exp_minus3() {
830        assert_eq!(format_float_protoc(1.5e-3_f32), "0.0015");
831    }
832
833    #[test]
834    fn float_protoc_exp_minus2() {
835        assert_eq!(format_float_protoc(1e-2_f32), "0.01");
836    }
837}
prototext_core/serialize/common.rs

prototext_core/serialize/
common.rs