facet_format/
parser.rs

1use crate::FieldEvidence;
2use facet_reflect::Span;
3
4/// Streaming cursor that yields serialized fields for solver probing.
5pub trait ProbeStream<'de> {
6    /// Parser-specific error type.
7    type Error;
8
9    /// Produce the next field evidence entry. Returning `Ok(None)` indicates
10    /// the parser ran out of evidence or the format does not need additional
11    /// passes.
12    fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error>;
13}
14
15/// Streaming parser for a specific wire format.
16pub trait FormatParser<'de> {
17    /// Parser-specific error type.
18    type Error;
19
20    /// Evidence cursor type produced by [`FormatParser::begin_probe`].
21    type Probe<'a>: ProbeStream<'de, Error = Self::Error>
22    where
23        Self: 'a;
24
25    /// Read the next parse event, or `None` if the input is exhausted.
26    ///
27    /// Returns `Ok(None)` at end-of-input (EOF). For formats like TOML where
28    /// structs can be "reopened" (fields added after the struct was previously
29    /// exited), callers should continue processing until EOF rather than
30    /// stopping at `StructEnd`.
31    fn next_event(&mut self) -> Result<Option<crate::ParseEvent<'de>>, Self::Error>;
32
33    /// Peek at the next event without consuming it, or `None` if at EOF.
34    fn peek_event(&mut self) -> Result<Option<crate::ParseEvent<'de>>, Self::Error>;
35
36    /// Skip the current value (for unknown fields, etc.).
37    fn skip_value(&mut self) -> Result<(), Self::Error>;
38
39    /// Begin evidence collection for untagged-enum resolution.
40    fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error>;
41
42    /// Capture the raw representation of the current value without parsing it.
43    ///
44    /// This is used for types like `RawJson` that want to defer parsing.
45    /// The parser should skip the value and return the raw bytes/string
46    /// from the input.
47    ///
48    /// Returns `Ok(None)` if raw capture is not supported (e.g., streaming mode
49    /// or formats where raw capture doesn't make sense).
50    fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
51        // Default: not supported
52        self.skip_value()?;
53        Ok(None)
54    }
55
56    /// Returns the shape of the format's raw capture type (e.g., `RawJson::SHAPE`).
57    ///
58    /// When the deserializer encounters a shape that matches this, it will use
59    /// `capture_raw` to capture the raw representation and store it in a
60    /// `Cow<str>` (the raw type must be a newtype over `Cow<str>`).
61    ///
62    /// Returns `None` if this format doesn't support raw capture types.
63    fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
64        None
65    }
66
67    /// Returns true if this format is self-describing.
68    ///
69    /// Self-describing formats (like JSON, YAML) include type information in the wire format
70    /// and emit `FieldKey` events for struct fields.
71    ///
72    /// Non-self-describing formats (like postcard, bincode) don't include type markers
73    /// and use `OrderedField` events, relying on the driver to provide schema information
74    /// via `hint_struct_fields`.
75    fn is_self_describing(&self) -> bool {
76        true // Default: most formats are self-describing
77    }
78
79    /// Hint to the parser that a struct with the given number of fields is expected.
80    ///
81    /// For non-self-describing formats, this allows the parser to emit the correct
82    /// number of `OrderedField` events followed by `StructEnd`.
83    ///
84    /// Self-describing formats can ignore this hint.
85    fn hint_struct_fields(&mut self, _num_fields: usize) {
86        // Default: ignore (self-describing formats don't need this)
87    }
88
89    /// Hint to the parser what scalar type is expected next.
90    ///
91    /// For non-self-describing formats, this allows the parser to correctly
92    /// decode the next value and emit an appropriate `Scalar` event.
93    ///
94    /// Self-describing formats can ignore this hint (they determine the type
95    /// from the wire format).
96    fn hint_scalar_type(&mut self, _hint: ScalarTypeHint) {
97        // Default: ignore (self-describing formats don't need this)
98    }
99
100    /// Hint to the parser that a sequence (array/Vec) is expected.
101    ///
102    /// For non-self-describing formats, this triggers reading the length prefix
103    /// and setting up sequence state.
104    ///
105    /// Self-describing formats can ignore this hint.
106    fn hint_sequence(&mut self) {
107        // Default: ignore (self-describing formats don't need this)
108    }
109
110    /// Hint to the parser that a fixed-size array is expected.
111    ///
112    /// For non-self-describing formats, this tells the parser the array length
113    /// is known at compile time (from the type), so no length prefix is read.
114    /// This differs from `hint_sequence` which reads a length prefix for Vec/slices.
115    ///
116    /// Self-describing formats can ignore this hint.
117    fn hint_array(&mut self, _len: usize) {
118        // Default: ignore (self-describing formats don't need this)
119    }
120
121    /// Hint to the parser that an `Option<T>` is expected.
122    ///
123    /// For non-self-describing formats (like postcard), this allows the parser
124    /// to read the discriminant byte and emit either:
125    /// - `Scalar(Null)` for None (discriminant 0x00)
126    /// - Set up state to parse the inner value for Some (discriminant 0x01)
127    ///
128    /// Self-describing formats can ignore this hint (they determine `Option`
129    /// presence from the wire format, e.g., null vs value in JSON).
130    fn hint_option(&mut self) {
131        // Default: ignore (self-describing formats don't need this)
132    }
133
134    /// Hint to the parser that a map is expected.
135    ///
136    /// For non-self-describing formats (like postcard), this allows the parser
137    /// to read the length prefix and set up map state. The parser should then
138    /// emit `SequenceStart` (representing the map entries) followed by pairs of
139    /// key and value events, and finally `SequenceEnd`.
140    ///
141    /// Self-describing formats can ignore this hint (they determine map structure
142    /// from the wire format, e.g., `{...}` in JSON).
143    fn hint_map(&mut self) {
144        // Default: ignore (self-describing formats don't need this)
145    }
146
147    /// Hint to the parser that a dynamic value is expected.
148    ///
149    /// Non-self-describing formats can use this to switch to a self-describing
150    /// encoding for dynamic values (e.g., tagged scalar/array/object).
151    /// Self-describing formats can ignore this hint.
152    fn hint_dynamic_value(&mut self) {
153        // Default: ignore (self-describing formats don't need this)
154    }
155
156    /// Hint to the parser that an enum is expected, providing variant information.
157    ///
158    /// For non-self-describing formats (like postcard), this allows the parser
159    /// to read the variant discriminant (varint) and map it to the variant name,
160    /// and to emit appropriate wrapper events for multi-field variants.
161    ///
162    /// The `variants` slice contains metadata for each variant in declaration order,
163    /// matching the indices used in the wire format.
164    ///
165    /// Self-describing formats can ignore this hint (they include variant names
166    /// in the wire format).
167    fn hint_enum(&mut self, _variants: &[EnumVariantHint]) {
168        // Default: ignore (self-describing formats don't need this)
169    }
170
171    /// Hint to the parser that an opaque scalar type is expected.
172    ///
173    /// For non-self-describing binary formats (like postcard), this allows the parser
174    /// to use format-specific encoding for types like UUID (16 raw bytes), ULID,
175    /// OrderedFloat, etc. that have a more efficient binary representation than
176    /// their string form.
177    ///
178    /// The `type_identifier` is the type's identifier string (e.g., "Uuid", "Ulid",
179    /// "OrderedFloat", `DateTime<Utc>`). The `shape` provides access to inner type
180    /// information (e.g., whether OrderedFloat wraps f32 or f64).
181    ///
182    /// Returns `true` if the parser will handle this type specially (caller should
183    /// expect format-specific `ScalarValue`), or `false` to fall back to standard
184    /// handling (e.g., `hint_scalar_type(String)` for `FromStr` types).
185    ///
186    /// Self-describing formats can ignore this and return `false`.
187    fn hint_opaque_scalar(
188        &mut self,
189        _type_identifier: &'static str,
190        _shape: &'static facet_core::Shape,
191    ) -> bool {
192        // Default: not handled, fall back to standard behavior
193        false
194    }
195
196    /// Returns the source span of the most recently consumed event.
197    ///
198    /// This is used for error reporting - when a deserialization error occurs,
199    /// the span of the last consumed event helps locate the problem in the input.
200    ///
201    /// Parsers that track source positions should override this to return
202    /// meaningful span information. The default implementation returns `None`.
203    fn current_span(&self) -> Option<Span> {
204        None
205    }
206}
207
208/// Metadata about an enum variant for use with `hint_enum`.
209///
210/// Provides the information needed by non-self-describing formats to correctly
211/// parse enum variants, including the variant's structure kind and field count.
212#[derive(Debug, Clone, Copy, PartialEq, Eq)]
213pub struct EnumVariantHint {
214    /// Name of the variant (e.g., "Some", "Pair", "Named")
215    pub name: &'static str,
216    /// The kind of struct this variant represents (Unit, Tuple, TupleStruct, or Struct)
217    pub kind: facet_core::StructKind,
218    /// Number of fields in this variant
219    pub field_count: usize,
220}
221
222/// Hint for what scalar type is expected next.
223///
224/// Used by non-self-describing formats to know how to decode the next value.
225#[derive(Debug, Clone, Copy, PartialEq, Eq)]
226pub enum ScalarTypeHint {
227    /// Boolean (postcard: 0 or 1 byte)
228    Bool,
229    /// Unsigned 8-bit integer (postcard: raw byte)
230    U8,
231    /// Unsigned 16-bit integer (postcard: varint)
232    U16,
233    /// Unsigned 32-bit integer (postcard: varint)
234    U32,
235    /// Unsigned 64-bit integer (postcard: varint)
236    U64,
237    /// Unsigned 128-bit integer (postcard: varint)
238    U128,
239    /// Platform-sized unsigned integer (postcard: varint)
240    Usize,
241    /// Signed 8-bit integer (postcard: zigzag varint)
242    I8,
243    /// Signed 16-bit integer (postcard: zigzag varint)
244    I16,
245    /// Signed 32-bit integer (postcard: zigzag varint)
246    I32,
247    /// Signed 64-bit integer (postcard: zigzag varint)
248    I64,
249    /// Signed 128-bit integer (postcard: zigzag varint)
250    I128,
251    /// Platform-sized signed integer (postcard: zigzag varint)
252    Isize,
253    /// 32-bit float (postcard: 4 bytes little-endian)
254    F32,
255    /// 64-bit float (postcard: 8 bytes little-endian)
256    F64,
257    /// UTF-8 string (postcard: varint length + bytes)
258    String,
259    /// Raw bytes (postcard: varint length + bytes)
260    Bytes,
261    /// Character (postcard: UTF-8 encoded)
262    Char,
263}
264
265/// Extension trait for parsers that support format-specific JIT (Tier 2).
266///
267/// Parsers implement this trait to enable the Tier 2 fast path, which
268/// generates Cranelift IR that parses bytes directly instead of going
269/// through the event abstraction.
270///
271/// # Requirements
272///
273/// Tier 2 requires:
274/// - The full input slice must be available upfront
275/// - The parser must be able to report and update its cursor position
276/// - The parser must reset internal state when `jit_set_pos` is called
277#[cfg(feature = "jit")]
278pub trait FormatJitParser<'de>: FormatParser<'de> {
279    /// The format-specific JIT emitter type.
280    type FormatJit: crate::jit::JitFormat;
281
282    /// Return the full input slice.
283    fn jit_input(&self) -> &'de [u8];
284
285    /// Return the current byte offset (cursor position).
286    ///
287    /// Returns `None` if there is buffered state (e.g., a peeked event)
288    /// that makes the position ambiguous.
289    fn jit_pos(&self) -> Option<usize>;
290
291    /// Commit a new cursor position after Tier 2 execution succeeds.
292    ///
293    /// Must also invalidate/reset any internal scanning/tokenizer state
294    /// so that subsequent parsing continues from `pos` consistently.
295    fn jit_set_pos(&mut self, pos: usize);
296
297    /// Return a format JIT emitter instance (usually a ZST).
298    fn jit_format(&self) -> Self::FormatJit;
299
300    /// Convert a Tier 2 error (code + position) into `Self::Error`.
301    fn jit_error(&self, input: &'de [u8], error_pos: usize, error_code: i32) -> Self::Error;
302}