facet_format/parser.rs
1use crate::FieldEvidence;
2use facet_reflect::Span;
3
4/// Streaming cursor that yields serialized fields for solver probing.
5pub trait ProbeStream<'de> {
6 /// Parser-specific error type.
7 type Error;
8
9 /// Produce the next field evidence entry. Returning `Ok(None)` indicates
10 /// the parser ran out of evidence or the format does not need additional
11 /// passes.
12 fn next(&mut self) -> Result<Option<FieldEvidence<'de>>, Self::Error>;
13}
14
15/// Streaming parser for a specific wire format.
16pub trait FormatParser<'de> {
17 /// Parser-specific error type.
18 type Error;
19
20 /// Evidence cursor type produced by [`FormatParser::begin_probe`].
21 type Probe<'a>: ProbeStream<'de, Error = Self::Error>
22 where
23 Self: 'a;
24
25 /// Read the next parse event, or `None` if the input is exhausted.
26 ///
27 /// Returns `Ok(None)` at end-of-input (EOF). For formats like TOML where
28 /// structs can be "reopened" (fields added after the struct was previously
29 /// exited), callers should continue processing until EOF rather than
30 /// stopping at `StructEnd`.
31 fn next_event(&mut self) -> Result<Option<crate::ParseEvent<'de>>, Self::Error>;
32
33 /// Peek at the next event without consuming it, or `None` if at EOF.
34 fn peek_event(&mut self) -> Result<Option<crate::ParseEvent<'de>>, Self::Error>;
35
36 /// Skip the current value (for unknown fields, etc.).
37 fn skip_value(&mut self) -> Result<(), Self::Error>;
38
39 /// Begin evidence collection for untagged-enum resolution.
40 fn begin_probe(&mut self) -> Result<Self::Probe<'_>, Self::Error>;
41
42 /// Capture the raw representation of the current value without parsing it.
43 ///
44 /// This is used for types like `RawJson` that want to defer parsing.
45 /// The parser should skip the value and return the raw bytes/string
46 /// from the input.
47 ///
48 /// Returns `Ok(None)` if raw capture is not supported (e.g., streaming mode
49 /// or formats where raw capture doesn't make sense).
50 fn capture_raw(&mut self) -> Result<Option<&'de str>, Self::Error> {
51 // Default: not supported
52 self.skip_value()?;
53 Ok(None)
54 }
55
56 /// Returns the shape of the format's raw capture type (e.g., `RawJson::SHAPE`).
57 ///
58 /// When the deserializer encounters a shape that matches this, it will use
59 /// `capture_raw` to capture the raw representation and store it in a
60 /// `Cow<str>` (the raw type must be a newtype over `Cow<str>`).
61 ///
62 /// Returns `None` if this format doesn't support raw capture types.
63 fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
64 None
65 }
66
67 /// Returns true if this format is self-describing.
68 ///
69 /// Self-describing formats (like JSON, YAML) include type information in the wire format
70 /// and emit `FieldKey` events for struct fields.
71 ///
72 /// Non-self-describing formats (like postcard, bincode) don't include type markers
73 /// and use `OrderedField` events, relying on the driver to provide schema information
74 /// via `hint_struct_fields`.
75 fn is_self_describing(&self) -> bool {
76 true // Default: most formats are self-describing
77 }
78
79 /// Hint to the parser that a struct with the given number of fields is expected.
80 ///
81 /// For non-self-describing formats, this allows the parser to emit the correct
82 /// number of `OrderedField` events followed by `StructEnd`.
83 ///
84 /// Self-describing formats can ignore this hint.
85 fn hint_struct_fields(&mut self, _num_fields: usize) {
86 // Default: ignore (self-describing formats don't need this)
87 }
88
89 /// Hint to the parser what scalar type is expected next.
90 ///
91 /// For non-self-describing formats, this allows the parser to correctly
92 /// decode the next value and emit an appropriate `Scalar` event.
93 ///
94 /// Self-describing formats can ignore this hint (they determine the type
95 /// from the wire format).
96 fn hint_scalar_type(&mut self, _hint: ScalarTypeHint) {
97 // Default: ignore (self-describing formats don't need this)
98 }
99
100 /// Hint to the parser that a sequence (array/Vec) is expected.
101 ///
102 /// For non-self-describing formats, this triggers reading the length prefix
103 /// and setting up sequence state.
104 ///
105 /// Self-describing formats can ignore this hint.
106 fn hint_sequence(&mut self) {
107 // Default: ignore (self-describing formats don't need this)
108 }
109
110 /// Hint to the parser that a byte sequence (`Vec<u8>`, `&[u8]`, etc.) is expected.
111 ///
112 /// For binary formats like postcard that store `Vec<u8>` as raw bytes (varint length
113 /// followed by raw data), this allows bulk reading instead of element-by-element
114 /// deserialization.
115 ///
116 /// If the parser handles this hint, it should emit `Scalar(Bytes(...))` directly.
117 /// If it doesn't support this optimization, it should return `false` and the
118 /// deserializer will fall back to element-by-element deserialization via `hint_sequence`.
119 ///
120 /// Returns `true` if the hint is handled (parser will emit `Scalar(Bytes(...))`),
121 /// `false` otherwise.
122 fn hint_byte_sequence(&mut self) -> bool {
123 // Default: not supported, fall back to element-by-element
124 false
125 }
126
127 /// Hint to the parser that a fixed-size array is expected.
128 ///
129 /// For non-self-describing formats, this tells the parser the array length
130 /// is known at compile time (from the type), so no length prefix is read.
131 /// This differs from `hint_sequence` which reads a length prefix for Vec/slices.
132 ///
133 /// Self-describing formats can ignore this hint.
134 fn hint_array(&mut self, _len: usize) {
135 // Default: ignore (self-describing formats don't need this)
136 }
137
138 /// Hint to the parser that an `Option<T>` is expected.
139 ///
140 /// For non-self-describing formats (like postcard), this allows the parser
141 /// to read the discriminant byte and emit either:
142 /// - `Scalar(Null)` for None (discriminant 0x00)
143 /// - Set up state to parse the inner value for Some (discriminant 0x01)
144 ///
145 /// Self-describing formats can ignore this hint (they determine `Option`
146 /// presence from the wire format, e.g., null vs value in JSON).
147 fn hint_option(&mut self) {
148 // Default: ignore (self-describing formats don't need this)
149 }
150
151 /// Hint to the parser that a map is expected.
152 ///
153 /// For non-self-describing formats (like postcard), this allows the parser
154 /// to read the length prefix and set up map state. The parser should then
155 /// emit `SequenceStart` (representing the map entries) followed by pairs of
156 /// key and value events, and finally `SequenceEnd`.
157 ///
158 /// Self-describing formats can ignore this hint (they determine map structure
159 /// from the wire format, e.g., `{...}` in JSON).
160 fn hint_map(&mut self) {
161 // Default: ignore (self-describing formats don't need this)
162 }
163
164 /// Hint to the parser that a dynamic value is expected.
165 ///
166 /// Non-self-describing formats can use this to switch to a self-describing
167 /// encoding for dynamic values (e.g., tagged scalar/array/object).
168 /// Self-describing formats can ignore this hint.
169 fn hint_dynamic_value(&mut self) {
170 // Default: ignore (self-describing formats don't need this)
171 }
172
173 /// Hint to the parser that an enum is expected, providing variant information.
174 ///
175 /// For non-self-describing formats (like postcard), this allows the parser
176 /// to read the variant discriminant (varint) and map it to the variant name,
177 /// and to emit appropriate wrapper events for multi-field variants.
178 ///
179 /// The `variants` slice contains metadata for each variant in declaration order,
180 /// matching the indices used in the wire format.
181 ///
182 /// Self-describing formats can ignore this hint (they include variant names
183 /// in the wire format).
184 fn hint_enum(&mut self, _variants: &[EnumVariantHint]) {
185 // Default: ignore (self-describing formats don't need this)
186 }
187
188 /// Hint to the parser that an opaque scalar type is expected.
189 ///
190 /// For non-self-describing binary formats (like postcard), this allows the parser
191 /// to use format-specific encoding for types like UUID (16 raw bytes), ULID,
192 /// OrderedFloat, etc. that have a more efficient binary representation than
193 /// their string form.
194 ///
195 /// The `type_identifier` is the type's identifier string (e.g., "Uuid", "Ulid",
196 /// "OrderedFloat", `DateTime<Utc>`). The `shape` provides access to inner type
197 /// information (e.g., whether OrderedFloat wraps f32 or f64).
198 ///
199 /// Returns `true` if the parser will handle this type specially (caller should
200 /// expect format-specific `ScalarValue`), or `false` to fall back to standard
201 /// handling (e.g., `hint_scalar_type(String)` for `FromStr` types).
202 ///
203 /// Self-describing formats can ignore this and return `false`.
204 fn hint_opaque_scalar(
205 &mut self,
206 _type_identifier: &'static str,
207 _shape: &'static facet_core::Shape,
208 ) -> bool {
209 // Default: not handled, fall back to standard behavior
210 false
211 }
212
213 /// Returns the source span of the most recently consumed event.
214 ///
215 /// This is used for error reporting - when a deserialization error occurs,
216 /// the span of the last consumed event helps locate the problem in the input.
217 ///
218 /// Parsers that track source positions should override this to return
219 /// meaningful span information. The default implementation returns `None`.
220 fn current_span(&self) -> Option<Span> {
221 None
222 }
223
224 /// Returns the format namespace for format-specific proxy resolution.
225 ///
226 /// When a field or container has format-specific proxies (e.g., `#[facet(xml::proxy = XmlProxy)]`),
227 /// this namespace is used to look up the appropriate proxy. If no namespace is returned,
228 /// only the format-agnostic proxy (`#[facet(proxy = ...)]`) is considered.
229 ///
230 /// Examples:
231 /// - XML parser should return `Some("xml")`
232 /// - JSON parser should return `Some("json")`
233 ///
234 /// Default: returns `None` (only format-agnostic proxies are used).
235 fn format_namespace(&self) -> Option<&'static str> {
236 None
237 }
238}
239
240/// Metadata about an enum variant for use with `hint_enum`.
241///
242/// Provides the information needed by non-self-describing formats to correctly
243/// parse enum variants, including the variant's structure kind and field count.
244#[derive(Debug, Clone, Copy, PartialEq, Eq)]
245pub struct EnumVariantHint {
246 /// Name of the variant (e.g., "Some", "Pair", "Named")
247 pub name: &'static str,
248 /// The kind of struct this variant represents (Unit, Tuple, TupleStruct, or Struct)
249 pub kind: facet_core::StructKind,
250 /// Number of fields in this variant
251 pub field_count: usize,
252}
253
254/// Hint for what scalar type is expected next.
255///
256/// Used by non-self-describing formats to know how to decode the next value.
257#[derive(Debug, Clone, Copy, PartialEq, Eq)]
258pub enum ScalarTypeHint {
259 /// Boolean (postcard: 0 or 1 byte)
260 Bool,
261 /// Unsigned 8-bit integer (postcard: raw byte)
262 U8,
263 /// Unsigned 16-bit integer (postcard: varint)
264 U16,
265 /// Unsigned 32-bit integer (postcard: varint)
266 U32,
267 /// Unsigned 64-bit integer (postcard: varint)
268 U64,
269 /// Unsigned 128-bit integer (postcard: varint)
270 U128,
271 /// Platform-sized unsigned integer (postcard: varint)
272 Usize,
273 /// Signed 8-bit integer (postcard: zigzag varint)
274 I8,
275 /// Signed 16-bit integer (postcard: zigzag varint)
276 I16,
277 /// Signed 32-bit integer (postcard: zigzag varint)
278 I32,
279 /// Signed 64-bit integer (postcard: zigzag varint)
280 I64,
281 /// Signed 128-bit integer (postcard: zigzag varint)
282 I128,
283 /// Platform-sized signed integer (postcard: zigzag varint)
284 Isize,
285 /// 32-bit float (postcard: 4 bytes little-endian)
286 F32,
287 /// 64-bit float (postcard: 8 bytes little-endian)
288 F64,
289 /// UTF-8 string (postcard: varint length + bytes)
290 String,
291 /// Raw bytes (postcard: varint length + bytes)
292 Bytes,
293 /// Character (postcard: UTF-8 encoded)
294 Char,
295}
296
297/// Extension trait for parsers that support format-specific JIT (Tier 2).
298///
299/// Parsers implement this trait to enable the Tier 2 fast path, which
300/// generates Cranelift IR that parses bytes directly instead of going
301/// through the event abstraction.
302///
303/// # Requirements
304///
305/// Tier 2 requires:
306/// - The full input slice must be available upfront
307/// - The parser must be able to report and update its cursor position
308/// - The parser must reset internal state when `jit_set_pos` is called
309#[cfg(feature = "jit")]
310pub trait FormatJitParser<'de>: FormatParser<'de> {
311 /// The format-specific JIT emitter type.
312 type FormatJit: crate::jit::JitFormat;
313
314 /// Return the full input slice.
315 fn jit_input(&self) -> &'de [u8];
316
317 /// Return the current byte offset (cursor position).
318 ///
319 /// Returns `None` if there is buffered state (e.g., a peeked event)
320 /// that makes the position ambiguous.
321 fn jit_pos(&self) -> Option<usize>;
322
323 /// Commit a new cursor position after Tier 2 execution succeeds.
324 ///
325 /// Must also invalidate/reset any internal scanning/tokenizer state
326 /// so that subsequent parsing continues from `pos` consistently.
327 fn jit_set_pos(&mut self, pos: usize);
328
329 /// Return a format JIT emitter instance (usually a ZST).
330 fn jit_format(&self) -> Self::FormatJit;
331
332 /// Convert a Tier 2 error (code + position) into `Self::Error`.
333 fn jit_error(&self, input: &'de [u8], error_pos: usize, error_code: i32) -> Self::Error;
334}