1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
extern crate alloc;
use crate::ParseError;
use alloc::collections::VecDeque;
use facet_reflect::Span;
/// Opaque token returned by [`FormatParser::save`].
///
/// This token can be passed to [`FormatParser::restore`] to replay
/// all events that were consumed since the save point.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SavePoint(pub u64);
impl SavePoint {
/// Create a new save point with the given ID.
pub fn new(id: u64) -> Self {
Self(id)
}
}
/// Streaming parser for a specific wire format.
pub trait FormatParser<'de> {
/// Read the next parse event, or `None` if the input is exhausted.
///
/// Returns `Ok(None)` at end-of-input (EOF). For formats like TOML where
/// structs can be "reopened" (fields added after the struct was previously
/// exited), callers should continue processing until EOF rather than
/// stopping at `StructEnd`.
///
/// If [`restore`](Self::restore) was called, events are first replayed
/// from the internal buffer before reading new events from the input.
fn next_event(&mut self) -> Result<Option<crate::ParseEvent<'de>>, ParseError>;
/// Read multiple parse events into a deque, returning the number of events read.
///
/// This is an optimization for parsers that can produce multiple events efficiently
/// in a single call, amortizing function call overhead and improving cache locality.
///
/// Returns `Ok(0)` at end-of-input (EOF).
///
/// The default implementation calls `next_event` repeatedly up to `limit` times.
/// Parsers can override this for better performance.
fn next_events(
&mut self,
buf: &mut VecDeque<crate::ParseEvent<'de>>,
limit: usize,
) -> Result<usize, ParseError> {
let mut count = 0;
while count < limit {
match self.next_event()? {
Some(event) => {
buf.push_back(event);
count += 1;
}
None => break,
}
}
Ok(count)
}
/// Peek at the next event without consuming it, or `None` if at EOF.
fn peek_event(&mut self) -> Result<Option<crate::ParseEvent<'de>>, ParseError>;
/// Skip the current value (for unknown fields, etc.).
fn skip_value(&mut self) -> Result<(), ParseError>;
/// Save the current parser position and start recording events.
///
/// Returns a [`SavePoint`] token. All events returned by [`next_event`](Self::next_event)
/// after this call are recorded internally. Call [`restore`](Self::restore) with this
/// token to replay all recorded events.
///
/// This is used for untagged enum resolution: save, read ahead to determine
/// the variant, then restore and parse with the correct type.
fn save(&mut self) -> SavePoint;
/// Restore to a previous save point, replaying recorded events.
///
/// After calling this, subsequent calls to [`next_event`](Self::next_event) will
/// first return all events that were recorded since the save point, then
/// continue reading from the input.
///
/// The save point is consumed - to save again, call [`save`](Self::save).
fn restore(&mut self, save_point: SavePoint);
/// Capture the raw representation of the current value without parsing it.
///
/// This is used for types like `RawJson` that want to defer parsing.
/// The parser should skip the value and return the raw bytes/string
/// from the input.
///
/// Returns `Ok(None)` if raw capture is not supported (e.g., streaming mode
/// or formats where raw capture doesn't make sense).
fn capture_raw(&mut self) -> Result<Option<&'de str>, ParseError> {
// Default: not supported
self.skip_value()?;
Ok(None)
}
/// Returns the raw input bytes, if available.
///
/// This is used by the deserializer to implement raw capture when buffering
/// events. The deserializer tracks value boundaries using event spans and
/// slices the input directly.
///
/// Returns `None` for streaming parsers that don't have the full input.
fn input(&self) -> Option<&'de [u8]> {
None
}
/// Returns the shape of the format's raw capture type (e.g., `RawJson::SHAPE`).
///
/// When the deserializer encounters a shape that matches this, it will use
/// `capture_raw` to capture the raw representation and store it in a
/// `Cow<str>` (the raw type must be a newtype over `Cow<str>`).
///
/// Returns `None` if this format doesn't support raw capture types.
fn raw_capture_shape(&self) -> Option<&'static facet_core::Shape> {
None
}
/// Returns true if this format is self-describing.
///
/// Self-describing formats (like JSON, YAML) include type information in the wire format
/// and emit `FieldKey` events for struct fields.
///
/// Non-self-describing formats (like postcard, bincode) don't include type markers
/// and use `OrderedField` events, relying on the driver to provide schema information
/// via `hint_struct_fields`.
fn is_self_describing(&self) -> bool {
true // Default: most formats are self-describing
}
/// Hint to the parser that a struct with the given number of fields is expected.
///
/// For non-self-describing formats, this allows the parser to emit the correct
/// number of `OrderedField` events followed by `StructEnd`.
///
/// Self-describing formats can ignore this hint.
fn hint_struct_fields(&mut self, _num_fields: usize) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser what scalar type is expected next.
///
/// For non-self-describing formats, this allows the parser to correctly
/// decode the next value and emit an appropriate `Scalar` event.
///
/// Self-describing formats can ignore this hint (they determine the type
/// from the wire format).
fn hint_scalar_type(&mut self, _hint: ScalarTypeHint) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that a sequence (array/Vec) is expected.
///
/// For non-self-describing formats, this triggers reading the length prefix
/// and setting up sequence state.
///
/// Self-describing formats can ignore this hint.
fn hint_sequence(&mut self) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that a byte sequence (`Vec<u8>`, `&[u8]`, etc.) is expected.
///
/// For binary formats like postcard that store `Vec<u8>` as raw bytes (varint length
/// followed by raw data), this allows bulk reading instead of element-by-element
/// deserialization.
///
/// If the parser handles this hint, it should emit `Scalar(Bytes(...))` directly.
/// If it doesn't support this optimization, it should return `false` and the
/// deserializer will fall back to element-by-element deserialization via `hint_sequence`.
///
/// Returns `true` if the hint is handled (parser will emit `Scalar(Bytes(...))`),
/// `false` otherwise.
fn hint_byte_sequence(&mut self) -> bool {
// Default: not supported, fall back to element-by-element
false
}
/// Hint to the parser that all remaining input bytes should be consumed as a byte slice.
///
/// This is used by formats like postcard for trailing opaque payloads where the
/// field boundary is "until end of input" rather than a length prefix.
///
/// If handled, the parser should emit `Scalar(Bytes(...))` and advance to EOF.
/// Returns `true` if handled, `false` to use normal deserialization behavior.
fn hint_remaining_byte_sequence(&mut self) -> bool {
false
}
/// Hint to the parser that a fixed-size array is expected.
///
/// For non-self-describing formats, this tells the parser the array length
/// is known at compile time (from the type), so no length prefix is read.
/// This differs from `hint_sequence` which reads a length prefix for Vec/slices.
///
/// Self-describing formats can ignore this hint.
fn hint_array(&mut self, _len: usize) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that an `Option<T>` is expected.
///
/// For non-self-describing formats (like postcard), this allows the parser
/// to read the discriminant byte and emit either:
/// - `Scalar(Null)` for None (discriminant 0x00)
/// - Set up state to parse the inner value for Some (discriminant 0x01)
///
/// Self-describing formats can ignore this hint (they determine `Option`
/// presence from the wire format, e.g., null vs value in JSON).
fn hint_option(&mut self) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that a map is expected.
///
/// For non-self-describing formats (like postcard), this allows the parser
/// to read the length prefix and set up map state. The parser should then
/// emit `SequenceStart` (representing the map entries) followed by pairs of
/// key and value events, and finally `SequenceEnd`.
///
/// Self-describing formats can ignore this hint (they determine map structure
/// from the wire format, e.g., `{...}` in JSON).
fn hint_map(&mut self) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that a dynamic value is expected.
///
/// Non-self-describing formats can use this to switch to a self-describing
/// encoding for dynamic values (e.g., tagged scalar/array/object).
/// Self-describing formats can ignore this hint.
fn hint_dynamic_value(&mut self) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that an enum is expected, providing variant information.
///
/// For non-self-describing formats (like postcard), this allows the parser
/// to read the variant discriminant (varint) and map it to the variant name,
/// and to emit appropriate wrapper events for multi-field variants.
///
/// The `variants` slice contains metadata for each variant in declaration order,
/// matching the indices used in the wire format.
///
/// Self-describing formats can ignore this hint (they include variant names
/// in the wire format).
fn hint_enum(&mut self, _variants: &[EnumVariantHint]) {
// Default: ignore (self-describing formats don't need this)
}
/// Hint to the parser that an opaque scalar type is expected.
///
/// For non-self-describing binary formats (like postcard), this allows the parser
/// to use format-specific encoding for types like UUID (16 raw bytes), ULID,
/// OrderedFloat, etc. that have a more efficient binary representation than
/// their string form.
///
/// The `type_identifier` is the type's identifier string (e.g., "Uuid", "Ulid",
/// "OrderedFloat", `DateTime<Utc>`). The `shape` provides access to inner type
/// information (e.g., whether OrderedFloat wraps f32 or f64).
///
/// Returns `true` if the parser will handle this type specially (caller should
/// expect format-specific `ScalarValue`), or `false` to fall back to standard
/// handling (e.g., `hint_scalar_type(String)` for `FromStr` types).
///
/// Self-describing formats can ignore this and return `false`.
fn hint_opaque_scalar(
&mut self,
_type_identifier: &'static str,
_shape: &'static facet_core::Shape,
) -> bool {
// Default: not handled, fall back to standard behavior
false
}
/// Returns the source span of the most recently consumed event.
///
/// This is used for error reporting - when a deserialization error occurs,
/// the span of the last consumed event helps locate the problem in the input.
///
/// Parsers that track source positions should override this to return
/// meaningful span information. The default implementation returns `None`.
fn current_span(&self) -> Option<Span> {
None
}
/// Returns the format namespace for format-specific proxy resolution.
///
/// When a field or container has format-specific proxies (e.g., `#[facet(xml::proxy = XmlProxy)]`),
/// this namespace is used to look up the appropriate proxy. If no namespace is returned,
/// only the format-agnostic proxy (`#[facet(proxy = ...)]`) is considered.
///
/// Examples:
/// - XML parser should return `Some("xml")`
/// - JSON parser should return `Some("json")`
///
/// Default: returns `None` (only format-agnostic proxies are used).
fn format_namespace(&self) -> Option<&'static str> {
None
}
}
/// Metadata about an enum variant for use with `hint_enum`.
///
/// Provides the information needed by non-self-describing formats to correctly
/// parse enum variants, including the variant's structure kind and field count.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct EnumVariantHint {
/// Name of the variant (e.g., "Some", "Pair", "Named")
pub name: &'static str,
/// The kind of struct this variant represents (Unit, Tuple, TupleStruct, or Struct)
pub kind: facet_core::StructKind,
/// Number of fields in this variant
pub field_count: usize,
}
/// Hint for what scalar type is expected next.
///
/// Used by non-self-describing formats to know how to decode the next value.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ScalarTypeHint {
/// Boolean (postcard: 0 or 1 byte)
Bool,
/// Unsigned 8-bit integer (postcard: raw byte)
U8,
/// Unsigned 16-bit integer (postcard: varint)
U16,
/// Unsigned 32-bit integer (postcard: varint)
U32,
/// Unsigned 64-bit integer (postcard: varint)
U64,
/// Unsigned 128-bit integer (postcard: varint)
U128,
/// Platform-sized unsigned integer (postcard: varint)
Usize,
/// Signed 8-bit integer (postcard: zigzag varint)
I8,
/// Signed 16-bit integer (postcard: zigzag varint)
I16,
/// Signed 32-bit integer (postcard: zigzag varint)
I32,
/// Signed 64-bit integer (postcard: zigzag varint)
I64,
/// Signed 128-bit integer (postcard: zigzag varint)
I128,
/// Platform-sized signed integer (postcard: zigzag varint)
Isize,
/// 32-bit float (postcard: 4 bytes little-endian)
F32,
/// 64-bit float (postcard: 8 bytes little-endian)
F64,
/// UTF-8 string (postcard: varint length + bytes)
String,
/// Raw bytes (postcard: varint length + bytes)
Bytes,
/// Character (postcard: UTF-8 encoded)
Char,
}