facet_solver/lib.rs
1#![cfg_attr(not(feature = "std"), no_std)]
2//!
3//! [](https://coveralls.io/github/facet-rs/facet?branch=main)
4//! [](https://crates.io/crates/facet-solver)
5//! [](https://docs.rs/facet-solver)
6//! [](./LICENSE)
7//! [](https://discord.gg/JhD7CwCJ8F)
8//!
9//!
10//! Helps facet deserializers implement `#[facet(flatten)]` and `#[facet(untagged)]`
11//! correctly, efficiently, and with useful diagnostics.
12//!
13//! ## The Problem
14//!
15//! When deserializing a type with a flattened enum:
16//!
17//! ```rust
18//! # use facet::Facet;
19//! #[derive(Facet)]
20//! struct TextMessage { content: String }
21//!
22//! #[derive(Facet)]
23//! struct BinaryMessage { data: Vec<u8>, encoding: String }
24//!
25//! #[derive(Facet)]
26//! #[repr(u8)]
27//! enum MessagePayload {
28//! Text(TextMessage),
29//! Binary(BinaryMessage),
30//! }
31//!
32//! #[derive(Facet)]
33//! struct Message {
34//! id: String,
35//! #[facet(flatten)]
36//! payload: MessagePayload,
37//! }
38//! ```
39//!
40//! ...we don't know which variant to use until we've seen the fields:
41//!
42//! ```json
43//! {"id": "msg-1", "content": "hello"} // Text
44//! {"id": "msg-2", "data": [1,2,3], "encoding": "raw"} // Binary
45//! ```
46//!
47//! The solver answers: "which variant has a `content` field?" or "which variant
48//! has both `data` and `encoding`?"
49//!
50//! ## How It Works
51//!
52//! The solver pre-computes all valid field combinations ("configurations") for a type,
53//! then uses an inverted index to quickly find which configuration(s) match the
54//! fields you've seen.
55//!
56//! ```rust
57//! use facet_solver::{KeyResult, Schema, Solver};
58//! # use facet::Facet;
59//! # #[derive(Facet)]
60//! # struct TextMessage { content: String }
61//! # #[derive(Facet)]
62//! # struct BinaryMessage { data: Vec<u8>, encoding: String }
63//! # #[derive(Facet)]
64//! # #[repr(u8)]
65//! # enum MessagePayload { Text(TextMessage), Binary(BinaryMessage) }
66//! # #[derive(Facet)]
67//! # struct Message { id: String, #[facet(flatten)] payload: MessagePayload }
68//!
69//! // Build schema once (can be cached)
70//! let schema = Schema::build(Message::SHAPE).unwrap();
71//!
72//! // Create a solver for this deserialization
73//! let mut solver = Solver::new(&schema);
74//!
75//! // As you see fields, report them:
76//! match solver.see_key("id") {
77//! KeyResult::Unambiguous { .. } => { /* both configs have "id" */ }
78//! _ => {}
79//! }
80//!
81//! match solver.see_key("content") {
82//! KeyResult::Solved(config) => {
83//! // Only Text has "content" - we now know the variant!
84//! assert!(config.resolution().has_key_path(&["content"]));
85//! }
86//! _ => {}
87//! }
88//! ```
89//!
90//! ### Nested Disambiguation
91//!
92//! When top-level keys don't distinguish variants, the solver can look deeper:
93//!
94//! ```rust
95//! # use facet::Facet;
96//! #[derive(Facet)]
97//! struct TextPayload { content: String }
98//!
99//! #[derive(Facet)]
100//! struct BinaryPayload { bytes: Vec<u8> }
101//!
102//! #[derive(Facet)]
103//! #[repr(u8)]
104//! enum Payload {
105//! Text { inner: TextPayload },
106//! Binary { inner: BinaryPayload },
107//! }
108//!
109//! #[derive(Facet)]
110//! struct Wrapper {
111//! #[facet(flatten)]
112//! payload: Payload,
113//! }
114//! ```
115//!
116//! Both variants have an `inner` field. But `inner.content` only exists in `Text`,
117//! and `inner.bytes` only exists in `Binary`. The `ProbingSolver` handles this:
118//!
119//! ```rust
120//! use facet_solver::{ProbingSolver, ProbeResult, Schema};
121//! # use facet::Facet;
122//! # #[derive(Facet)]
123//! # struct TextPayload { content: String }
124//! # #[derive(Facet)]
125//! # struct BinaryPayload { bytes: Vec<u8> }
126//! # #[derive(Facet)]
127//! # #[repr(u8)]
128//! # enum Payload { Text { inner: TextPayload }, Binary { inner: BinaryPayload } }
129//! # #[derive(Facet)]
130//! # struct Wrapper { #[facet(flatten)] payload: Payload }
131//!
132//! let schema = Schema::build(Wrapper::SHAPE).unwrap();
133//! let mut solver = ProbingSolver::new(&schema);
134//!
135//! // Top-level "inner" doesn't disambiguate
136//! assert!(matches!(solver.probe_key(&[], "inner"), ProbeResult::KeepGoing));
137//!
138//! // But "inner.content" does!
139//! match solver.probe_key(&["inner"], "content") {
140//! ProbeResult::Solved(config) => {
141//! assert!(config.has_key_path(&["inner", "content"]));
142//! }
143//! _ => panic!("should have solved"),
144//! }
145//! ```
146//!
147//! ### Lazy Type Disambiguation
148//!
149//! Sometimes variants have **identical keys** but different value types. The solver handles
150//! this without buffering—it lets you probe "can this value fit type X?" lazily:
151//!
152//! ```rust
153//! # use facet::Facet;
154//! #[derive(Facet)]
155//! struct SmallPayload { value: u8 }
156//!
157//! #[derive(Facet)]
158//! struct LargePayload { value: u16 }
159//!
160//! #[derive(Facet)]
161//! #[repr(u8)]
162//! enum Payload {
163//! Small { payload: SmallPayload },
164//! Large { payload: LargePayload },
165//! }
166//!
167//! #[derive(Facet)]
168//! struct Container {
169//! #[facet(flatten)]
170//! inner: Payload,
171//! }
172//! ```
173//!
174//! Both variants have `payload.value`, but one is `u8` (max 255) and one is `u16` (max 65535).
175//! When the deserializer sees value `1000`, it can rule out `Small` without ever parsing into
176//! the wrong type:
177//!
178//! ```rust
179//! use facet_solver::{Solver, KeyResult, Schema};
180//! # use facet::Facet;
181//! # #[derive(Facet)]
182//! # struct SmallPayload { value: u8 }
183//! # #[derive(Facet)]
184//! # struct LargePayload { value: u16 }
185//! # #[derive(Facet)]
186//! # #[repr(u8)]
187//! # enum Payload { Small { payload: SmallPayload }, Large { payload: LargePayload } }
188//! # #[derive(Facet)]
189//! # struct Container { #[facet(flatten)] inner: Payload }
190//!
191//! let schema = Schema::build(Container::SHAPE).unwrap();
192//! let mut solver = Solver::new(&schema);
193//!
194//! // "payload" exists in both - ambiguous by key alone
195//! solver.probe_key(&[], "payload");
196//!
197//! // "value" also exists in both, but with different types!
198//! match solver.probe_key(&["payload"], "value") {
199//! KeyResult::Ambiguous { fields } => {
200//! // fields contains (FieldInfo, score) pairs for u8 and u16
201//! // Lower score = more specific type
202//! assert_eq!(fields.len(), 2);
203//! }
204//! _ => {}
205//! }
206//!
207//! // Deserializer sees value 1000 - ask which types fit
208//! let shapes = solver.get_shapes_at_path(&["payload", "value"]);
209//! let fits: Vec<_> = shapes.iter()
210//! .filter(|s| match s.type_identifier {
211//! "u8" => "1000".parse::<u8>().is_ok(), // false!
212//! "u16" => "1000".parse::<u16>().is_ok(), // true
213//! _ => false,
214//! })
215//! .copied()
216//! .collect();
217//!
218//! // Narrow to types the value actually fits
219//! solver.satisfy_at_path(&["payload", "value"], &fits);
220//! assert_eq!(solver.candidates().len(), 1); // Solved: Large
221//! ```
222//!
223//! This enables true streaming deserialization: you never buffer values, never parse
224//! speculatively, and never lose precision. The solver tells you what types are possible,
225//! you check which ones the raw input satisfies, and disambiguation happens lazily.
226//!
227//! ## Performance
228//!
229//! - **O(1) field lookup**: Inverted index maps field names to bitmasks
230//! - **O(configs/64) narrowing**: Bitwise AND to filter candidates
231//! - **Zero allocation during solving**: Schema is built once, solving just manipulates bitmasks
232//! - **Early termination**: Stops as soon as one candidate remains
233//!
234//! Typical disambiguation: ~50ns for 4 configurations, <1µs for 64+ configurations.
235//!
236//! ## Why This Exists
237//!
238//! Serde's `#[serde(flatten)]` and `#[serde(untagged)]` have fundamental limitations
239//! because they buffer values into an intermediate `Content` enum, then re-deserialize.
240//! This loses type information and breaks many use cases.
241//!
242//! Facet takes a different approach: **determine the type first, then deserialize
243//! directly**. No buffering, no loss of fidelity.
244//!
245//! ### Serde Issues This Resolves
246//!
247//! | Issue | Problem | Facet's Solution |
248//! |-------|---------|------------------|
249//! | [serde#2186](https://github.com/serde-rs/serde/issues/2186) | Flatten buffers into `Content`, losing type distinctions (e.g., `1` vs `"1"`) | Scan keys only, deserialize values directly into the resolved type |
250//! | [serde#1600](https://github.com/serde-rs/serde/issues/1600) | `flatten` + `deny_unknown_fields` doesn't work | Schema knows all valid fields per configuration |
251//! | [serde#1626](https://github.com/serde-rs/serde/issues/1626) | `flatten` + `default` on enums | Solver tracks required vs optional per-field |
252//! | [serde#1560](https://github.com/serde-rs/serde/issues/1560) | Empty variant ambiguity with "first match wins" | Explicit configuration enumeration, no guessing |
253//! | [serde_json#721](https://github.com/serde-rs/json/issues/721) | `arbitrary_precision` + `flatten` loses precision | No buffering through `serde_json::Value` |
254//! | [serde_json#1155](https://github.com/serde-rs/json/issues/1155) | `u128` in flattened struct fails | Direct deserialization, no `Value` intermediary |
255//!
256#![doc = include_str!("../readme-footer.md")]
257
258extern crate alloc;
259
260use alloc::borrow::Cow;
261use alloc::collections::BTreeMap;
262use alloc::collections::BTreeSet;
263use alloc::string::{String, ToString};
264use alloc::vec;
265use alloc::vec::Vec;
266use core::fmt;
267
268use facet_core::{Def, Field, Shape, StructType, Type, UserType, Variant};
269
270// Re-export resolution types from facet-reflect
271pub use facet_reflect::{
272 DuplicateFieldError, FieldCategory, FieldInfo, FieldKey, FieldPath, KeyPath, MatchResult,
273 PathSegment, Resolution, VariantSelection,
274};
275
276/// Format determines how fields are categorized and indexed in the schema.
277///
278/// Different serialization formats have different concepts of "fields":
279/// - Flat formats (JSON, TOML, YAML) treat all fields as key-value pairs
280/// - DOM formats (XML, HTML) distinguish attributes, elements, and text content
281#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
282pub enum Format {
283 /// Flat key-value formats (JSON, TOML, YAML, etc.)
284 ///
285 /// All fields are treated as keys with no distinction. The solver
286 /// uses `see_key()` to report field names.
287 #[default]
288 Flat,
289
290 /// DOM/tree formats (XML, HTML)
291 ///
292 /// Fields are categorized as attributes, elements, or text content.
293 /// The solver uses `see_attribute()`, `see_element()`, etc. to report
294 /// fields with their category.
295 Dom,
296}
297
298/// Cached schema for a type that may contain flattened fields.
299///
300/// This is computed once per Shape and can be cached forever since
301/// type information is static.
302#[derive(Debug)]
303pub struct Schema {
304 /// The shape this schema is for (kept for future caching key)
305 #[allow(dead_code)]
306 shape: &'static Shape,
307
308 /// The format this schema was built for.
309 format: Format,
310
311 /// All possible resolutions of this type.
312 /// For types with no enums in flatten paths, this has exactly 1 entry.
313 /// For types with enums, this has one entry per valid combination of variants.
314 resolutions: Vec<Resolution>,
315
316 /// Inverted index for Flat format: field_name → bitmask of configuration indices.
317 /// Bit i is set if `resolutions[i]` contains this field.
318 /// Uses a `Vec<u64>` to support arbitrary numbers of resolutions.
319 field_to_resolutions: BTreeMap<&'static str, ResolutionSet>,
320
321 /// Inverted index for Dom format: (category, name) → bitmask of configuration indices.
322 /// Only populated when format is Dom.
323 dom_field_to_resolutions: BTreeMap<(FieldCategory, &'static str), ResolutionSet>,
324}
325
326/// Handle that identifies a specific resolution inside a schema.
327#[derive(Debug, Clone, Copy)]
328pub struct ResolutionHandle<'a> {
329 index: usize,
330 resolution: &'a Resolution,
331}
332
333impl<'a> PartialEq for ResolutionHandle<'a> {
334 fn eq(&self, other: &Self) -> bool {
335 self.index == other.index
336 }
337}
338
339impl<'a> Eq for ResolutionHandle<'a> {}
340
341impl<'a> ResolutionHandle<'a> {
342 /// Internal helper to build a handle for an index within a schema.
343 fn from_schema(schema: &'a Schema, index: usize) -> Self {
344 Self {
345 index,
346 resolution: &schema.resolutions[index],
347 }
348 }
349
350 /// Resolution index within the originating schema.
351 pub const fn index(self) -> usize {
352 self.index
353 }
354
355 /// Access the underlying resolution metadata.
356 pub const fn resolution(self) -> &'a Resolution {
357 self.resolution
358 }
359}
360
361/// A set of configuration indices, stored as a bitmask for O(1) intersection.
362#[derive(Debug, Clone, PartialEq, Eq)]
363pub struct ResolutionSet {
364 /// Bitmask where bit i indicates `resolutions[i]` is in the set.
365 /// For most types, a single u64 suffices (up to 64 configs).
366 bits: Vec<u64>,
367 /// Number of resolutions in the set.
368 count: usize,
369}
370
371impl ResolutionSet {
372 /// Create an empty config set.
373 fn empty(num_resolutions: usize) -> Self {
374 let num_words = num_resolutions.div_ceil(64);
375 Self {
376 bits: vec![0; num_words],
377 count: 0,
378 }
379 }
380
381 /// Create a full config set (all configs present).
382 fn full(num_resolutions: usize) -> Self {
383 let num_words = num_resolutions.div_ceil(64);
384 let mut bits = vec![!0u64; num_words];
385 // Clear bits beyond num_resolutions
386 if !num_resolutions.is_multiple_of(64) {
387 let last_word_bits = num_resolutions % 64;
388 bits[num_words - 1] = (1u64 << last_word_bits) - 1;
389 }
390 Self {
391 bits,
392 count: num_resolutions,
393 }
394 }
395
396 /// Insert a configuration index.
397 fn insert(&mut self, idx: usize) {
398 let word = idx / 64;
399 let bit = idx % 64;
400 if self.bits[word] & (1u64 << bit) == 0 {
401 self.bits[word] |= 1u64 << bit;
402 self.count += 1;
403 }
404 }
405
406 /// Intersect with another config set in place.
407 fn intersect_with(&mut self, other: &ResolutionSet) {
408 self.count = 0;
409 for (a, b) in self.bits.iter_mut().zip(other.bits.iter()) {
410 *a &= *b;
411 self.count += a.count_ones() as usize;
412 }
413 }
414
415 /// Check if intersection with another set would be non-empty.
416 /// Does not modify either set.
417 fn intersects(&self, other: &ResolutionSet) -> bool {
418 self.bits
419 .iter()
420 .zip(other.bits.iter())
421 .any(|(a, b)| (*a & *b) != 0)
422 }
423
424 /// Get the number of resolutions in the set.
425 const fn len(&self) -> usize {
426 self.count
427 }
428
429 /// Check if empty.
430 const fn is_empty(&self) -> bool {
431 self.count == 0
432 }
433
434 /// Get the first (lowest) configuration index in the set.
435 fn first(&self) -> Option<usize> {
436 for (word_idx, &word) in self.bits.iter().enumerate() {
437 if word != 0 {
438 return Some(word_idx * 64 + word.trailing_zeros() as usize);
439 }
440 }
441 None
442 }
443
444 /// Iterate over configuration indices in the set.
445 fn iter(&self) -> impl Iterator<Item = usize> + '_ {
446 self.bits.iter().enumerate().flat_map(|(word_idx, &word)| {
447 (0..64).filter_map(move |bit| {
448 if word & (1u64 << bit) != 0 {
449 Some(word_idx * 64 + bit)
450 } else {
451 None
452 }
453 })
454 })
455 }
456}
457
458/// Find fields that could disambiguate between resolutions.
459/// Returns fields that exist in some but not all resolutions.
460fn find_disambiguating_fields(configs: &[&Resolution]) -> Vec<String> {
461 if configs.len() < 2 {
462 return Vec::new();
463 }
464
465 // Collect all field names across all configs
466 let mut all_fields: BTreeSet<&str> = BTreeSet::new();
467 for config in configs {
468 for info in config.fields().values() {
469 all_fields.insert(info.serialized_name);
470 }
471 }
472
473 // Find fields that are in some but not all configs
474 let mut disambiguating = Vec::new();
475 for field in all_fields {
476 let count = configs
477 .iter()
478 .filter(|c| c.field_by_name(field).is_some())
479 .count();
480 if count > 0 && count < configs.len() {
481 disambiguating.push(field.to_string());
482 }
483 }
484
485 disambiguating
486}
487
488/// Information about a missing required field for error reporting.
489#[derive(Debug, Clone)]
490pub struct MissingFieldInfo {
491 /// The serialized field name (as it appears in input)
492 pub name: &'static str,
493 /// Full path to the field (e.g., "backend.connection.port")
494 pub path: String,
495 /// The Rust type that defines this field
496 pub defined_in: String,
497}
498
499impl MissingFieldInfo {
500 /// Create from a FieldInfo
501 fn from_field_info(info: &FieldInfo) -> Self {
502 Self {
503 name: info.serialized_name,
504 path: info.path.to_string(),
505 defined_in: info.value_shape.type_identifier.to_string(),
506 }
507 }
508}
509
510/// Information about why a specific candidate (resolution) failed to match.
511#[derive(Debug, Clone)]
512pub struct CandidateFailure {
513 /// Human-readable description of the variant (e.g., "DatabaseBackend::Postgres")
514 pub variant_name: String,
515 /// Required fields that were not provided in the input
516 pub missing_fields: Vec<MissingFieldInfo>,
517 /// Fields in the input that don't exist in this candidate
518 pub unknown_fields: Vec<String>,
519 /// Number of unknown fields that have "did you mean?" suggestions for this candidate
520 /// Higher = more likely the user intended this variant
521 pub suggestion_matches: usize,
522}
523
524/// Suggestion for a field that might have been misspelled.
525#[derive(Debug, Clone)]
526pub struct FieldSuggestion {
527 /// The unknown field from input
528 pub unknown: String,
529 /// The suggested correct field name
530 pub suggestion: &'static str,
531 /// Similarity score (0.0 to 1.0, higher is more similar)
532 pub similarity: f64,
533}
534
535/// Errors that can occur when building a schema.
536#[derive(Debug, Clone)]
537pub enum SchemaError {
538 /// A field name appears from multiple sources (parent struct and flattened struct)
539 DuplicateField(DuplicateFieldError),
540}
541
542impl From<DuplicateFieldError> for SchemaError {
543 fn from(err: DuplicateFieldError) -> Self {
544 SchemaError::DuplicateField(err)
545 }
546}
547
548impl fmt::Display for SchemaError {
549 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
550 match self {
551 SchemaError::DuplicateField(err) => {
552 write!(
553 f,
554 "Duplicate field name '{}' from different sources: {} vs {}. \
555 This usually means a parent struct and a flattened struct both \
556 define a field with the same name.",
557 err.field_name, err.first_path, err.second_path
558 )
559 }
560 }
561 }
562}
563
564#[cfg(feature = "std")]
565impl std::error::Error for SchemaError {}
566
567/// Errors that can occur during flatten resolution.
568#[derive(Debug, Clone)]
569pub enum SolverError {
570 /// No configuration matches the input fields
571 NoMatch {
572 /// The input fields that were provided
573 input_fields: Vec<String>,
574 /// Missing required fields (from the closest matching config) - simple names for backwards compat
575 missing_required: Vec<&'static str>,
576 /// Missing required fields with full path information
577 missing_required_detailed: Vec<MissingFieldInfo>,
578 /// Unknown fields that don't belong to any config
579 unknown_fields: Vec<String>,
580 /// Description of the closest matching configuration
581 closest_resolution: Option<String>,
582 /// Why each candidate failed to match (detailed per-candidate info)
583 candidate_failures: Vec<CandidateFailure>,
584 /// "Did you mean?" suggestions for unknown fields
585 suggestions: Vec<FieldSuggestion>,
586 },
587 /// Multiple resolutions match the input fields
588 Ambiguous {
589 /// Descriptions of the matching resolutions
590 candidates: Vec<String>,
591 /// Fields that could disambiguate (unique to specific configs)
592 disambiguating_fields: Vec<String>,
593 },
594}
595
596impl fmt::Display for SolverError {
597 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
598 match self {
599 SolverError::NoMatch {
600 input_fields,
601 missing_required: _,
602 missing_required_detailed,
603 unknown_fields,
604 closest_resolution,
605 candidate_failures,
606 suggestions,
607 } => {
608 write!(f, "No matching configuration for fields {input_fields:?}")?;
609
610 // Show per-candidate failure reasons if available
611 if !candidate_failures.is_empty() {
612 write!(f, "\n\nNo variant matched:")?;
613 for failure in candidate_failures {
614 write!(f, "\n - {}", failure.variant_name)?;
615 if !failure.missing_fields.is_empty() {
616 let names: Vec<_> =
617 failure.missing_fields.iter().map(|m| m.name).collect();
618 if names.len() == 1 {
619 write!(f, ": missing field '{}'", names[0])?;
620 } else {
621 write!(f, ": missing fields {names:?}")?;
622 }
623 }
624 if !failure.unknown_fields.is_empty() {
625 if failure.missing_fields.is_empty() {
626 write!(f, ":")?;
627 } else {
628 write!(f, ",")?;
629 }
630 write!(f, " unknown fields {:?}", failure.unknown_fields)?;
631 }
632 }
633 } else if let Some(config) = closest_resolution {
634 // Fallback to closest match if no per-candidate info
635 write!(f, " (closest match: {config})")?;
636 if !missing_required_detailed.is_empty() {
637 write!(f, "; missing required fields:")?;
638 for info in missing_required_detailed {
639 write!(f, " {} (at path: {})", info.name, info.path)?;
640 }
641 }
642 }
643
644 // Show unknown fields with suggestions
645 if !unknown_fields.is_empty() {
646 write!(f, "\n\nUnknown fields: {unknown_fields:?}")?;
647 }
648 for suggestion in suggestions {
649 write!(
650 f,
651 "\n Did you mean '{}' instead of '{}'?",
652 suggestion.suggestion, suggestion.unknown
653 )?;
654 }
655
656 Ok(())
657 }
658 SolverError::Ambiguous {
659 candidates,
660 disambiguating_fields,
661 } => {
662 write!(f, "Ambiguous: multiple resolutions match: {candidates:?}")?;
663 if !disambiguating_fields.is_empty() {
664 write!(
665 f,
666 "; try adding one of these fields to disambiguate: {disambiguating_fields:?}"
667 )?;
668 }
669 Ok(())
670 }
671 }
672 }
673}
674
675#[cfg(feature = "std")]
676impl std::error::Error for SolverError {}
677
678/// Compute a specificity score for a shape. Lower score = more specific.
679///
680/// This is used to disambiguate when a value could satisfy multiple types.
681/// For example, the value `42` fits both `u8` and `u16`, but `u8` is more
682/// specific (lower score), so it should be preferred.
683/// Compute a specificity score for a shape.
684///
685/// Lower score = more specific type. Used for type-based disambiguation
686/// where we want to try more specific types first (e.g., u8 before u16).
687pub fn specificity_score(shape: &'static Shape) -> u64 {
688 // Use type_identifier to determine specificity
689 // Smaller integer types are more specific
690 match shape.type_identifier {
691 "u8" | "i8" => 8,
692 "u16" | "i16" => 16,
693 "u32" | "i32" | "f32" => 32,
694 "u64" | "i64" | "f64" => 64,
695 "u128" | "i128" => 128,
696 "usize" | "isize" => 64, // Treat as 64-bit
697 // Other types get a high score (less specific)
698 _ => 1000,
699 }
700}
701
702// ============================================================================
703// Solver (State Machine)
704// ============================================================================
705
706/// Result of reporting a key to the solver.
707#[derive(Debug)]
708pub enum KeyResult<'a> {
709 /// All candidates have the same type for this key.
710 /// The deserializer can parse the value directly.
711 Unambiguous {
712 /// The shape all candidates expect for this field
713 shape: &'static Shape,
714 },
715
716 /// Candidates have different types for this key - need disambiguation.
717 /// Deserializer should parse the value, determine which fields it can
718 /// satisfy, and call `satisfy()` with the viable fields.
719 ///
720 /// **Important**: When multiple fields can be satisfied by the value,
721 /// pick the one with the lowest score (most specific). Scores are assigned
722 /// by specificity, e.g., `u8` has a lower score than `u16`.
723 Ambiguous {
724 /// The unique fields across remaining candidates (deduplicated by shape),
725 /// paired with a specificity score. Lower score = more specific type.
726 /// Deserializer should check which of these the value can satisfy,
727 /// then pick the one with the lowest score.
728 fields: Vec<(&'a FieldInfo, u64)>,
729 },
730
731 /// This key disambiguated to exactly one configuration.
732 Solved(ResolutionHandle<'a>),
733
734 /// This key doesn't exist in any remaining candidate.
735 Unknown,
736}
737
738/// Result of reporting which fields the value can satisfy.
739#[derive(Debug)]
740pub enum SatisfyResult<'a> {
741 /// Continue - still multiple candidates, keep feeding keys.
742 Continue,
743
744 /// Solved to exactly one configuration.
745 Solved(ResolutionHandle<'a>),
746
747 /// No configuration can accept the value (no fields were satisfied).
748 NoMatch,
749}
750
751/// State machine solver for lazy value-based disambiguation.
752///
753/// This solver only requests value inspection when candidates disagree on type.
754/// For keys where all candidates expect the same type, the deserializer can
755/// skip detailed value analysis.
756///
757/// # Example
758///
759/// ```rust
760/// use facet::Facet;
761/// use facet_solver::{Schema, Solver, KeyResult, SatisfyResult};
762///
763/// #[derive(Facet)]
764/// #[repr(u8)]
765/// enum NumericValue {
766/// Small(u8),
767/// Large(u16),
768/// }
769///
770/// #[derive(Facet)]
771/// struct Container {
772/// #[facet(flatten)]
773/// value: NumericValue,
774/// }
775///
776/// let schema = Schema::build(Container::SHAPE).unwrap();
777/// let mut solver = Solver::new(&schema);
778///
779/// // The field "0" has different types (u8 vs u16) - solver needs disambiguation
780/// match solver.see_key("0") {
781/// KeyResult::Ambiguous { fields } => {
782/// // Deserializer sees value "1000", checks which fields can accept it
783/// // u8 can't hold 1000, u16 can - so only report the u16 field
784/// // Fields come with specificity scores - lower = more specific
785/// let satisfied: Vec<_> = fields.iter()
786/// .filter(|(f, _score)| {
787/// // deserializer's logic: can this value parse as this field's type?
788/// f.value_shape.type_identifier == "u16"
789/// })
790/// .map(|(f, _)| *f)
791/// .collect();
792///
793/// match solver.satisfy(&satisfied) {
794/// SatisfyResult::Solved(config) => {
795/// assert!(config.resolution().describe().contains("Large"));
796/// }
797/// _ => panic!("expected solved"),
798/// }
799/// }
800/// _ => panic!("expected Ambiguous"),
801/// }
802/// ```
803#[derive(Debug)]
804pub struct Solver<'a> {
805 /// Reference to the schema for configuration lookup
806 schema: &'a Schema,
807 /// Bitmask of remaining candidate configuration indices
808 candidates: ResolutionSet,
809 /// Set of seen keys for required field checking.
810 /// For Flat format, stores FieldKey::Flat. For Dom format, stores FieldKey::Dom.
811 seen_keys: BTreeSet<FieldKey<'a>>,
812}
813
814impl<'a> Solver<'a> {
815 /// Create a new solver from a schema.
816 pub fn new(schema: &'a Schema) -> Self {
817 Self {
818 schema,
819 candidates: ResolutionSet::full(schema.resolutions.len()),
820 seen_keys: BTreeSet::new(),
821 }
822 }
823
824 /// Report a key. Returns what to do next.
825 ///
826 /// - `Unambiguous`: All candidates agree on the type - parse directly
827 /// - `Ambiguous`: Types differ - check which fields the value can satisfy
828 /// - `Solved`: Disambiguated to one config
829 /// - `Unknown`: Key not found in any candidate
830 ///
831 /// Accepts both borrowed (`&str`) and owned (`String`) keys via `Cow`.
832 /// For DOM format, use `see_attribute()`, `see_element()`, etc. instead.
833 pub fn see_key(&mut self, key: impl Into<FieldKey<'a>>) -> KeyResult<'a> {
834 let key = key.into();
835 self.see_key_internal(key)
836 }
837
838 /// Report an attribute key (DOM format only).
839 pub fn see_attribute(&mut self, name: impl Into<Cow<'a, str>>) -> KeyResult<'a> {
840 self.see_key_internal(FieldKey::attribute(name))
841 }
842
843 /// Report an element key (DOM format only).
844 pub fn see_element(&mut self, name: impl Into<Cow<'a, str>>) -> KeyResult<'a> {
845 self.see_key_internal(FieldKey::element(name))
846 }
847
848 /// Report a text content key (DOM format only).
849 pub fn see_text(&mut self) -> KeyResult<'a> {
850 self.see_key_internal(FieldKey::text())
851 }
852
853 /// Internal implementation of key lookup.
854 fn see_key_internal(&mut self, key: FieldKey<'a>) -> KeyResult<'a> {
855 self.seen_keys.insert(key.clone());
856
857 // Key-based filtering - use appropriate index based on format
858 let resolutions_with_key = match (&key, self.schema.format) {
859 (FieldKey::Flat(name), Format::Flat) => {
860 self.schema.field_to_resolutions.get(name.as_ref())
861 }
862 (FieldKey::Flat(name), Format::Dom) => {
863 // Flat key on DOM schema - try as element (most common)
864 self.schema
865 .dom_field_to_resolutions
866 .get(&(FieldCategory::Element, name.as_ref()))
867 }
868 (FieldKey::Dom(cat, name), Format::Dom) => {
869 // For Text/Tag/Elements categories, the name is often empty
870 // because there's only one such field per struct. Search by category.
871 if matches!(
872 cat,
873 FieldCategory::Text | FieldCategory::Tag | FieldCategory::Elements
874 ) && name.is_empty()
875 {
876 // Find any field with this category
877 self.schema
878 .dom_field_to_resolutions
879 .iter()
880 .find(|((c, _), _)| c == cat)
881 .map(|(_, rs)| rs)
882 } else {
883 self.schema
884 .dom_field_to_resolutions
885 .get(&(*cat, name.as_ref()))
886 }
887 }
888 (FieldKey::Dom(_, name), Format::Flat) => {
889 // DOM key on flat schema - ignore category
890 self.schema.field_to_resolutions.get(name.as_ref())
891 }
892 };
893
894 let resolutions_with_key = match resolutions_with_key {
895 Some(set) => set,
896 None => return KeyResult::Unknown,
897 };
898
899 // Check if this key exists in any current candidate.
900 // If not, treat it as unknown without modifying candidates.
901 // This ensures that extra/unknown fields don't eliminate valid candidates,
902 // which is important for "ignore unknown fields" semantics.
903 if !self.candidates.intersects(resolutions_with_key) {
904 return KeyResult::Unknown;
905 }
906
907 self.candidates.intersect_with(resolutions_with_key);
908
909 // Check if we've disambiguated to exactly one
910 if self.candidates.len() == 1 {
911 let idx = self.candidates.first().unwrap();
912 return KeyResult::Solved(self.handle(idx));
913 }
914
915 // Collect unique fields (by shape pointer) across remaining candidates
916 let mut unique_fields: Vec<&'a FieldInfo> = Vec::new();
917 for idx in self.candidates.iter() {
918 let config = &self.schema.resolutions[idx];
919 if let Some(info) = config.field_by_key(&key) {
920 // Deduplicate by shape pointer
921 if !unique_fields
922 .iter()
923 .any(|f| core::ptr::eq(f.value_shape, info.value_shape))
924 {
925 unique_fields.push(info);
926 }
927 }
928 }
929
930 if unique_fields.len() == 1 {
931 // All candidates have the same type - unambiguous
932 KeyResult::Unambiguous {
933 shape: unique_fields[0].value_shape,
934 }
935 } else if unique_fields.is_empty() {
936 KeyResult::Unknown
937 } else {
938 // Different types - need disambiguation
939 // Attach specificity scores so caller can pick most specific when multiple match
940 let fields_with_scores: Vec<_> = unique_fields
941 .into_iter()
942 .map(|f| (f, specificity_score(f.value_shape)))
943 .collect();
944 KeyResult::Ambiguous {
945 fields: fields_with_scores,
946 }
947 }
948 }
949
950 /// Report which fields the value can satisfy after `Ambiguous` result.
951 ///
952 /// The deserializer should pass the subset of fields (from the `Ambiguous` result)
953 /// that the actual value can be parsed into.
954 pub fn satisfy(&mut self, satisfied_fields: &[&FieldInfo]) -> SatisfyResult<'a> {
955 let satisfied_shapes: Vec<_> = satisfied_fields.iter().map(|f| f.value_shape).collect();
956 self.satisfy_shapes(&satisfied_shapes)
957 }
958
959 /// Report which shapes the value can satisfy after `Ambiguous` result from `probe_key`.
960 ///
961 /// This is the shape-based version of `satisfy`, used when disambiguating
962 /// by nested field types. The deserializer should pass the shapes that
963 /// the actual value can be parsed into.
964 ///
965 /// # Example
966 ///
967 /// ```rust
968 /// use facet::Facet;
969 /// use facet_solver::{Schema, Solver, KeyResult, SatisfyResult};
970 ///
971 /// #[derive(Facet)]
972 /// struct SmallPayload { value: u8 }
973 ///
974 /// #[derive(Facet)]
975 /// struct LargePayload { value: u16 }
976 ///
977 /// #[derive(Facet)]
978 /// #[repr(u8)]
979 /// enum PayloadKind {
980 /// Small { payload: SmallPayload },
981 /// Large { payload: LargePayload },
982 /// }
983 ///
984 /// #[derive(Facet)]
985 /// struct Container {
986 /// #[facet(flatten)]
987 /// inner: PayloadKind,
988 /// }
989 ///
990 /// let schema = Schema::build(Container::SHAPE).unwrap();
991 /// let mut solver = Solver::new(&schema);
992 ///
993 /// // Report nested key
994 /// solver.probe_key(&[], "payload");
995 ///
996 /// // At payload.value, value is 1000 - doesn't fit u8
997 /// // Get shapes at this path
998 /// let shapes = solver.get_shapes_at_path(&["payload", "value"]);
999 /// // Filter to shapes that can hold 1000
1000 /// let works: Vec<_> = shapes.iter()
1001 /// .filter(|s| s.type_identifier == "u16")
1002 /// .copied()
1003 /// .collect();
1004 /// solver.satisfy_shapes(&works);
1005 /// ```
1006 pub fn satisfy_shapes(&mut self, satisfied_shapes: &[&'static Shape]) -> SatisfyResult<'a> {
1007 if satisfied_shapes.is_empty() {
1008 self.candidates = ResolutionSet::empty(self.schema.resolutions.len());
1009 return SatisfyResult::NoMatch;
1010 }
1011
1012 let mut new_candidates = ResolutionSet::empty(self.schema.resolutions.len());
1013 for idx in self.candidates.iter() {
1014 let config = &self.schema.resolutions[idx];
1015 // Check if any of this config's fields match the satisfied shapes
1016 for field in config.fields().values() {
1017 if satisfied_shapes
1018 .iter()
1019 .any(|s| core::ptr::eq(*s, field.value_shape))
1020 {
1021 new_candidates.insert(idx);
1022 break;
1023 }
1024 }
1025 }
1026 self.candidates = new_candidates;
1027
1028 match self.candidates.len() {
1029 0 => SatisfyResult::NoMatch,
1030 1 => {
1031 let idx = self.candidates.first().unwrap();
1032 SatisfyResult::Solved(self.handle(idx))
1033 }
1034 _ => SatisfyResult::Continue,
1035 }
1036 }
1037
1038 /// Get the shapes at a nested path across all remaining candidates.
1039 ///
1040 /// This is useful when you have an `Ambiguous` result from `probe_key`
1041 /// and need to know what types are possible at that path.
1042 pub fn get_shapes_at_path(&self, path: &[&str]) -> Vec<&'static Shape> {
1043 let mut shapes: Vec<&'static Shape> = Vec::new();
1044 for idx in self.candidates.iter() {
1045 let config = &self.schema.resolutions[idx];
1046 if let Some(shape) = self.get_shape_at_path(config, path)
1047 && !shapes.iter().any(|s| core::ptr::eq(*s, shape))
1048 {
1049 shapes.push(shape);
1050 }
1051 }
1052 shapes
1053 }
1054
1055 /// Report which shapes at a nested path the value can satisfy.
1056 ///
1057 /// This is the path-aware version of `satisfy_shapes`, used when disambiguating
1058 /// by nested field types after `probe_key`.
1059 ///
1060 /// - `path`: The full path to the field (e.g., `["payload", "value"]`)
1061 /// - `satisfied_shapes`: The shapes that the value can be parsed into
1062 pub fn satisfy_at_path(
1063 &mut self,
1064 path: &[&str],
1065 satisfied_shapes: &[&'static Shape],
1066 ) -> SatisfyResult<'a> {
1067 if satisfied_shapes.is_empty() {
1068 self.candidates = ResolutionSet::empty(self.schema.resolutions.len());
1069 return SatisfyResult::NoMatch;
1070 }
1071
1072 // Keep only candidates where the shape at this path is in the satisfied set
1073 let mut new_candidates = ResolutionSet::empty(self.schema.resolutions.len());
1074 for idx in self.candidates.iter() {
1075 let config = &self.schema.resolutions[idx];
1076 if let Some(shape) = self.get_shape_at_path(config, path)
1077 && satisfied_shapes.iter().any(|s| core::ptr::eq(*s, shape))
1078 {
1079 new_candidates.insert(idx);
1080 }
1081 }
1082 self.candidates = new_candidates;
1083
1084 match self.candidates.len() {
1085 0 => SatisfyResult::NoMatch,
1086 1 => {
1087 let idx = self.candidates.first().unwrap();
1088 SatisfyResult::Solved(self.handle(idx))
1089 }
1090 _ => SatisfyResult::Continue,
1091 }
1092 }
1093
1094 /// Get the current candidate resolutions.
1095 pub fn candidates(&self) -> Vec<ResolutionHandle<'a>> {
1096 self.candidates.iter().map(|idx| self.handle(idx)).collect()
1097 }
1098
1099 /// Get the seen keys.
1100 /// Get the seen keys.
1101 pub const fn seen_keys(&self) -> &BTreeSet<FieldKey<'a>> {
1102 &self.seen_keys
1103 }
1104
1105 /// Check if a field name was seen (regardless of category for DOM format).
1106 pub fn was_field_seen(&self, field_name: &str) -> bool {
1107 self.seen_keys.iter().any(|k| k.name() == field_name)
1108 }
1109
1110 #[inline]
1111 fn handle(&self, idx: usize) -> ResolutionHandle<'a> {
1112 ResolutionHandle::from_schema(self.schema, idx)
1113 }
1114
1115 /// Hint that a specific enum variant should be selected.
1116 ///
1117 /// This filters the candidates to only those resolutions where at least one
1118 /// variant selection has the given variant name. This is useful for explicit
1119 /// type disambiguation via annotations (e.g., type annotations in various formats).
1120 ///
1121 /// Returns `true` if at least one candidate remains after filtering, `false` if
1122 /// no candidates match the variant name (in which case candidates are unchanged).
1123 ///
1124 /// # Example
1125 ///
1126 /// ```rust
1127 /// use facet::Facet;
1128 /// use facet_solver::{Schema, Solver};
1129 ///
1130 /// #[derive(Facet)]
1131 /// struct HttpSource { url: String }
1132 ///
1133 /// #[derive(Facet)]
1134 /// struct GitSource { url: String, branch: String }
1135 ///
1136 /// #[derive(Facet)]
1137 /// #[repr(u8)]
1138 /// enum SourceKind {
1139 /// Http(HttpSource),
1140 /// Git(GitSource),
1141 /// }
1142 ///
1143 /// #[derive(Facet)]
1144 /// struct Source {
1145 /// #[facet(flatten)]
1146 /// kind: SourceKind,
1147 /// }
1148 ///
1149 /// let schema = Schema::build(Source::SHAPE).unwrap();
1150 /// let mut solver = Solver::new(&schema);
1151 ///
1152 /// // Without hint, both variants are candidates
1153 /// assert_eq!(solver.candidates().len(), 2);
1154 ///
1155 /// // Hint at Http variant
1156 /// assert!(solver.hint_variant("Http"));
1157 /// assert_eq!(solver.candidates().len(), 1);
1158 /// ```
1159 pub fn hint_variant(&mut self, variant_name: &str) -> bool {
1160 // Build a set of configs that have this variant name
1161 let mut matching = ResolutionSet::empty(self.schema.resolutions.len());
1162
1163 for idx in self.candidates.iter() {
1164 let config = &self.schema.resolutions[idx];
1165 // Check if any variant selection matches the given name
1166 if config
1167 .variant_selections()
1168 .iter()
1169 .any(|vs| vs.variant_name == variant_name)
1170 {
1171 matching.insert(idx);
1172 }
1173 }
1174
1175 if matching.is_empty() {
1176 // No matches - keep candidates unchanged
1177 false
1178 } else {
1179 self.candidates = matching;
1180 true
1181 }
1182 }
1183
1184 /// Hint that a variant is selected, but only if the field is actually a tag field
1185 /// for an internally-tagged enum.
1186 ///
1187 /// This is safer than `hint_variant` because it validates that `tag_field_name`
1188 /// is actually the tag field for an internally-tagged enum in at least one
1189 /// candidate resolution before applying the hint.
1190 ///
1191 /// Returns `true` if the hint was applied (field was a valid tag field and
1192 /// at least one candidate matches), `false` otherwise.
1193 pub fn hint_variant_for_tag(&mut self, tag_field_name: &str, variant_name: &str) -> bool {
1194 // First check if any candidate has this field as an internally-tagged enum tag field
1195 let is_tag_field = self.candidates.iter().any(|idx| {
1196 let config = &self.schema.resolutions[idx];
1197 // Look for a field with the given name that is a tag field
1198 config.fields().values().any(|field| {
1199 field.serialized_name == tag_field_name
1200 && field
1201 .value_shape
1202 .get_tag_attr()
1203 .is_some_and(|tag| tag == tag_field_name)
1204 && field.value_shape.get_content_attr().is_none()
1205 })
1206 });
1207
1208 if !is_tag_field {
1209 return false;
1210 }
1211
1212 // Now apply the variant hint
1213 self.hint_variant(variant_name)
1214 }
1215
1216 /// Mark a key as seen without filtering candidates.
1217 ///
1218 /// This is useful when the key is known to be present through means other than
1219 /// parsing (e.g., type annotations). Call this after `hint_variant` to mark
1220 /// the variant name as seen so that `finish()` doesn't report it as missing.
1221 pub fn mark_seen(&mut self, key: impl Into<FieldKey<'a>>) {
1222 self.seen_keys.insert(key.into());
1223 }
1224
1225 /// Report a key at a nested path. Returns what to do next.
1226 ///
1227 /// This is the depth-aware version of `see_key`. Use this when probing
1228 /// nested structures where disambiguation might require looking inside objects.
1229 ///
1230 /// - `path`: The ancestor keys (e.g., `["payload"]` when inside a payload object)
1231 /// - `key`: The key found at this level (e.g., `"value"`)
1232 ///
1233 /// # Example
1234 ///
1235 /// ```rust
1236 /// use facet::Facet;
1237 /// use facet_solver::{Schema, Solver, KeyResult};
1238 ///
1239 /// #[derive(Facet)]
1240 /// struct SmallPayload { value: u8 }
1241 ///
1242 /// #[derive(Facet)]
1243 /// struct LargePayload { value: u16 }
1244 ///
1245 /// #[derive(Facet)]
1246 /// #[repr(u8)]
1247 /// enum PayloadKind {
1248 /// Small { payload: SmallPayload },
1249 /// Large { payload: LargePayload },
1250 /// }
1251 ///
1252 /// #[derive(Facet)]
1253 /// struct Container {
1254 /// #[facet(flatten)]
1255 /// inner: PayloadKind,
1256 /// }
1257 ///
1258 /// let schema = Schema::build(Container::SHAPE).unwrap();
1259 /// let mut solver = Solver::new(&schema);
1260 ///
1261 /// // "payload" exists in both - keep going
1262 /// solver.probe_key(&[], "payload");
1263 ///
1264 /// // "value" inside payload - both have it but different types!
1265 /// match solver.probe_key(&["payload"], "value") {
1266 /// KeyResult::Ambiguous { fields } => {
1267 /// // fields is Vec<(&FieldInfo, u64)> - field + specificity score
1268 /// // Deserializer checks: 1000 fits u16 but not u8
1269 /// // When multiple match, pick the one with lowest score (most specific)
1270 /// }
1271 /// _ => {}
1272 /// }
1273 /// ```
1274 pub fn probe_key(&mut self, path: &[&str], key: &str) -> KeyResult<'a> {
1275 // Build full path
1276 let mut full_path: Vec<&str> = path.to_vec();
1277 full_path.push(key);
1278
1279 // Filter candidates to only those that have this key path
1280 let mut new_candidates = ResolutionSet::empty(self.schema.resolutions.len());
1281 for idx in self.candidates.iter() {
1282 let config = &self.schema.resolutions[idx];
1283 if config.has_key_path(&full_path) {
1284 new_candidates.insert(idx);
1285 }
1286 }
1287 self.candidates = new_candidates;
1288
1289 if self.candidates.is_empty() {
1290 return KeyResult::Unknown;
1291 }
1292
1293 // Check if we've disambiguated to exactly one
1294 if self.candidates.len() == 1 {
1295 let idx = self.candidates.first().unwrap();
1296 return KeyResult::Solved(self.handle(idx));
1297 }
1298
1299 // Get the shape at this path for each remaining candidate
1300 // We need to traverse the type tree to find the actual field type
1301 let mut unique_shapes: Vec<(&'static Shape, usize)> = Vec::new(); // (shape, resolution_idx)
1302
1303 for idx in self.candidates.iter() {
1304 let config = &self.schema.resolutions[idx];
1305 if let Some(shape) = self.get_shape_at_path(config, &full_path) {
1306 // Deduplicate by shape pointer
1307 if !unique_shapes.iter().any(|(s, _)| core::ptr::eq(*s, shape)) {
1308 unique_shapes.push((shape, idx));
1309 }
1310 }
1311 }
1312
1313 match unique_shapes.len() {
1314 0 => KeyResult::Unknown,
1315 1 => {
1316 // All candidates have the same type at this path - unambiguous
1317 KeyResult::Unambiguous {
1318 shape: unique_shapes[0].0,
1319 }
1320 }
1321 _ => {
1322 // Different types at this path - need disambiguation
1323 // Build FieldInfo with scores for each unique shape
1324 let fields: Vec<(&'a FieldInfo, u64)> = unique_shapes
1325 .iter()
1326 .filter_map(|(shape, idx)| {
1327 let config = &self.schema.resolutions[*idx];
1328 // For nested paths, we need the parent field
1329 // e.g., for ["payload", "value"], get the "payload" field
1330 let field = if path.is_empty() {
1331 config.field_by_name(key)
1332 } else {
1333 // Return the top-level field that contains this path
1334 config.field_by_name(path[0])
1335 }?;
1336 Some((field, specificity_score(shape)))
1337 })
1338 .collect();
1339
1340 KeyResult::Ambiguous { fields }
1341 }
1342 }
1343 }
1344
1345 /// Get the shape at a nested path within a configuration.
1346 fn get_shape_at_path(&self, config: &'a Resolution, path: &[&str]) -> Option<&'static Shape> {
1347 if path.is_empty() {
1348 return None;
1349 }
1350
1351 // Start with the top-level field
1352 let top_field = config.field_by_name(path[0])?;
1353 let mut current_shape = top_field.value_shape;
1354
1355 // Navigate through nested structs
1356 for &key in &path[1..] {
1357 current_shape = self.get_field_shape(current_shape, key)?;
1358 }
1359
1360 Some(current_shape)
1361 }
1362
1363 /// Get the shape of a field within a struct shape.
1364 fn get_field_shape(&self, shape: &'static Shape, field_name: &str) -> Option<&'static Shape> {
1365 use facet_core::{StructType, Type, UserType};
1366
1367 match shape.ty {
1368 Type::User(UserType::Struct(StructType { fields, .. })) => {
1369 for field in fields {
1370 if field.effective_name() == field_name {
1371 return Some(field.shape());
1372 }
1373 }
1374 None
1375 }
1376 _ => None,
1377 }
1378 }
1379
1380 /// Finish solving. Call this after all keys have been processed.
1381 ///
1382 /// This method is necessary because key-based filtering alone cannot disambiguate
1383 /// when one variant's required fields are a subset of another's.
1384 ///
1385 /// # Why not just use `see_key()` results?
1386 ///
1387 /// `see_key()` returns `Solved` when a key *excludes* candidates down to one.
1388 /// But when the input is a valid subset of multiple variants, no key excludes
1389 /// anything — you need `finish()` to check which candidates have all their
1390 /// required fields satisfied.
1391 ///
1392 /// # Example
1393 ///
1394 /// ```rust,ignore
1395 /// enum Source {
1396 /// Http { url: String }, // required: url
1397 /// Git { url: String, branch: String }, // required: url, branch
1398 /// }
1399 /// ```
1400 ///
1401 /// | Input | `see_key` behavior | Resolution |
1402 /// |------------------------|-------------------------------------------|-----------------------|
1403 /// | `{ "url", "branch" }` | `branch` excludes `Http` → candidates = 1 | Early `Solved(Git)` |
1404 /// | `{ "url" }` | both have `url` → candidates = 2 | `finish()` → `Http` |
1405 ///
1406 /// In the second case, no key ever excludes a candidate. Only `finish()` can
1407 /// determine that `Git` is missing its required `branch` field, leaving `Http`
1408 /// as the sole viable configuration.
1409 #[allow(clippy::result_large_err)] // SolverError intentionally contains detailed diagnostic info
1410 pub fn finish(self) -> Result<ResolutionHandle<'a>, SolverError> {
1411 let Solver {
1412 schema,
1413 candidates,
1414 seen_keys,
1415 } = self;
1416
1417 // Compute all known fields across all resolutions (for unknown field detection)
1418 let all_known_fields: BTreeSet<&'static str> = schema
1419 .resolutions
1420 .iter()
1421 .flat_map(|r| r.fields().values().map(|f| f.serialized_name))
1422 .collect();
1423
1424 // Find unknown fields (fields in input that don't exist in ANY resolution)
1425 let unknown_fields: Vec<String> = seen_keys
1426 .iter()
1427 .filter(|k| !all_known_fields.contains(k.name()))
1428 .map(|k| k.name().to_string())
1429 .collect();
1430
1431 // Compute suggestions for unknown fields
1432 let suggestions = compute_suggestions(&unknown_fields, &all_known_fields);
1433
1434 if candidates.is_empty() {
1435 // Build per-candidate failure info for all resolutions
1436 let mut candidate_failures: Vec<CandidateFailure> = schema
1437 .resolutions
1438 .iter()
1439 .map(|config| build_candidate_failure(config, &seen_keys))
1440 .collect();
1441
1442 // Sort by closeness (best match first)
1443 sort_candidates_by_closeness(&mut candidate_failures);
1444
1445 return Err(SolverError::NoMatch {
1446 input_fields: seen_keys.iter().map(|k| k.name().to_string()).collect(),
1447 missing_required: Vec::new(),
1448 missing_required_detailed: Vec::new(),
1449 unknown_fields,
1450 closest_resolution: None,
1451 candidate_failures,
1452 suggestions,
1453 });
1454 }
1455
1456 // Filter candidates to only those that have all required fields satisfied
1457 let viable: Vec<usize> = candidates
1458 .iter()
1459 .filter(|idx| {
1460 let config = &schema.resolutions[*idx];
1461 config
1462 .required_field_names()
1463 .iter()
1464 .all(|f| seen_keys.iter().any(|k| k.name() == *f))
1465 })
1466 .collect();
1467
1468 match viable.len() {
1469 0 => {
1470 // No viable candidates - build per-candidate failure info
1471 let mut candidate_failures: Vec<CandidateFailure> = candidates
1472 .iter()
1473 .map(|idx| {
1474 let config = &schema.resolutions[idx];
1475 build_candidate_failure(config, &seen_keys)
1476 })
1477 .collect();
1478
1479 // Sort by closeness (best match first)
1480 sort_candidates_by_closeness(&mut candidate_failures);
1481
1482 // For backwards compatibility, also populate the "closest" fields
1483 // Now use the first (closest) candidate after sorting
1484 let closest_name = candidate_failures.first().map(|f| f.variant_name.clone());
1485 let closest_config = closest_name
1486 .as_ref()
1487 .and_then(|name| schema.resolutions.iter().find(|r| r.describe() == *name));
1488
1489 let (missing, missing_detailed, closest_resolution) =
1490 if let Some(config) = closest_config {
1491 let missing: Vec<_> = config
1492 .required_field_names()
1493 .iter()
1494 .filter(|f| !seen_keys.iter().any(|k| k.name() == **f))
1495 .copied()
1496 .collect();
1497 let missing_detailed: Vec<_> = missing
1498 .iter()
1499 .filter_map(|name| config.field_by_name(name))
1500 .map(MissingFieldInfo::from_field_info)
1501 .collect();
1502 (missing, missing_detailed, Some(config.describe()))
1503 } else {
1504 (Vec::new(), Vec::new(), None)
1505 };
1506
1507 Err(SolverError::NoMatch {
1508 input_fields: seen_keys.iter().map(|s| s.to_string()).collect(),
1509 missing_required: missing,
1510 missing_required_detailed: missing_detailed,
1511 unknown_fields,
1512 closest_resolution,
1513 candidate_failures,
1514 suggestions,
1515 })
1516 }
1517 1 => {
1518 // Exactly one viable candidate - success!
1519 Ok(ResolutionHandle::from_schema(schema, viable[0]))
1520 }
1521 _ => {
1522 // Multiple viable candidates - ambiguous!
1523 let configs: Vec<_> = viable.iter().map(|&idx| &schema.resolutions[idx]).collect();
1524 let candidates: Vec<String> = configs.iter().map(|c| c.describe()).collect();
1525 let disambiguating_fields = find_disambiguating_fields(&configs);
1526
1527 Err(SolverError::Ambiguous {
1528 candidates,
1529 disambiguating_fields,
1530 })
1531 }
1532 }
1533 }
1534}
1535
1536/// Build a CandidateFailure for a resolution given the seen keys.
1537fn build_candidate_failure<'a>(
1538 config: &Resolution,
1539 seen_keys: &BTreeSet<FieldKey<'a>>,
1540) -> CandidateFailure {
1541 let missing_fields: Vec<MissingFieldInfo> = config
1542 .required_field_names()
1543 .iter()
1544 .filter(|f| !seen_keys.iter().any(|k| k.name() == **f))
1545 .filter_map(|f| config.field_by_name(f))
1546 .map(MissingFieldInfo::from_field_info)
1547 .collect();
1548
1549 let unknown_fields: Vec<String> = seen_keys
1550 .iter()
1551 .filter(|k| config.field_by_key(k).is_none())
1552 .map(|k| k.name().to_string())
1553 .collect();
1554
1555 // Compute closeness score for ranking
1556 let suggestion_matches = compute_closeness_score(&unknown_fields, &missing_fields, config);
1557
1558 CandidateFailure {
1559 variant_name: config.describe(),
1560 missing_fields,
1561 unknown_fields,
1562 suggestion_matches,
1563 }
1564}
1565
1566/// Compute a closeness score for ranking candidates.
1567/// Higher score = more likely the user intended this variant.
1568///
1569/// The score considers:
1570/// - Typo matches: unknown fields that are similar to known fields (weighted by similarity)
1571/// - Field coverage: if we fixed typos, would we have all required fields?
1572/// - Missing fields: fewer missing = better
1573/// - Unknown fields: fewer truly unknown (no suggestion) = better
1574#[cfg(feature = "suggestions")]
1575fn compute_closeness_score(
1576 unknown_fields: &[String],
1577 missing_fields: &[MissingFieldInfo],
1578 config: &Resolution,
1579) -> usize {
1580 const SIMILARITY_THRESHOLD: f64 = 0.6;
1581
1582 // Score components (scaled to integers for easy comparison)
1583 let mut typo_score: usize = 0;
1584 let mut fields_that_would_match: usize = 0;
1585
1586 // For each unknown field, find best matching known field
1587 for unknown in unknown_fields {
1588 let mut best_similarity = 0.0f64;
1589 let mut best_match: Option<&str> = None;
1590
1591 for info in config.fields().values() {
1592 let similarity = strsim::jaro_winkler(unknown, info.serialized_name);
1593 if similarity >= SIMILARITY_THRESHOLD && similarity > best_similarity {
1594 best_similarity = similarity;
1595 best_match = Some(info.serialized_name);
1596 }
1597 }
1598
1599 if let Some(_matched_field) = best_match {
1600 // Weight by similarity: 0.6 -> 60 points, 1.0 -> 100 points
1601 typo_score += (best_similarity * 100.0) as usize;
1602 fields_that_would_match += 1;
1603 }
1604 }
1605
1606 // Calculate how many required fields would be satisfied if typos were fixed
1607 let required_count = config.required_field_names().len();
1608 let currently_missing = missing_fields.len();
1609 let would_be_missing = currently_missing.saturating_sub(fields_that_would_match);
1610
1611 // Coverage score: percentage of required fields that would be present
1612 let coverage_score = if required_count > 0 {
1613 ((required_count - would_be_missing) * 100) / required_count
1614 } else {
1615 100 // No required fields = perfect coverage
1616 };
1617
1618 // Penalty for truly unknown fields (no typo suggestion)
1619 let truly_unknown = unknown_fields.len().saturating_sub(fields_that_would_match);
1620 let unknown_penalty = truly_unknown * 10;
1621
1622 // Combine scores: typo matches are most important, then coverage, then penalties
1623 // Each typo match can give up to 100 points, so scale coverage to match
1624 typo_score + coverage_score.saturating_sub(unknown_penalty)
1625}
1626
1627/// Compute closeness score (no-op without suggestions feature).
1628#[cfg(not(feature = "suggestions"))]
1629fn compute_closeness_score(
1630 _unknown_fields: &[String],
1631 _missing_fields: &[MissingFieldInfo],
1632 _config: &Resolution,
1633) -> usize {
1634 0
1635}
1636
1637/// Sort candidate failures by closeness (best match first).
1638fn sort_candidates_by_closeness(failures: &mut [CandidateFailure]) {
1639 failures.sort_by(|a, b| {
1640 // Higher suggestion_matches (closeness score) first
1641 b.suggestion_matches.cmp(&a.suggestion_matches)
1642 });
1643}
1644
1645/// Compute "did you mean?" suggestions for unknown fields.
1646#[cfg(feature = "suggestions")]
1647fn compute_suggestions(
1648 unknown_fields: &[String],
1649 all_known_fields: &BTreeSet<&'static str>,
1650) -> Vec<FieldSuggestion> {
1651 const SIMILARITY_THRESHOLD: f64 = 0.6;
1652
1653 let mut suggestions = Vec::new();
1654
1655 for unknown in unknown_fields {
1656 let mut best_match: Option<(&'static str, f64)> = None;
1657
1658 for known in all_known_fields {
1659 let similarity = strsim::jaro_winkler(unknown, known);
1660 if similarity >= SIMILARITY_THRESHOLD
1661 && best_match.is_none_or(|(_, best_sim)| similarity > best_sim)
1662 {
1663 best_match = Some((known, similarity));
1664 }
1665 }
1666
1667 if let Some((suggestion, similarity)) = best_match {
1668 suggestions.push(FieldSuggestion {
1669 unknown: unknown.clone(),
1670 suggestion,
1671 similarity,
1672 });
1673 }
1674 }
1675
1676 suggestions
1677}
1678
1679/// Compute "did you mean?" suggestions for unknown fields (no-op without strsim).
1680#[cfg(not(feature = "suggestions"))]
1681fn compute_suggestions(
1682 _unknown_fields: &[String],
1683 _all_known_fields: &BTreeSet<&'static str>,
1684) -> Vec<FieldSuggestion> {
1685 Vec::new()
1686}
1687
1688// ============================================================================
1689// Probing Solver (Depth-Aware)
1690// ============================================================================
1691
1692/// Result of reporting a key to the probing solver.
1693#[derive(Debug)]
1694pub enum ProbeResult<'a> {
1695 /// Keep reporting keys - not yet disambiguated
1696 KeepGoing,
1697 /// Solved! Use this configuration
1698 Solved(&'a Resolution),
1699 /// No configuration matches the observed keys
1700 NoMatch,
1701}
1702
1703/// Depth-aware probing solver for streaming deserialization.
1704///
1705/// Unlike the batch solver, this solver accepts
1706/// key reports at arbitrary depths. It's designed for the "peek" strategy:
1707///
1708/// 1. Deserializer scans keys (without parsing values) and reports them
1709/// 2. Solver filters candidates based on which configs have that key path
1710/// 3. Once one candidate remains, solver returns `Solved`
1711/// 4. Deserializer rewinds and parses into the resolved type
1712///
1713/// # Example
1714///
1715/// ```rust
1716/// use facet::Facet;
1717/// use facet_solver::{Schema, ProbingSolver, ProbeResult};
1718///
1719/// #[derive(Facet)]
1720/// struct TextPayload { content: String }
1721///
1722/// #[derive(Facet)]
1723/// struct BinaryPayload { bytes: Vec<u8> }
1724///
1725/// #[derive(Facet)]
1726/// #[repr(u8)]
1727/// enum MessageKind {
1728/// Text { payload: TextPayload },
1729/// Binary { payload: BinaryPayload },
1730/// }
1731///
1732/// #[derive(Facet)]
1733/// struct Message {
1734/// id: String,
1735/// #[facet(flatten)]
1736/// kind: MessageKind,
1737/// }
1738///
1739/// let schema = Schema::build(Message::SHAPE).unwrap();
1740/// let mut solver = ProbingSolver::new(&schema);
1741///
1742/// // "id" exists in both configs - keep going
1743/// assert!(matches!(solver.probe_key(&[], "id"), ProbeResult::KeepGoing));
1744///
1745/// // "payload" exists in both configs - keep going
1746/// assert!(matches!(solver.probe_key(&[], "payload"), ProbeResult::KeepGoing));
1747///
1748/// // "content" inside payload only exists in Text - solved!
1749/// match solver.probe_key(&["payload"], "content") {
1750/// ProbeResult::Solved(config) => {
1751/// assert!(config.has_key_path(&["payload", "content"]));
1752/// }
1753/// _ => panic!("expected Solved"),
1754/// }
1755/// ```
1756#[derive(Debug)]
1757pub struct ProbingSolver<'a> {
1758 /// Remaining candidate resolutions
1759 candidates: Vec<&'a Resolution>,
1760}
1761
1762impl<'a> ProbingSolver<'a> {
1763 /// Create a new probing solver from a schema.
1764 pub fn new(schema: &'a Schema) -> Self {
1765 Self {
1766 candidates: schema.resolutions.iter().collect(),
1767 }
1768 }
1769
1770 /// Create a new probing solver from resolutions directly.
1771 pub fn from_resolutions(configs: &'a [Resolution]) -> Self {
1772 Self {
1773 candidates: configs.iter().collect(),
1774 }
1775 }
1776
1777 /// Report a key found at a path during probing.
1778 ///
1779 /// - `path`: The ancestor keys (e.g., `["payload"]` when inside the payload object)
1780 /// - `key`: The key found at this level (e.g., `"content"`)
1781 ///
1782 /// Returns what to do next.
1783 pub fn probe_key(&mut self, path: &[&str], key: &str) -> ProbeResult<'a> {
1784 // Build the full key path (runtime strings, compared against static schema)
1785 let mut full_path: Vec<&str> = path.to_vec();
1786 full_path.push(key);
1787
1788 // Filter to candidates that have this key path
1789 self.candidates.retain(|c| c.has_key_path(&full_path));
1790
1791 match self.candidates.len() {
1792 0 => ProbeResult::NoMatch,
1793 1 => ProbeResult::Solved(self.candidates[0]),
1794 _ => ProbeResult::KeepGoing,
1795 }
1796 }
1797
1798 /// Get the current candidate resolutions.
1799 pub fn candidates(&self) -> &[&'a Resolution] {
1800 &self.candidates
1801 }
1802
1803 /// Finish probing - returns Solved if exactly one candidate remains.
1804 pub fn finish(&self) -> ProbeResult<'a> {
1805 match self.candidates.len() {
1806 0 => ProbeResult::NoMatch,
1807 1 => ProbeResult::Solved(self.candidates[0]),
1808 _ => ProbeResult::KeepGoing, // Still ambiguous
1809 }
1810 }
1811}
1812
1813// ============================================================================
1814// Variant Format Classification
1815// ============================================================================
1816
1817/// Classification of an enum variant's expected serialized format.
1818///
1819/// This is used by deserializers to determine how to parse untagged enum variants
1820/// based on the YAML/JSON/etc. value type they encounter.
1821#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1822pub enum VariantFormat {
1823 /// Unit variant: no fields, serializes as the variant name or nothing for untagged
1824 Unit,
1825
1826 /// Newtype variant wrapping a scalar type (String, numbers, bool, etc.)
1827 /// Serializes as just the scalar value for untagged enums.
1828 NewtypeScalar {
1829 /// The shape of the inner scalar type
1830 inner_shape: &'static Shape,
1831 },
1832
1833 /// Newtype variant wrapping a struct
1834 /// Serializes as a mapping for untagged enums.
1835 NewtypeStruct {
1836 /// The shape of the inner struct type
1837 inner_shape: &'static Shape,
1838 },
1839
1840 /// Newtype variant wrapping a tuple struct/tuple
1841 /// Serializes as a sequence for untagged enums.
1842 NewtypeTuple {
1843 /// The shape of the inner tuple type
1844 inner_shape: &'static Shape,
1845 /// Number of elements in the inner tuple
1846 arity: usize,
1847 },
1848
1849 /// Newtype variant wrapping a sequence type (Vec, Array, Slice, Set)
1850 /// Serializes as a sequence for untagged enums.
1851 NewtypeSequence {
1852 /// The shape of the inner sequence type
1853 inner_shape: &'static Shape,
1854 },
1855
1856 /// Newtype variant wrapping another type (enum, map, etc.)
1857 NewtypeOther {
1858 /// The shape of the inner type
1859 inner_shape: &'static Shape,
1860 },
1861
1862 /// Tuple variant with multiple fields
1863 /// Serializes as a sequence for untagged enums.
1864 Tuple {
1865 /// Number of fields in the tuple
1866 arity: usize,
1867 },
1868
1869 /// Struct variant with named fields
1870 /// Serializes as a mapping for untagged enums.
1871 Struct,
1872}
1873
1874impl VariantFormat {
1875 /// Classify a variant's expected serialized format.
1876 pub fn from_variant(variant: &'static Variant) -> Self {
1877 use facet_core::StructKind;
1878
1879 let fields = variant.data.fields;
1880 let kind = variant.data.kind;
1881
1882 match kind {
1883 StructKind::Unit => VariantFormat::Unit,
1884 // TupleStruct and Tuple are both used for tuple-like variants
1885 // depending on how they're defined. Handle them the same way.
1886 StructKind::TupleStruct | StructKind::Tuple => {
1887 if fields.len() == 1 {
1888 // Newtype variant - classify by inner type
1889 let field_shape = fields[0].shape();
1890 // Dereference through pointers to get the actual inner type
1891 let inner_shape = deref_pointer(field_shape);
1892
1893 // Check if this is a metadata container (like Spanned<T>) and unwrap it for classification
1894 // This allows untagged enum variants containing Spanned<String> etc.
1895 // to match scalar values transparently
1896 let classification_shape = if let Some(inner) =
1897 facet_reflect::get_metadata_container_value_shape(field_shape)
1898 {
1899 inner
1900 } else {
1901 field_shape
1902 };
1903
1904 if is_scalar_shape(classification_shape)
1905 || is_unit_enum_shape(classification_shape)
1906 {
1907 // Scalars and unit-only enums both serialize as primitive values
1908 // Store the classification shape (unwrapped from Spanned if needed)
1909 // so that type matching works correctly for multi-variant untagged enums
1910 VariantFormat::NewtypeScalar {
1911 inner_shape: classification_shape,
1912 }
1913 } else if let Some(arity) = tuple_struct_arity(classification_shape) {
1914 VariantFormat::NewtypeTuple { inner_shape, arity }
1915 } else if is_named_struct_shape(classification_shape)
1916 || is_map_shape(classification_shape)
1917 {
1918 VariantFormat::NewtypeStruct { inner_shape }
1919 } else if is_sequence_shape(classification_shape) {
1920 VariantFormat::NewtypeSequence { inner_shape }
1921 } else {
1922 VariantFormat::NewtypeOther { inner_shape }
1923 }
1924 } else {
1925 // Multi-field tuple variant
1926 VariantFormat::Tuple {
1927 arity: fields.len(),
1928 }
1929 }
1930 }
1931 StructKind::Struct => VariantFormat::Struct,
1932 }
1933 }
1934
1935 /// Returns true if this variant expects a scalar value in untagged format.
1936 pub const fn expects_scalar(&self) -> bool {
1937 matches!(self, VariantFormat::NewtypeScalar { .. })
1938 }
1939
1940 /// Returns true if this variant expects a sequence in untagged format.
1941 pub const fn expects_sequence(&self) -> bool {
1942 matches!(
1943 self,
1944 VariantFormat::Tuple { .. }
1945 | VariantFormat::NewtypeTuple { .. }
1946 | VariantFormat::NewtypeSequence { .. }
1947 )
1948 }
1949
1950 /// Returns true if this variant expects a mapping in untagged format.
1951 pub const fn expects_mapping(&self) -> bool {
1952 matches!(
1953 self,
1954 VariantFormat::Struct | VariantFormat::NewtypeStruct { .. }
1955 )
1956 }
1957
1958 /// Returns true if this is a unit variant (no data).
1959 pub const fn is_unit(&self) -> bool {
1960 matches!(self, VariantFormat::Unit)
1961 }
1962}
1963
1964/// Dereference through pointer types (like `Box<T>`) to get the pointee shape.
1965/// Returns the original shape if it's not a pointer.
1966fn deref_pointer(shape: &'static Shape) -> &'static Shape {
1967 use facet_core::Def;
1968
1969 match shape.def {
1970 Def::Pointer(pointer_def) => {
1971 if let Some(pointee) = pointer_def.pointee() {
1972 // Recursively dereference in case of nested pointers
1973 deref_pointer(pointee)
1974 } else {
1975 // Opaque pointer - can't dereference
1976 shape
1977 }
1978 }
1979 _ => shape,
1980 }
1981}
1982
1983/// Check if a shape represents a scalar type.
1984/// Transparently handles pointer types like `Box<i32>`.
1985fn is_scalar_shape(shape: &'static Shape) -> bool {
1986 let shape = deref_pointer(shape);
1987 shape.scalar_type().is_some()
1988}
1989
1990/// Returns the arity of a tuple struct/tuple shape, if applicable.
1991/// Transparently handles pointer types like `Box<(i32, i32)>`.
1992fn tuple_struct_arity(shape: &'static Shape) -> Option<usize> {
1993 use facet_core::{StructKind, Type, UserType};
1994
1995 let shape = deref_pointer(shape);
1996 match shape.ty {
1997 Type::User(UserType::Struct(struct_type)) => match struct_type.kind {
1998 StructKind::Tuple | StructKind::TupleStruct => Some(struct_type.fields.len()),
1999 _ => None,
2000 },
2001 _ => None,
2002 }
2003}
2004
2005/// Returns true if the shape is a named struct (non-tuple).
2006/// Transparently handles pointer types like `Box<MyStruct>`.
2007fn is_named_struct_shape(shape: &'static Shape) -> bool {
2008 use facet_core::{StructKind, Type, UserType};
2009
2010 let shape = deref_pointer(shape);
2011 matches!(
2012 shape.ty,
2013 Type::User(UserType::Struct(struct_type)) if matches!(struct_type.kind, StructKind::Struct)
2014 )
2015}
2016
2017/// Returns true if the shape is a sequence type (List, Array, Slice, Set).
2018/// These types serialize as arrays/sequences in formats like TOML, JSON, YAML.
2019/// Transparently handles pointer types like `Box<Vec<i32>>`.
2020fn is_sequence_shape(shape: &'static Shape) -> bool {
2021 use facet_core::Def;
2022
2023 let shape = deref_pointer(shape);
2024 matches!(
2025 shape.def,
2026 Def::List(_) | Def::Array(_) | Def::Slice(_) | Def::Set(_)
2027 )
2028}
2029
2030/// Check if a shape represents a map type (HashMap, BTreeMap, IndexMap, etc.)
2031fn is_map_shape(shape: &'static Shape) -> bool {
2032 use facet_core::Def;
2033
2034 let shape = deref_pointer(shape);
2035 matches!(shape.def, Def::Map(_))
2036}
2037
2038/// Returns true if the shape is a unit-only enum.
2039/// Unit-only enums serialize as strings in most formats (TOML, JSON, YAML).
2040/// Transparently handles pointer types like `Box<UnitEnum>`.
2041fn is_unit_enum_shape(shape: &'static Shape) -> bool {
2042 use facet_core::{Type, UserType};
2043
2044 let shape = deref_pointer(shape);
2045 match shape.ty {
2046 Type::User(UserType::Enum(enum_type)) => {
2047 // Check if all variants are unit variants
2048 enum_type.variants.iter().all(|v| v.data.fields.is_empty())
2049 }
2050 _ => false,
2051 }
2052}
2053
2054/// Information about variants grouped by their expected format.
2055///
2056/// Used by deserializers to efficiently dispatch untagged enum parsing
2057/// based on the type of value encountered.
2058#[derive(Debug, Default)]
2059pub struct VariantsByFormat {
2060 /// Variants that expect a scalar value (newtype wrapping String, i32, etc.)
2061 ///
2062 /// **Deprecated:** Use the type-specific fields below for better type matching.
2063 /// This field contains all scalar variants regardless of type.
2064 pub scalar_variants: Vec<(&'static Variant, &'static Shape)>,
2065
2066 /// Variants that expect a boolean value (newtype wrapping bool)
2067 pub bool_variants: Vec<(&'static Variant, &'static Shape)>,
2068
2069 /// Variants that expect an integer value (newtype wrapping i8, u8, i32, u64, etc.)
2070 pub int_variants: Vec<(&'static Variant, &'static Shape)>,
2071
2072 /// Variants that expect a float value (newtype wrapping f32, f64)
2073 pub float_variants: Vec<(&'static Variant, &'static Shape)>,
2074
2075 /// Variants that expect a string value (newtype wrapping String, `&str`, `Cow<str>`)
2076 pub string_variants: Vec<(&'static Variant, &'static Shape)>,
2077
2078 /// Variants that expect a sequence (tuple variants)
2079 /// Grouped by arity for efficient matching.
2080 pub tuple_variants: Vec<(&'static Variant, usize)>,
2081
2082 /// Variants that expect a mapping (struct variants, newtype wrapping struct)
2083 pub struct_variants: Vec<&'static Variant>,
2084
2085 /// Unit variants (no data)
2086 pub unit_variants: Vec<&'static Variant>,
2087
2088 /// Other variants that don't fit the above categories
2089 pub other_variants: Vec<&'static Variant>,
2090}
2091
2092impl VariantsByFormat {
2093 /// Build variant classification for an enum shape.
2094 ///
2095 /// Returns None if the shape is not an enum.
2096 pub fn from_shape(shape: &'static Shape) -> Option<Self> {
2097 use facet_core::{Type, UserType};
2098
2099 let enum_type = match shape.ty {
2100 Type::User(UserType::Enum(e)) => e,
2101 _ => return None,
2102 };
2103
2104 let mut result = Self::default();
2105
2106 for variant in enum_type.variants {
2107 match VariantFormat::from_variant(variant) {
2108 VariantFormat::Unit => {
2109 result.unit_variants.push(variant);
2110 }
2111 VariantFormat::NewtypeScalar { inner_shape } => {
2112 // Add to general scalar_variants (for backward compatibility)
2113 result.scalar_variants.push((variant, inner_shape));
2114
2115 // Classify by specific scalar type for better type matching
2116 // Dereference through pointers (Box, &, etc.) to get the actual scalar type
2117 use facet_core::ScalarType;
2118 let scalar_shape = deref_pointer(inner_shape);
2119 match scalar_shape.scalar_type() {
2120 Some(ScalarType::Bool) => {
2121 result.bool_variants.push((variant, inner_shape));
2122 }
2123 Some(
2124 ScalarType::U8
2125 | ScalarType::U16
2126 | ScalarType::U32
2127 | ScalarType::U64
2128 | ScalarType::U128
2129 | ScalarType::USize
2130 | ScalarType::I8
2131 | ScalarType::I16
2132 | ScalarType::I32
2133 | ScalarType::I64
2134 | ScalarType::I128
2135 | ScalarType::ISize,
2136 ) => {
2137 result.int_variants.push((variant, inner_shape));
2138 }
2139 Some(ScalarType::F32 | ScalarType::F64) => {
2140 result.float_variants.push((variant, inner_shape));
2141 }
2142 #[cfg(feature = "alloc")]
2143 Some(ScalarType::String | ScalarType::CowStr) => {
2144 result.string_variants.push((variant, inner_shape));
2145 }
2146 Some(ScalarType::Str | ScalarType::Char) => {
2147 result.string_variants.push((variant, inner_shape));
2148 }
2149 _ => {
2150 // Other scalar types (Unit, SocketAddr, IpAddr, etc.) - leave in general scalar_variants only
2151 }
2152 }
2153 }
2154 VariantFormat::NewtypeStruct { .. } => {
2155 result.struct_variants.push(variant);
2156 }
2157 VariantFormat::NewtypeTuple { arity, .. } => {
2158 result.tuple_variants.push((variant, arity));
2159 }
2160 VariantFormat::NewtypeSequence { .. } => {
2161 // Sequences like Vec<T> are variable-length, so we use arity 0
2162 // to indicate "accepts any array" (not an exact match requirement)
2163 result.tuple_variants.push((variant, 0));
2164 }
2165 VariantFormat::NewtypeOther { .. } => {
2166 result.other_variants.push(variant);
2167 }
2168 VariantFormat::Tuple { arity } => {
2169 result.tuple_variants.push((variant, arity));
2170 }
2171 VariantFormat::Struct => {
2172 result.struct_variants.push(variant);
2173 }
2174 }
2175 }
2176
2177 Some(result)
2178 }
2179
2180 /// Get tuple variants with a specific arity.
2181 pub fn tuple_variants_with_arity(&self, arity: usize) -> Vec<&'static Variant> {
2182 self.tuple_variants
2183 .iter()
2184 .filter(|(_, a)| *a == arity)
2185 .map(|(v, _)| *v)
2186 .collect()
2187 }
2188
2189 /// Check if there are any scalar-expecting variants.
2190 pub const fn has_scalar_variants(&self) -> bool {
2191 !self.scalar_variants.is_empty()
2192 }
2193
2194 /// Check if there are any tuple-expecting variants.
2195 pub const fn has_tuple_variants(&self) -> bool {
2196 !self.tuple_variants.is_empty()
2197 }
2198
2199 /// Check if there are any struct-expecting variants.
2200 pub const fn has_struct_variants(&self) -> bool {
2201 !self.struct_variants.is_empty()
2202 }
2203}
2204
2205// ============================================================================
2206// Schema Builder
2207// ============================================================================
2208
2209/// How enum variants are represented in the serialized format.
2210#[derive(Debug, Clone, PartialEq, Eq, Default)]
2211pub enum EnumRepr {
2212 /// Variant fields are flattened to the same level as other fields.
2213 /// Also used for `#[facet(untagged)]` enums where there's no tag at all.
2214 /// Used by formats like TOML where all fields appear at one level.
2215 /// Example: `{"name": "...", "host": "...", "port": 8080}`
2216 #[default]
2217 Flattened,
2218
2219 /// Variant name is a key, variant content is nested under it.
2220 /// This is the default serde representation for enums.
2221 /// Example: `{"name": "...", "Tcp": {"host": "...", "port": 8080}}`
2222 ExternallyTagged,
2223
2224 /// Tag field is inside the content, alongside variant fields.
2225 /// Used with `#[facet(tag = "type")]`.
2226 /// Example: `{"type": "Tcp", "host": "...", "port": 8080}`
2227 InternallyTagged {
2228 /// The name of the tag field (e.g., "type")
2229 tag: &'static str,
2230 },
2231
2232 /// Tag and content are adjacent fields at the same level.
2233 /// Used with `#[facet(tag = "t", content = "c")]`.
2234 /// Example: `{"t": "Tcp", "c": {"host": "...", "port": 8080}}`
2235 AdjacentlyTagged {
2236 /// The name of the tag field (e.g., "t")
2237 tag: &'static str,
2238 /// The name of the content field (e.g., "c")
2239 content: &'static str,
2240 },
2241}
2242
2243impl EnumRepr {
2244 /// Detect the enum representation from a Shape's attributes.
2245 ///
2246 /// Returns:
2247 /// - `Flattened` if `#[facet(untagged)]`
2248 /// - `InternallyTagged` if `#[facet(tag = "...")]` without content
2249 /// - `AdjacentlyTagged` if both `#[facet(tag = "...", content = "...")]`
2250 /// - `ExternallyTagged` if no attributes (the default enum representation)
2251 pub const fn from_shape(shape: &'static Shape) -> Self {
2252 let tag = shape.get_tag_attr();
2253 let content = shape.get_content_attr();
2254 let untagged = shape.is_untagged();
2255
2256 match (tag, content, untagged) {
2257 // Untagged explicitly requested
2258 (_, _, true) => EnumRepr::Flattened,
2259 // Both tag and content specified → adjacently tagged
2260 (Some(t), Some(c), false) => EnumRepr::AdjacentlyTagged { tag: t, content: c },
2261 // Only tag specified → internally tagged
2262 (Some(t), None, false) => EnumRepr::InternallyTagged { tag: t },
2263 // No attributes → default to externally tagged (variant name as key)
2264 (None, None, false) => EnumRepr::ExternallyTagged,
2265 // Content without tag is invalid, treat as externally tagged
2266 (None, Some(_), false) => EnumRepr::ExternallyTagged,
2267 }
2268 }
2269}
2270
2271impl Schema {
2272 /// Build a schema for the given shape with flattened enum representation.
2273 ///
2274 /// Returns an error if the type definition contains conflicts, such as
2275 /// duplicate field names from parent and flattened structs.
2276 ///
2277 /// Note: This defaults to `Flattened` representation. For auto-detection
2278 /// based on `#[facet(tag = "...")]` attributes, use [`Schema::build_auto`].
2279 pub fn build(shape: &'static Shape) -> Result<Self, SchemaError> {
2280 Self::build_with_repr(shape, EnumRepr::Flattened)
2281 }
2282
2283 /// Build a schema with auto-detected enum representation based on each enum's attributes.
2284 ///
2285 /// This inspects each flattened enum's shape attributes to determine its representation:
2286 /// - `#[facet(untagged)]` → Flattened
2287 /// - `#[facet(tag = "type")]` → InternallyTagged
2288 /// - `#[facet(tag = "t", content = "c")]` → AdjacentlyTagged
2289 /// - No attributes → Flattened (for flatten solver behavior)
2290 ///
2291 /// For externally-tagged enums (variant name as key), use [`Schema::build_externally_tagged`].
2292 pub fn build_auto(shape: &'static Shape) -> Result<Self, SchemaError> {
2293 let builder = SchemaBuilder::new(shape, EnumRepr::Flattened).with_auto_detect();
2294 builder.into_schema()
2295 }
2296
2297 /// Build a schema for externally-tagged enum representation (e.g., JSON).
2298 ///
2299 /// In this representation, the variant name appears as a key and the
2300 /// variant's content is nested under it. The solver will only expect
2301 /// to see the variant name as a top-level key, not the variant's fields.
2302 pub fn build_externally_tagged(shape: &'static Shape) -> Result<Self, SchemaError> {
2303 Self::build_with_repr(shape, EnumRepr::ExternallyTagged)
2304 }
2305
2306 /// Build a schema with the specified enum representation.
2307 pub fn build_with_repr(shape: &'static Shape, repr: EnumRepr) -> Result<Self, SchemaError> {
2308 let builder = SchemaBuilder::new(shape, repr);
2309 builder.into_schema()
2310 }
2311
2312 /// Get the resolutions for this schema.
2313 pub fn resolutions(&self) -> &[Resolution] {
2314 &self.resolutions
2315 }
2316
2317 /// Get the format this schema was built for.
2318 pub const fn format(&self) -> Format {
2319 self.format
2320 }
2321
2322 /// Build a schema for DOM format (XML, HTML) with auto-detected enum representation.
2323 ///
2324 /// In DOM format, fields are categorized as attributes, elements, or text content.
2325 /// The solver uses `see_attribute()`, `see_element()`, etc. to report fields.
2326 pub fn build_dom(shape: &'static Shape) -> Result<Self, SchemaError> {
2327 let builder = SchemaBuilder::new(shape, EnumRepr::Flattened)
2328 .with_auto_detect()
2329 .with_format(Format::Dom);
2330 builder.into_schema()
2331 }
2332
2333 /// Build a schema with a specific format.
2334 pub fn build_with_format(shape: &'static Shape, format: Format) -> Result<Self, SchemaError> {
2335 let builder = SchemaBuilder::new(shape, EnumRepr::Flattened)
2336 .with_auto_detect()
2337 .with_format(format);
2338 builder.into_schema()
2339 }
2340}
2341
2342struct SchemaBuilder {
2343 shape: &'static Shape,
2344 enum_repr: EnumRepr,
2345 /// If true, detect enum representation from each enum's shape attributes.
2346 /// If false, use `enum_repr` for all enums.
2347 auto_detect_enum_repr: bool,
2348 /// The format to build the schema for.
2349 format: Format,
2350}
2351
2352impl SchemaBuilder {
2353 const fn new(shape: &'static Shape, enum_repr: EnumRepr) -> Self {
2354 Self {
2355 shape,
2356 enum_repr,
2357 auto_detect_enum_repr: false,
2358 format: Format::Flat,
2359 }
2360 }
2361
2362 const fn with_auto_detect(mut self) -> Self {
2363 self.auto_detect_enum_repr = true;
2364 self
2365 }
2366
2367 const fn with_format(mut self, format: Format) -> Self {
2368 self.format = format;
2369 self
2370 }
2371
2372 fn analyze(&self) -> Result<Vec<Resolution>, SchemaError> {
2373 self.analyze_shape(self.shape, FieldPath::empty(), Vec::new())
2374 }
2375
2376 /// Analyze a shape and return all possible resolutions.
2377 /// Returns a Vec because enums create multiple resolutions.
2378 ///
2379 /// - `current_path`: The internal field path (for FieldInfo)
2380 /// - `key_prefix`: The serialized key path prefix (for known_paths)
2381 fn analyze_shape(
2382 &self,
2383 shape: &'static Shape,
2384 current_path: FieldPath,
2385 key_prefix: KeyPath,
2386 ) -> Result<Vec<Resolution>, SchemaError> {
2387 match shape.ty {
2388 Type::User(UserType::Struct(struct_type)) => {
2389 self.analyze_struct(struct_type, current_path, key_prefix)
2390 }
2391 Type::User(UserType::Enum(enum_type)) => {
2392 // Enum at root level: create one configuration per variant
2393 self.analyze_enum(shape, enum_type, current_path, key_prefix)
2394 }
2395 _ => {
2396 // For non-struct types at root level, return single empty config
2397 Ok(vec![Resolution::new()])
2398 }
2399 }
2400 }
2401
2402 /// Analyze an enum and return one configuration per variant.
2403 ///
2404 /// - `current_path`: The internal field path (for FieldInfo)
2405 /// - `key_prefix`: The serialized key path prefix (for known_paths)
2406 fn analyze_enum(
2407 &self,
2408 shape: &'static Shape,
2409 enum_type: facet_core::EnumType,
2410 current_path: FieldPath,
2411 key_prefix: KeyPath,
2412 ) -> Result<Vec<Resolution>, SchemaError> {
2413 let enum_name = shape.type_identifier;
2414 let mut result = Vec::new();
2415
2416 for variant in enum_type.variants {
2417 let mut config = Resolution::new();
2418
2419 // Record this variant selection
2420 config.add_variant_selection(current_path.clone(), enum_name, variant.name);
2421
2422 let variant_path = current_path.push_variant("", variant.name);
2423
2424 // Get resolutions from the variant's content
2425 let variant_configs =
2426 self.analyze_variant_content(variant, &variant_path, &key_prefix)?;
2427
2428 // Merge each variant config into the base
2429 for variant_config in variant_configs {
2430 let mut final_config = config.clone();
2431 final_config.merge(&variant_config)?;
2432 result.push(final_config);
2433 }
2434 }
2435
2436 Ok(result)
2437 }
2438
2439 /// Analyze a struct and return all possible resolutions.
2440 ///
2441 /// - `current_path`: The internal field path (for FieldInfo)
2442 /// - `key_prefix`: The serialized key path prefix (for known_paths)
2443 fn analyze_struct(
2444 &self,
2445 struct_type: StructType,
2446 current_path: FieldPath,
2447 key_prefix: KeyPath,
2448 ) -> Result<Vec<Resolution>, SchemaError> {
2449 // Start with one empty configuration
2450 let mut configs = vec![Resolution::new()];
2451
2452 // Process each field, potentially multiplying resolutions
2453 for field in struct_type.fields {
2454 configs =
2455 self.analyze_field_into_configs(field, ¤t_path, &key_prefix, configs)?;
2456 }
2457
2458 Ok(configs)
2459 }
2460
2461 /// Process a field and return updated resolutions.
2462 /// If the field is a flattened enum, this may multiply the number of configs.
2463 ///
2464 /// - `parent_path`: The internal field path to the parent (for FieldInfo)
2465 /// - `key_prefix`: The serialized key path prefix (for known_paths)
2466 fn analyze_field_into_configs(
2467 &self,
2468 field: &'static Field,
2469 parent_path: &FieldPath,
2470 key_prefix: &KeyPath,
2471 mut configs: Vec<Resolution>,
2472 ) -> Result<Vec<Resolution>, SchemaError> {
2473 let is_flatten = field.is_flattened();
2474
2475 if is_flatten {
2476 // Flattened: inner keys bubble up to current level (same key_prefix)
2477 self.analyze_flattened_field_into_configs(field, parent_path, key_prefix, configs)
2478 } else {
2479 // Regular field: add to ALL current configs
2480 let field_path = parent_path.push_field(field.name);
2481 let required = !field.has_default() && !is_option_type(field.shape());
2482
2483 // Build the key path for this field (uses effective_name for wire format)
2484 let mut field_key_path = key_prefix.clone();
2485 field_key_path.push(field.effective_name());
2486
2487 let field_info = FieldInfo {
2488 serialized_name: field.effective_name(),
2489 path: field_path,
2490 required,
2491 value_shape: field.shape(),
2492 field,
2493 category: if self.format == Format::Dom {
2494 FieldCategory::from_field_dom(field).unwrap_or(FieldCategory::Element)
2495 } else {
2496 FieldCategory::Flat
2497 },
2498 };
2499
2500 for config in &mut configs {
2501 config.add_field(field_info.clone())?;
2502 // Add this field's key path
2503 config.add_key_path(field_key_path.clone());
2504 }
2505
2506 // If the field's value is a struct, recurse to collect nested key paths
2507 // (for probing, not for flattening - these are nested in serialized format)
2508 // This may fork resolutions if the nested struct contains flattened enums!
2509 configs =
2510 self.collect_nested_key_paths_for_shape(field.shape(), &field_key_path, configs)?;
2511
2512 Ok(configs)
2513 }
2514 }
2515
2516 /// Collect nested key paths from a shape into resolutions.
2517 /// This handles the case where a non-flattened field contains a struct with flattened enums.
2518 /// Returns updated resolutions (may fork if flattened enums are encountered).
2519 fn collect_nested_key_paths_for_shape(
2520 &self,
2521 shape: &'static Shape,
2522 key_prefix: &KeyPath,
2523 configs: Vec<Resolution>,
2524 ) -> Result<Vec<Resolution>, SchemaError> {
2525 match shape.ty {
2526 Type::User(UserType::Struct(struct_type)) => {
2527 self.collect_nested_key_paths_for_struct(struct_type, key_prefix, configs)
2528 }
2529 _ => Ok(configs),
2530 }
2531 }
2532
2533 /// Collect nested key paths from a struct, potentially forking for flattened enums.
2534 fn collect_nested_key_paths_for_struct(
2535 &self,
2536 struct_type: StructType,
2537 key_prefix: &KeyPath,
2538 mut configs: Vec<Resolution>,
2539 ) -> Result<Vec<Resolution>, SchemaError> {
2540 for field in struct_type.fields {
2541 let is_flatten = field.is_flattened();
2542 let mut field_key_path = key_prefix.clone();
2543
2544 if is_flatten {
2545 // Flattened field: keys bubble up to current level, may fork configs
2546 configs =
2547 self.collect_nested_key_paths_for_flattened(field, key_prefix, configs)?;
2548 } else {
2549 // Regular field: add key path and recurse
2550 field_key_path.push(field.effective_name());
2551
2552 for config in &mut configs {
2553 config.add_key_path(field_key_path.clone());
2554 }
2555
2556 // Recurse into nested structs
2557 configs = self.collect_nested_key_paths_for_shape(
2558 field.shape(),
2559 &field_key_path,
2560 configs,
2561 )?;
2562 }
2563 }
2564 Ok(configs)
2565 }
2566
2567 /// Handle flattened fields when collecting nested key paths.
2568 /// This may fork resolutions for flattened enums.
2569 fn collect_nested_key_paths_for_flattened(
2570 &self,
2571 field: &'static Field,
2572 key_prefix: &KeyPath,
2573 configs: Vec<Resolution>,
2574 ) -> Result<Vec<Resolution>, SchemaError> {
2575 let shape = field.shape();
2576
2577 match shape.ty {
2578 Type::User(UserType::Struct(struct_type)) => {
2579 // Flattened struct: recurse with same key_prefix
2580 self.collect_nested_key_paths_for_struct(struct_type, key_prefix, configs)
2581 }
2582 Type::User(UserType::Enum(enum_type)) => {
2583 // Flattened enum: fork resolutions
2584 // We need to match each config to its corresponding variant
2585 let mut result = Vec::new();
2586
2587 for config in configs {
2588 // Find which variant this config has selected for this field
2589 let selected_variant = config
2590 .variant_selections()
2591 .iter()
2592 .find(|vs| {
2593 // Match by the field name in the path
2594 vs.path.segments().last() == Some(&PathSegment::Field(field.name))
2595 })
2596 .map(|vs| vs.variant_name);
2597
2598 if let Some(variant_name) = selected_variant {
2599 // Find the variant and collect its key paths
2600 if let Some(variant) =
2601 enum_type.variants.iter().find(|v| v.name == variant_name)
2602 {
2603 let mut updated_config = config;
2604 updated_config = self.collect_variant_key_paths(
2605 variant,
2606 key_prefix,
2607 updated_config,
2608 )?;
2609 result.push(updated_config);
2610 } else {
2611 result.push(config);
2612 }
2613 } else {
2614 result.push(config);
2615 }
2616 }
2617 Ok(result)
2618 }
2619 _ => Ok(configs),
2620 }
2621 }
2622
2623 /// Collect key paths from an enum variant's content.
2624 fn collect_variant_key_paths(
2625 &self,
2626 variant: &'static Variant,
2627 key_prefix: &KeyPath,
2628 mut config: Resolution,
2629 ) -> Result<Resolution, SchemaError> {
2630 // Check if this is a newtype variant (single unnamed field)
2631 if variant.data.fields.len() == 1 && variant.data.fields[0].name == "0" {
2632 let inner_field = &variant.data.fields[0];
2633 let inner_shape = inner_field.shape();
2634
2635 // If the inner type is a struct, flatten its fields
2636 if let Type::User(UserType::Struct(inner_struct)) = inner_shape.ty {
2637 let configs = self.collect_nested_key_paths_for_struct(
2638 inner_struct,
2639 key_prefix,
2640 vec![config],
2641 )?;
2642 return Ok(configs.into_iter().next().unwrap_or_else(Resolution::new));
2643 }
2644 }
2645
2646 // Named fields - process each
2647 for variant_field in variant.data.fields {
2648 let is_flatten = variant_field.is_flattened();
2649
2650 if is_flatten {
2651 let configs = self.collect_nested_key_paths_for_flattened(
2652 variant_field,
2653 key_prefix,
2654 vec![config],
2655 )?;
2656 config = configs.into_iter().next().unwrap_or_else(Resolution::new);
2657 } else {
2658 let mut field_key_path = key_prefix.clone();
2659 field_key_path.push(variant_field.effective_name());
2660 config.add_key_path(field_key_path.clone());
2661
2662 let configs = self.collect_nested_key_paths_for_shape(
2663 variant_field.shape(),
2664 &field_key_path,
2665 vec![config],
2666 )?;
2667 config = configs.into_iter().next().unwrap_or_else(Resolution::new);
2668 }
2669 }
2670 Ok(config)
2671 }
2672
2673 /// Collect ONLY key paths from a variant's content (no fields added).
2674 /// Used for externally-tagged enums where variant content is nested and
2675 /// will be parsed separately by the deserializer.
2676 fn collect_variant_key_paths_only(
2677 &self,
2678 variant: &'static Variant,
2679 key_prefix: &KeyPath,
2680 config: &mut Resolution,
2681 ) -> Result<(), SchemaError> {
2682 Self::collect_variant_fields_key_paths_only(variant, key_prefix, config);
2683 Ok(())
2684 }
2685
2686 /// Recursively collect key paths from a struct (no fields added).
2687 fn collect_struct_key_paths_only(
2688 struct_type: StructType,
2689 key_prefix: &KeyPath,
2690 config: &mut Resolution,
2691 ) {
2692 for field in struct_type.fields {
2693 let is_flatten = field.is_flattened();
2694
2695 if is_flatten {
2696 // Flattened field: keys bubble up to current level
2697 Self::collect_shape_key_paths_only(field.shape(), key_prefix, config);
2698 } else {
2699 // Regular field: add its key path
2700 let mut field_key_path = key_prefix.clone();
2701 field_key_path.push(field.effective_name());
2702 config.add_key_path(field_key_path.clone());
2703
2704 // Recurse into nested types
2705 Self::collect_shape_key_paths_only(field.shape(), &field_key_path, config);
2706 }
2707 }
2708 }
2709
2710 /// Recursively collect key paths from a shape (struct or enum).
2711 fn collect_shape_key_paths_only(
2712 shape: &'static Shape,
2713 key_prefix: &KeyPath,
2714 config: &mut Resolution,
2715 ) {
2716 match shape.ty {
2717 Type::User(UserType::Struct(inner_struct)) => {
2718 Self::collect_struct_key_paths_only(inner_struct, key_prefix, config);
2719 }
2720 Type::User(UserType::Enum(enum_type)) => {
2721 // For enums, collect key paths from ALL variants
2722 // (we don't know which variant will be selected)
2723 for variant in enum_type.variants {
2724 Self::collect_variant_fields_key_paths_only(variant, key_prefix, config);
2725 }
2726 }
2727 _ => {}
2728 }
2729 }
2730
2731 /// Collect key paths from a variant's fields (not the variant itself).
2732 fn collect_variant_fields_key_paths_only(
2733 variant: &'static Variant,
2734 key_prefix: &KeyPath,
2735 config: &mut Resolution,
2736 ) {
2737 // Check if this is a newtype variant (single unnamed field)
2738 if variant.data.fields.len() == 1 && variant.data.fields[0].name == "0" {
2739 let inner_field = &variant.data.fields[0];
2740 Self::collect_shape_key_paths_only(inner_field.shape(), key_prefix, config);
2741 return;
2742 }
2743
2744 // Named fields - add key paths for each
2745 for variant_field in variant.data.fields {
2746 let mut field_key_path = key_prefix.clone();
2747 field_key_path.push(variant_field.effective_name());
2748 config.add_key_path(field_key_path.clone());
2749
2750 // Recurse into nested types
2751 Self::collect_shape_key_paths_only(variant_field.shape(), &field_key_path, config);
2752 }
2753 }
2754
2755 /// Process a flattened field, potentially forking resolutions for enums.
2756 ///
2757 /// For flattened fields, the inner keys bubble up to the current level,
2758 /// so we pass the same key_prefix (not key_prefix + field.name).
2759 ///
2760 /// If the field is `Option<T>`, we unwrap to get T and mark all resulting
2761 /// fields as optional (since the entire flattened block can be omitted).
2762 fn analyze_flattened_field_into_configs(
2763 &self,
2764 field: &'static Field,
2765 parent_path: &FieldPath,
2766 key_prefix: &KeyPath,
2767 configs: Vec<Resolution>,
2768 ) -> Result<Vec<Resolution>, SchemaError> {
2769 let field_path = parent_path.push_field(field.name);
2770 let original_shape = field.shape();
2771
2772 // Check if this is Option<T> - if so, unwrap and mark all fields optional
2773 let (shape, is_optional_flatten) = match unwrap_option_type(original_shape) {
2774 Some(inner) => (inner, true),
2775 None => (original_shape, false),
2776 };
2777
2778 match shape.ty {
2779 Type::User(UserType::Struct(struct_type)) => {
2780 // Flatten a struct: get its resolutions and merge into each of ours
2781 // Key prefix stays the same - inner keys bubble up
2782 let mut struct_configs =
2783 self.analyze_struct(struct_type, field_path, key_prefix.clone())?;
2784
2785 // If the flatten field was Option<T>, mark all inner fields as optional
2786 if is_optional_flatten {
2787 for config in &mut struct_configs {
2788 config.mark_all_optional();
2789 }
2790 }
2791
2792 // Each of our configs combines with each struct config
2793 // (usually struct_configs has 1 element unless it contains enums)
2794 let mut result = Vec::new();
2795 for base_config in configs {
2796 for struct_config in &struct_configs {
2797 let mut merged = base_config.clone();
2798 merged.merge(struct_config)?;
2799 result.push(merged);
2800 }
2801 }
2802 Ok(result)
2803 }
2804 Type::User(UserType::Enum(enum_type)) => {
2805 // Fork: each existing config × each variant
2806 let mut result = Vec::new();
2807 let enum_name = shape.type_identifier;
2808
2809 // Determine enum representation:
2810 // - If auto_detect_enum_repr is enabled, detect from the enum's shape attributes
2811 // - Otherwise, use the global enum_repr setting
2812 let enum_repr = if self.auto_detect_enum_repr {
2813 EnumRepr::from_shape(shape)
2814 } else {
2815 self.enum_repr.clone()
2816 };
2817
2818 for base_config in configs {
2819 for variant in enum_type.variants {
2820 let mut forked = base_config.clone();
2821 forked.add_variant_selection(field_path.clone(), enum_name, variant.name);
2822
2823 let variant_path = field_path.push_variant(field.name, variant.name);
2824
2825 match &enum_repr {
2826 EnumRepr::ExternallyTagged => {
2827 // For externally tagged enums, the variant name is a key
2828 // at the current level, and its content is nested underneath.
2829 let mut variant_key_prefix = key_prefix.clone();
2830 variant_key_prefix.push(variant.name);
2831
2832 // Add the variant name itself as a known key path
2833 forked.add_key_path(variant_key_prefix.clone());
2834
2835 // Add the variant name as a field (the key that selects this variant)
2836 let variant_field_info = FieldInfo {
2837 serialized_name: variant.name,
2838 path: variant_path.clone(),
2839 required: !is_optional_flatten,
2840 value_shape: shape, // The enum shape
2841 field, // The original flatten field
2842 category: FieldCategory::Element, // Variant selector is like an element
2843 };
2844 forked.add_field(variant_field_info)?;
2845
2846 // For externally-tagged enums, we do NOT add the variant's
2847 // inner fields to required fields. They're nested and will
2848 // be parsed separately by the deserializer.
2849 // Only add them to known_paths for depth-aware probing.
2850 self.collect_variant_key_paths_only(
2851 variant,
2852 &variant_key_prefix,
2853 &mut forked,
2854 )?;
2855
2856 result.push(forked);
2857 }
2858 EnumRepr::Flattened => {
2859 // For flattened/untagged enums, the variant's fields appear at the
2860 // same level as other fields. The variant name is NOT a key;
2861 // only the variant's inner fields are keys.
2862
2863 // Get resolutions from the variant's content
2864 // Key prefix stays the same - inner keys bubble up
2865 let mut variant_configs = self.analyze_variant_content(
2866 variant,
2867 &variant_path,
2868 key_prefix,
2869 )?;
2870
2871 // If the flatten field was Option<T>, mark all inner fields as optional
2872 if is_optional_flatten {
2873 for config in &mut variant_configs {
2874 config.mark_all_optional();
2875 }
2876 }
2877
2878 // Merge each variant config into the forked base
2879 for variant_config in variant_configs {
2880 let mut final_config = forked.clone();
2881 final_config.merge(&variant_config)?;
2882 result.push(final_config);
2883 }
2884 }
2885 EnumRepr::InternallyTagged { tag } => {
2886 // For internally tagged enums, the tag field appears at the
2887 // same level as the variant's fields.
2888 // Example: {"type": "Tcp", "host": "...", "port": 8080}
2889
2890 // Add the tag field as a known key path
2891 let mut tag_key_path = key_prefix.clone();
2892 tag_key_path.push(tag);
2893 forked.add_key_path(tag_key_path);
2894
2895 // Add the tag field info - the tag discriminates the variant
2896 // We use a synthetic field for the tag
2897 let tag_field_info = FieldInfo {
2898 serialized_name: tag,
2899 path: variant_path.clone(),
2900 required: !is_optional_flatten,
2901 value_shape: shape, // The enum shape
2902 field, // The original flatten field
2903 category: FieldCategory::Element, // Tag is a key field
2904 };
2905 forked.add_field(tag_field_info)?;
2906
2907 // Get resolutions from the variant's content
2908 // Key prefix stays the same - inner keys are at the same level
2909 let mut variant_configs = self.analyze_variant_content(
2910 variant,
2911 &variant_path,
2912 key_prefix,
2913 )?;
2914
2915 // If the flatten field was Option<T>, mark all inner fields as optional
2916 if is_optional_flatten {
2917 for config in &mut variant_configs {
2918 config.mark_all_optional();
2919 }
2920 }
2921
2922 // Merge each variant config into the forked base
2923 for variant_config in variant_configs {
2924 let mut final_config = forked.clone();
2925 final_config.merge(&variant_config)?;
2926 result.push(final_config);
2927 }
2928 }
2929 EnumRepr::AdjacentlyTagged { tag, content } => {
2930 // For adjacently tagged enums, both tag and content fields
2931 // appear at the same level. Content contains the variant's fields.
2932 // Example: {"t": "Tcp", "c": {"host": "...", "port": 8080}}
2933
2934 // Add the tag field as a known key path
2935 let mut tag_key_path = key_prefix.clone();
2936 tag_key_path.push(tag);
2937 forked.add_key_path(tag_key_path);
2938
2939 // Add the tag field info
2940 let tag_field_info = FieldInfo {
2941 serialized_name: tag,
2942 path: variant_path.clone(),
2943 required: !is_optional_flatten,
2944 value_shape: shape, // The enum shape
2945 field, // The original flatten field
2946 category: FieldCategory::Element, // Tag is a key field
2947 };
2948 forked.add_field(tag_field_info)?;
2949
2950 // Add the content field as a known key path
2951 let mut content_key_prefix = key_prefix.clone();
2952 content_key_prefix.push(content);
2953 forked.add_key_path(content_key_prefix.clone());
2954
2955 // The variant's fields are nested under the content key
2956 // Collect key paths for probing
2957 self.collect_variant_key_paths_only(
2958 variant,
2959 &content_key_prefix,
2960 &mut forked,
2961 )?;
2962
2963 result.push(forked);
2964 }
2965 }
2966 }
2967 }
2968 Ok(result)
2969 }
2970 _ => {
2971 // Check if this is a Map type - if so, it becomes a catch-all for unknown fields
2972 if let Def::Map(_) = &shape.def {
2973 // Any map type can serve as a catch-all. Whether the key type can actually
2974 // be deserialized from field name strings is the deserializer's problem,
2975 // not the solver's.
2976 let field_info = FieldInfo {
2977 serialized_name: field.effective_name(),
2978 path: field_path,
2979 required: false, // Catch-all maps are never required
2980 value_shape: shape,
2981 field,
2982 // For DOM format, determine if this catches attributes or elements
2983 // based on the field's attributes
2984 category: if self.format == Format::Dom {
2985 if field.is_attribute() {
2986 FieldCategory::Attribute
2987 } else {
2988 FieldCategory::Element
2989 }
2990 } else {
2991 FieldCategory::Flat
2992 },
2993 };
2994
2995 let mut result = configs;
2996 for config in &mut result {
2997 config.set_catch_all_map(field_info.category, field_info.clone());
2998 }
2999 return Ok(result);
3000 }
3001
3002 // Check if this is a DynamicValue type (like facet_value::Value) - also a catch-all
3003 if matches!(&shape.def, Def::DynamicValue(_)) {
3004 let field_info = FieldInfo {
3005 serialized_name: field.effective_name(),
3006 path: field_path,
3007 required: false, // Catch-all dynamic values are never required
3008 value_shape: shape,
3009 field,
3010 category: if self.format == Format::Dom {
3011 if field.is_attribute() {
3012 FieldCategory::Attribute
3013 } else {
3014 FieldCategory::Element
3015 }
3016 } else {
3017 FieldCategory::Flat
3018 },
3019 };
3020
3021 let mut result = configs;
3022 for config in &mut result {
3023 config.set_catch_all_map(field_info.category, field_info.clone());
3024 }
3025 return Ok(result);
3026 }
3027
3028 // Can't flatten other types - treat as regular field
3029 // For Option<T> flatten, also consider optionality from the wrapper
3030 let required =
3031 !field.has_default() && !is_option_type(shape) && !is_optional_flatten;
3032
3033 // For non-flattenable types, add the field with its key path
3034 let mut field_key_path = key_prefix.clone();
3035 field_key_path.push(field.effective_name());
3036
3037 let field_info = FieldInfo {
3038 serialized_name: field.effective_name(),
3039 path: field_path,
3040 required,
3041 value_shape: shape,
3042 field,
3043 category: if self.format == Format::Dom {
3044 FieldCategory::from_field_dom(field).unwrap_or(FieldCategory::Element)
3045 } else {
3046 FieldCategory::Flat
3047 },
3048 };
3049
3050 let mut result = configs;
3051 for config in &mut result {
3052 config.add_field(field_info.clone())?;
3053 config.add_key_path(field_key_path.clone());
3054 }
3055 Ok(result)
3056 }
3057 }
3058 }
3059
3060 /// Analyze a variant's content and return resolutions.
3061 ///
3062 /// - `variant_path`: The internal field path (for FieldInfo)
3063 /// - `key_prefix`: The serialized key path prefix (for known_paths)
3064 fn analyze_variant_content(
3065 &self,
3066 variant: &'static Variant,
3067 variant_path: &FieldPath,
3068 key_prefix: &KeyPath,
3069 ) -> Result<Vec<Resolution>, SchemaError> {
3070 // Check if this is a newtype variant (single unnamed field like `Foo(Bar)`)
3071 if variant.data.fields.len() == 1 && variant.data.fields[0].name == "0" {
3072 let inner_field = &variant.data.fields[0];
3073 let inner_shape = inner_field.shape();
3074
3075 // If the inner type is a struct, treat the newtype wrapper as transparent.
3076 //
3077 // Previously we pushed a synthetic `"0"` segment onto the path. That made the
3078 // solver think there was an extra field between the variant and the inner
3079 // struct (e.g., `backend.backend::Local.0.cache`). Format-specific flattening does not
3080 // expose that tuple wrapper, so the deserializer would try to open a field
3081 // named `"0"` on the inner struct/enum, causing "no such field" errors when
3082 // navigating paths like `backend::Local.cache`.
3083 //
3084 // Keep the synthetic `"0"` segment so the solver/reflect layer walks through
3085 // the tuple wrapper that Rust generates for newtype variants.
3086
3087 // For untagged enum variant resolution, we need to look at the "effective"
3088 // shape that determines the serialization format. This unwraps:
3089 // 1. Transparent wrappers (shape.inner) - e.g., `Curve64(GCurve<f64, f64>)`
3090 // 2. Proxy types (shape.proxy) - e.g., `GCurve` uses `GCurveProxy` for ser/de
3091 //
3092 // This ensures that `{"x":..., "y":...}` correctly matches `Linear(Curve64)`
3093 // where Curve64 is transparent around GCurve which has a proxy with x,y fields.
3094 let effective_shape = unwrap_to_effective_shape(inner_shape);
3095
3096 if let Type::User(UserType::Struct(inner_struct)) = effective_shape.ty {
3097 let inner_path = variant_path.push_field("0");
3098 return self.analyze_struct(inner_struct, inner_path, key_prefix.clone());
3099 }
3100 }
3101
3102 // Named fields or multiple fields - analyze as a pseudo-struct
3103 let mut configs = vec![Resolution::new()];
3104 for variant_field in variant.data.fields {
3105 configs =
3106 self.analyze_field_into_configs(variant_field, variant_path, key_prefix, configs)?;
3107 }
3108 Ok(configs)
3109 }
3110
3111 fn into_schema(self) -> Result<Schema, SchemaError> {
3112 let resolutions = self.analyze()?;
3113 let num_resolutions = resolutions.len();
3114
3115 // Build inverted index: field_name → bitmask of config indices (for Flat format)
3116 let mut field_to_resolutions: BTreeMap<&'static str, ResolutionSet> = BTreeMap::new();
3117 for (idx, config) in resolutions.iter().enumerate() {
3118 for field_info in config.fields().values() {
3119 field_to_resolutions
3120 .entry(field_info.serialized_name)
3121 .or_insert_with(|| ResolutionSet::empty(num_resolutions))
3122 .insert(idx);
3123 }
3124 }
3125
3126 // Build DOM inverted index: (category, name) → bitmask of config indices
3127 let mut dom_field_to_resolutions: BTreeMap<(FieldCategory, &'static str), ResolutionSet> =
3128 BTreeMap::new();
3129 if self.format == Format::Dom {
3130 for (idx, config) in resolutions.iter().enumerate() {
3131 for field_info in config.fields().values() {
3132 dom_field_to_resolutions
3133 .entry((field_info.category, field_info.serialized_name))
3134 .or_insert_with(|| ResolutionSet::empty(num_resolutions))
3135 .insert(idx);
3136 }
3137 }
3138 }
3139
3140 Ok(Schema {
3141 shape: self.shape,
3142 format: self.format,
3143 resolutions,
3144 field_to_resolutions,
3145 dom_field_to_resolutions,
3146 })
3147 }
3148}
3149
3150/// Check if a shape represents an Option type.
3151const fn is_option_type(shape: &'static Shape) -> bool {
3152 matches!(shape.def, Def::Option(_))
3153}
3154
3155/// If shape is `Option<T>`, returns `Some(T's shape)`. Otherwise returns `None`.
3156const fn unwrap_option_type(shape: &'static Shape) -> Option<&'static Shape> {
3157 match shape.def {
3158 Def::Option(option_def) => Some(option_def.t),
3159 _ => None,
3160 }
3161}
3162
3163/// Unwrap transparent wrappers and proxies to get the effective shape for field matching.
3164///
3165/// When determining which untagged enum variant matches a set of fields, we need to
3166/// look at the "effective" shape that determines the serialization format:
3167///
3168/// 1. Transparent wrappers (shape.inner): e.g., `Curve64` wraps `GCurve<f64, f64>`
3169/// - The wrapper has no serialization presence; it serializes as its inner type
3170///
3171/// 2. Proxy types (shape.proxy): e.g., `GCurve` uses `GCurveProxy` for ser/de
3172/// - The proxy's fields are what appear in the serialized format
3173///
3174/// This function recursively unwraps these layers to find the shape whose fields
3175/// should be used for variant matching. For example:
3176/// - `Curve64` (transparent) → `GCurve<f64, f64>` (has proxy) → `GCurveProxy<f64, f64>`
3177fn unwrap_to_effective_shape(shape: &'static Shape) -> &'static Shape {
3178 // First, unwrap transparent wrappers
3179 let shape = unwrap_transparent(shape);
3180
3181 // Then, if there's a proxy, use its shape instead
3182 if let Some(proxy_def) = shape.proxy {
3183 // Recursively unwrap in case the proxy is also transparent or has its own proxy
3184 unwrap_to_effective_shape(proxy_def.shape)
3185 } else {
3186 shape
3187 }
3188}
3189
3190/// Recursively unwrap transparent wrappers to get to the innermost type.
3191fn unwrap_transparent(shape: &'static Shape) -> &'static Shape {
3192 if let Some(inner) = shape.inner {
3193 unwrap_transparent(inner)
3194 } else {
3195 shape
3196 }
3197}