Skip to main content

hermes_core/dsl/
schema.rs

1//! Schema definitions for documents and fields
2
3use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6/// Field identifier
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10/// Types of fields supported
11#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13    /// Text field - tokenized and indexed
14    #[serde(rename = "text")]
15    Text,
16    /// Unsigned 64-bit integer
17    #[serde(rename = "u64")]
18    U64,
19    /// Signed 64-bit integer
20    #[serde(rename = "i64")]
21    I64,
22    /// 64-bit floating point
23    #[serde(rename = "f64")]
24    F64,
25    /// Raw bytes (not tokenized)
26    #[serde(rename = "bytes")]
27    Bytes,
28    /// Sparse vector field - indexed as inverted posting lists with quantized weights
29    #[serde(rename = "sparse_vector")]
30    SparseVector,
31    /// Dense vector field - indexed using RaBitQ binary quantization for ANN search
32    #[serde(rename = "dense_vector")]
33    DenseVector,
34    /// JSON field - arbitrary JSON data, stored but not indexed
35    #[serde(rename = "json")]
36    Json,
37    /// Binary dense vector field - packed-bit storage with Hamming distance scoring
38    #[serde(rename = "binary_dense_vector")]
39    BinaryDenseVector,
40}
41
42/// Field options
43#[derive(Debug, Clone, Serialize, Deserialize)]
44pub struct FieldEntry {
45    pub name: String,
46    pub field_type: FieldType,
47    pub indexed: bool,
48    pub stored: bool,
49    /// Name of the tokenizer to use for this field (for text fields)
50    pub tokenizer: Option<String>,
51    /// Whether this field can have multiple values (serialized as array in JSON)
52    #[serde(default)]
53    pub multi: bool,
54    /// Position tracking mode for phrase queries and multi-field element tracking
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub positions: Option<PositionMode>,
57    /// Configuration for sparse vector fields (index size, weight quantization)
58    #[serde(default, skip_serializing_if = "Option::is_none")]
59    pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
60    /// Configuration for dense vector fields (dimension, quantization)
61    #[serde(default, skip_serializing_if = "Option::is_none")]
62    pub dense_vector_config: Option<DenseVectorConfig>,
63    /// Configuration for binary dense vector fields (dimension in bits)
64    #[serde(default, skip_serializing_if = "Option::is_none")]
65    pub binary_dense_vector_config: Option<BinaryDenseVectorConfig>,
66    /// Whether this field has columnar fast-field storage for O(1) doc→value access.
67    /// Valid for u64, i64, f64, and text fields.
68    #[serde(default)]
69    pub fast: bool,
70    /// Whether this field is a primary key (unique constraint, at most one per schema)
71    #[serde(default)]
72    pub primary_key: bool,
73    /// Whether build-time document reordering (Recursive Graph Bisection) is enabled.
74    /// Valid for sparse_vector fields with BMP format. Clusters similar documents
75    /// into the same blocks for better pruning effectiveness.
76    #[serde(default)]
77    pub reorder: bool,
78}
79
80/// Position tracking mode for text fields
81#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
82#[serde(rename_all = "snake_case")]
83pub enum PositionMode {
84    /// Track only element ordinal for multi-valued fields (which array element)
85    /// Useful for returning which element matched without full phrase query support
86    Ordinal,
87    /// Track only token position within text (for phrase queries)
88    /// Does not track element ordinal - all positions are relative to concatenated text
89    TokenPosition,
90    /// Track both element ordinal and token position (full support)
91    /// Position format: (element_ordinal << 20) | token_position
92    Full,
93}
94
95impl PositionMode {
96    /// Whether this mode tracks element ordinals
97    pub fn tracks_ordinal(&self) -> bool {
98        matches!(self, PositionMode::Ordinal | PositionMode::Full)
99    }
100
101    /// Whether this mode tracks token positions
102    pub fn tracks_token_position(&self) -> bool {
103        matches!(self, PositionMode::TokenPosition | PositionMode::Full)
104    }
105}
106
107/// Vector index algorithm type
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
109#[serde(rename_all = "snake_case")]
110pub enum VectorIndexType {
111    /// Flat - brute-force search over raw vectors (accumulating state)
112    Flat,
113    /// RaBitQ - binary quantization, good for small datasets (<100K)
114    #[default]
115    RaBitQ,
116    /// IVF-RaBitQ - inverted file with RaBitQ, good for medium datasets
117    IvfRaBitQ,
118    /// ScaNN - product quantization with OPQ and anisotropic loss, best for large datasets
119    ScaNN,
120}
121
122/// Storage quantization for dense vector elements
123///
124/// Controls the precision of each vector coordinate in `.vectors` files.
125/// Lower precision reduces storage and memory bandwidth; scoring uses
126/// native-precision SIMD (no dequantization on the hot path).
127#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
128#[serde(rename_all = "snake_case")]
129pub enum DenseVectorQuantization {
130    /// 32-bit IEEE 754 float (4 bytes/dim) — full precision, baseline
131    #[default]
132    F32,
133    /// 16-bit IEEE 754 half-float (2 bytes/dim) — <0.1% recall loss for normalized embeddings
134    F16,
135    /// 8-bit unsigned scalar quantization (1 byte/dim) — maps [-1,1] → [0,255]
136    UInt8,
137    /// Binary packed-bit storage (1 bit per dimension, ceil(dim/8) bytes per vector).
138    /// Used internally by BinaryDenseVector fields. Not selectable for DenseVector fields.
139    Binary,
140}
141
142impl DenseVectorQuantization {
143    /// Bytes per element for non-binary quantization types.
144    /// Panics for Binary — use `dim.div_ceil(8)` for binary vector byte size.
145    pub fn element_size(self) -> usize {
146        match self {
147            Self::F32 => 4,
148            Self::F16 => 2,
149            Self::UInt8 => 1,
150            Self::Binary => panic!("element_size() not valid for Binary; use dim.div_ceil(8)"),
151        }
152    }
153
154    /// Wire format tag (stored in .vectors header)
155    pub fn tag(self) -> u8 {
156        match self {
157            Self::F32 => 0,
158            Self::F16 => 1,
159            Self::UInt8 => 2,
160            Self::Binary => 3,
161        }
162    }
163
164    /// Decode wire format tag
165    pub fn from_tag(tag: u8) -> Option<Self> {
166        match tag {
167            0 => Some(Self::F32),
168            1 => Some(Self::F16),
169            2 => Some(Self::UInt8),
170            3 => Some(Self::Binary),
171            _ => None,
172        }
173    }
174}
175
176/// Configuration for dense vector fields using Flat, RaBitQ, IVF-RaBitQ, or ScaNN
177///
178/// Indexes operate in two states:
179/// - **Flat (accumulating)**: Brute-force search over raw vectors. Used when vector count
180///   is below `build_threshold` or before `build_index` is called.
181/// - **Built (ANN)**: Fast approximate nearest neighbor search using trained structures.
182///   Centroids and codebooks are trained from data and stored within the segment.
183#[derive(Debug, Clone, Serialize, Deserialize)]
184pub struct DenseVectorConfig {
185    /// Dimensionality of vectors
186    pub dim: usize,
187    /// Target vector index algorithm (Flat, RaBitQ, IVF-RaBitQ, or ScaNN)
188    /// When in accumulating state, search uses brute-force regardless of this setting.
189    #[serde(default)]
190    pub index_type: VectorIndexType,
191    /// Storage quantization for vector elements (f32, f16, uint8)
192    #[serde(default)]
193    pub quantization: DenseVectorQuantization,
194    /// Number of IVF clusters for IVF-RaBitQ and ScaNN (default: sqrt(n) capped at 4096)
195    /// If None, automatically determined based on dataset size.
196    #[serde(default, skip_serializing_if = "Option::is_none")]
197    pub num_clusters: Option<usize>,
198    /// Number of clusters to probe during search (default: 32)
199    #[serde(default = "default_nprobe")]
200    pub nprobe: usize,
201    /// Minimum number of vectors required before building ANN index.
202    /// Below this threshold, brute-force (Flat) search is used.
203    /// Default: 1000 for RaBitQ, 10000 for IVF-RaBitQ/ScaNN.
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub build_threshold: Option<usize>,
206    /// Whether stored vectors are pre-normalized to unit L2 norm.
207    /// When true, scoring skips per-vector norm computation (cosine = dot / ||q||),
208    /// reducing compute by ~40%. Common for embedding models (e.g. OpenAI, Cohere).
209    /// Default: true (most embedding models produce L2-normalized vectors).
210    #[serde(default = "default_unit_norm")]
211    pub unit_norm: bool,
212}
213
214fn default_nprobe() -> usize {
215    32
216}
217
218fn default_unit_norm() -> bool {
219    true
220}
221
222impl DenseVectorConfig {
223    pub fn new(dim: usize) -> Self {
224        Self {
225            dim,
226            index_type: VectorIndexType::RaBitQ,
227            quantization: DenseVectorQuantization::F32,
228            num_clusters: None,
229            nprobe: 32,
230            build_threshold: None,
231            unit_norm: true,
232        }
233    }
234
235    /// Create IVF-RaBitQ configuration
236    pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
237        Self {
238            dim,
239            index_type: VectorIndexType::IvfRaBitQ,
240            quantization: DenseVectorQuantization::F32,
241            num_clusters,
242            nprobe,
243            build_threshold: None,
244            unit_norm: true,
245        }
246    }
247
248    /// Create ScaNN configuration
249    pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
250        Self {
251            dim,
252            index_type: VectorIndexType::ScaNN,
253            quantization: DenseVectorQuantization::F32,
254            num_clusters,
255            nprobe,
256            build_threshold: None,
257            unit_norm: true,
258        }
259    }
260
261    /// Create Flat (brute-force) configuration - no ANN index
262    pub fn flat(dim: usize) -> Self {
263        Self {
264            dim,
265            index_type: VectorIndexType::Flat,
266            quantization: DenseVectorQuantization::F32,
267            num_clusters: None,
268            nprobe: 0,
269            build_threshold: None,
270            unit_norm: true,
271        }
272    }
273
274    /// Set storage quantization
275    pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
276        self.quantization = quantization;
277        self
278    }
279
280    /// Set build threshold for auto-building ANN index
281    pub fn with_build_threshold(mut self, threshold: usize) -> Self {
282        self.build_threshold = Some(threshold);
283        self
284    }
285
286    /// Mark vectors as pre-normalized to unit L2 norm
287    pub fn with_unit_norm(mut self) -> Self {
288        self.unit_norm = true;
289        self
290    }
291
292    /// Set number of IVF clusters
293    pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
294        self.num_clusters = Some(num_clusters);
295        self
296    }
297
298    /// Check if this config uses IVF
299    pub fn uses_ivf(&self) -> bool {
300        matches!(
301            self.index_type,
302            VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
303        )
304    }
305
306    /// Check if this config uses ScaNN
307    pub fn uses_scann(&self) -> bool {
308        self.index_type == VectorIndexType::ScaNN
309    }
310
311    /// Check if this config is flat (brute-force)
312    pub fn is_flat(&self) -> bool {
313        self.index_type == VectorIndexType::Flat
314    }
315
316    /// Get the default build threshold for this index type
317    pub fn default_build_threshold(&self) -> usize {
318        self.build_threshold.unwrap_or(match self.index_type {
319            VectorIndexType::Flat => usize::MAX, // Never auto-build
320            VectorIndexType::RaBitQ => 1000,
321            VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
322        })
323    }
324
325    /// Calculate optimal number of clusters for given vector count
326    pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
327        self.num_clusters.unwrap_or_else(|| {
328            // sqrt(n) heuristic, capped at 4096
329            let optimal = (num_vectors as f64).sqrt() as usize;
330            optimal.clamp(16, 4096)
331        })
332    }
333}
334
335/// Configuration for binary dense vector fields
336///
337/// Binary dense vectors store packed bits (1 bit per dimension) and use
338/// Hamming distance for scoring. Always uses brute-force flat search
339/// (Hamming popcount is ~10ns/vec for 768-bit, ANN indexes don't help).
340#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct BinaryDenseVectorConfig {
342    /// Number of bits (dimensions). Storage is ceil(dim/8) bytes per vector.
343    pub dim: usize,
344}
345
346impl BinaryDenseVectorConfig {
347    pub fn new(dim: usize) -> Self {
348        assert!(
349            dim.is_multiple_of(8),
350            "BinaryDenseVector dimension must be a multiple of 8, got {dim}"
351        );
352        Self { dim }
353    }
354
355    /// Number of bytes needed to store one vector
356    pub fn byte_len(&self) -> usize {
357        self.dim.div_ceil(8)
358    }
359}
360
361use super::query_field_router::QueryRouterRule;
362
363/// Schema defining document structure
364#[derive(Debug, Clone, Default, Serialize, Deserialize)]
365pub struct Schema {
366    fields: Vec<FieldEntry>,
367    name_to_field: HashMap<String, Field>,
368    /// Default fields for query parsing (when no field is specified)
369    #[serde(default)]
370    default_fields: Vec<Field>,
371    /// Query router rules for routing queries to specific fields based on regex patterns
372    #[serde(default)]
373    query_routers: Vec<QueryRouterRule>,
374}
375
376impl Schema {
377    pub fn builder() -> SchemaBuilder {
378        SchemaBuilder::default()
379    }
380
381    pub fn get_field(&self, name: &str) -> Option<Field> {
382        self.name_to_field.get(name).copied()
383    }
384
385    pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
386        self.fields.get(field.0 as usize)
387    }
388
389    pub fn get_field_name(&self, field: Field) -> Option<&str> {
390        self.fields.get(field.0 as usize).map(|e| e.name.as_str())
391    }
392
393    pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
394        self.fields
395            .iter()
396            .enumerate()
397            .map(|(i, e)| (Field(i as u32), e))
398    }
399
400    pub fn num_fields(&self) -> usize {
401        self.fields.len()
402    }
403
404    /// Whether any field has the `reorder` attribute set.
405    /// Used by the background optimizer to determine which indexes need BP reordering.
406    pub fn has_reorder_fields(&self) -> bool {
407        self.fields.iter().any(|e| e.reorder)
408    }
409
410    /// Get the default fields for query parsing
411    pub fn default_fields(&self) -> &[Field] {
412        &self.default_fields
413    }
414
415    /// Set default fields (used by builder)
416    pub fn set_default_fields(&mut self, fields: Vec<Field>) {
417        self.default_fields = fields;
418    }
419
420    /// Get the query router rules
421    pub fn query_routers(&self) -> &[QueryRouterRule] {
422        &self.query_routers
423    }
424
425    /// Set query router rules
426    pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
427        self.query_routers = rules;
428    }
429
430    /// Get the primary key field, if one is defined
431    pub fn primary_field(&self) -> Option<Field> {
432        self.fields
433            .iter()
434            .enumerate()
435            .find(|(_, e)| e.primary_key)
436            .map(|(i, _)| Field(i as u32))
437    }
438}
439
440/// Builder for Schema
441#[derive(Debug, Default)]
442pub struct SchemaBuilder {
443    fields: Vec<FieldEntry>,
444    default_fields: Vec<String>,
445    query_routers: Vec<QueryRouterRule>,
446}
447
448impl SchemaBuilder {
449    pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
450        self.add_field_with_tokenizer(
451            name,
452            FieldType::Text,
453            indexed,
454            stored,
455            Some("simple".to_string()),
456        )
457    }
458
459    pub fn add_text_field_with_tokenizer(
460        &mut self,
461        name: &str,
462        indexed: bool,
463        stored: bool,
464        tokenizer: &str,
465    ) -> Field {
466        self.add_field_with_tokenizer(
467            name,
468            FieldType::Text,
469            indexed,
470            stored,
471            Some(tokenizer.to_string()),
472        )
473    }
474
475    pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
476        self.add_field(name, FieldType::U64, indexed, stored)
477    }
478
479    pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
480        self.add_field(name, FieldType::I64, indexed, stored)
481    }
482
483    pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
484        self.add_field(name, FieldType::F64, indexed, stored)
485    }
486
487    pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
488        self.add_field(name, FieldType::Bytes, false, stored)
489    }
490
491    /// Add a JSON field for storing arbitrary JSON data
492    ///
493    /// JSON fields are never indexed, only stored. They can hold any valid JSON value
494    /// (objects, arrays, strings, numbers, booleans, null).
495    pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
496        self.add_field(name, FieldType::Json, false, stored)
497    }
498
499    /// Add a sparse vector field with default configuration
500    ///
501    /// Sparse vectors are indexed as inverted posting lists where each dimension
502    /// becomes a "term" and documents have quantized weights for each dimension.
503    pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
504        self.add_sparse_vector_field_with_config(
505            name,
506            indexed,
507            stored,
508            crate::structures::SparseVectorConfig::default(),
509        )
510    }
511
512    /// Add a sparse vector field with custom configuration
513    ///
514    /// Use `SparseVectorConfig::splade()` for SPLADE models (u16 indices, uint8 weights).
515    /// Use `SparseVectorConfig::compact()` for maximum compression (u16 indices, uint4 weights).
516    pub fn add_sparse_vector_field_with_config(
517        &mut self,
518        name: &str,
519        indexed: bool,
520        stored: bool,
521        config: crate::structures::SparseVectorConfig,
522    ) -> Field {
523        let field = Field(self.fields.len() as u32);
524        self.fields.push(FieldEntry {
525            name: name.to_string(),
526            field_type: FieldType::SparseVector,
527            indexed,
528            stored,
529            tokenizer: None,
530            multi: false,
531            positions: None,
532            sparse_vector_config: Some(config),
533            dense_vector_config: None,
534            binary_dense_vector_config: None,
535            fast: false,
536            primary_key: false,
537            reorder: false,
538        });
539        field
540    }
541
542    /// Set sparse vector configuration for an existing field
543    pub fn set_sparse_vector_config(
544        &mut self,
545        field: Field,
546        config: crate::structures::SparseVectorConfig,
547    ) {
548        if let Some(entry) = self.fields.get_mut(field.0 as usize) {
549            entry.sparse_vector_config = Some(config);
550        }
551    }
552
553    /// Add a dense vector field with default configuration
554    ///
555    /// Dense vectors are indexed using RaBitQ binary quantization for fast ANN search.
556    /// The dimension must be specified as it determines the quantization structure.
557    pub fn add_dense_vector_field(
558        &mut self,
559        name: &str,
560        dim: usize,
561        indexed: bool,
562        stored: bool,
563    ) -> Field {
564        self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
565    }
566
567    /// Add a dense vector field with custom configuration
568    pub fn add_dense_vector_field_with_config(
569        &mut self,
570        name: &str,
571        indexed: bool,
572        stored: bool,
573        config: DenseVectorConfig,
574    ) -> Field {
575        let field = Field(self.fields.len() as u32);
576        self.fields.push(FieldEntry {
577            name: name.to_string(),
578            field_type: FieldType::DenseVector,
579            indexed,
580            stored,
581            tokenizer: None,
582            multi: false,
583            positions: None,
584            sparse_vector_config: None,
585            dense_vector_config: Some(config),
586            binary_dense_vector_config: None,
587            fast: false,
588            primary_key: false,
589            reorder: false,
590        });
591        field
592    }
593
594    /// Add a binary dense vector field
595    ///
596    /// Binary dense vectors use packed-bit storage (1 bit per dimension)
597    /// and Hamming distance scoring. Always brute-force flat search.
598    pub fn add_binary_dense_vector_field(
599        &mut self,
600        name: &str,
601        dim: usize,
602        indexed: bool,
603        stored: bool,
604    ) -> Field {
605        self.add_binary_dense_vector_field_with_config(
606            name,
607            indexed,
608            stored,
609            BinaryDenseVectorConfig::new(dim),
610        )
611    }
612
613    /// Add a binary dense vector field with custom configuration
614    pub fn add_binary_dense_vector_field_with_config(
615        &mut self,
616        name: &str,
617        indexed: bool,
618        stored: bool,
619        config: BinaryDenseVectorConfig,
620    ) -> Field {
621        let field = Field(self.fields.len() as u32);
622        self.fields.push(FieldEntry {
623            name: name.to_string(),
624            field_type: FieldType::BinaryDenseVector,
625            indexed,
626            stored,
627            tokenizer: None,
628            multi: false,
629            positions: None,
630            sparse_vector_config: None,
631            dense_vector_config: None,
632            binary_dense_vector_config: Some(config),
633            fast: false,
634            primary_key: false,
635            reorder: false,
636        });
637        field
638    }
639
640    fn add_field(
641        &mut self,
642        name: &str,
643        field_type: FieldType,
644        indexed: bool,
645        stored: bool,
646    ) -> Field {
647        self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
648    }
649
650    fn add_field_with_tokenizer(
651        &mut self,
652        name: &str,
653        field_type: FieldType,
654        indexed: bool,
655        stored: bool,
656        tokenizer: Option<String>,
657    ) -> Field {
658        self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
659    }
660
661    fn add_field_full(
662        &mut self,
663        name: &str,
664        field_type: FieldType,
665        indexed: bool,
666        stored: bool,
667        tokenizer: Option<String>,
668        multi: bool,
669    ) -> Field {
670        let field = Field(self.fields.len() as u32);
671        self.fields.push(FieldEntry {
672            name: name.to_string(),
673            field_type,
674            indexed,
675            stored,
676            tokenizer,
677            multi,
678            positions: None,
679            sparse_vector_config: None,
680            dense_vector_config: None,
681            binary_dense_vector_config: None,
682            fast: false,
683            primary_key: false,
684            reorder: false,
685        });
686        field
687    }
688
689    /// Set the multi attribute on the last added field
690    pub fn set_multi(&mut self, field: Field, multi: bool) {
691        if let Some(entry) = self.fields.get_mut(field.0 as usize) {
692            entry.multi = multi;
693        }
694    }
695
696    /// Set fast-field columnar storage for O(1) doc→value access.
697    /// Valid for u64, i64, f64, and text fields.
698    pub fn set_fast(&mut self, field: Field, fast: bool) {
699        if let Some(entry) = self.fields.get_mut(field.0 as usize) {
700            entry.fast = fast;
701        }
702    }
703
704    /// Mark a field as the primary key (unique constraint)
705    pub fn set_primary_key(&mut self, field: Field) {
706        if let Some(entry) = self.fields.get_mut(field.0 as usize) {
707            entry.primary_key = true;
708        }
709    }
710
711    /// Enable build-time document reordering (Recursive Graph Bisection) for BMP fields
712    pub fn set_reorder(&mut self, field: Field, reorder: bool) {
713        if let Some(entry) = self.fields.get_mut(field.0 as usize) {
714            entry.reorder = reorder;
715        }
716    }
717
718    /// Set position tracking mode for phrase queries and multi-field element tracking
719    pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
720        if let Some(entry) = self.fields.get_mut(field.0 as usize) {
721            entry.positions = Some(mode);
722        }
723    }
724
725    /// Set default fields by name
726    pub fn set_default_fields(&mut self, field_names: Vec<String>) {
727        self.default_fields = field_names;
728    }
729
730    /// Set query router rules
731    pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
732        self.query_routers = rules;
733    }
734
735    pub fn build(self) -> Schema {
736        let mut name_to_field = HashMap::new();
737        for (i, entry) in self.fields.iter().enumerate() {
738            name_to_field.insert(entry.name.clone(), Field(i as u32));
739        }
740
741        // Resolve default field names to Field IDs
742        let default_fields: Vec<Field> = self
743            .default_fields
744            .iter()
745            .filter_map(|name| name_to_field.get(name).copied())
746            .collect();
747
748        Schema {
749            fields: self.fields,
750            name_to_field,
751            default_fields,
752            query_routers: self.query_routers,
753        }
754    }
755}
756
757/// Value that can be stored in a field
758#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
759pub enum FieldValue {
760    #[serde(rename = "text")]
761    Text(String),
762    #[serde(rename = "u64")]
763    U64(u64),
764    #[serde(rename = "i64")]
765    I64(i64),
766    #[serde(rename = "f64")]
767    F64(f64),
768    #[serde(rename = "bytes")]
769    Bytes(Vec<u8>),
770    /// Sparse vector: list of (dimension_id, weight) pairs
771    #[serde(rename = "sparse_vector")]
772    SparseVector(Vec<(u32, f32)>),
773    /// Dense vector: float32 values
774    #[serde(rename = "dense_vector")]
775    DenseVector(Vec<f32>),
776    /// Arbitrary JSON value
777    #[serde(rename = "json")]
778    Json(serde_json::Value),
779    /// Binary dense vector: packed bits (ceil(dim/8) bytes)
780    #[serde(rename = "binary_dense_vector")]
781    BinaryDenseVector(Vec<u8>),
782}
783
784impl FieldValue {
785    pub fn as_text(&self) -> Option<&str> {
786        match self {
787            FieldValue::Text(s) => Some(s),
788            _ => None,
789        }
790    }
791
792    pub fn as_u64(&self) -> Option<u64> {
793        match self {
794            FieldValue::U64(v) => Some(*v),
795            _ => None,
796        }
797    }
798
799    pub fn as_i64(&self) -> Option<i64> {
800        match self {
801            FieldValue::I64(v) => Some(*v),
802            _ => None,
803        }
804    }
805
806    pub fn as_f64(&self) -> Option<f64> {
807        match self {
808            FieldValue::F64(v) => Some(*v),
809            _ => None,
810        }
811    }
812
813    pub fn as_bytes(&self) -> Option<&[u8]> {
814        match self {
815            FieldValue::Bytes(b) => Some(b),
816            _ => None,
817        }
818    }
819
820    pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
821        match self {
822            FieldValue::SparseVector(entries) => Some(entries),
823            _ => None,
824        }
825    }
826
827    pub fn as_dense_vector(&self) -> Option<&[f32]> {
828        match self {
829            FieldValue::DenseVector(v) => Some(v),
830            _ => None,
831        }
832    }
833
834    pub fn as_json(&self) -> Option<&serde_json::Value> {
835        match self {
836            FieldValue::Json(v) => Some(v),
837            _ => None,
838        }
839    }
840
841    pub fn as_binary_dense_vector(&self) -> Option<&[u8]> {
842        match self {
843            FieldValue::BinaryDenseVector(v) => Some(v),
844            _ => None,
845        }
846    }
847}
848
849/// A document to be indexed
850#[derive(Debug, Clone, Default, Serialize, Deserialize)]
851pub struct Document {
852    field_values: Vec<(Field, FieldValue)>,
853}
854
855impl Document {
856    pub fn new() -> Self {
857        Self::default()
858    }
859
860    pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
861        self.field_values
862            .push((field, FieldValue::Text(value.into())));
863    }
864
865    pub fn add_u64(&mut self, field: Field, value: u64) {
866        self.field_values.push((field, FieldValue::U64(value)));
867    }
868
869    pub fn add_i64(&mut self, field: Field, value: i64) {
870        self.field_values.push((field, FieldValue::I64(value)));
871    }
872
873    pub fn add_f64(&mut self, field: Field, value: f64) {
874        self.field_values.push((field, FieldValue::F64(value)));
875    }
876
877    pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
878        self.field_values.push((field, FieldValue::Bytes(value)));
879    }
880
881    pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
882        self.field_values
883            .push((field, FieldValue::SparseVector(entries)));
884    }
885
886    pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
887        self.field_values
888            .push((field, FieldValue::DenseVector(values)));
889    }
890
891    pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
892        self.field_values.push((field, FieldValue::Json(value)));
893    }
894
895    pub fn add_binary_dense_vector(&mut self, field: Field, values: Vec<u8>) {
896        self.field_values
897            .push((field, FieldValue::BinaryDenseVector(values)));
898    }
899
900    pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
901        self.field_values
902            .iter()
903            .find(|(f, _)| *f == field)
904            .map(|(_, v)| v)
905    }
906
907    pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
908        self.field_values
909            .iter()
910            .filter(move |(f, _)| *f == field)
911            .map(|(_, v)| v)
912    }
913
914    pub fn field_values(&self) -> &[(Field, FieldValue)] {
915        &self.field_values
916    }
917
918    /// Return a new Document containing only fields marked as `stored` in the schema
919    pub fn filter_stored(&self, schema: &Schema) -> Document {
920        Document {
921            field_values: self
922                .field_values
923                .iter()
924                .filter(|(field, _)| {
925                    schema
926                        .get_field_entry(*field)
927                        .is_some_and(|entry| entry.stored)
928                })
929                .cloned()
930                .collect(),
931        }
932    }
933
934    /// Convert document to a JSON object using field names from schema
935    ///
936    /// Fields marked as `multi` in the schema are always returned as JSON arrays.
937    /// Other fields with multiple values are also returned as arrays.
938    /// Fields with a single value (and not marked multi) are returned as scalar values.
939    pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
940        use std::collections::HashMap;
941
942        // Group values by field, keeping track of field entry for multi check
943        let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
944            HashMap::new();
945
946        for (field, value) in &self.field_values {
947            if let Some(entry) = schema.get_field_entry(*field) {
948                let json_value = match value {
949                    FieldValue::Text(s) => serde_json::Value::String(s.clone()),
950                    FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
951                    FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
952                    FieldValue::F64(n) => serde_json::json!(n),
953                    FieldValue::Bytes(b) => {
954                        use base64::Engine;
955                        serde_json::Value::String(
956                            base64::engine::general_purpose::STANDARD.encode(b),
957                        )
958                    }
959                    FieldValue::SparseVector(entries) => {
960                        let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
961                        let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
962                        serde_json::json!({
963                            "indices": indices,
964                            "values": values
965                        })
966                    }
967                    FieldValue::DenseVector(values) => {
968                        serde_json::json!(values)
969                    }
970                    FieldValue::Json(v) => v.clone(),
971                    FieldValue::BinaryDenseVector(b) => {
972                        use base64::Engine;
973                        serde_json::Value::String(
974                            base64::engine::general_purpose::STANDARD.encode(b),
975                        )
976                    }
977                };
978                field_values_map
979                    .entry(*field)
980                    .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
981                    .2
982                    .push(json_value);
983            }
984        }
985
986        // Convert to JSON object, using arrays for multi fields or when multiple values exist
987        let mut map = serde_json::Map::new();
988        for (_field, (name, is_multi, values)) in field_values_map {
989            let json_value = if is_multi || values.len() > 1 {
990                serde_json::Value::Array(values)
991            } else {
992                values.into_iter().next().unwrap()
993            };
994            map.insert(name, json_value);
995        }
996
997        serde_json::Value::Object(map)
998    }
999
1000    /// Create a Document from a JSON object using field names from schema
1001    ///
1002    /// Supports:
1003    /// - String values -> Text fields
1004    /// - Number values -> U64/I64/F64 fields (based on schema type)
1005    /// - Array values -> Multiple values for the same field (multifields)
1006    ///
1007    /// Unknown fields (not in schema) are silently ignored.
1008    pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
1009        let obj = json.as_object()?;
1010        let mut doc = Document::new();
1011
1012        for (key, value) in obj {
1013            if let Some(field) = schema.get_field(key) {
1014                let field_entry = schema.get_field_entry(field)?;
1015                Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
1016            }
1017        }
1018
1019        Some(doc)
1020    }
1021
1022    /// Helper to add a JSON value to a document, handling type conversion
1023    fn add_json_value(
1024        doc: &mut Document,
1025        field: Field,
1026        field_type: &FieldType,
1027        value: &serde_json::Value,
1028    ) {
1029        match value {
1030            serde_json::Value::String(s) => {
1031                if matches!(field_type, FieldType::Text) {
1032                    doc.add_text(field, s.clone());
1033                }
1034            }
1035            serde_json::Value::Number(n) => {
1036                match field_type {
1037                    FieldType::I64 => {
1038                        if let Some(i) = n.as_i64() {
1039                            doc.add_i64(field, i);
1040                        }
1041                    }
1042                    FieldType::U64 => {
1043                        if let Some(u) = n.as_u64() {
1044                            doc.add_u64(field, u);
1045                        } else if let Some(i) = n.as_i64() {
1046                            // Allow positive i64 as u64
1047                            if i >= 0 {
1048                                doc.add_u64(field, i as u64);
1049                            }
1050                        }
1051                    }
1052                    FieldType::F64 => {
1053                        if let Some(f) = n.as_f64() {
1054                            doc.add_f64(field, f);
1055                        }
1056                    }
1057                    _ => {}
1058                }
1059            }
1060            // Handle arrays (multifields) - add each element separately
1061            serde_json::Value::Array(arr) => {
1062                for item in arr {
1063                    Self::add_json_value(doc, field, field_type, item);
1064                }
1065            }
1066            // Handle sparse vector objects
1067            serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
1068                if let (Some(indices_val), Some(values_val)) =
1069                    (obj.get("indices"), obj.get("values"))
1070                {
1071                    let indices: Vec<u32> = indices_val
1072                        .as_array()
1073                        .map(|arr| {
1074                            arr.iter()
1075                                .filter_map(|v| v.as_u64().map(|n| n as u32))
1076                                .collect()
1077                        })
1078                        .unwrap_or_default();
1079                    let values: Vec<f32> = values_val
1080                        .as_array()
1081                        .map(|arr| {
1082                            arr.iter()
1083                                .filter_map(|v| v.as_f64().map(|n| n as f32))
1084                                .collect()
1085                        })
1086                        .unwrap_or_default();
1087                    if indices.len() == values.len() {
1088                        let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
1089                        doc.add_sparse_vector(field, entries);
1090                    }
1091                }
1092            }
1093            // Handle JSON fields - accept any value directly
1094            _ if matches!(field_type, FieldType::Json) => {
1095                doc.add_json(field, value.clone());
1096            }
1097            serde_json::Value::Object(_) => {}
1098            _ => {}
1099        }
1100    }
1101}
1102
1103#[cfg(test)]
1104mod tests {
1105    use super::*;
1106
1107    #[test]
1108    fn test_schema_builder() {
1109        let mut builder = Schema::builder();
1110        let title = builder.add_text_field("title", true, true);
1111        let body = builder.add_text_field("body", true, false);
1112        let count = builder.add_u64_field("count", true, true);
1113        let schema = builder.build();
1114
1115        assert_eq!(schema.get_field("title"), Some(title));
1116        assert_eq!(schema.get_field("body"), Some(body));
1117        assert_eq!(schema.get_field("count"), Some(count));
1118        assert_eq!(schema.get_field("nonexistent"), None);
1119    }
1120
1121    #[test]
1122    fn test_document() {
1123        let mut builder = Schema::builder();
1124        let title = builder.add_text_field("title", true, true);
1125        let count = builder.add_u64_field("count", true, true);
1126        let _schema = builder.build();
1127
1128        let mut doc = Document::new();
1129        doc.add_text(title, "Hello World");
1130        doc.add_u64(count, 42);
1131
1132        assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
1133        assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
1134    }
1135
1136    #[test]
1137    fn test_document_serialization() {
1138        let mut builder = Schema::builder();
1139        let title = builder.add_text_field("title", true, true);
1140        let count = builder.add_u64_field("count", true, true);
1141        let _schema = builder.build();
1142
1143        let mut doc = Document::new();
1144        doc.add_text(title, "Hello World");
1145        doc.add_u64(count, 42);
1146
1147        // Serialize
1148        let json = serde_json::to_string(&doc).unwrap();
1149        println!("Serialized doc: {}", json);
1150
1151        // Deserialize
1152        let doc2: Document = serde_json::from_str(&json).unwrap();
1153        assert_eq!(
1154            doc2.field_values().len(),
1155            2,
1156            "Should have 2 field values after deserialization"
1157        );
1158        assert_eq!(
1159            doc2.get_first(title).unwrap().as_text(),
1160            Some("Hello World")
1161        );
1162        assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
1163    }
1164
1165    #[test]
1166    fn test_multivalue_field() {
1167        let mut builder = Schema::builder();
1168        let uris = builder.add_text_field("uris", true, true);
1169        let title = builder.add_text_field("title", true, true);
1170        let schema = builder.build();
1171
1172        // Create document with multiple values for the same field
1173        let mut doc = Document::new();
1174        doc.add_text(uris, "one");
1175        doc.add_text(uris, "two");
1176        doc.add_text(title, "Test Document");
1177
1178        // Verify get_first returns the first value
1179        assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1180
1181        // Verify get_all returns all values
1182        let all_uris: Vec<_> = doc.get_all(uris).collect();
1183        assert_eq!(all_uris.len(), 2);
1184        assert_eq!(all_uris[0].as_text(), Some("one"));
1185        assert_eq!(all_uris[1].as_text(), Some("two"));
1186
1187        // Verify to_json returns array for multi-value field
1188        let json = doc.to_json(&schema);
1189        let uris_json = json.get("uris").unwrap();
1190        assert!(uris_json.is_array(), "Multi-value field should be an array");
1191        let uris_arr = uris_json.as_array().unwrap();
1192        assert_eq!(uris_arr.len(), 2);
1193        assert_eq!(uris_arr[0].as_str(), Some("one"));
1194        assert_eq!(uris_arr[1].as_str(), Some("two"));
1195
1196        // Verify single-value field is NOT an array
1197        let title_json = json.get("title").unwrap();
1198        assert!(
1199            title_json.is_string(),
1200            "Single-value field should be a string"
1201        );
1202        assert_eq!(title_json.as_str(), Some("Test Document"));
1203    }
1204
1205    #[test]
1206    fn test_multivalue_from_json() {
1207        let mut builder = Schema::builder();
1208        let uris = builder.add_text_field("uris", true, true);
1209        let title = builder.add_text_field("title", true, true);
1210        let schema = builder.build();
1211
1212        // Create JSON with array value
1213        let json = serde_json::json!({
1214            "uris": ["one", "two"],
1215            "title": "Test Document"
1216        });
1217
1218        // Parse from JSON
1219        let doc = Document::from_json(&json, &schema).unwrap();
1220
1221        // Verify all values are present
1222        let all_uris: Vec<_> = doc.get_all(uris).collect();
1223        assert_eq!(all_uris.len(), 2);
1224        assert_eq!(all_uris[0].as_text(), Some("one"));
1225        assert_eq!(all_uris[1].as_text(), Some("two"));
1226
1227        // Verify single value
1228        assert_eq!(
1229            doc.get_first(title).unwrap().as_text(),
1230            Some("Test Document")
1231        );
1232
1233        // Verify roundtrip: to_json should produce equivalent JSON
1234        let json_out = doc.to_json(&schema);
1235        let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1236        assert_eq!(uris_out.len(), 2);
1237        assert_eq!(uris_out[0].as_str(), Some("one"));
1238        assert_eq!(uris_out[1].as_str(), Some("two"));
1239    }
1240
1241    #[test]
1242    fn test_multi_attribute_forces_array() {
1243        // Test that fields marked as 'multi' are always serialized as arrays,
1244        // even when they have only one value
1245        let mut builder = Schema::builder();
1246        let uris = builder.add_text_field("uris", true, true);
1247        builder.set_multi(uris, true); // Mark as multi
1248        let title = builder.add_text_field("title", true, true);
1249        let schema = builder.build();
1250
1251        // Verify the multi attribute is set
1252        assert!(schema.get_field_entry(uris).unwrap().multi);
1253        assert!(!schema.get_field_entry(title).unwrap().multi);
1254
1255        // Create document with single value for multi field
1256        let mut doc = Document::new();
1257        doc.add_text(uris, "only_one");
1258        doc.add_text(title, "Test Document");
1259
1260        // Verify to_json returns array for multi field even with single value
1261        let json = doc.to_json(&schema);
1262
1263        let uris_json = json.get("uris").unwrap();
1264        assert!(
1265            uris_json.is_array(),
1266            "Multi field should be array even with single value"
1267        );
1268        let uris_arr = uris_json.as_array().unwrap();
1269        assert_eq!(uris_arr.len(), 1);
1270        assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1271
1272        // Verify non-multi field with single value is NOT an array
1273        let title_json = json.get("title").unwrap();
1274        assert!(
1275            title_json.is_string(),
1276            "Non-multi single-value field should be a string"
1277        );
1278        assert_eq!(title_json.as_str(), Some("Test Document"));
1279    }
1280
1281    #[test]
1282    fn test_sparse_vector_field() {
1283        let mut builder = Schema::builder();
1284        let embedding = builder.add_sparse_vector_field("embedding", true, true);
1285        let title = builder.add_text_field("title", true, true);
1286        let schema = builder.build();
1287
1288        assert_eq!(schema.get_field("embedding"), Some(embedding));
1289        assert_eq!(
1290            schema.get_field_entry(embedding).unwrap().field_type,
1291            FieldType::SparseVector
1292        );
1293
1294        // Create document with sparse vector
1295        let mut doc = Document::new();
1296        doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1297        doc.add_text(title, "Test Document");
1298
1299        // Verify accessor
1300        let entries = doc
1301            .get_first(embedding)
1302            .unwrap()
1303            .as_sparse_vector()
1304            .unwrap();
1305        assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1306
1307        // Verify JSON roundtrip
1308        let json = doc.to_json(&schema);
1309        let embedding_json = json.get("embedding").unwrap();
1310        assert!(embedding_json.is_object());
1311        assert_eq!(
1312            embedding_json
1313                .get("indices")
1314                .unwrap()
1315                .as_array()
1316                .unwrap()
1317                .len(),
1318            3
1319        );
1320
1321        // Parse back from JSON
1322        let doc2 = Document::from_json(&json, &schema).unwrap();
1323        let entries2 = doc2
1324            .get_first(embedding)
1325            .unwrap()
1326            .as_sparse_vector()
1327            .unwrap();
1328        assert_eq!(entries2[0].0, 0);
1329        assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1330        assert_eq!(entries2[1].0, 5);
1331        assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1332        assert_eq!(entries2[2].0, 10);
1333        assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1334    }
1335
1336    #[test]
1337    fn test_json_field() {
1338        let mut builder = Schema::builder();
1339        let metadata = builder.add_json_field("metadata", true);
1340        let title = builder.add_text_field("title", true, true);
1341        let schema = builder.build();
1342
1343        assert_eq!(schema.get_field("metadata"), Some(metadata));
1344        assert_eq!(
1345            schema.get_field_entry(metadata).unwrap().field_type,
1346            FieldType::Json
1347        );
1348        // JSON fields are never indexed
1349        assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1350        assert!(schema.get_field_entry(metadata).unwrap().stored);
1351
1352        // Create document with JSON value (object)
1353        let json_value = serde_json::json!({
1354            "author": "John Doe",
1355            "tags": ["rust", "search"],
1356            "nested": {"key": "value"}
1357        });
1358        let mut doc = Document::new();
1359        doc.add_json(metadata, json_value.clone());
1360        doc.add_text(title, "Test Document");
1361
1362        // Verify accessor
1363        let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1364        assert_eq!(stored_json, &json_value);
1365        assert_eq!(
1366            stored_json.get("author").unwrap().as_str(),
1367            Some("John Doe")
1368        );
1369
1370        // Verify JSON roundtrip via to_json/from_json
1371        let doc_json = doc.to_json(&schema);
1372        let metadata_out = doc_json.get("metadata").unwrap();
1373        assert_eq!(metadata_out, &json_value);
1374
1375        // Parse back from JSON
1376        let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1377        let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1378        assert_eq!(stored_json2, &json_value);
1379    }
1380
1381    #[test]
1382    fn test_json_field_various_types() {
1383        let mut builder = Schema::builder();
1384        let data = builder.add_json_field("data", true);
1385        let _schema = builder.build();
1386
1387        // Test with array
1388        let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1389        let mut doc = Document::new();
1390        doc.add_json(data, arr_value.clone());
1391        assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1392
1393        // Test with string
1394        let str_value = serde_json::json!("just a string");
1395        let mut doc2 = Document::new();
1396        doc2.add_json(data, str_value.clone());
1397        assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1398
1399        // Test with number
1400        let num_value = serde_json::json!(42.5);
1401        let mut doc3 = Document::new();
1402        doc3.add_json(data, num_value.clone());
1403        assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1404
1405        // Test with null
1406        let null_value = serde_json::Value::Null;
1407        let mut doc4 = Document::new();
1408        doc4.add_json(data, null_value.clone());
1409        assert_eq!(
1410            doc4.get_first(data).unwrap().as_json().unwrap(),
1411            &null_value
1412        );
1413
1414        // Test with boolean
1415        let bool_value = serde_json::json!(true);
1416        let mut doc5 = Document::new();
1417        doc5.add_json(data, bool_value.clone());
1418        assert_eq!(
1419            doc5.get_first(data).unwrap().as_json().unwrap(),
1420            &bool_value
1421        );
1422    }
1423}