1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60 #[serde(default)]
63 pub fast: bool,
64 #[serde(default)]
66 pub primary_key: bool,
67 #[serde(default)]
69 pub simhash: bool,
70}
71
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
74#[serde(rename_all = "snake_case")]
75pub enum PositionMode {
76 Ordinal,
79 TokenPosition,
82 Full,
85}
86
87impl PositionMode {
88 pub fn tracks_ordinal(&self) -> bool {
90 matches!(self, PositionMode::Ordinal | PositionMode::Full)
91 }
92
93 pub fn tracks_token_position(&self) -> bool {
95 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
96 }
97}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
101#[serde(rename_all = "snake_case")]
102pub enum VectorIndexType {
103 Flat,
105 #[default]
107 RaBitQ,
108 IvfRaBitQ,
110 ScaNN,
112}
113
114#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
120#[serde(rename_all = "snake_case")]
121pub enum DenseVectorQuantization {
122 #[default]
124 F32,
125 F16,
127 UInt8,
129}
130
131impl DenseVectorQuantization {
132 pub fn element_size(self) -> usize {
134 match self {
135 Self::F32 => 4,
136 Self::F16 => 2,
137 Self::UInt8 => 1,
138 }
139 }
140
141 pub fn tag(self) -> u8 {
143 match self {
144 Self::F32 => 0,
145 Self::F16 => 1,
146 Self::UInt8 => 2,
147 }
148 }
149
150 pub fn from_tag(tag: u8) -> Option<Self> {
152 match tag {
153 0 => Some(Self::F32),
154 1 => Some(Self::F16),
155 2 => Some(Self::UInt8),
156 _ => None,
157 }
158 }
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
169pub struct DenseVectorConfig {
170 pub dim: usize,
172 #[serde(default)]
175 pub index_type: VectorIndexType,
176 #[serde(default)]
178 pub quantization: DenseVectorQuantization,
179 #[serde(default, skip_serializing_if = "Option::is_none")]
182 pub num_clusters: Option<usize>,
183 #[serde(default = "default_nprobe")]
185 pub nprobe: usize,
186 #[serde(default, skip_serializing_if = "Option::is_none")]
190 pub build_threshold: Option<usize>,
191 #[serde(default = "default_unit_norm")]
196 pub unit_norm: bool,
197}
198
199fn default_nprobe() -> usize {
200 32
201}
202
203fn default_unit_norm() -> bool {
204 true
205}
206
207impl DenseVectorConfig {
208 pub fn new(dim: usize) -> Self {
209 Self {
210 dim,
211 index_type: VectorIndexType::RaBitQ,
212 quantization: DenseVectorQuantization::F32,
213 num_clusters: None,
214 nprobe: 32,
215 build_threshold: None,
216 unit_norm: true,
217 }
218 }
219
220 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
222 Self {
223 dim,
224 index_type: VectorIndexType::IvfRaBitQ,
225 quantization: DenseVectorQuantization::F32,
226 num_clusters,
227 nprobe,
228 build_threshold: None,
229 unit_norm: true,
230 }
231 }
232
233 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
235 Self {
236 dim,
237 index_type: VectorIndexType::ScaNN,
238 quantization: DenseVectorQuantization::F32,
239 num_clusters,
240 nprobe,
241 build_threshold: None,
242 unit_norm: true,
243 }
244 }
245
246 pub fn flat(dim: usize) -> Self {
248 Self {
249 dim,
250 index_type: VectorIndexType::Flat,
251 quantization: DenseVectorQuantization::F32,
252 num_clusters: None,
253 nprobe: 0,
254 build_threshold: None,
255 unit_norm: true,
256 }
257 }
258
259 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
261 self.quantization = quantization;
262 self
263 }
264
265 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
267 self.build_threshold = Some(threshold);
268 self
269 }
270
271 pub fn with_unit_norm(mut self) -> Self {
273 self.unit_norm = true;
274 self
275 }
276
277 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
279 self.num_clusters = Some(num_clusters);
280 self
281 }
282
283 pub fn uses_ivf(&self) -> bool {
285 matches!(
286 self.index_type,
287 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
288 )
289 }
290
291 pub fn uses_scann(&self) -> bool {
293 self.index_type == VectorIndexType::ScaNN
294 }
295
296 pub fn is_flat(&self) -> bool {
298 self.index_type == VectorIndexType::Flat
299 }
300
301 pub fn default_build_threshold(&self) -> usize {
303 self.build_threshold.unwrap_or(match self.index_type {
304 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
306 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
307 })
308 }
309
310 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
312 self.num_clusters.unwrap_or_else(|| {
313 let optimal = (num_vectors as f64).sqrt() as usize;
315 optimal.clamp(16, 4096)
316 })
317 }
318}
319
320use super::query_field_router::QueryRouterRule;
321
322#[derive(Debug, Clone, Default, Serialize, Deserialize)]
324pub struct Schema {
325 fields: Vec<FieldEntry>,
326 name_to_field: HashMap<String, Field>,
327 #[serde(default)]
329 default_fields: Vec<Field>,
330 #[serde(default)]
332 query_routers: Vec<QueryRouterRule>,
333}
334
335impl Schema {
336 pub fn builder() -> SchemaBuilder {
337 SchemaBuilder::default()
338 }
339
340 pub fn get_field(&self, name: &str) -> Option<Field> {
341 self.name_to_field.get(name).copied()
342 }
343
344 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
345 self.fields.get(field.0 as usize)
346 }
347
348 pub fn get_field_name(&self, field: Field) -> Option<&str> {
349 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
350 }
351
352 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
353 self.fields
354 .iter()
355 .enumerate()
356 .map(|(i, e)| (Field(i as u32), e))
357 }
358
359 pub fn num_fields(&self) -> usize {
360 self.fields.len()
361 }
362
363 pub fn default_fields(&self) -> &[Field] {
365 &self.default_fields
366 }
367
368 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
370 self.default_fields = fields;
371 }
372
373 pub fn query_routers(&self) -> &[QueryRouterRule] {
375 &self.query_routers
376 }
377
378 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
380 self.query_routers = rules;
381 }
382
383 pub fn primary_field(&self) -> Option<Field> {
385 self.fields
386 .iter()
387 .enumerate()
388 .find(|(_, e)| e.primary_key)
389 .map(|(i, _)| Field(i as u32))
390 }
391}
392
393#[derive(Debug, Default)]
395pub struct SchemaBuilder {
396 fields: Vec<FieldEntry>,
397 default_fields: Vec<String>,
398 query_routers: Vec<QueryRouterRule>,
399}
400
401impl SchemaBuilder {
402 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
403 self.add_field_with_tokenizer(
404 name,
405 FieldType::Text,
406 indexed,
407 stored,
408 Some("simple".to_string()),
409 )
410 }
411
412 pub fn add_text_field_with_tokenizer(
413 &mut self,
414 name: &str,
415 indexed: bool,
416 stored: bool,
417 tokenizer: &str,
418 ) -> Field {
419 self.add_field_with_tokenizer(
420 name,
421 FieldType::Text,
422 indexed,
423 stored,
424 Some(tokenizer.to_string()),
425 )
426 }
427
428 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
429 self.add_field(name, FieldType::U64, indexed, stored)
430 }
431
432 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
433 self.add_field(name, FieldType::I64, indexed, stored)
434 }
435
436 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
437 self.add_field(name, FieldType::F64, indexed, stored)
438 }
439
440 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
441 self.add_field(name, FieldType::Bytes, false, stored)
442 }
443
444 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
449 self.add_field(name, FieldType::Json, false, stored)
450 }
451
452 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
457 self.add_sparse_vector_field_with_config(
458 name,
459 indexed,
460 stored,
461 crate::structures::SparseVectorConfig::default(),
462 )
463 }
464
465 pub fn add_sparse_vector_field_with_config(
470 &mut self,
471 name: &str,
472 indexed: bool,
473 stored: bool,
474 config: crate::structures::SparseVectorConfig,
475 ) -> Field {
476 let field = Field(self.fields.len() as u32);
477 self.fields.push(FieldEntry {
478 name: name.to_string(),
479 field_type: FieldType::SparseVector,
480 indexed,
481 stored,
482 tokenizer: None,
483 multi: false,
484 positions: None,
485 sparse_vector_config: Some(config),
486 dense_vector_config: None,
487 fast: false,
488 primary_key: false,
489 simhash: false,
490 });
491 field
492 }
493
494 pub fn set_sparse_vector_config(
496 &mut self,
497 field: Field,
498 config: crate::structures::SparseVectorConfig,
499 ) {
500 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
501 entry.sparse_vector_config = Some(config);
502 }
503 }
504
505 pub fn add_dense_vector_field(
510 &mut self,
511 name: &str,
512 dim: usize,
513 indexed: bool,
514 stored: bool,
515 ) -> Field {
516 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
517 }
518
519 pub fn add_dense_vector_field_with_config(
521 &mut self,
522 name: &str,
523 indexed: bool,
524 stored: bool,
525 config: DenseVectorConfig,
526 ) -> Field {
527 let field = Field(self.fields.len() as u32);
528 self.fields.push(FieldEntry {
529 name: name.to_string(),
530 field_type: FieldType::DenseVector,
531 indexed,
532 stored,
533 tokenizer: None,
534 multi: false,
535 positions: None,
536 sparse_vector_config: None,
537 dense_vector_config: Some(config),
538 fast: false,
539 primary_key: false,
540 simhash: false,
541 });
542 field
543 }
544
545 fn add_field(
546 &mut self,
547 name: &str,
548 field_type: FieldType,
549 indexed: bool,
550 stored: bool,
551 ) -> Field {
552 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
553 }
554
555 fn add_field_with_tokenizer(
556 &mut self,
557 name: &str,
558 field_type: FieldType,
559 indexed: bool,
560 stored: bool,
561 tokenizer: Option<String>,
562 ) -> Field {
563 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
564 }
565
566 fn add_field_full(
567 &mut self,
568 name: &str,
569 field_type: FieldType,
570 indexed: bool,
571 stored: bool,
572 tokenizer: Option<String>,
573 multi: bool,
574 ) -> Field {
575 let field = Field(self.fields.len() as u32);
576 self.fields.push(FieldEntry {
577 name: name.to_string(),
578 field_type,
579 indexed,
580 stored,
581 tokenizer,
582 multi,
583 positions: None,
584 sparse_vector_config: None,
585 dense_vector_config: None,
586 fast: false,
587 primary_key: false,
588 simhash: false,
589 });
590 field
591 }
592
593 pub fn set_multi(&mut self, field: Field, multi: bool) {
595 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
596 entry.multi = multi;
597 }
598 }
599
600 pub fn set_fast(&mut self, field: Field, fast: bool) {
603 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
604 entry.fast = fast;
605 }
606 }
607
608 pub fn set_primary_key(&mut self, field: Field) {
610 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
611 entry.primary_key = true;
612 }
613 }
614
615 pub fn set_simhash(&mut self, field: Field, simhash: bool) {
617 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
618 entry.simhash = simhash;
619 }
620 }
621
622 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
624 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
625 entry.positions = Some(mode);
626 }
627 }
628
629 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
631 self.default_fields = field_names;
632 }
633
634 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
636 self.query_routers = rules;
637 }
638
639 pub fn build(self) -> Schema {
640 let mut name_to_field = HashMap::new();
641 for (i, entry) in self.fields.iter().enumerate() {
642 name_to_field.insert(entry.name.clone(), Field(i as u32));
643 }
644
645 let default_fields: Vec<Field> = self
647 .default_fields
648 .iter()
649 .filter_map(|name| name_to_field.get(name).copied())
650 .collect();
651
652 Schema {
653 fields: self.fields,
654 name_to_field,
655 default_fields,
656 query_routers: self.query_routers,
657 }
658 }
659}
660
661#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
663pub enum FieldValue {
664 #[serde(rename = "text")]
665 Text(String),
666 #[serde(rename = "u64")]
667 U64(u64),
668 #[serde(rename = "i64")]
669 I64(i64),
670 #[serde(rename = "f64")]
671 F64(f64),
672 #[serde(rename = "bytes")]
673 Bytes(Vec<u8>),
674 #[serde(rename = "sparse_vector")]
676 SparseVector(Vec<(u32, f32)>),
677 #[serde(rename = "dense_vector")]
679 DenseVector(Vec<f32>),
680 #[serde(rename = "json")]
682 Json(serde_json::Value),
683}
684
685impl FieldValue {
686 pub fn as_text(&self) -> Option<&str> {
687 match self {
688 FieldValue::Text(s) => Some(s),
689 _ => None,
690 }
691 }
692
693 pub fn as_u64(&self) -> Option<u64> {
694 match self {
695 FieldValue::U64(v) => Some(*v),
696 _ => None,
697 }
698 }
699
700 pub fn as_i64(&self) -> Option<i64> {
701 match self {
702 FieldValue::I64(v) => Some(*v),
703 _ => None,
704 }
705 }
706
707 pub fn as_f64(&self) -> Option<f64> {
708 match self {
709 FieldValue::F64(v) => Some(*v),
710 _ => None,
711 }
712 }
713
714 pub fn as_bytes(&self) -> Option<&[u8]> {
715 match self {
716 FieldValue::Bytes(b) => Some(b),
717 _ => None,
718 }
719 }
720
721 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
722 match self {
723 FieldValue::SparseVector(entries) => Some(entries),
724 _ => None,
725 }
726 }
727
728 pub fn as_dense_vector(&self) -> Option<&[f32]> {
729 match self {
730 FieldValue::DenseVector(v) => Some(v),
731 _ => None,
732 }
733 }
734
735 pub fn as_json(&self) -> Option<&serde_json::Value> {
736 match self {
737 FieldValue::Json(v) => Some(v),
738 _ => None,
739 }
740 }
741}
742
743#[derive(Debug, Clone, Default, Serialize, Deserialize)]
745pub struct Document {
746 field_values: Vec<(Field, FieldValue)>,
747}
748
749impl Document {
750 pub fn new() -> Self {
751 Self::default()
752 }
753
754 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
755 self.field_values
756 .push((field, FieldValue::Text(value.into())));
757 }
758
759 pub fn add_u64(&mut self, field: Field, value: u64) {
760 self.field_values.push((field, FieldValue::U64(value)));
761 }
762
763 pub fn add_i64(&mut self, field: Field, value: i64) {
764 self.field_values.push((field, FieldValue::I64(value)));
765 }
766
767 pub fn add_f64(&mut self, field: Field, value: f64) {
768 self.field_values.push((field, FieldValue::F64(value)));
769 }
770
771 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
772 self.field_values.push((field, FieldValue::Bytes(value)));
773 }
774
775 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
776 self.field_values
777 .push((field, FieldValue::SparseVector(entries)));
778 }
779
780 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
781 self.field_values
782 .push((field, FieldValue::DenseVector(values)));
783 }
784
785 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
786 self.field_values.push((field, FieldValue::Json(value)));
787 }
788
789 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
790 self.field_values
791 .iter()
792 .find(|(f, _)| *f == field)
793 .map(|(_, v)| v)
794 }
795
796 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
797 self.field_values
798 .iter()
799 .filter(move |(f, _)| *f == field)
800 .map(|(_, v)| v)
801 }
802
803 pub fn field_values(&self) -> &[(Field, FieldValue)] {
804 &self.field_values
805 }
806
807 pub fn filter_stored(&self, schema: &Schema) -> Document {
809 Document {
810 field_values: self
811 .field_values
812 .iter()
813 .filter(|(field, _)| {
814 schema
815 .get_field_entry(*field)
816 .is_some_and(|entry| entry.stored)
817 })
818 .cloned()
819 .collect(),
820 }
821 }
822
823 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
829 use std::collections::HashMap;
830
831 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
833 HashMap::new();
834
835 for (field, value) in &self.field_values {
836 if let Some(entry) = schema.get_field_entry(*field) {
837 let json_value = match value {
838 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
839 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
840 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
841 FieldValue::F64(n) => serde_json::json!(n),
842 FieldValue::Bytes(b) => {
843 use base64::Engine;
844 serde_json::Value::String(
845 base64::engine::general_purpose::STANDARD.encode(b),
846 )
847 }
848 FieldValue::SparseVector(entries) => {
849 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
850 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
851 serde_json::json!({
852 "indices": indices,
853 "values": values
854 })
855 }
856 FieldValue::DenseVector(values) => {
857 serde_json::json!(values)
858 }
859 FieldValue::Json(v) => v.clone(),
860 };
861 field_values_map
862 .entry(*field)
863 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
864 .2
865 .push(json_value);
866 }
867 }
868
869 let mut map = serde_json::Map::new();
871 for (_field, (name, is_multi, values)) in field_values_map {
872 let json_value = if is_multi || values.len() > 1 {
873 serde_json::Value::Array(values)
874 } else {
875 values.into_iter().next().unwrap()
876 };
877 map.insert(name, json_value);
878 }
879
880 serde_json::Value::Object(map)
881 }
882
883 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
892 let obj = json.as_object()?;
893 let mut doc = Document::new();
894
895 for (key, value) in obj {
896 if let Some(field) = schema.get_field(key) {
897 let field_entry = schema.get_field_entry(field)?;
898 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
899 }
900 }
901
902 Some(doc)
903 }
904
905 fn add_json_value(
907 doc: &mut Document,
908 field: Field,
909 field_type: &FieldType,
910 value: &serde_json::Value,
911 ) {
912 match value {
913 serde_json::Value::String(s) => {
914 if matches!(field_type, FieldType::Text) {
915 doc.add_text(field, s.clone());
916 }
917 }
918 serde_json::Value::Number(n) => {
919 match field_type {
920 FieldType::I64 => {
921 if let Some(i) = n.as_i64() {
922 doc.add_i64(field, i);
923 }
924 }
925 FieldType::U64 => {
926 if let Some(u) = n.as_u64() {
927 doc.add_u64(field, u);
928 } else if let Some(i) = n.as_i64() {
929 if i >= 0 {
931 doc.add_u64(field, i as u64);
932 }
933 }
934 }
935 FieldType::F64 => {
936 if let Some(f) = n.as_f64() {
937 doc.add_f64(field, f);
938 }
939 }
940 _ => {}
941 }
942 }
943 serde_json::Value::Array(arr) => {
945 for item in arr {
946 Self::add_json_value(doc, field, field_type, item);
947 }
948 }
949 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
951 if let (Some(indices_val), Some(values_val)) =
952 (obj.get("indices"), obj.get("values"))
953 {
954 let indices: Vec<u32> = indices_val
955 .as_array()
956 .map(|arr| {
957 arr.iter()
958 .filter_map(|v| v.as_u64().map(|n| n as u32))
959 .collect()
960 })
961 .unwrap_or_default();
962 let values: Vec<f32> = values_val
963 .as_array()
964 .map(|arr| {
965 arr.iter()
966 .filter_map(|v| v.as_f64().map(|n| n as f32))
967 .collect()
968 })
969 .unwrap_or_default();
970 if indices.len() == values.len() {
971 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
972 doc.add_sparse_vector(field, entries);
973 }
974 }
975 }
976 _ if matches!(field_type, FieldType::Json) => {
978 doc.add_json(field, value.clone());
979 }
980 serde_json::Value::Object(_) => {}
981 _ => {}
982 }
983 }
984}
985
986#[cfg(test)]
987mod tests {
988 use super::*;
989
990 #[test]
991 fn test_schema_builder() {
992 let mut builder = Schema::builder();
993 let title = builder.add_text_field("title", true, true);
994 let body = builder.add_text_field("body", true, false);
995 let count = builder.add_u64_field("count", true, true);
996 let schema = builder.build();
997
998 assert_eq!(schema.get_field("title"), Some(title));
999 assert_eq!(schema.get_field("body"), Some(body));
1000 assert_eq!(schema.get_field("count"), Some(count));
1001 assert_eq!(schema.get_field("nonexistent"), None);
1002 }
1003
1004 #[test]
1005 fn test_document() {
1006 let mut builder = Schema::builder();
1007 let title = builder.add_text_field("title", true, true);
1008 let count = builder.add_u64_field("count", true, true);
1009 let _schema = builder.build();
1010
1011 let mut doc = Document::new();
1012 doc.add_text(title, "Hello World");
1013 doc.add_u64(count, 42);
1014
1015 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
1016 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
1017 }
1018
1019 #[test]
1020 fn test_document_serialization() {
1021 let mut builder = Schema::builder();
1022 let title = builder.add_text_field("title", true, true);
1023 let count = builder.add_u64_field("count", true, true);
1024 let _schema = builder.build();
1025
1026 let mut doc = Document::new();
1027 doc.add_text(title, "Hello World");
1028 doc.add_u64(count, 42);
1029
1030 let json = serde_json::to_string(&doc).unwrap();
1032 println!("Serialized doc: {}", json);
1033
1034 let doc2: Document = serde_json::from_str(&json).unwrap();
1036 assert_eq!(
1037 doc2.field_values().len(),
1038 2,
1039 "Should have 2 field values after deserialization"
1040 );
1041 assert_eq!(
1042 doc2.get_first(title).unwrap().as_text(),
1043 Some("Hello World")
1044 );
1045 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
1046 }
1047
1048 #[test]
1049 fn test_multivalue_field() {
1050 let mut builder = Schema::builder();
1051 let uris = builder.add_text_field("uris", true, true);
1052 let title = builder.add_text_field("title", true, true);
1053 let schema = builder.build();
1054
1055 let mut doc = Document::new();
1057 doc.add_text(uris, "one");
1058 doc.add_text(uris, "two");
1059 doc.add_text(title, "Test Document");
1060
1061 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1063
1064 let all_uris: Vec<_> = doc.get_all(uris).collect();
1066 assert_eq!(all_uris.len(), 2);
1067 assert_eq!(all_uris[0].as_text(), Some("one"));
1068 assert_eq!(all_uris[1].as_text(), Some("two"));
1069
1070 let json = doc.to_json(&schema);
1072 let uris_json = json.get("uris").unwrap();
1073 assert!(uris_json.is_array(), "Multi-value field should be an array");
1074 let uris_arr = uris_json.as_array().unwrap();
1075 assert_eq!(uris_arr.len(), 2);
1076 assert_eq!(uris_arr[0].as_str(), Some("one"));
1077 assert_eq!(uris_arr[1].as_str(), Some("two"));
1078
1079 let title_json = json.get("title").unwrap();
1081 assert!(
1082 title_json.is_string(),
1083 "Single-value field should be a string"
1084 );
1085 assert_eq!(title_json.as_str(), Some("Test Document"));
1086 }
1087
1088 #[test]
1089 fn test_multivalue_from_json() {
1090 let mut builder = Schema::builder();
1091 let uris = builder.add_text_field("uris", true, true);
1092 let title = builder.add_text_field("title", true, true);
1093 let schema = builder.build();
1094
1095 let json = serde_json::json!({
1097 "uris": ["one", "two"],
1098 "title": "Test Document"
1099 });
1100
1101 let doc = Document::from_json(&json, &schema).unwrap();
1103
1104 let all_uris: Vec<_> = doc.get_all(uris).collect();
1106 assert_eq!(all_uris.len(), 2);
1107 assert_eq!(all_uris[0].as_text(), Some("one"));
1108 assert_eq!(all_uris[1].as_text(), Some("two"));
1109
1110 assert_eq!(
1112 doc.get_first(title).unwrap().as_text(),
1113 Some("Test Document")
1114 );
1115
1116 let json_out = doc.to_json(&schema);
1118 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1119 assert_eq!(uris_out.len(), 2);
1120 assert_eq!(uris_out[0].as_str(), Some("one"));
1121 assert_eq!(uris_out[1].as_str(), Some("two"));
1122 }
1123
1124 #[test]
1125 fn test_multi_attribute_forces_array() {
1126 let mut builder = Schema::builder();
1129 let uris = builder.add_text_field("uris", true, true);
1130 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1132 let schema = builder.build();
1133
1134 assert!(schema.get_field_entry(uris).unwrap().multi);
1136 assert!(!schema.get_field_entry(title).unwrap().multi);
1137
1138 let mut doc = Document::new();
1140 doc.add_text(uris, "only_one");
1141 doc.add_text(title, "Test Document");
1142
1143 let json = doc.to_json(&schema);
1145
1146 let uris_json = json.get("uris").unwrap();
1147 assert!(
1148 uris_json.is_array(),
1149 "Multi field should be array even with single value"
1150 );
1151 let uris_arr = uris_json.as_array().unwrap();
1152 assert_eq!(uris_arr.len(), 1);
1153 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1154
1155 let title_json = json.get("title").unwrap();
1157 assert!(
1158 title_json.is_string(),
1159 "Non-multi single-value field should be a string"
1160 );
1161 assert_eq!(title_json.as_str(), Some("Test Document"));
1162 }
1163
1164 #[test]
1165 fn test_sparse_vector_field() {
1166 let mut builder = Schema::builder();
1167 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1168 let title = builder.add_text_field("title", true, true);
1169 let schema = builder.build();
1170
1171 assert_eq!(schema.get_field("embedding"), Some(embedding));
1172 assert_eq!(
1173 schema.get_field_entry(embedding).unwrap().field_type,
1174 FieldType::SparseVector
1175 );
1176
1177 let mut doc = Document::new();
1179 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1180 doc.add_text(title, "Test Document");
1181
1182 let entries = doc
1184 .get_first(embedding)
1185 .unwrap()
1186 .as_sparse_vector()
1187 .unwrap();
1188 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1189
1190 let json = doc.to_json(&schema);
1192 let embedding_json = json.get("embedding").unwrap();
1193 assert!(embedding_json.is_object());
1194 assert_eq!(
1195 embedding_json
1196 .get("indices")
1197 .unwrap()
1198 .as_array()
1199 .unwrap()
1200 .len(),
1201 3
1202 );
1203
1204 let doc2 = Document::from_json(&json, &schema).unwrap();
1206 let entries2 = doc2
1207 .get_first(embedding)
1208 .unwrap()
1209 .as_sparse_vector()
1210 .unwrap();
1211 assert_eq!(entries2[0].0, 0);
1212 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1213 assert_eq!(entries2[1].0, 5);
1214 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1215 assert_eq!(entries2[2].0, 10);
1216 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1217 }
1218
1219 #[test]
1220 fn test_json_field() {
1221 let mut builder = Schema::builder();
1222 let metadata = builder.add_json_field("metadata", true);
1223 let title = builder.add_text_field("title", true, true);
1224 let schema = builder.build();
1225
1226 assert_eq!(schema.get_field("metadata"), Some(metadata));
1227 assert_eq!(
1228 schema.get_field_entry(metadata).unwrap().field_type,
1229 FieldType::Json
1230 );
1231 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1233 assert!(schema.get_field_entry(metadata).unwrap().stored);
1234
1235 let json_value = serde_json::json!({
1237 "author": "John Doe",
1238 "tags": ["rust", "search"],
1239 "nested": {"key": "value"}
1240 });
1241 let mut doc = Document::new();
1242 doc.add_json(metadata, json_value.clone());
1243 doc.add_text(title, "Test Document");
1244
1245 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1247 assert_eq!(stored_json, &json_value);
1248 assert_eq!(
1249 stored_json.get("author").unwrap().as_str(),
1250 Some("John Doe")
1251 );
1252
1253 let doc_json = doc.to_json(&schema);
1255 let metadata_out = doc_json.get("metadata").unwrap();
1256 assert_eq!(metadata_out, &json_value);
1257
1258 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1260 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1261 assert_eq!(stored_json2, &json_value);
1262 }
1263
1264 #[test]
1265 fn test_json_field_various_types() {
1266 let mut builder = Schema::builder();
1267 let data = builder.add_json_field("data", true);
1268 let _schema = builder.build();
1269
1270 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1272 let mut doc = Document::new();
1273 doc.add_json(data, arr_value.clone());
1274 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1275
1276 let str_value = serde_json::json!("just a string");
1278 let mut doc2 = Document::new();
1279 doc2.add_json(data, str_value.clone());
1280 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1281
1282 let num_value = serde_json::json!(42.5);
1284 let mut doc3 = Document::new();
1285 doc3.add_json(data, num_value.clone());
1286 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1287
1288 let null_value = serde_json::Value::Null;
1290 let mut doc4 = Document::new();
1291 doc4.add_json(data, null_value.clone());
1292 assert_eq!(
1293 doc4.get_first(data).unwrap().as_json().unwrap(),
1294 &null_value
1295 );
1296
1297 let bool_value = serde_json::json!(true);
1299 let mut doc5 = Document::new();
1300 doc5.add_json(data, bool_value.clone());
1301 assert_eq!(
1302 doc5.get_first(data).unwrap().as_json().unwrap(),
1303 &bool_value
1304 );
1305 }
1306}