1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60 #[serde(default)]
63 pub fast: bool,
64 #[serde(default)]
66 pub primary_key: bool,
67 #[serde(default)]
71 pub reorder: bool,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76#[serde(rename_all = "snake_case")]
77pub enum PositionMode {
78 Ordinal,
81 TokenPosition,
84 Full,
87}
88
89impl PositionMode {
90 pub fn tracks_ordinal(&self) -> bool {
92 matches!(self, PositionMode::Ordinal | PositionMode::Full)
93 }
94
95 pub fn tracks_token_position(&self) -> bool {
97 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
98 }
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum VectorIndexType {
105 Flat,
107 #[default]
109 RaBitQ,
110 IvfRaBitQ,
112 ScaNN,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
122#[serde(rename_all = "snake_case")]
123pub enum DenseVectorQuantization {
124 #[default]
126 F32,
127 F16,
129 UInt8,
131}
132
133impl DenseVectorQuantization {
134 pub fn element_size(self) -> usize {
136 match self {
137 Self::F32 => 4,
138 Self::F16 => 2,
139 Self::UInt8 => 1,
140 }
141 }
142
143 pub fn tag(self) -> u8 {
145 match self {
146 Self::F32 => 0,
147 Self::F16 => 1,
148 Self::UInt8 => 2,
149 }
150 }
151
152 pub fn from_tag(tag: u8) -> Option<Self> {
154 match tag {
155 0 => Some(Self::F32),
156 1 => Some(Self::F16),
157 2 => Some(Self::UInt8),
158 _ => None,
159 }
160 }
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct DenseVectorConfig {
172 pub dim: usize,
174 #[serde(default)]
177 pub index_type: VectorIndexType,
178 #[serde(default)]
180 pub quantization: DenseVectorQuantization,
181 #[serde(default, skip_serializing_if = "Option::is_none")]
184 pub num_clusters: Option<usize>,
185 #[serde(default = "default_nprobe")]
187 pub nprobe: usize,
188 #[serde(default, skip_serializing_if = "Option::is_none")]
192 pub build_threshold: Option<usize>,
193 #[serde(default = "default_unit_norm")]
198 pub unit_norm: bool,
199}
200
201fn default_nprobe() -> usize {
202 32
203}
204
205fn default_unit_norm() -> bool {
206 true
207}
208
209impl DenseVectorConfig {
210 pub fn new(dim: usize) -> Self {
211 Self {
212 dim,
213 index_type: VectorIndexType::RaBitQ,
214 quantization: DenseVectorQuantization::F32,
215 num_clusters: None,
216 nprobe: 32,
217 build_threshold: None,
218 unit_norm: true,
219 }
220 }
221
222 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
224 Self {
225 dim,
226 index_type: VectorIndexType::IvfRaBitQ,
227 quantization: DenseVectorQuantization::F32,
228 num_clusters,
229 nprobe,
230 build_threshold: None,
231 unit_norm: true,
232 }
233 }
234
235 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
237 Self {
238 dim,
239 index_type: VectorIndexType::ScaNN,
240 quantization: DenseVectorQuantization::F32,
241 num_clusters,
242 nprobe,
243 build_threshold: None,
244 unit_norm: true,
245 }
246 }
247
248 pub fn flat(dim: usize) -> Self {
250 Self {
251 dim,
252 index_type: VectorIndexType::Flat,
253 quantization: DenseVectorQuantization::F32,
254 num_clusters: None,
255 nprobe: 0,
256 build_threshold: None,
257 unit_norm: true,
258 }
259 }
260
261 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
263 self.quantization = quantization;
264 self
265 }
266
267 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
269 self.build_threshold = Some(threshold);
270 self
271 }
272
273 pub fn with_unit_norm(mut self) -> Self {
275 self.unit_norm = true;
276 self
277 }
278
279 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
281 self.num_clusters = Some(num_clusters);
282 self
283 }
284
285 pub fn uses_ivf(&self) -> bool {
287 matches!(
288 self.index_type,
289 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
290 )
291 }
292
293 pub fn uses_scann(&self) -> bool {
295 self.index_type == VectorIndexType::ScaNN
296 }
297
298 pub fn is_flat(&self) -> bool {
300 self.index_type == VectorIndexType::Flat
301 }
302
303 pub fn default_build_threshold(&self) -> usize {
305 self.build_threshold.unwrap_or(match self.index_type {
306 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
308 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
309 })
310 }
311
312 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
314 self.num_clusters.unwrap_or_else(|| {
315 let optimal = (num_vectors as f64).sqrt() as usize;
317 optimal.clamp(16, 4096)
318 })
319 }
320}
321
322use super::query_field_router::QueryRouterRule;
323
324#[derive(Debug, Clone, Default, Serialize, Deserialize)]
326pub struct Schema {
327 fields: Vec<FieldEntry>,
328 name_to_field: HashMap<String, Field>,
329 #[serde(default)]
331 default_fields: Vec<Field>,
332 #[serde(default)]
334 query_routers: Vec<QueryRouterRule>,
335}
336
337impl Schema {
338 pub fn builder() -> SchemaBuilder {
339 SchemaBuilder::default()
340 }
341
342 pub fn get_field(&self, name: &str) -> Option<Field> {
343 self.name_to_field.get(name).copied()
344 }
345
346 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
347 self.fields.get(field.0 as usize)
348 }
349
350 pub fn get_field_name(&self, field: Field) -> Option<&str> {
351 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
352 }
353
354 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
355 self.fields
356 .iter()
357 .enumerate()
358 .map(|(i, e)| (Field(i as u32), e))
359 }
360
361 pub fn num_fields(&self) -> usize {
362 self.fields.len()
363 }
364
365 pub fn default_fields(&self) -> &[Field] {
367 &self.default_fields
368 }
369
370 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
372 self.default_fields = fields;
373 }
374
375 pub fn query_routers(&self) -> &[QueryRouterRule] {
377 &self.query_routers
378 }
379
380 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
382 self.query_routers = rules;
383 }
384
385 pub fn primary_field(&self) -> Option<Field> {
387 self.fields
388 .iter()
389 .enumerate()
390 .find(|(_, e)| e.primary_key)
391 .map(|(i, _)| Field(i as u32))
392 }
393}
394
395#[derive(Debug, Default)]
397pub struct SchemaBuilder {
398 fields: Vec<FieldEntry>,
399 default_fields: Vec<String>,
400 query_routers: Vec<QueryRouterRule>,
401}
402
403impl SchemaBuilder {
404 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
405 self.add_field_with_tokenizer(
406 name,
407 FieldType::Text,
408 indexed,
409 stored,
410 Some("simple".to_string()),
411 )
412 }
413
414 pub fn add_text_field_with_tokenizer(
415 &mut self,
416 name: &str,
417 indexed: bool,
418 stored: bool,
419 tokenizer: &str,
420 ) -> Field {
421 self.add_field_with_tokenizer(
422 name,
423 FieldType::Text,
424 indexed,
425 stored,
426 Some(tokenizer.to_string()),
427 )
428 }
429
430 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
431 self.add_field(name, FieldType::U64, indexed, stored)
432 }
433
434 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
435 self.add_field(name, FieldType::I64, indexed, stored)
436 }
437
438 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
439 self.add_field(name, FieldType::F64, indexed, stored)
440 }
441
442 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
443 self.add_field(name, FieldType::Bytes, false, stored)
444 }
445
446 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
451 self.add_field(name, FieldType::Json, false, stored)
452 }
453
454 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
459 self.add_sparse_vector_field_with_config(
460 name,
461 indexed,
462 stored,
463 crate::structures::SparseVectorConfig::default(),
464 )
465 }
466
467 pub fn add_sparse_vector_field_with_config(
472 &mut self,
473 name: &str,
474 indexed: bool,
475 stored: bool,
476 config: crate::structures::SparseVectorConfig,
477 ) -> Field {
478 let field = Field(self.fields.len() as u32);
479 self.fields.push(FieldEntry {
480 name: name.to_string(),
481 field_type: FieldType::SparseVector,
482 indexed,
483 stored,
484 tokenizer: None,
485 multi: false,
486 positions: None,
487 sparse_vector_config: Some(config),
488 dense_vector_config: None,
489 fast: false,
490 primary_key: false,
491 reorder: false,
492 });
493 field
494 }
495
496 pub fn set_sparse_vector_config(
498 &mut self,
499 field: Field,
500 config: crate::structures::SparseVectorConfig,
501 ) {
502 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
503 entry.sparse_vector_config = Some(config);
504 }
505 }
506
507 pub fn add_dense_vector_field(
512 &mut self,
513 name: &str,
514 dim: usize,
515 indexed: bool,
516 stored: bool,
517 ) -> Field {
518 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
519 }
520
521 pub fn add_dense_vector_field_with_config(
523 &mut self,
524 name: &str,
525 indexed: bool,
526 stored: bool,
527 config: DenseVectorConfig,
528 ) -> Field {
529 let field = Field(self.fields.len() as u32);
530 self.fields.push(FieldEntry {
531 name: name.to_string(),
532 field_type: FieldType::DenseVector,
533 indexed,
534 stored,
535 tokenizer: None,
536 multi: false,
537 positions: None,
538 sparse_vector_config: None,
539 dense_vector_config: Some(config),
540 fast: false,
541 primary_key: false,
542 reorder: false,
543 });
544 field
545 }
546
547 fn add_field(
548 &mut self,
549 name: &str,
550 field_type: FieldType,
551 indexed: bool,
552 stored: bool,
553 ) -> Field {
554 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
555 }
556
557 fn add_field_with_tokenizer(
558 &mut self,
559 name: &str,
560 field_type: FieldType,
561 indexed: bool,
562 stored: bool,
563 tokenizer: Option<String>,
564 ) -> Field {
565 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
566 }
567
568 fn add_field_full(
569 &mut self,
570 name: &str,
571 field_type: FieldType,
572 indexed: bool,
573 stored: bool,
574 tokenizer: Option<String>,
575 multi: bool,
576 ) -> Field {
577 let field = Field(self.fields.len() as u32);
578 self.fields.push(FieldEntry {
579 name: name.to_string(),
580 field_type,
581 indexed,
582 stored,
583 tokenizer,
584 multi,
585 positions: None,
586 sparse_vector_config: None,
587 dense_vector_config: None,
588 fast: false,
589 primary_key: false,
590 reorder: false,
591 });
592 field
593 }
594
595 pub fn set_multi(&mut self, field: Field, multi: bool) {
597 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
598 entry.multi = multi;
599 }
600 }
601
602 pub fn set_fast(&mut self, field: Field, fast: bool) {
605 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
606 entry.fast = fast;
607 }
608 }
609
610 pub fn set_primary_key(&mut self, field: Field) {
612 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
613 entry.primary_key = true;
614 }
615 }
616
617 pub fn set_reorder(&mut self, field: Field, reorder: bool) {
619 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
620 entry.reorder = reorder;
621 }
622 }
623
624 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
626 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
627 entry.positions = Some(mode);
628 }
629 }
630
631 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
633 self.default_fields = field_names;
634 }
635
636 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
638 self.query_routers = rules;
639 }
640
641 pub fn build(self) -> Schema {
642 let mut name_to_field = HashMap::new();
643 for (i, entry) in self.fields.iter().enumerate() {
644 name_to_field.insert(entry.name.clone(), Field(i as u32));
645 }
646
647 let default_fields: Vec<Field> = self
649 .default_fields
650 .iter()
651 .filter_map(|name| name_to_field.get(name).copied())
652 .collect();
653
654 Schema {
655 fields: self.fields,
656 name_to_field,
657 default_fields,
658 query_routers: self.query_routers,
659 }
660 }
661}
662
663#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
665pub enum FieldValue {
666 #[serde(rename = "text")]
667 Text(String),
668 #[serde(rename = "u64")]
669 U64(u64),
670 #[serde(rename = "i64")]
671 I64(i64),
672 #[serde(rename = "f64")]
673 F64(f64),
674 #[serde(rename = "bytes")]
675 Bytes(Vec<u8>),
676 #[serde(rename = "sparse_vector")]
678 SparseVector(Vec<(u32, f32)>),
679 #[serde(rename = "dense_vector")]
681 DenseVector(Vec<f32>),
682 #[serde(rename = "json")]
684 Json(serde_json::Value),
685}
686
687impl FieldValue {
688 pub fn as_text(&self) -> Option<&str> {
689 match self {
690 FieldValue::Text(s) => Some(s),
691 _ => None,
692 }
693 }
694
695 pub fn as_u64(&self) -> Option<u64> {
696 match self {
697 FieldValue::U64(v) => Some(*v),
698 _ => None,
699 }
700 }
701
702 pub fn as_i64(&self) -> Option<i64> {
703 match self {
704 FieldValue::I64(v) => Some(*v),
705 _ => None,
706 }
707 }
708
709 pub fn as_f64(&self) -> Option<f64> {
710 match self {
711 FieldValue::F64(v) => Some(*v),
712 _ => None,
713 }
714 }
715
716 pub fn as_bytes(&self) -> Option<&[u8]> {
717 match self {
718 FieldValue::Bytes(b) => Some(b),
719 _ => None,
720 }
721 }
722
723 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
724 match self {
725 FieldValue::SparseVector(entries) => Some(entries),
726 _ => None,
727 }
728 }
729
730 pub fn as_dense_vector(&self) -> Option<&[f32]> {
731 match self {
732 FieldValue::DenseVector(v) => Some(v),
733 _ => None,
734 }
735 }
736
737 pub fn as_json(&self) -> Option<&serde_json::Value> {
738 match self {
739 FieldValue::Json(v) => Some(v),
740 _ => None,
741 }
742 }
743}
744
745#[derive(Debug, Clone, Default, Serialize, Deserialize)]
747pub struct Document {
748 field_values: Vec<(Field, FieldValue)>,
749}
750
751impl Document {
752 pub fn new() -> Self {
753 Self::default()
754 }
755
756 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
757 self.field_values
758 .push((field, FieldValue::Text(value.into())));
759 }
760
761 pub fn add_u64(&mut self, field: Field, value: u64) {
762 self.field_values.push((field, FieldValue::U64(value)));
763 }
764
765 pub fn add_i64(&mut self, field: Field, value: i64) {
766 self.field_values.push((field, FieldValue::I64(value)));
767 }
768
769 pub fn add_f64(&mut self, field: Field, value: f64) {
770 self.field_values.push((field, FieldValue::F64(value)));
771 }
772
773 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
774 self.field_values.push((field, FieldValue::Bytes(value)));
775 }
776
777 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
778 self.field_values
779 .push((field, FieldValue::SparseVector(entries)));
780 }
781
782 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
783 self.field_values
784 .push((field, FieldValue::DenseVector(values)));
785 }
786
787 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
788 self.field_values.push((field, FieldValue::Json(value)));
789 }
790
791 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
792 self.field_values
793 .iter()
794 .find(|(f, _)| *f == field)
795 .map(|(_, v)| v)
796 }
797
798 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
799 self.field_values
800 .iter()
801 .filter(move |(f, _)| *f == field)
802 .map(|(_, v)| v)
803 }
804
805 pub fn field_values(&self) -> &[(Field, FieldValue)] {
806 &self.field_values
807 }
808
809 pub fn filter_stored(&self, schema: &Schema) -> Document {
811 Document {
812 field_values: self
813 .field_values
814 .iter()
815 .filter(|(field, _)| {
816 schema
817 .get_field_entry(*field)
818 .is_some_and(|entry| entry.stored)
819 })
820 .cloned()
821 .collect(),
822 }
823 }
824
825 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
831 use std::collections::HashMap;
832
833 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
835 HashMap::new();
836
837 for (field, value) in &self.field_values {
838 if let Some(entry) = schema.get_field_entry(*field) {
839 let json_value = match value {
840 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
841 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
842 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
843 FieldValue::F64(n) => serde_json::json!(n),
844 FieldValue::Bytes(b) => {
845 use base64::Engine;
846 serde_json::Value::String(
847 base64::engine::general_purpose::STANDARD.encode(b),
848 )
849 }
850 FieldValue::SparseVector(entries) => {
851 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
852 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
853 serde_json::json!({
854 "indices": indices,
855 "values": values
856 })
857 }
858 FieldValue::DenseVector(values) => {
859 serde_json::json!(values)
860 }
861 FieldValue::Json(v) => v.clone(),
862 };
863 field_values_map
864 .entry(*field)
865 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
866 .2
867 .push(json_value);
868 }
869 }
870
871 let mut map = serde_json::Map::new();
873 for (_field, (name, is_multi, values)) in field_values_map {
874 let json_value = if is_multi || values.len() > 1 {
875 serde_json::Value::Array(values)
876 } else {
877 values.into_iter().next().unwrap()
878 };
879 map.insert(name, json_value);
880 }
881
882 serde_json::Value::Object(map)
883 }
884
885 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
894 let obj = json.as_object()?;
895 let mut doc = Document::new();
896
897 for (key, value) in obj {
898 if let Some(field) = schema.get_field(key) {
899 let field_entry = schema.get_field_entry(field)?;
900 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
901 }
902 }
903
904 Some(doc)
905 }
906
907 fn add_json_value(
909 doc: &mut Document,
910 field: Field,
911 field_type: &FieldType,
912 value: &serde_json::Value,
913 ) {
914 match value {
915 serde_json::Value::String(s) => {
916 if matches!(field_type, FieldType::Text) {
917 doc.add_text(field, s.clone());
918 }
919 }
920 serde_json::Value::Number(n) => {
921 match field_type {
922 FieldType::I64 => {
923 if let Some(i) = n.as_i64() {
924 doc.add_i64(field, i);
925 }
926 }
927 FieldType::U64 => {
928 if let Some(u) = n.as_u64() {
929 doc.add_u64(field, u);
930 } else if let Some(i) = n.as_i64() {
931 if i >= 0 {
933 doc.add_u64(field, i as u64);
934 }
935 }
936 }
937 FieldType::F64 => {
938 if let Some(f) = n.as_f64() {
939 doc.add_f64(field, f);
940 }
941 }
942 _ => {}
943 }
944 }
945 serde_json::Value::Array(arr) => {
947 for item in arr {
948 Self::add_json_value(doc, field, field_type, item);
949 }
950 }
951 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
953 if let (Some(indices_val), Some(values_val)) =
954 (obj.get("indices"), obj.get("values"))
955 {
956 let indices: Vec<u32> = indices_val
957 .as_array()
958 .map(|arr| {
959 arr.iter()
960 .filter_map(|v| v.as_u64().map(|n| n as u32))
961 .collect()
962 })
963 .unwrap_or_default();
964 let values: Vec<f32> = values_val
965 .as_array()
966 .map(|arr| {
967 arr.iter()
968 .filter_map(|v| v.as_f64().map(|n| n as f32))
969 .collect()
970 })
971 .unwrap_or_default();
972 if indices.len() == values.len() {
973 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
974 doc.add_sparse_vector(field, entries);
975 }
976 }
977 }
978 _ if matches!(field_type, FieldType::Json) => {
980 doc.add_json(field, value.clone());
981 }
982 serde_json::Value::Object(_) => {}
983 _ => {}
984 }
985 }
986}
987
988#[cfg(test)]
989mod tests {
990 use super::*;
991
992 #[test]
993 fn test_schema_builder() {
994 let mut builder = Schema::builder();
995 let title = builder.add_text_field("title", true, true);
996 let body = builder.add_text_field("body", true, false);
997 let count = builder.add_u64_field("count", true, true);
998 let schema = builder.build();
999
1000 assert_eq!(schema.get_field("title"), Some(title));
1001 assert_eq!(schema.get_field("body"), Some(body));
1002 assert_eq!(schema.get_field("count"), Some(count));
1003 assert_eq!(schema.get_field("nonexistent"), None);
1004 }
1005
1006 #[test]
1007 fn test_document() {
1008 let mut builder = Schema::builder();
1009 let title = builder.add_text_field("title", true, true);
1010 let count = builder.add_u64_field("count", true, true);
1011 let _schema = builder.build();
1012
1013 let mut doc = Document::new();
1014 doc.add_text(title, "Hello World");
1015 doc.add_u64(count, 42);
1016
1017 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
1018 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
1019 }
1020
1021 #[test]
1022 fn test_document_serialization() {
1023 let mut builder = Schema::builder();
1024 let title = builder.add_text_field("title", true, true);
1025 let count = builder.add_u64_field("count", true, true);
1026 let _schema = builder.build();
1027
1028 let mut doc = Document::new();
1029 doc.add_text(title, "Hello World");
1030 doc.add_u64(count, 42);
1031
1032 let json = serde_json::to_string(&doc).unwrap();
1034 println!("Serialized doc: {}", json);
1035
1036 let doc2: Document = serde_json::from_str(&json).unwrap();
1038 assert_eq!(
1039 doc2.field_values().len(),
1040 2,
1041 "Should have 2 field values after deserialization"
1042 );
1043 assert_eq!(
1044 doc2.get_first(title).unwrap().as_text(),
1045 Some("Hello World")
1046 );
1047 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
1048 }
1049
1050 #[test]
1051 fn test_multivalue_field() {
1052 let mut builder = Schema::builder();
1053 let uris = builder.add_text_field("uris", true, true);
1054 let title = builder.add_text_field("title", true, true);
1055 let schema = builder.build();
1056
1057 let mut doc = Document::new();
1059 doc.add_text(uris, "one");
1060 doc.add_text(uris, "two");
1061 doc.add_text(title, "Test Document");
1062
1063 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1065
1066 let all_uris: Vec<_> = doc.get_all(uris).collect();
1068 assert_eq!(all_uris.len(), 2);
1069 assert_eq!(all_uris[0].as_text(), Some("one"));
1070 assert_eq!(all_uris[1].as_text(), Some("two"));
1071
1072 let json = doc.to_json(&schema);
1074 let uris_json = json.get("uris").unwrap();
1075 assert!(uris_json.is_array(), "Multi-value field should be an array");
1076 let uris_arr = uris_json.as_array().unwrap();
1077 assert_eq!(uris_arr.len(), 2);
1078 assert_eq!(uris_arr[0].as_str(), Some("one"));
1079 assert_eq!(uris_arr[1].as_str(), Some("two"));
1080
1081 let title_json = json.get("title").unwrap();
1083 assert!(
1084 title_json.is_string(),
1085 "Single-value field should be a string"
1086 );
1087 assert_eq!(title_json.as_str(), Some("Test Document"));
1088 }
1089
1090 #[test]
1091 fn test_multivalue_from_json() {
1092 let mut builder = Schema::builder();
1093 let uris = builder.add_text_field("uris", true, true);
1094 let title = builder.add_text_field("title", true, true);
1095 let schema = builder.build();
1096
1097 let json = serde_json::json!({
1099 "uris": ["one", "two"],
1100 "title": "Test Document"
1101 });
1102
1103 let doc = Document::from_json(&json, &schema).unwrap();
1105
1106 let all_uris: Vec<_> = doc.get_all(uris).collect();
1108 assert_eq!(all_uris.len(), 2);
1109 assert_eq!(all_uris[0].as_text(), Some("one"));
1110 assert_eq!(all_uris[1].as_text(), Some("two"));
1111
1112 assert_eq!(
1114 doc.get_first(title).unwrap().as_text(),
1115 Some("Test Document")
1116 );
1117
1118 let json_out = doc.to_json(&schema);
1120 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1121 assert_eq!(uris_out.len(), 2);
1122 assert_eq!(uris_out[0].as_str(), Some("one"));
1123 assert_eq!(uris_out[1].as_str(), Some("two"));
1124 }
1125
1126 #[test]
1127 fn test_multi_attribute_forces_array() {
1128 let mut builder = Schema::builder();
1131 let uris = builder.add_text_field("uris", true, true);
1132 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1134 let schema = builder.build();
1135
1136 assert!(schema.get_field_entry(uris).unwrap().multi);
1138 assert!(!schema.get_field_entry(title).unwrap().multi);
1139
1140 let mut doc = Document::new();
1142 doc.add_text(uris, "only_one");
1143 doc.add_text(title, "Test Document");
1144
1145 let json = doc.to_json(&schema);
1147
1148 let uris_json = json.get("uris").unwrap();
1149 assert!(
1150 uris_json.is_array(),
1151 "Multi field should be array even with single value"
1152 );
1153 let uris_arr = uris_json.as_array().unwrap();
1154 assert_eq!(uris_arr.len(), 1);
1155 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1156
1157 let title_json = json.get("title").unwrap();
1159 assert!(
1160 title_json.is_string(),
1161 "Non-multi single-value field should be a string"
1162 );
1163 assert_eq!(title_json.as_str(), Some("Test Document"));
1164 }
1165
1166 #[test]
1167 fn test_sparse_vector_field() {
1168 let mut builder = Schema::builder();
1169 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1170 let title = builder.add_text_field("title", true, true);
1171 let schema = builder.build();
1172
1173 assert_eq!(schema.get_field("embedding"), Some(embedding));
1174 assert_eq!(
1175 schema.get_field_entry(embedding).unwrap().field_type,
1176 FieldType::SparseVector
1177 );
1178
1179 let mut doc = Document::new();
1181 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1182 doc.add_text(title, "Test Document");
1183
1184 let entries = doc
1186 .get_first(embedding)
1187 .unwrap()
1188 .as_sparse_vector()
1189 .unwrap();
1190 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1191
1192 let json = doc.to_json(&schema);
1194 let embedding_json = json.get("embedding").unwrap();
1195 assert!(embedding_json.is_object());
1196 assert_eq!(
1197 embedding_json
1198 .get("indices")
1199 .unwrap()
1200 .as_array()
1201 .unwrap()
1202 .len(),
1203 3
1204 );
1205
1206 let doc2 = Document::from_json(&json, &schema).unwrap();
1208 let entries2 = doc2
1209 .get_first(embedding)
1210 .unwrap()
1211 .as_sparse_vector()
1212 .unwrap();
1213 assert_eq!(entries2[0].0, 0);
1214 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1215 assert_eq!(entries2[1].0, 5);
1216 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1217 assert_eq!(entries2[2].0, 10);
1218 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1219 }
1220
1221 #[test]
1222 fn test_json_field() {
1223 let mut builder = Schema::builder();
1224 let metadata = builder.add_json_field("metadata", true);
1225 let title = builder.add_text_field("title", true, true);
1226 let schema = builder.build();
1227
1228 assert_eq!(schema.get_field("metadata"), Some(metadata));
1229 assert_eq!(
1230 schema.get_field_entry(metadata).unwrap().field_type,
1231 FieldType::Json
1232 );
1233 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1235 assert!(schema.get_field_entry(metadata).unwrap().stored);
1236
1237 let json_value = serde_json::json!({
1239 "author": "John Doe",
1240 "tags": ["rust", "search"],
1241 "nested": {"key": "value"}
1242 });
1243 let mut doc = Document::new();
1244 doc.add_json(metadata, json_value.clone());
1245 doc.add_text(title, "Test Document");
1246
1247 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1249 assert_eq!(stored_json, &json_value);
1250 assert_eq!(
1251 stored_json.get("author").unwrap().as_str(),
1252 Some("John Doe")
1253 );
1254
1255 let doc_json = doc.to_json(&schema);
1257 let metadata_out = doc_json.get("metadata").unwrap();
1258 assert_eq!(metadata_out, &json_value);
1259
1260 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1262 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1263 assert_eq!(stored_json2, &json_value);
1264 }
1265
1266 #[test]
1267 fn test_json_field_various_types() {
1268 let mut builder = Schema::builder();
1269 let data = builder.add_json_field("data", true);
1270 let _schema = builder.build();
1271
1272 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1274 let mut doc = Document::new();
1275 doc.add_json(data, arr_value.clone());
1276 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1277
1278 let str_value = serde_json::json!("just a string");
1280 let mut doc2 = Document::new();
1281 doc2.add_json(data, str_value.clone());
1282 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1283
1284 let num_value = serde_json::json!(42.5);
1286 let mut doc3 = Document::new();
1287 doc3.add_json(data, num_value.clone());
1288 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1289
1290 let null_value = serde_json::Value::Null;
1292 let mut doc4 = Document::new();
1293 doc4.add_json(data, null_value.clone());
1294 assert_eq!(
1295 doc4.get_first(data).unwrap().as_json().unwrap(),
1296 &null_value
1297 );
1298
1299 let bool_value = serde_json::json!(true);
1301 let mut doc5 = Document::new();
1302 doc5.add_json(data, bool_value.clone());
1303 assert_eq!(
1304 doc5.get_first(data).unwrap().as_json().unwrap(),
1305 &bool_value
1306 );
1307 }
1308}