1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60 #[serde(default)]
63 pub fast: bool,
64 #[serde(default)]
66 pub primary_key: bool,
67 #[serde(default)]
71 pub reorder: bool,
72}
73
74#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
76#[serde(rename_all = "snake_case")]
77pub enum PositionMode {
78 Ordinal,
81 TokenPosition,
84 Full,
87}
88
89impl PositionMode {
90 pub fn tracks_ordinal(&self) -> bool {
92 matches!(self, PositionMode::Ordinal | PositionMode::Full)
93 }
94
95 pub fn tracks_token_position(&self) -> bool {
97 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
98 }
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum VectorIndexType {
105 Flat,
107 #[default]
109 RaBitQ,
110 IvfRaBitQ,
112 ScaNN,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
122#[serde(rename_all = "snake_case")]
123pub enum DenseVectorQuantization {
124 #[default]
126 F32,
127 F16,
129 UInt8,
131}
132
133impl DenseVectorQuantization {
134 pub fn element_size(self) -> usize {
136 match self {
137 Self::F32 => 4,
138 Self::F16 => 2,
139 Self::UInt8 => 1,
140 }
141 }
142
143 pub fn tag(self) -> u8 {
145 match self {
146 Self::F32 => 0,
147 Self::F16 => 1,
148 Self::UInt8 => 2,
149 }
150 }
151
152 pub fn from_tag(tag: u8) -> Option<Self> {
154 match tag {
155 0 => Some(Self::F32),
156 1 => Some(Self::F16),
157 2 => Some(Self::UInt8),
158 _ => None,
159 }
160 }
161}
162
163#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct DenseVectorConfig {
172 pub dim: usize,
174 #[serde(default)]
177 pub index_type: VectorIndexType,
178 #[serde(default)]
180 pub quantization: DenseVectorQuantization,
181 #[serde(default, skip_serializing_if = "Option::is_none")]
184 pub num_clusters: Option<usize>,
185 #[serde(default = "default_nprobe")]
187 pub nprobe: usize,
188 #[serde(default, skip_serializing_if = "Option::is_none")]
192 pub build_threshold: Option<usize>,
193 #[serde(default = "default_unit_norm")]
198 pub unit_norm: bool,
199}
200
201fn default_nprobe() -> usize {
202 32
203}
204
205fn default_unit_norm() -> bool {
206 true
207}
208
209impl DenseVectorConfig {
210 pub fn new(dim: usize) -> Self {
211 Self {
212 dim,
213 index_type: VectorIndexType::RaBitQ,
214 quantization: DenseVectorQuantization::F32,
215 num_clusters: None,
216 nprobe: 32,
217 build_threshold: None,
218 unit_norm: true,
219 }
220 }
221
222 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
224 Self {
225 dim,
226 index_type: VectorIndexType::IvfRaBitQ,
227 quantization: DenseVectorQuantization::F32,
228 num_clusters,
229 nprobe,
230 build_threshold: None,
231 unit_norm: true,
232 }
233 }
234
235 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
237 Self {
238 dim,
239 index_type: VectorIndexType::ScaNN,
240 quantization: DenseVectorQuantization::F32,
241 num_clusters,
242 nprobe,
243 build_threshold: None,
244 unit_norm: true,
245 }
246 }
247
248 pub fn flat(dim: usize) -> Self {
250 Self {
251 dim,
252 index_type: VectorIndexType::Flat,
253 quantization: DenseVectorQuantization::F32,
254 num_clusters: None,
255 nprobe: 0,
256 build_threshold: None,
257 unit_norm: true,
258 }
259 }
260
261 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
263 self.quantization = quantization;
264 self
265 }
266
267 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
269 self.build_threshold = Some(threshold);
270 self
271 }
272
273 pub fn with_unit_norm(mut self) -> Self {
275 self.unit_norm = true;
276 self
277 }
278
279 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
281 self.num_clusters = Some(num_clusters);
282 self
283 }
284
285 pub fn uses_ivf(&self) -> bool {
287 matches!(
288 self.index_type,
289 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
290 )
291 }
292
293 pub fn uses_scann(&self) -> bool {
295 self.index_type == VectorIndexType::ScaNN
296 }
297
298 pub fn is_flat(&self) -> bool {
300 self.index_type == VectorIndexType::Flat
301 }
302
303 pub fn default_build_threshold(&self) -> usize {
305 self.build_threshold.unwrap_or(match self.index_type {
306 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
308 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
309 })
310 }
311
312 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
314 self.num_clusters.unwrap_or_else(|| {
315 let optimal = (num_vectors as f64).sqrt() as usize;
317 optimal.clamp(16, 4096)
318 })
319 }
320}
321
322use super::query_field_router::QueryRouterRule;
323
324#[derive(Debug, Clone, Default, Serialize, Deserialize)]
326pub struct Schema {
327 fields: Vec<FieldEntry>,
328 name_to_field: HashMap<String, Field>,
329 #[serde(default)]
331 default_fields: Vec<Field>,
332 #[serde(default)]
334 query_routers: Vec<QueryRouterRule>,
335}
336
337impl Schema {
338 pub fn builder() -> SchemaBuilder {
339 SchemaBuilder::default()
340 }
341
342 pub fn get_field(&self, name: &str) -> Option<Field> {
343 self.name_to_field.get(name).copied()
344 }
345
346 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
347 self.fields.get(field.0 as usize)
348 }
349
350 pub fn get_field_name(&self, field: Field) -> Option<&str> {
351 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
352 }
353
354 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
355 self.fields
356 .iter()
357 .enumerate()
358 .map(|(i, e)| (Field(i as u32), e))
359 }
360
361 pub fn num_fields(&self) -> usize {
362 self.fields.len()
363 }
364
365 pub fn has_reorder_fields(&self) -> bool {
368 self.fields.iter().any(|e| e.reorder)
369 }
370
371 pub fn default_fields(&self) -> &[Field] {
373 &self.default_fields
374 }
375
376 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
378 self.default_fields = fields;
379 }
380
381 pub fn query_routers(&self) -> &[QueryRouterRule] {
383 &self.query_routers
384 }
385
386 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
388 self.query_routers = rules;
389 }
390
391 pub fn primary_field(&self) -> Option<Field> {
393 self.fields
394 .iter()
395 .enumerate()
396 .find(|(_, e)| e.primary_key)
397 .map(|(i, _)| Field(i as u32))
398 }
399}
400
401#[derive(Debug, Default)]
403pub struct SchemaBuilder {
404 fields: Vec<FieldEntry>,
405 default_fields: Vec<String>,
406 query_routers: Vec<QueryRouterRule>,
407}
408
409impl SchemaBuilder {
410 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
411 self.add_field_with_tokenizer(
412 name,
413 FieldType::Text,
414 indexed,
415 stored,
416 Some("simple".to_string()),
417 )
418 }
419
420 pub fn add_text_field_with_tokenizer(
421 &mut self,
422 name: &str,
423 indexed: bool,
424 stored: bool,
425 tokenizer: &str,
426 ) -> Field {
427 self.add_field_with_tokenizer(
428 name,
429 FieldType::Text,
430 indexed,
431 stored,
432 Some(tokenizer.to_string()),
433 )
434 }
435
436 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
437 self.add_field(name, FieldType::U64, indexed, stored)
438 }
439
440 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
441 self.add_field(name, FieldType::I64, indexed, stored)
442 }
443
444 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
445 self.add_field(name, FieldType::F64, indexed, stored)
446 }
447
448 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
449 self.add_field(name, FieldType::Bytes, false, stored)
450 }
451
452 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
457 self.add_field(name, FieldType::Json, false, stored)
458 }
459
460 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
465 self.add_sparse_vector_field_with_config(
466 name,
467 indexed,
468 stored,
469 crate::structures::SparseVectorConfig::default(),
470 )
471 }
472
473 pub fn add_sparse_vector_field_with_config(
478 &mut self,
479 name: &str,
480 indexed: bool,
481 stored: bool,
482 config: crate::structures::SparseVectorConfig,
483 ) -> Field {
484 let field = Field(self.fields.len() as u32);
485 self.fields.push(FieldEntry {
486 name: name.to_string(),
487 field_type: FieldType::SparseVector,
488 indexed,
489 stored,
490 tokenizer: None,
491 multi: false,
492 positions: None,
493 sparse_vector_config: Some(config),
494 dense_vector_config: None,
495 fast: false,
496 primary_key: false,
497 reorder: false,
498 });
499 field
500 }
501
502 pub fn set_sparse_vector_config(
504 &mut self,
505 field: Field,
506 config: crate::structures::SparseVectorConfig,
507 ) {
508 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
509 entry.sparse_vector_config = Some(config);
510 }
511 }
512
513 pub fn add_dense_vector_field(
518 &mut self,
519 name: &str,
520 dim: usize,
521 indexed: bool,
522 stored: bool,
523 ) -> Field {
524 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
525 }
526
527 pub fn add_dense_vector_field_with_config(
529 &mut self,
530 name: &str,
531 indexed: bool,
532 stored: bool,
533 config: DenseVectorConfig,
534 ) -> Field {
535 let field = Field(self.fields.len() as u32);
536 self.fields.push(FieldEntry {
537 name: name.to_string(),
538 field_type: FieldType::DenseVector,
539 indexed,
540 stored,
541 tokenizer: None,
542 multi: false,
543 positions: None,
544 sparse_vector_config: None,
545 dense_vector_config: Some(config),
546 fast: false,
547 primary_key: false,
548 reorder: false,
549 });
550 field
551 }
552
553 fn add_field(
554 &mut self,
555 name: &str,
556 field_type: FieldType,
557 indexed: bool,
558 stored: bool,
559 ) -> Field {
560 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
561 }
562
563 fn add_field_with_tokenizer(
564 &mut self,
565 name: &str,
566 field_type: FieldType,
567 indexed: bool,
568 stored: bool,
569 tokenizer: Option<String>,
570 ) -> Field {
571 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
572 }
573
574 fn add_field_full(
575 &mut self,
576 name: &str,
577 field_type: FieldType,
578 indexed: bool,
579 stored: bool,
580 tokenizer: Option<String>,
581 multi: bool,
582 ) -> Field {
583 let field = Field(self.fields.len() as u32);
584 self.fields.push(FieldEntry {
585 name: name.to_string(),
586 field_type,
587 indexed,
588 stored,
589 tokenizer,
590 multi,
591 positions: None,
592 sparse_vector_config: None,
593 dense_vector_config: None,
594 fast: false,
595 primary_key: false,
596 reorder: false,
597 });
598 field
599 }
600
601 pub fn set_multi(&mut self, field: Field, multi: bool) {
603 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
604 entry.multi = multi;
605 }
606 }
607
608 pub fn set_fast(&mut self, field: Field, fast: bool) {
611 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
612 entry.fast = fast;
613 }
614 }
615
616 pub fn set_primary_key(&mut self, field: Field) {
618 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
619 entry.primary_key = true;
620 }
621 }
622
623 pub fn set_reorder(&mut self, field: Field, reorder: bool) {
625 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
626 entry.reorder = reorder;
627 }
628 }
629
630 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
632 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
633 entry.positions = Some(mode);
634 }
635 }
636
637 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
639 self.default_fields = field_names;
640 }
641
642 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
644 self.query_routers = rules;
645 }
646
647 pub fn build(self) -> Schema {
648 let mut name_to_field = HashMap::new();
649 for (i, entry) in self.fields.iter().enumerate() {
650 name_to_field.insert(entry.name.clone(), Field(i as u32));
651 }
652
653 let default_fields: Vec<Field> = self
655 .default_fields
656 .iter()
657 .filter_map(|name| name_to_field.get(name).copied())
658 .collect();
659
660 Schema {
661 fields: self.fields,
662 name_to_field,
663 default_fields,
664 query_routers: self.query_routers,
665 }
666 }
667}
668
669#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
671pub enum FieldValue {
672 #[serde(rename = "text")]
673 Text(String),
674 #[serde(rename = "u64")]
675 U64(u64),
676 #[serde(rename = "i64")]
677 I64(i64),
678 #[serde(rename = "f64")]
679 F64(f64),
680 #[serde(rename = "bytes")]
681 Bytes(Vec<u8>),
682 #[serde(rename = "sparse_vector")]
684 SparseVector(Vec<(u32, f32)>),
685 #[serde(rename = "dense_vector")]
687 DenseVector(Vec<f32>),
688 #[serde(rename = "json")]
690 Json(serde_json::Value),
691}
692
693impl FieldValue {
694 pub fn as_text(&self) -> Option<&str> {
695 match self {
696 FieldValue::Text(s) => Some(s),
697 _ => None,
698 }
699 }
700
701 pub fn as_u64(&self) -> Option<u64> {
702 match self {
703 FieldValue::U64(v) => Some(*v),
704 _ => None,
705 }
706 }
707
708 pub fn as_i64(&self) -> Option<i64> {
709 match self {
710 FieldValue::I64(v) => Some(*v),
711 _ => None,
712 }
713 }
714
715 pub fn as_f64(&self) -> Option<f64> {
716 match self {
717 FieldValue::F64(v) => Some(*v),
718 _ => None,
719 }
720 }
721
722 pub fn as_bytes(&self) -> Option<&[u8]> {
723 match self {
724 FieldValue::Bytes(b) => Some(b),
725 _ => None,
726 }
727 }
728
729 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
730 match self {
731 FieldValue::SparseVector(entries) => Some(entries),
732 _ => None,
733 }
734 }
735
736 pub fn as_dense_vector(&self) -> Option<&[f32]> {
737 match self {
738 FieldValue::DenseVector(v) => Some(v),
739 _ => None,
740 }
741 }
742
743 pub fn as_json(&self) -> Option<&serde_json::Value> {
744 match self {
745 FieldValue::Json(v) => Some(v),
746 _ => None,
747 }
748 }
749}
750
751#[derive(Debug, Clone, Default, Serialize, Deserialize)]
753pub struct Document {
754 field_values: Vec<(Field, FieldValue)>,
755}
756
757impl Document {
758 pub fn new() -> Self {
759 Self::default()
760 }
761
762 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
763 self.field_values
764 .push((field, FieldValue::Text(value.into())));
765 }
766
767 pub fn add_u64(&mut self, field: Field, value: u64) {
768 self.field_values.push((field, FieldValue::U64(value)));
769 }
770
771 pub fn add_i64(&mut self, field: Field, value: i64) {
772 self.field_values.push((field, FieldValue::I64(value)));
773 }
774
775 pub fn add_f64(&mut self, field: Field, value: f64) {
776 self.field_values.push((field, FieldValue::F64(value)));
777 }
778
779 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
780 self.field_values.push((field, FieldValue::Bytes(value)));
781 }
782
783 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
784 self.field_values
785 .push((field, FieldValue::SparseVector(entries)));
786 }
787
788 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
789 self.field_values
790 .push((field, FieldValue::DenseVector(values)));
791 }
792
793 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
794 self.field_values.push((field, FieldValue::Json(value)));
795 }
796
797 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
798 self.field_values
799 .iter()
800 .find(|(f, _)| *f == field)
801 .map(|(_, v)| v)
802 }
803
804 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
805 self.field_values
806 .iter()
807 .filter(move |(f, _)| *f == field)
808 .map(|(_, v)| v)
809 }
810
811 pub fn field_values(&self) -> &[(Field, FieldValue)] {
812 &self.field_values
813 }
814
815 pub fn filter_stored(&self, schema: &Schema) -> Document {
817 Document {
818 field_values: self
819 .field_values
820 .iter()
821 .filter(|(field, _)| {
822 schema
823 .get_field_entry(*field)
824 .is_some_and(|entry| entry.stored)
825 })
826 .cloned()
827 .collect(),
828 }
829 }
830
831 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
837 use std::collections::HashMap;
838
839 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
841 HashMap::new();
842
843 for (field, value) in &self.field_values {
844 if let Some(entry) = schema.get_field_entry(*field) {
845 let json_value = match value {
846 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
847 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
848 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
849 FieldValue::F64(n) => serde_json::json!(n),
850 FieldValue::Bytes(b) => {
851 use base64::Engine;
852 serde_json::Value::String(
853 base64::engine::general_purpose::STANDARD.encode(b),
854 )
855 }
856 FieldValue::SparseVector(entries) => {
857 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
858 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
859 serde_json::json!({
860 "indices": indices,
861 "values": values
862 })
863 }
864 FieldValue::DenseVector(values) => {
865 serde_json::json!(values)
866 }
867 FieldValue::Json(v) => v.clone(),
868 };
869 field_values_map
870 .entry(*field)
871 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
872 .2
873 .push(json_value);
874 }
875 }
876
877 let mut map = serde_json::Map::new();
879 for (_field, (name, is_multi, values)) in field_values_map {
880 let json_value = if is_multi || values.len() > 1 {
881 serde_json::Value::Array(values)
882 } else {
883 values.into_iter().next().unwrap()
884 };
885 map.insert(name, json_value);
886 }
887
888 serde_json::Value::Object(map)
889 }
890
891 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
900 let obj = json.as_object()?;
901 let mut doc = Document::new();
902
903 for (key, value) in obj {
904 if let Some(field) = schema.get_field(key) {
905 let field_entry = schema.get_field_entry(field)?;
906 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
907 }
908 }
909
910 Some(doc)
911 }
912
913 fn add_json_value(
915 doc: &mut Document,
916 field: Field,
917 field_type: &FieldType,
918 value: &serde_json::Value,
919 ) {
920 match value {
921 serde_json::Value::String(s) => {
922 if matches!(field_type, FieldType::Text) {
923 doc.add_text(field, s.clone());
924 }
925 }
926 serde_json::Value::Number(n) => {
927 match field_type {
928 FieldType::I64 => {
929 if let Some(i) = n.as_i64() {
930 doc.add_i64(field, i);
931 }
932 }
933 FieldType::U64 => {
934 if let Some(u) = n.as_u64() {
935 doc.add_u64(field, u);
936 } else if let Some(i) = n.as_i64() {
937 if i >= 0 {
939 doc.add_u64(field, i as u64);
940 }
941 }
942 }
943 FieldType::F64 => {
944 if let Some(f) = n.as_f64() {
945 doc.add_f64(field, f);
946 }
947 }
948 _ => {}
949 }
950 }
951 serde_json::Value::Array(arr) => {
953 for item in arr {
954 Self::add_json_value(doc, field, field_type, item);
955 }
956 }
957 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
959 if let (Some(indices_val), Some(values_val)) =
960 (obj.get("indices"), obj.get("values"))
961 {
962 let indices: Vec<u32> = indices_val
963 .as_array()
964 .map(|arr| {
965 arr.iter()
966 .filter_map(|v| v.as_u64().map(|n| n as u32))
967 .collect()
968 })
969 .unwrap_or_default();
970 let values: Vec<f32> = values_val
971 .as_array()
972 .map(|arr| {
973 arr.iter()
974 .filter_map(|v| v.as_f64().map(|n| n as f32))
975 .collect()
976 })
977 .unwrap_or_default();
978 if indices.len() == values.len() {
979 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
980 doc.add_sparse_vector(field, entries);
981 }
982 }
983 }
984 _ if matches!(field_type, FieldType::Json) => {
986 doc.add_json(field, value.clone());
987 }
988 serde_json::Value::Object(_) => {}
989 _ => {}
990 }
991 }
992}
993
994#[cfg(test)]
995mod tests {
996 use super::*;
997
998 #[test]
999 fn test_schema_builder() {
1000 let mut builder = Schema::builder();
1001 let title = builder.add_text_field("title", true, true);
1002 let body = builder.add_text_field("body", true, false);
1003 let count = builder.add_u64_field("count", true, true);
1004 let schema = builder.build();
1005
1006 assert_eq!(schema.get_field("title"), Some(title));
1007 assert_eq!(schema.get_field("body"), Some(body));
1008 assert_eq!(schema.get_field("count"), Some(count));
1009 assert_eq!(schema.get_field("nonexistent"), None);
1010 }
1011
1012 #[test]
1013 fn test_document() {
1014 let mut builder = Schema::builder();
1015 let title = builder.add_text_field("title", true, true);
1016 let count = builder.add_u64_field("count", true, true);
1017 let _schema = builder.build();
1018
1019 let mut doc = Document::new();
1020 doc.add_text(title, "Hello World");
1021 doc.add_u64(count, 42);
1022
1023 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
1024 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
1025 }
1026
1027 #[test]
1028 fn test_document_serialization() {
1029 let mut builder = Schema::builder();
1030 let title = builder.add_text_field("title", true, true);
1031 let count = builder.add_u64_field("count", true, true);
1032 let _schema = builder.build();
1033
1034 let mut doc = Document::new();
1035 doc.add_text(title, "Hello World");
1036 doc.add_u64(count, 42);
1037
1038 let json = serde_json::to_string(&doc).unwrap();
1040 println!("Serialized doc: {}", json);
1041
1042 let doc2: Document = serde_json::from_str(&json).unwrap();
1044 assert_eq!(
1045 doc2.field_values().len(),
1046 2,
1047 "Should have 2 field values after deserialization"
1048 );
1049 assert_eq!(
1050 doc2.get_first(title).unwrap().as_text(),
1051 Some("Hello World")
1052 );
1053 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
1054 }
1055
1056 #[test]
1057 fn test_multivalue_field() {
1058 let mut builder = Schema::builder();
1059 let uris = builder.add_text_field("uris", true, true);
1060 let title = builder.add_text_field("title", true, true);
1061 let schema = builder.build();
1062
1063 let mut doc = Document::new();
1065 doc.add_text(uris, "one");
1066 doc.add_text(uris, "two");
1067 doc.add_text(title, "Test Document");
1068
1069 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1071
1072 let all_uris: Vec<_> = doc.get_all(uris).collect();
1074 assert_eq!(all_uris.len(), 2);
1075 assert_eq!(all_uris[0].as_text(), Some("one"));
1076 assert_eq!(all_uris[1].as_text(), Some("two"));
1077
1078 let json = doc.to_json(&schema);
1080 let uris_json = json.get("uris").unwrap();
1081 assert!(uris_json.is_array(), "Multi-value field should be an array");
1082 let uris_arr = uris_json.as_array().unwrap();
1083 assert_eq!(uris_arr.len(), 2);
1084 assert_eq!(uris_arr[0].as_str(), Some("one"));
1085 assert_eq!(uris_arr[1].as_str(), Some("two"));
1086
1087 let title_json = json.get("title").unwrap();
1089 assert!(
1090 title_json.is_string(),
1091 "Single-value field should be a string"
1092 );
1093 assert_eq!(title_json.as_str(), Some("Test Document"));
1094 }
1095
1096 #[test]
1097 fn test_multivalue_from_json() {
1098 let mut builder = Schema::builder();
1099 let uris = builder.add_text_field("uris", true, true);
1100 let title = builder.add_text_field("title", true, true);
1101 let schema = builder.build();
1102
1103 let json = serde_json::json!({
1105 "uris": ["one", "two"],
1106 "title": "Test Document"
1107 });
1108
1109 let doc = Document::from_json(&json, &schema).unwrap();
1111
1112 let all_uris: Vec<_> = doc.get_all(uris).collect();
1114 assert_eq!(all_uris.len(), 2);
1115 assert_eq!(all_uris[0].as_text(), Some("one"));
1116 assert_eq!(all_uris[1].as_text(), Some("two"));
1117
1118 assert_eq!(
1120 doc.get_first(title).unwrap().as_text(),
1121 Some("Test Document")
1122 );
1123
1124 let json_out = doc.to_json(&schema);
1126 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1127 assert_eq!(uris_out.len(), 2);
1128 assert_eq!(uris_out[0].as_str(), Some("one"));
1129 assert_eq!(uris_out[1].as_str(), Some("two"));
1130 }
1131
1132 #[test]
1133 fn test_multi_attribute_forces_array() {
1134 let mut builder = Schema::builder();
1137 let uris = builder.add_text_field("uris", true, true);
1138 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1140 let schema = builder.build();
1141
1142 assert!(schema.get_field_entry(uris).unwrap().multi);
1144 assert!(!schema.get_field_entry(title).unwrap().multi);
1145
1146 let mut doc = Document::new();
1148 doc.add_text(uris, "only_one");
1149 doc.add_text(title, "Test Document");
1150
1151 let json = doc.to_json(&schema);
1153
1154 let uris_json = json.get("uris").unwrap();
1155 assert!(
1156 uris_json.is_array(),
1157 "Multi field should be array even with single value"
1158 );
1159 let uris_arr = uris_json.as_array().unwrap();
1160 assert_eq!(uris_arr.len(), 1);
1161 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1162
1163 let title_json = json.get("title").unwrap();
1165 assert!(
1166 title_json.is_string(),
1167 "Non-multi single-value field should be a string"
1168 );
1169 assert_eq!(title_json.as_str(), Some("Test Document"));
1170 }
1171
1172 #[test]
1173 fn test_sparse_vector_field() {
1174 let mut builder = Schema::builder();
1175 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1176 let title = builder.add_text_field("title", true, true);
1177 let schema = builder.build();
1178
1179 assert_eq!(schema.get_field("embedding"), Some(embedding));
1180 assert_eq!(
1181 schema.get_field_entry(embedding).unwrap().field_type,
1182 FieldType::SparseVector
1183 );
1184
1185 let mut doc = Document::new();
1187 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1188 doc.add_text(title, "Test Document");
1189
1190 let entries = doc
1192 .get_first(embedding)
1193 .unwrap()
1194 .as_sparse_vector()
1195 .unwrap();
1196 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1197
1198 let json = doc.to_json(&schema);
1200 let embedding_json = json.get("embedding").unwrap();
1201 assert!(embedding_json.is_object());
1202 assert_eq!(
1203 embedding_json
1204 .get("indices")
1205 .unwrap()
1206 .as_array()
1207 .unwrap()
1208 .len(),
1209 3
1210 );
1211
1212 let doc2 = Document::from_json(&json, &schema).unwrap();
1214 let entries2 = doc2
1215 .get_first(embedding)
1216 .unwrap()
1217 .as_sparse_vector()
1218 .unwrap();
1219 assert_eq!(entries2[0].0, 0);
1220 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1221 assert_eq!(entries2[1].0, 5);
1222 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1223 assert_eq!(entries2[2].0, 10);
1224 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1225 }
1226
1227 #[test]
1228 fn test_json_field() {
1229 let mut builder = Schema::builder();
1230 let metadata = builder.add_json_field("metadata", true);
1231 let title = builder.add_text_field("title", true, true);
1232 let schema = builder.build();
1233
1234 assert_eq!(schema.get_field("metadata"), Some(metadata));
1235 assert_eq!(
1236 schema.get_field_entry(metadata).unwrap().field_type,
1237 FieldType::Json
1238 );
1239 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1241 assert!(schema.get_field_entry(metadata).unwrap().stored);
1242
1243 let json_value = serde_json::json!({
1245 "author": "John Doe",
1246 "tags": ["rust", "search"],
1247 "nested": {"key": "value"}
1248 });
1249 let mut doc = Document::new();
1250 doc.add_json(metadata, json_value.clone());
1251 doc.add_text(title, "Test Document");
1252
1253 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1255 assert_eq!(stored_json, &json_value);
1256 assert_eq!(
1257 stored_json.get("author").unwrap().as_str(),
1258 Some("John Doe")
1259 );
1260
1261 let doc_json = doc.to_json(&schema);
1263 let metadata_out = doc_json.get("metadata").unwrap();
1264 assert_eq!(metadata_out, &json_value);
1265
1266 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1268 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1269 assert_eq!(stored_json2, &json_value);
1270 }
1271
1272 #[test]
1273 fn test_json_field_various_types() {
1274 let mut builder = Schema::builder();
1275 let data = builder.add_json_field("data", true);
1276 let _schema = builder.build();
1277
1278 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1280 let mut doc = Document::new();
1281 doc.add_json(data, arr_value.clone());
1282 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1283
1284 let str_value = serde_json::json!("just a string");
1286 let mut doc2 = Document::new();
1287 doc2.add_json(data, str_value.clone());
1288 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1289
1290 let num_value = serde_json::json!(42.5);
1292 let mut doc3 = Document::new();
1293 doc3.add_json(data, num_value.clone());
1294 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1295
1296 let null_value = serde_json::Value::Null;
1298 let mut doc4 = Document::new();
1299 doc4.add_json(data, null_value.clone());
1300 assert_eq!(
1301 doc4.get_first(data).unwrap().as_json().unwrap(),
1302 &null_value
1303 );
1304
1305 let bool_value = serde_json::json!(true);
1307 let mut doc5 = Document::new();
1308 doc5.add_json(data, bool_value.clone());
1309 assert_eq!(
1310 doc5.get_first(data).unwrap().as_json().unwrap(),
1311 &bool_value
1312 );
1313 }
1314}