1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60 #[serde(default)]
63 pub fast: bool,
64 #[serde(default)]
66 pub primary_key: bool,
67}
68
69#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
71#[serde(rename_all = "snake_case")]
72pub enum PositionMode {
73 Ordinal,
76 TokenPosition,
79 Full,
82}
83
84impl PositionMode {
85 pub fn tracks_ordinal(&self) -> bool {
87 matches!(self, PositionMode::Ordinal | PositionMode::Full)
88 }
89
90 pub fn tracks_token_position(&self) -> bool {
92 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
93 }
94}
95
96#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
98#[serde(rename_all = "snake_case")]
99pub enum VectorIndexType {
100 Flat,
102 #[default]
104 RaBitQ,
105 IvfRaBitQ,
107 ScaNN,
109}
110
111#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
117#[serde(rename_all = "snake_case")]
118pub enum DenseVectorQuantization {
119 #[default]
121 F32,
122 F16,
124 UInt8,
126}
127
128impl DenseVectorQuantization {
129 pub fn element_size(self) -> usize {
131 match self {
132 Self::F32 => 4,
133 Self::F16 => 2,
134 Self::UInt8 => 1,
135 }
136 }
137
138 pub fn tag(self) -> u8 {
140 match self {
141 Self::F32 => 0,
142 Self::F16 => 1,
143 Self::UInt8 => 2,
144 }
145 }
146
147 pub fn from_tag(tag: u8) -> Option<Self> {
149 match tag {
150 0 => Some(Self::F32),
151 1 => Some(Self::F16),
152 2 => Some(Self::UInt8),
153 _ => None,
154 }
155 }
156}
157
158#[derive(Debug, Clone, Serialize, Deserialize)]
166pub struct DenseVectorConfig {
167 pub dim: usize,
169 #[serde(default)]
172 pub index_type: VectorIndexType,
173 #[serde(default)]
175 pub quantization: DenseVectorQuantization,
176 #[serde(default, skip_serializing_if = "Option::is_none")]
179 pub num_clusters: Option<usize>,
180 #[serde(default = "default_nprobe")]
182 pub nprobe: usize,
183 #[serde(default, skip_serializing_if = "Option::is_none")]
187 pub build_threshold: Option<usize>,
188 #[serde(default = "default_unit_norm")]
193 pub unit_norm: bool,
194}
195
196fn default_nprobe() -> usize {
197 32
198}
199
200fn default_unit_norm() -> bool {
201 true
202}
203
204impl DenseVectorConfig {
205 pub fn new(dim: usize) -> Self {
206 Self {
207 dim,
208 index_type: VectorIndexType::RaBitQ,
209 quantization: DenseVectorQuantization::F32,
210 num_clusters: None,
211 nprobe: 32,
212 build_threshold: None,
213 unit_norm: true,
214 }
215 }
216
217 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
219 Self {
220 dim,
221 index_type: VectorIndexType::IvfRaBitQ,
222 quantization: DenseVectorQuantization::F32,
223 num_clusters,
224 nprobe,
225 build_threshold: None,
226 unit_norm: true,
227 }
228 }
229
230 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
232 Self {
233 dim,
234 index_type: VectorIndexType::ScaNN,
235 quantization: DenseVectorQuantization::F32,
236 num_clusters,
237 nprobe,
238 build_threshold: None,
239 unit_norm: true,
240 }
241 }
242
243 pub fn flat(dim: usize) -> Self {
245 Self {
246 dim,
247 index_type: VectorIndexType::Flat,
248 quantization: DenseVectorQuantization::F32,
249 num_clusters: None,
250 nprobe: 0,
251 build_threshold: None,
252 unit_norm: true,
253 }
254 }
255
256 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
258 self.quantization = quantization;
259 self
260 }
261
262 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
264 self.build_threshold = Some(threshold);
265 self
266 }
267
268 pub fn with_unit_norm(mut self) -> Self {
270 self.unit_norm = true;
271 self
272 }
273
274 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
276 self.num_clusters = Some(num_clusters);
277 self
278 }
279
280 pub fn uses_ivf(&self) -> bool {
282 matches!(
283 self.index_type,
284 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
285 )
286 }
287
288 pub fn uses_scann(&self) -> bool {
290 self.index_type == VectorIndexType::ScaNN
291 }
292
293 pub fn is_flat(&self) -> bool {
295 self.index_type == VectorIndexType::Flat
296 }
297
298 pub fn default_build_threshold(&self) -> usize {
300 self.build_threshold.unwrap_or(match self.index_type {
301 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
303 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
304 })
305 }
306
307 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
309 self.num_clusters.unwrap_or_else(|| {
310 let optimal = (num_vectors as f64).sqrt() as usize;
312 optimal.clamp(16, 4096)
313 })
314 }
315}
316
317use super::query_field_router::QueryRouterRule;
318
319#[derive(Debug, Clone, Default, Serialize, Deserialize)]
321pub struct Schema {
322 fields: Vec<FieldEntry>,
323 name_to_field: HashMap<String, Field>,
324 #[serde(default)]
326 default_fields: Vec<Field>,
327 #[serde(default)]
329 query_routers: Vec<QueryRouterRule>,
330}
331
332impl Schema {
333 pub fn builder() -> SchemaBuilder {
334 SchemaBuilder::default()
335 }
336
337 pub fn get_field(&self, name: &str) -> Option<Field> {
338 self.name_to_field.get(name).copied()
339 }
340
341 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
342 self.fields.get(field.0 as usize)
343 }
344
345 pub fn get_field_name(&self, field: Field) -> Option<&str> {
346 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
347 }
348
349 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
350 self.fields
351 .iter()
352 .enumerate()
353 .map(|(i, e)| (Field(i as u32), e))
354 }
355
356 pub fn num_fields(&self) -> usize {
357 self.fields.len()
358 }
359
360 pub fn default_fields(&self) -> &[Field] {
362 &self.default_fields
363 }
364
365 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
367 self.default_fields = fields;
368 }
369
370 pub fn query_routers(&self) -> &[QueryRouterRule] {
372 &self.query_routers
373 }
374
375 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
377 self.query_routers = rules;
378 }
379
380 pub fn primary_field(&self) -> Option<Field> {
382 self.fields
383 .iter()
384 .enumerate()
385 .find(|(_, e)| e.primary_key)
386 .map(|(i, _)| Field(i as u32))
387 }
388}
389
390#[derive(Debug, Default)]
392pub struct SchemaBuilder {
393 fields: Vec<FieldEntry>,
394 default_fields: Vec<String>,
395 query_routers: Vec<QueryRouterRule>,
396}
397
398impl SchemaBuilder {
399 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
400 self.add_field_with_tokenizer(
401 name,
402 FieldType::Text,
403 indexed,
404 stored,
405 Some("simple".to_string()),
406 )
407 }
408
409 pub fn add_text_field_with_tokenizer(
410 &mut self,
411 name: &str,
412 indexed: bool,
413 stored: bool,
414 tokenizer: &str,
415 ) -> Field {
416 self.add_field_with_tokenizer(
417 name,
418 FieldType::Text,
419 indexed,
420 stored,
421 Some(tokenizer.to_string()),
422 )
423 }
424
425 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
426 self.add_field(name, FieldType::U64, indexed, stored)
427 }
428
429 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
430 self.add_field(name, FieldType::I64, indexed, stored)
431 }
432
433 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
434 self.add_field(name, FieldType::F64, indexed, stored)
435 }
436
437 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
438 self.add_field(name, FieldType::Bytes, false, stored)
439 }
440
441 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
446 self.add_field(name, FieldType::Json, false, stored)
447 }
448
449 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
454 self.add_sparse_vector_field_with_config(
455 name,
456 indexed,
457 stored,
458 crate::structures::SparseVectorConfig::default(),
459 )
460 }
461
462 pub fn add_sparse_vector_field_with_config(
467 &mut self,
468 name: &str,
469 indexed: bool,
470 stored: bool,
471 config: crate::structures::SparseVectorConfig,
472 ) -> Field {
473 let field = Field(self.fields.len() as u32);
474 self.fields.push(FieldEntry {
475 name: name.to_string(),
476 field_type: FieldType::SparseVector,
477 indexed,
478 stored,
479 tokenizer: None,
480 multi: false,
481 positions: None,
482 sparse_vector_config: Some(config),
483 dense_vector_config: None,
484 fast: false,
485 primary_key: false,
486 });
487 field
488 }
489
490 pub fn set_sparse_vector_config(
492 &mut self,
493 field: Field,
494 config: crate::structures::SparseVectorConfig,
495 ) {
496 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
497 entry.sparse_vector_config = Some(config);
498 }
499 }
500
501 pub fn add_dense_vector_field(
506 &mut self,
507 name: &str,
508 dim: usize,
509 indexed: bool,
510 stored: bool,
511 ) -> Field {
512 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
513 }
514
515 pub fn add_dense_vector_field_with_config(
517 &mut self,
518 name: &str,
519 indexed: bool,
520 stored: bool,
521 config: DenseVectorConfig,
522 ) -> Field {
523 let field = Field(self.fields.len() as u32);
524 self.fields.push(FieldEntry {
525 name: name.to_string(),
526 field_type: FieldType::DenseVector,
527 indexed,
528 stored,
529 tokenizer: None,
530 multi: false,
531 positions: None,
532 sparse_vector_config: None,
533 dense_vector_config: Some(config),
534 fast: false,
535 primary_key: false,
536 });
537 field
538 }
539
540 fn add_field(
541 &mut self,
542 name: &str,
543 field_type: FieldType,
544 indexed: bool,
545 stored: bool,
546 ) -> Field {
547 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
548 }
549
550 fn add_field_with_tokenizer(
551 &mut self,
552 name: &str,
553 field_type: FieldType,
554 indexed: bool,
555 stored: bool,
556 tokenizer: Option<String>,
557 ) -> Field {
558 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
559 }
560
561 fn add_field_full(
562 &mut self,
563 name: &str,
564 field_type: FieldType,
565 indexed: bool,
566 stored: bool,
567 tokenizer: Option<String>,
568 multi: bool,
569 ) -> Field {
570 let field = Field(self.fields.len() as u32);
571 self.fields.push(FieldEntry {
572 name: name.to_string(),
573 field_type,
574 indexed,
575 stored,
576 tokenizer,
577 multi,
578 positions: None,
579 sparse_vector_config: None,
580 dense_vector_config: None,
581 fast: false,
582 primary_key: false,
583 });
584 field
585 }
586
587 pub fn set_multi(&mut self, field: Field, multi: bool) {
589 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
590 entry.multi = multi;
591 }
592 }
593
594 pub fn set_fast(&mut self, field: Field, fast: bool) {
597 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
598 entry.fast = fast;
599 }
600 }
601
602 pub fn set_primary_key(&mut self, field: Field) {
604 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
605 entry.primary_key = true;
606 }
607 }
608
609 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
611 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
612 entry.positions = Some(mode);
613 }
614 }
615
616 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
618 self.default_fields = field_names;
619 }
620
621 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
623 self.query_routers = rules;
624 }
625
626 pub fn build(self) -> Schema {
627 let mut name_to_field = HashMap::new();
628 for (i, entry) in self.fields.iter().enumerate() {
629 name_to_field.insert(entry.name.clone(), Field(i as u32));
630 }
631
632 let default_fields: Vec<Field> = self
634 .default_fields
635 .iter()
636 .filter_map(|name| name_to_field.get(name).copied())
637 .collect();
638
639 Schema {
640 fields: self.fields,
641 name_to_field,
642 default_fields,
643 query_routers: self.query_routers,
644 }
645 }
646}
647
648#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
650pub enum FieldValue {
651 #[serde(rename = "text")]
652 Text(String),
653 #[serde(rename = "u64")]
654 U64(u64),
655 #[serde(rename = "i64")]
656 I64(i64),
657 #[serde(rename = "f64")]
658 F64(f64),
659 #[serde(rename = "bytes")]
660 Bytes(Vec<u8>),
661 #[serde(rename = "sparse_vector")]
663 SparseVector(Vec<(u32, f32)>),
664 #[serde(rename = "dense_vector")]
666 DenseVector(Vec<f32>),
667 #[serde(rename = "json")]
669 Json(serde_json::Value),
670}
671
672impl FieldValue {
673 pub fn as_text(&self) -> Option<&str> {
674 match self {
675 FieldValue::Text(s) => Some(s),
676 _ => None,
677 }
678 }
679
680 pub fn as_u64(&self) -> Option<u64> {
681 match self {
682 FieldValue::U64(v) => Some(*v),
683 _ => None,
684 }
685 }
686
687 pub fn as_i64(&self) -> Option<i64> {
688 match self {
689 FieldValue::I64(v) => Some(*v),
690 _ => None,
691 }
692 }
693
694 pub fn as_f64(&self) -> Option<f64> {
695 match self {
696 FieldValue::F64(v) => Some(*v),
697 _ => None,
698 }
699 }
700
701 pub fn as_bytes(&self) -> Option<&[u8]> {
702 match self {
703 FieldValue::Bytes(b) => Some(b),
704 _ => None,
705 }
706 }
707
708 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
709 match self {
710 FieldValue::SparseVector(entries) => Some(entries),
711 _ => None,
712 }
713 }
714
715 pub fn as_dense_vector(&self) -> Option<&[f32]> {
716 match self {
717 FieldValue::DenseVector(v) => Some(v),
718 _ => None,
719 }
720 }
721
722 pub fn as_json(&self) -> Option<&serde_json::Value> {
723 match self {
724 FieldValue::Json(v) => Some(v),
725 _ => None,
726 }
727 }
728}
729
730#[derive(Debug, Clone, Default, Serialize, Deserialize)]
732pub struct Document {
733 field_values: Vec<(Field, FieldValue)>,
734}
735
736impl Document {
737 pub fn new() -> Self {
738 Self::default()
739 }
740
741 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
742 self.field_values
743 .push((field, FieldValue::Text(value.into())));
744 }
745
746 pub fn add_u64(&mut self, field: Field, value: u64) {
747 self.field_values.push((field, FieldValue::U64(value)));
748 }
749
750 pub fn add_i64(&mut self, field: Field, value: i64) {
751 self.field_values.push((field, FieldValue::I64(value)));
752 }
753
754 pub fn add_f64(&mut self, field: Field, value: f64) {
755 self.field_values.push((field, FieldValue::F64(value)));
756 }
757
758 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
759 self.field_values.push((field, FieldValue::Bytes(value)));
760 }
761
762 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
763 self.field_values
764 .push((field, FieldValue::SparseVector(entries)));
765 }
766
767 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
768 self.field_values
769 .push((field, FieldValue::DenseVector(values)));
770 }
771
772 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
773 self.field_values.push((field, FieldValue::Json(value)));
774 }
775
776 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
777 self.field_values
778 .iter()
779 .find(|(f, _)| *f == field)
780 .map(|(_, v)| v)
781 }
782
783 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
784 self.field_values
785 .iter()
786 .filter(move |(f, _)| *f == field)
787 .map(|(_, v)| v)
788 }
789
790 pub fn field_values(&self) -> &[(Field, FieldValue)] {
791 &self.field_values
792 }
793
794 pub fn filter_stored(&self, schema: &Schema) -> Document {
796 Document {
797 field_values: self
798 .field_values
799 .iter()
800 .filter(|(field, _)| {
801 schema
802 .get_field_entry(*field)
803 .is_some_and(|entry| entry.stored)
804 })
805 .cloned()
806 .collect(),
807 }
808 }
809
810 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
816 use std::collections::HashMap;
817
818 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
820 HashMap::new();
821
822 for (field, value) in &self.field_values {
823 if let Some(entry) = schema.get_field_entry(*field) {
824 let json_value = match value {
825 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
826 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
827 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
828 FieldValue::F64(n) => serde_json::json!(n),
829 FieldValue::Bytes(b) => {
830 use base64::Engine;
831 serde_json::Value::String(
832 base64::engine::general_purpose::STANDARD.encode(b),
833 )
834 }
835 FieldValue::SparseVector(entries) => {
836 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
837 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
838 serde_json::json!({
839 "indices": indices,
840 "values": values
841 })
842 }
843 FieldValue::DenseVector(values) => {
844 serde_json::json!(values)
845 }
846 FieldValue::Json(v) => v.clone(),
847 };
848 field_values_map
849 .entry(*field)
850 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
851 .2
852 .push(json_value);
853 }
854 }
855
856 let mut map = serde_json::Map::new();
858 for (_field, (name, is_multi, values)) in field_values_map {
859 let json_value = if is_multi || values.len() > 1 {
860 serde_json::Value::Array(values)
861 } else {
862 values.into_iter().next().unwrap()
863 };
864 map.insert(name, json_value);
865 }
866
867 serde_json::Value::Object(map)
868 }
869
870 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
879 let obj = json.as_object()?;
880 let mut doc = Document::new();
881
882 for (key, value) in obj {
883 if let Some(field) = schema.get_field(key) {
884 let field_entry = schema.get_field_entry(field)?;
885 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
886 }
887 }
888
889 Some(doc)
890 }
891
892 fn add_json_value(
894 doc: &mut Document,
895 field: Field,
896 field_type: &FieldType,
897 value: &serde_json::Value,
898 ) {
899 match value {
900 serde_json::Value::String(s) => {
901 if matches!(field_type, FieldType::Text) {
902 doc.add_text(field, s.clone());
903 }
904 }
905 serde_json::Value::Number(n) => {
906 match field_type {
907 FieldType::I64 => {
908 if let Some(i) = n.as_i64() {
909 doc.add_i64(field, i);
910 }
911 }
912 FieldType::U64 => {
913 if let Some(u) = n.as_u64() {
914 doc.add_u64(field, u);
915 } else if let Some(i) = n.as_i64() {
916 if i >= 0 {
918 doc.add_u64(field, i as u64);
919 }
920 }
921 }
922 FieldType::F64 => {
923 if let Some(f) = n.as_f64() {
924 doc.add_f64(field, f);
925 }
926 }
927 _ => {}
928 }
929 }
930 serde_json::Value::Array(arr) => {
932 for item in arr {
933 Self::add_json_value(doc, field, field_type, item);
934 }
935 }
936 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
938 if let (Some(indices_val), Some(values_val)) =
939 (obj.get("indices"), obj.get("values"))
940 {
941 let indices: Vec<u32> = indices_val
942 .as_array()
943 .map(|arr| {
944 arr.iter()
945 .filter_map(|v| v.as_u64().map(|n| n as u32))
946 .collect()
947 })
948 .unwrap_or_default();
949 let values: Vec<f32> = values_val
950 .as_array()
951 .map(|arr| {
952 arr.iter()
953 .filter_map(|v| v.as_f64().map(|n| n as f32))
954 .collect()
955 })
956 .unwrap_or_default();
957 if indices.len() == values.len() {
958 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
959 doc.add_sparse_vector(field, entries);
960 }
961 }
962 }
963 _ if matches!(field_type, FieldType::Json) => {
965 doc.add_json(field, value.clone());
966 }
967 serde_json::Value::Object(_) => {}
968 _ => {}
969 }
970 }
971}
972
973#[cfg(test)]
974mod tests {
975 use super::*;
976
977 #[test]
978 fn test_schema_builder() {
979 let mut builder = Schema::builder();
980 let title = builder.add_text_field("title", true, true);
981 let body = builder.add_text_field("body", true, false);
982 let count = builder.add_u64_field("count", true, true);
983 let schema = builder.build();
984
985 assert_eq!(schema.get_field("title"), Some(title));
986 assert_eq!(schema.get_field("body"), Some(body));
987 assert_eq!(schema.get_field("count"), Some(count));
988 assert_eq!(schema.get_field("nonexistent"), None);
989 }
990
991 #[test]
992 fn test_document() {
993 let mut builder = Schema::builder();
994 let title = builder.add_text_field("title", true, true);
995 let count = builder.add_u64_field("count", true, true);
996 let _schema = builder.build();
997
998 let mut doc = Document::new();
999 doc.add_text(title, "Hello World");
1000 doc.add_u64(count, 42);
1001
1002 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
1003 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
1004 }
1005
1006 #[test]
1007 fn test_document_serialization() {
1008 let mut builder = Schema::builder();
1009 let title = builder.add_text_field("title", true, true);
1010 let count = builder.add_u64_field("count", true, true);
1011 let _schema = builder.build();
1012
1013 let mut doc = Document::new();
1014 doc.add_text(title, "Hello World");
1015 doc.add_u64(count, 42);
1016
1017 let json = serde_json::to_string(&doc).unwrap();
1019 println!("Serialized doc: {}", json);
1020
1021 let doc2: Document = serde_json::from_str(&json).unwrap();
1023 assert_eq!(
1024 doc2.field_values().len(),
1025 2,
1026 "Should have 2 field values after deserialization"
1027 );
1028 assert_eq!(
1029 doc2.get_first(title).unwrap().as_text(),
1030 Some("Hello World")
1031 );
1032 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
1033 }
1034
1035 #[test]
1036 fn test_multivalue_field() {
1037 let mut builder = Schema::builder();
1038 let uris = builder.add_text_field("uris", true, true);
1039 let title = builder.add_text_field("title", true, true);
1040 let schema = builder.build();
1041
1042 let mut doc = Document::new();
1044 doc.add_text(uris, "one");
1045 doc.add_text(uris, "two");
1046 doc.add_text(title, "Test Document");
1047
1048 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1050
1051 let all_uris: Vec<_> = doc.get_all(uris).collect();
1053 assert_eq!(all_uris.len(), 2);
1054 assert_eq!(all_uris[0].as_text(), Some("one"));
1055 assert_eq!(all_uris[1].as_text(), Some("two"));
1056
1057 let json = doc.to_json(&schema);
1059 let uris_json = json.get("uris").unwrap();
1060 assert!(uris_json.is_array(), "Multi-value field should be an array");
1061 let uris_arr = uris_json.as_array().unwrap();
1062 assert_eq!(uris_arr.len(), 2);
1063 assert_eq!(uris_arr[0].as_str(), Some("one"));
1064 assert_eq!(uris_arr[1].as_str(), Some("two"));
1065
1066 let title_json = json.get("title").unwrap();
1068 assert!(
1069 title_json.is_string(),
1070 "Single-value field should be a string"
1071 );
1072 assert_eq!(title_json.as_str(), Some("Test Document"));
1073 }
1074
1075 #[test]
1076 fn test_multivalue_from_json() {
1077 let mut builder = Schema::builder();
1078 let uris = builder.add_text_field("uris", true, true);
1079 let title = builder.add_text_field("title", true, true);
1080 let schema = builder.build();
1081
1082 let json = serde_json::json!({
1084 "uris": ["one", "two"],
1085 "title": "Test Document"
1086 });
1087
1088 let doc = Document::from_json(&json, &schema).unwrap();
1090
1091 let all_uris: Vec<_> = doc.get_all(uris).collect();
1093 assert_eq!(all_uris.len(), 2);
1094 assert_eq!(all_uris[0].as_text(), Some("one"));
1095 assert_eq!(all_uris[1].as_text(), Some("two"));
1096
1097 assert_eq!(
1099 doc.get_first(title).unwrap().as_text(),
1100 Some("Test Document")
1101 );
1102
1103 let json_out = doc.to_json(&schema);
1105 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1106 assert_eq!(uris_out.len(), 2);
1107 assert_eq!(uris_out[0].as_str(), Some("one"));
1108 assert_eq!(uris_out[1].as_str(), Some("two"));
1109 }
1110
1111 #[test]
1112 fn test_multi_attribute_forces_array() {
1113 let mut builder = Schema::builder();
1116 let uris = builder.add_text_field("uris", true, true);
1117 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1119 let schema = builder.build();
1120
1121 assert!(schema.get_field_entry(uris).unwrap().multi);
1123 assert!(!schema.get_field_entry(title).unwrap().multi);
1124
1125 let mut doc = Document::new();
1127 doc.add_text(uris, "only_one");
1128 doc.add_text(title, "Test Document");
1129
1130 let json = doc.to_json(&schema);
1132
1133 let uris_json = json.get("uris").unwrap();
1134 assert!(
1135 uris_json.is_array(),
1136 "Multi field should be array even with single value"
1137 );
1138 let uris_arr = uris_json.as_array().unwrap();
1139 assert_eq!(uris_arr.len(), 1);
1140 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1141
1142 let title_json = json.get("title").unwrap();
1144 assert!(
1145 title_json.is_string(),
1146 "Non-multi single-value field should be a string"
1147 );
1148 assert_eq!(title_json.as_str(), Some("Test Document"));
1149 }
1150
1151 #[test]
1152 fn test_sparse_vector_field() {
1153 let mut builder = Schema::builder();
1154 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1155 let title = builder.add_text_field("title", true, true);
1156 let schema = builder.build();
1157
1158 assert_eq!(schema.get_field("embedding"), Some(embedding));
1159 assert_eq!(
1160 schema.get_field_entry(embedding).unwrap().field_type,
1161 FieldType::SparseVector
1162 );
1163
1164 let mut doc = Document::new();
1166 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1167 doc.add_text(title, "Test Document");
1168
1169 let entries = doc
1171 .get_first(embedding)
1172 .unwrap()
1173 .as_sparse_vector()
1174 .unwrap();
1175 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1176
1177 let json = doc.to_json(&schema);
1179 let embedding_json = json.get("embedding").unwrap();
1180 assert!(embedding_json.is_object());
1181 assert_eq!(
1182 embedding_json
1183 .get("indices")
1184 .unwrap()
1185 .as_array()
1186 .unwrap()
1187 .len(),
1188 3
1189 );
1190
1191 let doc2 = Document::from_json(&json, &schema).unwrap();
1193 let entries2 = doc2
1194 .get_first(embedding)
1195 .unwrap()
1196 .as_sparse_vector()
1197 .unwrap();
1198 assert_eq!(entries2[0].0, 0);
1199 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1200 assert_eq!(entries2[1].0, 5);
1201 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1202 assert_eq!(entries2[2].0, 10);
1203 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1204 }
1205
1206 #[test]
1207 fn test_json_field() {
1208 let mut builder = Schema::builder();
1209 let metadata = builder.add_json_field("metadata", true);
1210 let title = builder.add_text_field("title", true, true);
1211 let schema = builder.build();
1212
1213 assert_eq!(schema.get_field("metadata"), Some(metadata));
1214 assert_eq!(
1215 schema.get_field_entry(metadata).unwrap().field_type,
1216 FieldType::Json
1217 );
1218 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1220 assert!(schema.get_field_entry(metadata).unwrap().stored);
1221
1222 let json_value = serde_json::json!({
1224 "author": "John Doe",
1225 "tags": ["rust", "search"],
1226 "nested": {"key": "value"}
1227 });
1228 let mut doc = Document::new();
1229 doc.add_json(metadata, json_value.clone());
1230 doc.add_text(title, "Test Document");
1231
1232 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1234 assert_eq!(stored_json, &json_value);
1235 assert_eq!(
1236 stored_json.get("author").unwrap().as_str(),
1237 Some("John Doe")
1238 );
1239
1240 let doc_json = doc.to_json(&schema);
1242 let metadata_out = doc_json.get("metadata").unwrap();
1243 assert_eq!(metadata_out, &json_value);
1244
1245 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1247 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1248 assert_eq!(stored_json2, &json_value);
1249 }
1250
1251 #[test]
1252 fn test_json_field_various_types() {
1253 let mut builder = Schema::builder();
1254 let data = builder.add_json_field("data", true);
1255 let _schema = builder.build();
1256
1257 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1259 let mut doc = Document::new();
1260 doc.add_json(data, arr_value.clone());
1261 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1262
1263 let str_value = serde_json::json!("just a string");
1265 let mut doc2 = Document::new();
1266 doc2.add_json(data, str_value.clone());
1267 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1268
1269 let num_value = serde_json::json!(42.5);
1271 let mut doc3 = Document::new();
1272 doc3.add_json(data, num_value.clone());
1273 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1274
1275 let null_value = serde_json::Value::Null;
1277 let mut doc4 = Document::new();
1278 doc4.add_json(data, null_value.clone());
1279 assert_eq!(
1280 doc4.get_first(data).unwrap().as_json().unwrap(),
1281 &null_value
1282 );
1283
1284 let bool_value = serde_json::json!(true);
1286 let mut doc5 = Document::new();
1287 doc5.add_json(data, bool_value.clone());
1288 assert_eq!(
1289 doc5.get_first(data).unwrap().as_json().unwrap(),
1290 &bool_value
1291 );
1292 }
1293}