1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "snake_case")]
65pub enum PositionMode {
66 Ordinal,
69 TokenPosition,
72 Full,
75}
76
77impl PositionMode {
78 pub fn tracks_ordinal(&self) -> bool {
80 matches!(self, PositionMode::Ordinal | PositionMode::Full)
81 }
82
83 pub fn tracks_token_position(&self) -> bool {
85 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
91#[serde(rename_all = "snake_case")]
92pub enum VectorIndexType {
93 Flat,
95 #[default]
97 RaBitQ,
98 IvfRaBitQ,
100 ScaNN,
102}
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
110#[serde(rename_all = "snake_case")]
111pub enum DenseVectorQuantization {
112 #[default]
114 F32,
115 F16,
117 UInt8,
119}
120
121impl DenseVectorQuantization {
122 pub fn element_size(self) -> usize {
124 match self {
125 Self::F32 => 4,
126 Self::F16 => 2,
127 Self::UInt8 => 1,
128 }
129 }
130
131 pub fn tag(self) -> u8 {
133 match self {
134 Self::F32 => 0,
135 Self::F16 => 1,
136 Self::UInt8 => 2,
137 }
138 }
139
140 pub fn from_tag(tag: u8) -> Option<Self> {
142 match tag {
143 0 => Some(Self::F32),
144 1 => Some(Self::F16),
145 2 => Some(Self::UInt8),
146 _ => None,
147 }
148 }
149}
150
151#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct DenseVectorConfig {
160 pub dim: usize,
162 #[serde(default)]
165 pub index_type: VectorIndexType,
166 #[serde(default)]
168 pub quantization: DenseVectorQuantization,
169 #[serde(default, skip_serializing_if = "Option::is_none")]
172 pub num_clusters: Option<usize>,
173 #[serde(default = "default_nprobe")]
175 pub nprobe: usize,
176 #[serde(default, skip_serializing_if = "Option::is_none")]
180 pub build_threshold: Option<usize>,
181 #[serde(default = "default_unit_norm")]
186 pub unit_norm: bool,
187}
188
189fn default_nprobe() -> usize {
190 32
191}
192
193fn default_unit_norm() -> bool {
194 true
195}
196
197impl DenseVectorConfig {
198 pub fn new(dim: usize) -> Self {
199 Self {
200 dim,
201 index_type: VectorIndexType::RaBitQ,
202 quantization: DenseVectorQuantization::F32,
203 num_clusters: None,
204 nprobe: 32,
205 build_threshold: None,
206 unit_norm: true,
207 }
208 }
209
210 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
212 Self {
213 dim,
214 index_type: VectorIndexType::IvfRaBitQ,
215 quantization: DenseVectorQuantization::F32,
216 num_clusters,
217 nprobe,
218 build_threshold: None,
219 unit_norm: true,
220 }
221 }
222
223 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
225 Self {
226 dim,
227 index_type: VectorIndexType::ScaNN,
228 quantization: DenseVectorQuantization::F32,
229 num_clusters,
230 nprobe,
231 build_threshold: None,
232 unit_norm: true,
233 }
234 }
235
236 pub fn flat(dim: usize) -> Self {
238 Self {
239 dim,
240 index_type: VectorIndexType::Flat,
241 quantization: DenseVectorQuantization::F32,
242 num_clusters: None,
243 nprobe: 0,
244 build_threshold: None,
245 unit_norm: true,
246 }
247 }
248
249 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
251 self.quantization = quantization;
252 self
253 }
254
255 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
257 self.build_threshold = Some(threshold);
258 self
259 }
260
261 pub fn with_unit_norm(mut self) -> Self {
263 self.unit_norm = true;
264 self
265 }
266
267 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
269 self.num_clusters = Some(num_clusters);
270 self
271 }
272
273 pub fn uses_ivf(&self) -> bool {
275 matches!(
276 self.index_type,
277 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
278 )
279 }
280
281 pub fn uses_scann(&self) -> bool {
283 self.index_type == VectorIndexType::ScaNN
284 }
285
286 pub fn is_flat(&self) -> bool {
288 self.index_type == VectorIndexType::Flat
289 }
290
291 pub fn default_build_threshold(&self) -> usize {
293 self.build_threshold.unwrap_or(match self.index_type {
294 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
296 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
297 })
298 }
299
300 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
302 self.num_clusters.unwrap_or_else(|| {
303 let optimal = (num_vectors as f64).sqrt() as usize;
305 optimal.clamp(16, 4096)
306 })
307 }
308}
309
310use super::query_field_router::QueryRouterRule;
311
312#[derive(Debug, Clone, Default, Serialize, Deserialize)]
314pub struct Schema {
315 fields: Vec<FieldEntry>,
316 name_to_field: HashMap<String, Field>,
317 #[serde(default)]
319 default_fields: Vec<Field>,
320 #[serde(default)]
322 query_routers: Vec<QueryRouterRule>,
323}
324
325impl Schema {
326 pub fn builder() -> SchemaBuilder {
327 SchemaBuilder::default()
328 }
329
330 pub fn get_field(&self, name: &str) -> Option<Field> {
331 self.name_to_field.get(name).copied()
332 }
333
334 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
335 self.fields.get(field.0 as usize)
336 }
337
338 pub fn get_field_name(&self, field: Field) -> Option<&str> {
339 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
340 }
341
342 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
343 self.fields
344 .iter()
345 .enumerate()
346 .map(|(i, e)| (Field(i as u32), e))
347 }
348
349 pub fn num_fields(&self) -> usize {
350 self.fields.len()
351 }
352
353 pub fn default_fields(&self) -> &[Field] {
355 &self.default_fields
356 }
357
358 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
360 self.default_fields = fields;
361 }
362
363 pub fn query_routers(&self) -> &[QueryRouterRule] {
365 &self.query_routers
366 }
367
368 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
370 self.query_routers = rules;
371 }
372}
373
374#[derive(Debug, Default)]
376pub struct SchemaBuilder {
377 fields: Vec<FieldEntry>,
378 default_fields: Vec<String>,
379 query_routers: Vec<QueryRouterRule>,
380}
381
382impl SchemaBuilder {
383 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
384 self.add_field_with_tokenizer(
385 name,
386 FieldType::Text,
387 indexed,
388 stored,
389 Some("default".to_string()),
390 )
391 }
392
393 pub fn add_text_field_with_tokenizer(
394 &mut self,
395 name: &str,
396 indexed: bool,
397 stored: bool,
398 tokenizer: &str,
399 ) -> Field {
400 self.add_field_with_tokenizer(
401 name,
402 FieldType::Text,
403 indexed,
404 stored,
405 Some(tokenizer.to_string()),
406 )
407 }
408
409 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
410 self.add_field(name, FieldType::U64, indexed, stored)
411 }
412
413 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
414 self.add_field(name, FieldType::I64, indexed, stored)
415 }
416
417 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
418 self.add_field(name, FieldType::F64, indexed, stored)
419 }
420
421 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
422 self.add_field(name, FieldType::Bytes, false, stored)
423 }
424
425 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
430 self.add_field(name, FieldType::Json, false, stored)
431 }
432
433 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
438 self.add_sparse_vector_field_with_config(
439 name,
440 indexed,
441 stored,
442 crate::structures::SparseVectorConfig::default(),
443 )
444 }
445
446 pub fn add_sparse_vector_field_with_config(
451 &mut self,
452 name: &str,
453 indexed: bool,
454 stored: bool,
455 config: crate::structures::SparseVectorConfig,
456 ) -> Field {
457 let field = Field(self.fields.len() as u32);
458 self.fields.push(FieldEntry {
459 name: name.to_string(),
460 field_type: FieldType::SparseVector,
461 indexed,
462 stored,
463 tokenizer: None,
464 multi: false,
465 positions: None,
466 sparse_vector_config: Some(config),
467 dense_vector_config: None,
468 });
469 field
470 }
471
472 pub fn set_sparse_vector_config(
474 &mut self,
475 field: Field,
476 config: crate::structures::SparseVectorConfig,
477 ) {
478 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
479 entry.sparse_vector_config = Some(config);
480 }
481 }
482
483 pub fn add_dense_vector_field(
488 &mut self,
489 name: &str,
490 dim: usize,
491 indexed: bool,
492 stored: bool,
493 ) -> Field {
494 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
495 }
496
497 pub fn add_dense_vector_field_with_config(
499 &mut self,
500 name: &str,
501 indexed: bool,
502 stored: bool,
503 config: DenseVectorConfig,
504 ) -> Field {
505 let field = Field(self.fields.len() as u32);
506 self.fields.push(FieldEntry {
507 name: name.to_string(),
508 field_type: FieldType::DenseVector,
509 indexed,
510 stored,
511 tokenizer: None,
512 multi: false,
513 positions: None,
514 sparse_vector_config: None,
515 dense_vector_config: Some(config),
516 });
517 field
518 }
519
520 fn add_field(
521 &mut self,
522 name: &str,
523 field_type: FieldType,
524 indexed: bool,
525 stored: bool,
526 ) -> Field {
527 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
528 }
529
530 fn add_field_with_tokenizer(
531 &mut self,
532 name: &str,
533 field_type: FieldType,
534 indexed: bool,
535 stored: bool,
536 tokenizer: Option<String>,
537 ) -> Field {
538 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
539 }
540
541 fn add_field_full(
542 &mut self,
543 name: &str,
544 field_type: FieldType,
545 indexed: bool,
546 stored: bool,
547 tokenizer: Option<String>,
548 multi: bool,
549 ) -> Field {
550 let field = Field(self.fields.len() as u32);
551 self.fields.push(FieldEntry {
552 name: name.to_string(),
553 field_type,
554 indexed,
555 stored,
556 tokenizer,
557 multi,
558 positions: None,
559 sparse_vector_config: None,
560 dense_vector_config: None,
561 });
562 field
563 }
564
565 pub fn set_multi(&mut self, field: Field, multi: bool) {
567 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
568 entry.multi = multi;
569 }
570 }
571
572 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
574 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
575 entry.positions = Some(mode);
576 }
577 }
578
579 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
581 self.default_fields = field_names;
582 }
583
584 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
586 self.query_routers = rules;
587 }
588
589 pub fn build(self) -> Schema {
590 let mut name_to_field = HashMap::new();
591 for (i, entry) in self.fields.iter().enumerate() {
592 name_to_field.insert(entry.name.clone(), Field(i as u32));
593 }
594
595 let default_fields: Vec<Field> = self
597 .default_fields
598 .iter()
599 .filter_map(|name| name_to_field.get(name).copied())
600 .collect();
601
602 Schema {
603 fields: self.fields,
604 name_to_field,
605 default_fields,
606 query_routers: self.query_routers,
607 }
608 }
609}
610
611#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
613pub enum FieldValue {
614 #[serde(rename = "text")]
615 Text(String),
616 #[serde(rename = "u64")]
617 U64(u64),
618 #[serde(rename = "i64")]
619 I64(i64),
620 #[serde(rename = "f64")]
621 F64(f64),
622 #[serde(rename = "bytes")]
623 Bytes(Vec<u8>),
624 #[serde(rename = "sparse_vector")]
626 SparseVector(Vec<(u32, f32)>),
627 #[serde(rename = "dense_vector")]
629 DenseVector(Vec<f32>),
630 #[serde(rename = "json")]
632 Json(serde_json::Value),
633}
634
635impl FieldValue {
636 pub fn as_text(&self) -> Option<&str> {
637 match self {
638 FieldValue::Text(s) => Some(s),
639 _ => None,
640 }
641 }
642
643 pub fn as_u64(&self) -> Option<u64> {
644 match self {
645 FieldValue::U64(v) => Some(*v),
646 _ => None,
647 }
648 }
649
650 pub fn as_i64(&self) -> Option<i64> {
651 match self {
652 FieldValue::I64(v) => Some(*v),
653 _ => None,
654 }
655 }
656
657 pub fn as_f64(&self) -> Option<f64> {
658 match self {
659 FieldValue::F64(v) => Some(*v),
660 _ => None,
661 }
662 }
663
664 pub fn as_bytes(&self) -> Option<&[u8]> {
665 match self {
666 FieldValue::Bytes(b) => Some(b),
667 _ => None,
668 }
669 }
670
671 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
672 match self {
673 FieldValue::SparseVector(entries) => Some(entries),
674 _ => None,
675 }
676 }
677
678 pub fn as_dense_vector(&self) -> Option<&[f32]> {
679 match self {
680 FieldValue::DenseVector(v) => Some(v),
681 _ => None,
682 }
683 }
684
685 pub fn as_json(&self) -> Option<&serde_json::Value> {
686 match self {
687 FieldValue::Json(v) => Some(v),
688 _ => None,
689 }
690 }
691}
692
693#[derive(Debug, Clone, Default, Serialize, Deserialize)]
695pub struct Document {
696 field_values: Vec<(Field, FieldValue)>,
697}
698
699impl Document {
700 pub fn new() -> Self {
701 Self::default()
702 }
703
704 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
705 self.field_values
706 .push((field, FieldValue::Text(value.into())));
707 }
708
709 pub fn add_u64(&mut self, field: Field, value: u64) {
710 self.field_values.push((field, FieldValue::U64(value)));
711 }
712
713 pub fn add_i64(&mut self, field: Field, value: i64) {
714 self.field_values.push((field, FieldValue::I64(value)));
715 }
716
717 pub fn add_f64(&mut self, field: Field, value: f64) {
718 self.field_values.push((field, FieldValue::F64(value)));
719 }
720
721 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
722 self.field_values.push((field, FieldValue::Bytes(value)));
723 }
724
725 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
726 self.field_values
727 .push((field, FieldValue::SparseVector(entries)));
728 }
729
730 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
731 self.field_values
732 .push((field, FieldValue::DenseVector(values)));
733 }
734
735 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
736 self.field_values.push((field, FieldValue::Json(value)));
737 }
738
739 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
740 self.field_values
741 .iter()
742 .find(|(f, _)| *f == field)
743 .map(|(_, v)| v)
744 }
745
746 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
747 self.field_values
748 .iter()
749 .filter(move |(f, _)| *f == field)
750 .map(|(_, v)| v)
751 }
752
753 pub fn field_values(&self) -> &[(Field, FieldValue)] {
754 &self.field_values
755 }
756
757 pub fn filter_stored(&self, schema: &Schema) -> Document {
759 Document {
760 field_values: self
761 .field_values
762 .iter()
763 .filter(|(field, _)| {
764 schema
765 .get_field_entry(*field)
766 .is_some_and(|entry| entry.stored)
767 })
768 .cloned()
769 .collect(),
770 }
771 }
772
773 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
779 use std::collections::HashMap;
780
781 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
783 HashMap::new();
784
785 for (field, value) in &self.field_values {
786 if let Some(entry) = schema.get_field_entry(*field) {
787 let json_value = match value {
788 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
789 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
790 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
791 FieldValue::F64(n) => serde_json::json!(n),
792 FieldValue::Bytes(b) => {
793 use base64::Engine;
794 serde_json::Value::String(
795 base64::engine::general_purpose::STANDARD.encode(b),
796 )
797 }
798 FieldValue::SparseVector(entries) => {
799 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
800 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
801 serde_json::json!({
802 "indices": indices,
803 "values": values
804 })
805 }
806 FieldValue::DenseVector(values) => {
807 serde_json::json!(values)
808 }
809 FieldValue::Json(v) => v.clone(),
810 };
811 field_values_map
812 .entry(*field)
813 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
814 .2
815 .push(json_value);
816 }
817 }
818
819 let mut map = serde_json::Map::new();
821 for (_field, (name, is_multi, values)) in field_values_map {
822 let json_value = if is_multi || values.len() > 1 {
823 serde_json::Value::Array(values)
824 } else {
825 values.into_iter().next().unwrap()
826 };
827 map.insert(name, json_value);
828 }
829
830 serde_json::Value::Object(map)
831 }
832
833 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
842 let obj = json.as_object()?;
843 let mut doc = Document::new();
844
845 for (key, value) in obj {
846 if let Some(field) = schema.get_field(key) {
847 let field_entry = schema.get_field_entry(field)?;
848 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
849 }
850 }
851
852 Some(doc)
853 }
854
855 fn add_json_value(
857 doc: &mut Document,
858 field: Field,
859 field_type: &FieldType,
860 value: &serde_json::Value,
861 ) {
862 match value {
863 serde_json::Value::String(s) => {
864 if matches!(field_type, FieldType::Text) {
865 doc.add_text(field, s.clone());
866 }
867 }
868 serde_json::Value::Number(n) => {
869 match field_type {
870 FieldType::I64 => {
871 if let Some(i) = n.as_i64() {
872 doc.add_i64(field, i);
873 }
874 }
875 FieldType::U64 => {
876 if let Some(u) = n.as_u64() {
877 doc.add_u64(field, u);
878 } else if let Some(i) = n.as_i64() {
879 if i >= 0 {
881 doc.add_u64(field, i as u64);
882 }
883 }
884 }
885 FieldType::F64 => {
886 if let Some(f) = n.as_f64() {
887 doc.add_f64(field, f);
888 }
889 }
890 _ => {}
891 }
892 }
893 serde_json::Value::Array(arr) => {
895 for item in arr {
896 Self::add_json_value(doc, field, field_type, item);
897 }
898 }
899 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
901 if let (Some(indices_val), Some(values_val)) =
902 (obj.get("indices"), obj.get("values"))
903 {
904 let indices: Vec<u32> = indices_val
905 .as_array()
906 .map(|arr| {
907 arr.iter()
908 .filter_map(|v| v.as_u64().map(|n| n as u32))
909 .collect()
910 })
911 .unwrap_or_default();
912 let values: Vec<f32> = values_val
913 .as_array()
914 .map(|arr| {
915 arr.iter()
916 .filter_map(|v| v.as_f64().map(|n| n as f32))
917 .collect()
918 })
919 .unwrap_or_default();
920 if indices.len() == values.len() {
921 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
922 doc.add_sparse_vector(field, entries);
923 }
924 }
925 }
926 _ if matches!(field_type, FieldType::Json) => {
928 doc.add_json(field, value.clone());
929 }
930 serde_json::Value::Object(_) => {}
931 _ => {}
932 }
933 }
934}
935
936#[cfg(test)]
937mod tests {
938 use super::*;
939
940 #[test]
941 fn test_schema_builder() {
942 let mut builder = Schema::builder();
943 let title = builder.add_text_field("title", true, true);
944 let body = builder.add_text_field("body", true, false);
945 let count = builder.add_u64_field("count", true, true);
946 let schema = builder.build();
947
948 assert_eq!(schema.get_field("title"), Some(title));
949 assert_eq!(schema.get_field("body"), Some(body));
950 assert_eq!(schema.get_field("count"), Some(count));
951 assert_eq!(schema.get_field("nonexistent"), None);
952 }
953
954 #[test]
955 fn test_document() {
956 let mut builder = Schema::builder();
957 let title = builder.add_text_field("title", true, true);
958 let count = builder.add_u64_field("count", true, true);
959 let _schema = builder.build();
960
961 let mut doc = Document::new();
962 doc.add_text(title, "Hello World");
963 doc.add_u64(count, 42);
964
965 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
966 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
967 }
968
969 #[test]
970 fn test_document_serialization() {
971 let mut builder = Schema::builder();
972 let title = builder.add_text_field("title", true, true);
973 let count = builder.add_u64_field("count", true, true);
974 let _schema = builder.build();
975
976 let mut doc = Document::new();
977 doc.add_text(title, "Hello World");
978 doc.add_u64(count, 42);
979
980 let json = serde_json::to_string(&doc).unwrap();
982 println!("Serialized doc: {}", json);
983
984 let doc2: Document = serde_json::from_str(&json).unwrap();
986 assert_eq!(
987 doc2.field_values().len(),
988 2,
989 "Should have 2 field values after deserialization"
990 );
991 assert_eq!(
992 doc2.get_first(title).unwrap().as_text(),
993 Some("Hello World")
994 );
995 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
996 }
997
998 #[test]
999 fn test_multivalue_field() {
1000 let mut builder = Schema::builder();
1001 let uris = builder.add_text_field("uris", true, true);
1002 let title = builder.add_text_field("title", true, true);
1003 let schema = builder.build();
1004
1005 let mut doc = Document::new();
1007 doc.add_text(uris, "one");
1008 doc.add_text(uris, "two");
1009 doc.add_text(title, "Test Document");
1010
1011 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1013
1014 let all_uris: Vec<_> = doc.get_all(uris).collect();
1016 assert_eq!(all_uris.len(), 2);
1017 assert_eq!(all_uris[0].as_text(), Some("one"));
1018 assert_eq!(all_uris[1].as_text(), Some("two"));
1019
1020 let json = doc.to_json(&schema);
1022 let uris_json = json.get("uris").unwrap();
1023 assert!(uris_json.is_array(), "Multi-value field should be an array");
1024 let uris_arr = uris_json.as_array().unwrap();
1025 assert_eq!(uris_arr.len(), 2);
1026 assert_eq!(uris_arr[0].as_str(), Some("one"));
1027 assert_eq!(uris_arr[1].as_str(), Some("two"));
1028
1029 let title_json = json.get("title").unwrap();
1031 assert!(
1032 title_json.is_string(),
1033 "Single-value field should be a string"
1034 );
1035 assert_eq!(title_json.as_str(), Some("Test Document"));
1036 }
1037
1038 #[test]
1039 fn test_multivalue_from_json() {
1040 let mut builder = Schema::builder();
1041 let uris = builder.add_text_field("uris", true, true);
1042 let title = builder.add_text_field("title", true, true);
1043 let schema = builder.build();
1044
1045 let json = serde_json::json!({
1047 "uris": ["one", "two"],
1048 "title": "Test Document"
1049 });
1050
1051 let doc = Document::from_json(&json, &schema).unwrap();
1053
1054 let all_uris: Vec<_> = doc.get_all(uris).collect();
1056 assert_eq!(all_uris.len(), 2);
1057 assert_eq!(all_uris[0].as_text(), Some("one"));
1058 assert_eq!(all_uris[1].as_text(), Some("two"));
1059
1060 assert_eq!(
1062 doc.get_first(title).unwrap().as_text(),
1063 Some("Test Document")
1064 );
1065
1066 let json_out = doc.to_json(&schema);
1068 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1069 assert_eq!(uris_out.len(), 2);
1070 assert_eq!(uris_out[0].as_str(), Some("one"));
1071 assert_eq!(uris_out[1].as_str(), Some("two"));
1072 }
1073
1074 #[test]
1075 fn test_multi_attribute_forces_array() {
1076 let mut builder = Schema::builder();
1079 let uris = builder.add_text_field("uris", true, true);
1080 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1082 let schema = builder.build();
1083
1084 assert!(schema.get_field_entry(uris).unwrap().multi);
1086 assert!(!schema.get_field_entry(title).unwrap().multi);
1087
1088 let mut doc = Document::new();
1090 doc.add_text(uris, "only_one");
1091 doc.add_text(title, "Test Document");
1092
1093 let json = doc.to_json(&schema);
1095
1096 let uris_json = json.get("uris").unwrap();
1097 assert!(
1098 uris_json.is_array(),
1099 "Multi field should be array even with single value"
1100 );
1101 let uris_arr = uris_json.as_array().unwrap();
1102 assert_eq!(uris_arr.len(), 1);
1103 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1104
1105 let title_json = json.get("title").unwrap();
1107 assert!(
1108 title_json.is_string(),
1109 "Non-multi single-value field should be a string"
1110 );
1111 assert_eq!(title_json.as_str(), Some("Test Document"));
1112 }
1113
1114 #[test]
1115 fn test_sparse_vector_field() {
1116 let mut builder = Schema::builder();
1117 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1118 let title = builder.add_text_field("title", true, true);
1119 let schema = builder.build();
1120
1121 assert_eq!(schema.get_field("embedding"), Some(embedding));
1122 assert_eq!(
1123 schema.get_field_entry(embedding).unwrap().field_type,
1124 FieldType::SparseVector
1125 );
1126
1127 let mut doc = Document::new();
1129 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1130 doc.add_text(title, "Test Document");
1131
1132 let entries = doc
1134 .get_first(embedding)
1135 .unwrap()
1136 .as_sparse_vector()
1137 .unwrap();
1138 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1139
1140 let json = doc.to_json(&schema);
1142 let embedding_json = json.get("embedding").unwrap();
1143 assert!(embedding_json.is_object());
1144 assert_eq!(
1145 embedding_json
1146 .get("indices")
1147 .unwrap()
1148 .as_array()
1149 .unwrap()
1150 .len(),
1151 3
1152 );
1153
1154 let doc2 = Document::from_json(&json, &schema).unwrap();
1156 let entries2 = doc2
1157 .get_first(embedding)
1158 .unwrap()
1159 .as_sparse_vector()
1160 .unwrap();
1161 assert_eq!(entries2[0].0, 0);
1162 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1163 assert_eq!(entries2[1].0, 5);
1164 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1165 assert_eq!(entries2[2].0, 10);
1166 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1167 }
1168
1169 #[test]
1170 fn test_json_field() {
1171 let mut builder = Schema::builder();
1172 let metadata = builder.add_json_field("metadata", true);
1173 let title = builder.add_text_field("title", true, true);
1174 let schema = builder.build();
1175
1176 assert_eq!(schema.get_field("metadata"), Some(metadata));
1177 assert_eq!(
1178 schema.get_field_entry(metadata).unwrap().field_type,
1179 FieldType::Json
1180 );
1181 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1183 assert!(schema.get_field_entry(metadata).unwrap().stored);
1184
1185 let json_value = serde_json::json!({
1187 "author": "John Doe",
1188 "tags": ["rust", "search"],
1189 "nested": {"key": "value"}
1190 });
1191 let mut doc = Document::new();
1192 doc.add_json(metadata, json_value.clone());
1193 doc.add_text(title, "Test Document");
1194
1195 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1197 assert_eq!(stored_json, &json_value);
1198 assert_eq!(
1199 stored_json.get("author").unwrap().as_str(),
1200 Some("John Doe")
1201 );
1202
1203 let doc_json = doc.to_json(&schema);
1205 let metadata_out = doc_json.get("metadata").unwrap();
1206 assert_eq!(metadata_out, &json_value);
1207
1208 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1210 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1211 assert_eq!(stored_json2, &json_value);
1212 }
1213
1214 #[test]
1215 fn test_json_field_various_types() {
1216 let mut builder = Schema::builder();
1217 let data = builder.add_json_field("data", true);
1218 let _schema = builder.build();
1219
1220 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1222 let mut doc = Document::new();
1223 doc.add_json(data, arr_value.clone());
1224 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1225
1226 let str_value = serde_json::json!("just a string");
1228 let mut doc2 = Document::new();
1229 doc2.add_json(data, str_value.clone());
1230 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1231
1232 let num_value = serde_json::json!(42.5);
1234 let mut doc3 = Document::new();
1235 doc3.add_json(data, num_value.clone());
1236 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1237
1238 let null_value = serde_json::Value::Null;
1240 let mut doc4 = Document::new();
1241 doc4.add_json(data, null_value.clone());
1242 assert_eq!(
1243 doc4.get_first(data).unwrap().as_json().unwrap(),
1244 &null_value
1245 );
1246
1247 let bool_value = serde_json::json!(true);
1249 let mut doc5 = Document::new();
1250 doc5.add_json(data, bool_value.clone());
1251 assert_eq!(
1252 doc5.get_first(data).unwrap().as_json().unwrap(),
1253 &bool_value
1254 );
1255 }
1256}