1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "snake_case")]
65pub enum PositionMode {
66 Ordinal,
69 TokenPosition,
72 Full,
75}
76
77impl PositionMode {
78 pub fn tracks_ordinal(&self) -> bool {
80 matches!(self, PositionMode::Ordinal | PositionMode::Full)
81 }
82
83 pub fn tracks_token_position(&self) -> bool {
85 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
91#[serde(rename_all = "snake_case")]
92pub enum VectorIndexType {
93 Flat,
95 #[default]
97 RaBitQ,
98 IvfRaBitQ,
100 ScaNN,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct DenseVectorConfig {
113 pub dim: usize,
115 #[serde(default)]
118 pub index_type: VectorIndexType,
119 #[serde(default = "default_store_raw")]
121 pub store_raw: bool,
122 #[serde(default, skip_serializing_if = "Option::is_none")]
125 pub num_clusters: Option<usize>,
126 #[serde(default = "default_nprobe")]
128 pub nprobe: usize,
129 #[serde(default, skip_serializing_if = "Option::is_none")]
133 pub mrl_dim: Option<usize>,
134 #[serde(default, skip_serializing_if = "Option::is_none")]
138 pub build_threshold: Option<usize>,
139}
140
141fn default_store_raw() -> bool {
142 true
143}
144
145fn default_nprobe() -> usize {
146 32
147}
148
149impl DenseVectorConfig {
150 pub fn new(dim: usize) -> Self {
151 Self {
152 dim,
153 index_type: VectorIndexType::RaBitQ,
154 store_raw: true,
155 num_clusters: None,
156 nprobe: 32,
157 mrl_dim: None,
158 build_threshold: None,
159 }
160 }
161
162 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
164 Self {
165 dim,
166 index_type: VectorIndexType::IvfRaBitQ,
167 store_raw: true,
168 num_clusters,
169 nprobe,
170 mrl_dim: None,
171 build_threshold: None,
172 }
173 }
174
175 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
177 Self {
178 dim,
179 index_type: VectorIndexType::ScaNN,
180 store_raw: true,
181 num_clusters,
182 nprobe,
183 mrl_dim: None,
184 build_threshold: None,
185 }
186 }
187
188 pub fn flat(dim: usize) -> Self {
190 Self {
191 dim,
192 index_type: VectorIndexType::Flat,
193 store_raw: true,
194 num_clusters: None,
195 nprobe: 0,
196 mrl_dim: None,
197 build_threshold: None,
198 }
199 }
200
201 pub fn without_raw(dim: usize) -> Self {
202 Self {
203 dim,
204 index_type: VectorIndexType::RaBitQ,
205 store_raw: false,
206 num_clusters: None,
207 nprobe: 32,
208 mrl_dim: None,
209 build_threshold: None,
210 }
211 }
212
213 pub fn with_mrl_dim(mut self, mrl_dim: usize) -> Self {
215 self.mrl_dim = Some(mrl_dim);
216 self
217 }
218
219 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
221 self.build_threshold = Some(threshold);
222 self
223 }
224
225 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
227 self.num_clusters = Some(num_clusters);
228 self
229 }
230
231 pub fn index_dim(&self) -> usize {
233 self.mrl_dim.unwrap_or(self.dim)
234 }
235
236 pub fn uses_ivf(&self) -> bool {
238 matches!(
239 self.index_type,
240 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
241 )
242 }
243
244 pub fn uses_scann(&self) -> bool {
246 self.index_type == VectorIndexType::ScaNN
247 }
248
249 pub fn is_flat(&self) -> bool {
251 self.index_type == VectorIndexType::Flat
252 }
253
254 pub fn default_build_threshold(&self) -> usize {
256 self.build_threshold.unwrap_or(match self.index_type {
257 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
259 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
260 })
261 }
262
263 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
265 self.num_clusters.unwrap_or_else(|| {
266 let optimal = (num_vectors as f64).sqrt() as usize;
268 optimal.clamp(16, 4096)
269 })
270 }
271}
272
273use super::query_field_router::QueryRouterRule;
274
275#[derive(Debug, Clone, Default, Serialize, Deserialize)]
277pub struct Schema {
278 fields: Vec<FieldEntry>,
279 name_to_field: HashMap<String, Field>,
280 #[serde(default)]
282 default_fields: Vec<Field>,
283 #[serde(default)]
285 query_routers: Vec<QueryRouterRule>,
286}
287
288impl Schema {
289 pub fn builder() -> SchemaBuilder {
290 SchemaBuilder::default()
291 }
292
293 pub fn get_field(&self, name: &str) -> Option<Field> {
294 self.name_to_field.get(name).copied()
295 }
296
297 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
298 self.fields.get(field.0 as usize)
299 }
300
301 pub fn get_field_name(&self, field: Field) -> Option<&str> {
302 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
303 }
304
305 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
306 self.fields
307 .iter()
308 .enumerate()
309 .map(|(i, e)| (Field(i as u32), e))
310 }
311
312 pub fn num_fields(&self) -> usize {
313 self.fields.len()
314 }
315
316 pub fn default_fields(&self) -> &[Field] {
318 &self.default_fields
319 }
320
321 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
323 self.default_fields = fields;
324 }
325
326 pub fn query_routers(&self) -> &[QueryRouterRule] {
328 &self.query_routers
329 }
330
331 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
333 self.query_routers = rules;
334 }
335}
336
337#[derive(Debug, Default)]
339pub struct SchemaBuilder {
340 fields: Vec<FieldEntry>,
341 default_fields: Vec<String>,
342 query_routers: Vec<QueryRouterRule>,
343}
344
345impl SchemaBuilder {
346 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
347 self.add_field_with_tokenizer(
348 name,
349 FieldType::Text,
350 indexed,
351 stored,
352 Some("default".to_string()),
353 )
354 }
355
356 pub fn add_text_field_with_tokenizer(
357 &mut self,
358 name: &str,
359 indexed: bool,
360 stored: bool,
361 tokenizer: &str,
362 ) -> Field {
363 self.add_field_with_tokenizer(
364 name,
365 FieldType::Text,
366 indexed,
367 stored,
368 Some(tokenizer.to_string()),
369 )
370 }
371
372 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
373 self.add_field(name, FieldType::U64, indexed, stored)
374 }
375
376 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
377 self.add_field(name, FieldType::I64, indexed, stored)
378 }
379
380 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
381 self.add_field(name, FieldType::F64, indexed, stored)
382 }
383
384 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
385 self.add_field(name, FieldType::Bytes, false, stored)
386 }
387
388 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
393 self.add_field(name, FieldType::Json, false, stored)
394 }
395
396 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
401 self.add_sparse_vector_field_with_config(
402 name,
403 indexed,
404 stored,
405 crate::structures::SparseVectorConfig::default(),
406 )
407 }
408
409 pub fn add_sparse_vector_field_with_config(
414 &mut self,
415 name: &str,
416 indexed: bool,
417 stored: bool,
418 config: crate::structures::SparseVectorConfig,
419 ) -> Field {
420 let field = Field(self.fields.len() as u32);
421 self.fields.push(FieldEntry {
422 name: name.to_string(),
423 field_type: FieldType::SparseVector,
424 indexed,
425 stored,
426 tokenizer: None,
427 multi: false,
428 positions: None,
429 sparse_vector_config: Some(config),
430 dense_vector_config: None,
431 });
432 field
433 }
434
435 pub fn set_sparse_vector_config(
437 &mut self,
438 field: Field,
439 config: crate::structures::SparseVectorConfig,
440 ) {
441 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
442 entry.sparse_vector_config = Some(config);
443 }
444 }
445
446 pub fn add_dense_vector_field(
451 &mut self,
452 name: &str,
453 dim: usize,
454 indexed: bool,
455 stored: bool,
456 ) -> Field {
457 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
458 }
459
460 pub fn add_dense_vector_field_with_config(
462 &mut self,
463 name: &str,
464 indexed: bool,
465 stored: bool,
466 config: DenseVectorConfig,
467 ) -> Field {
468 let field = Field(self.fields.len() as u32);
469 self.fields.push(FieldEntry {
470 name: name.to_string(),
471 field_type: FieldType::DenseVector,
472 indexed,
473 stored,
474 tokenizer: None,
475 multi: false,
476 positions: None,
477 sparse_vector_config: None,
478 dense_vector_config: Some(config),
479 });
480 field
481 }
482
483 fn add_field(
484 &mut self,
485 name: &str,
486 field_type: FieldType,
487 indexed: bool,
488 stored: bool,
489 ) -> Field {
490 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
491 }
492
493 fn add_field_with_tokenizer(
494 &mut self,
495 name: &str,
496 field_type: FieldType,
497 indexed: bool,
498 stored: bool,
499 tokenizer: Option<String>,
500 ) -> Field {
501 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
502 }
503
504 fn add_field_full(
505 &mut self,
506 name: &str,
507 field_type: FieldType,
508 indexed: bool,
509 stored: bool,
510 tokenizer: Option<String>,
511 multi: bool,
512 ) -> Field {
513 let field = Field(self.fields.len() as u32);
514 self.fields.push(FieldEntry {
515 name: name.to_string(),
516 field_type,
517 indexed,
518 stored,
519 tokenizer,
520 multi,
521 positions: None,
522 sparse_vector_config: None,
523 dense_vector_config: None,
524 });
525 field
526 }
527
528 pub fn set_multi(&mut self, field: Field, multi: bool) {
530 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
531 entry.multi = multi;
532 }
533 }
534
535 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
537 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
538 entry.positions = Some(mode);
539 }
540 }
541
542 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
544 self.default_fields = field_names;
545 }
546
547 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
549 self.query_routers = rules;
550 }
551
552 pub fn build(self) -> Schema {
553 let mut name_to_field = HashMap::new();
554 for (i, entry) in self.fields.iter().enumerate() {
555 name_to_field.insert(entry.name.clone(), Field(i as u32));
556 }
557
558 let default_fields: Vec<Field> = self
560 .default_fields
561 .iter()
562 .filter_map(|name| name_to_field.get(name).copied())
563 .collect();
564
565 Schema {
566 fields: self.fields,
567 name_to_field,
568 default_fields,
569 query_routers: self.query_routers,
570 }
571 }
572}
573
574#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
576pub enum FieldValue {
577 #[serde(rename = "text")]
578 Text(String),
579 #[serde(rename = "u64")]
580 U64(u64),
581 #[serde(rename = "i64")]
582 I64(i64),
583 #[serde(rename = "f64")]
584 F64(f64),
585 #[serde(rename = "bytes")]
586 Bytes(Vec<u8>),
587 #[serde(rename = "sparse_vector")]
589 SparseVector(Vec<(u32, f32)>),
590 #[serde(rename = "dense_vector")]
592 DenseVector(Vec<f32>),
593 #[serde(rename = "json")]
595 Json(serde_json::Value),
596}
597
598impl FieldValue {
599 pub fn as_text(&self) -> Option<&str> {
600 match self {
601 FieldValue::Text(s) => Some(s),
602 _ => None,
603 }
604 }
605
606 pub fn as_u64(&self) -> Option<u64> {
607 match self {
608 FieldValue::U64(v) => Some(*v),
609 _ => None,
610 }
611 }
612
613 pub fn as_i64(&self) -> Option<i64> {
614 match self {
615 FieldValue::I64(v) => Some(*v),
616 _ => None,
617 }
618 }
619
620 pub fn as_f64(&self) -> Option<f64> {
621 match self {
622 FieldValue::F64(v) => Some(*v),
623 _ => None,
624 }
625 }
626
627 pub fn as_bytes(&self) -> Option<&[u8]> {
628 match self {
629 FieldValue::Bytes(b) => Some(b),
630 _ => None,
631 }
632 }
633
634 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
635 match self {
636 FieldValue::SparseVector(entries) => Some(entries),
637 _ => None,
638 }
639 }
640
641 pub fn as_dense_vector(&self) -> Option<&[f32]> {
642 match self {
643 FieldValue::DenseVector(v) => Some(v),
644 _ => None,
645 }
646 }
647
648 pub fn as_json(&self) -> Option<&serde_json::Value> {
649 match self {
650 FieldValue::Json(v) => Some(v),
651 _ => None,
652 }
653 }
654}
655
656#[derive(Debug, Clone, Default, Serialize, Deserialize)]
658pub struct Document {
659 field_values: Vec<(Field, FieldValue)>,
660}
661
662impl Document {
663 pub fn new() -> Self {
664 Self::default()
665 }
666
667 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
668 self.field_values
669 .push((field, FieldValue::Text(value.into())));
670 }
671
672 pub fn add_u64(&mut self, field: Field, value: u64) {
673 self.field_values.push((field, FieldValue::U64(value)));
674 }
675
676 pub fn add_i64(&mut self, field: Field, value: i64) {
677 self.field_values.push((field, FieldValue::I64(value)));
678 }
679
680 pub fn add_f64(&mut self, field: Field, value: f64) {
681 self.field_values.push((field, FieldValue::F64(value)));
682 }
683
684 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
685 self.field_values.push((field, FieldValue::Bytes(value)));
686 }
687
688 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
689 self.field_values
690 .push((field, FieldValue::SparseVector(entries)));
691 }
692
693 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
694 self.field_values
695 .push((field, FieldValue::DenseVector(values)));
696 }
697
698 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
699 self.field_values.push((field, FieldValue::Json(value)));
700 }
701
702 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
703 self.field_values
704 .iter()
705 .find(|(f, _)| *f == field)
706 .map(|(_, v)| v)
707 }
708
709 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
710 self.field_values
711 .iter()
712 .filter(move |(f, _)| *f == field)
713 .map(|(_, v)| v)
714 }
715
716 pub fn field_values(&self) -> &[(Field, FieldValue)] {
717 &self.field_values
718 }
719
720 pub fn filter_stored(&self, schema: &Schema) -> Document {
722 Document {
723 field_values: self
724 .field_values
725 .iter()
726 .filter(|(field, _)| {
727 schema
728 .get_field_entry(*field)
729 .is_some_and(|entry| entry.stored)
730 })
731 .cloned()
732 .collect(),
733 }
734 }
735
736 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
742 use std::collections::HashMap;
743
744 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
746 HashMap::new();
747
748 for (field, value) in &self.field_values {
749 if let Some(entry) = schema.get_field_entry(*field) {
750 let json_value = match value {
751 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
752 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
753 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
754 FieldValue::F64(n) => serde_json::json!(n),
755 FieldValue::Bytes(b) => {
756 use base64::Engine;
757 serde_json::Value::String(
758 base64::engine::general_purpose::STANDARD.encode(b),
759 )
760 }
761 FieldValue::SparseVector(entries) => {
762 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
763 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
764 serde_json::json!({
765 "indices": indices,
766 "values": values
767 })
768 }
769 FieldValue::DenseVector(values) => {
770 serde_json::json!(values)
771 }
772 FieldValue::Json(v) => v.clone(),
773 };
774 field_values_map
775 .entry(*field)
776 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
777 .2
778 .push(json_value);
779 }
780 }
781
782 let mut map = serde_json::Map::new();
784 for (_field, (name, is_multi, values)) in field_values_map {
785 let json_value = if is_multi || values.len() > 1 {
786 serde_json::Value::Array(values)
787 } else {
788 values.into_iter().next().unwrap()
789 };
790 map.insert(name, json_value);
791 }
792
793 serde_json::Value::Object(map)
794 }
795
796 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
805 let obj = json.as_object()?;
806 let mut doc = Document::new();
807
808 for (key, value) in obj {
809 if let Some(field) = schema.get_field(key) {
810 let field_entry = schema.get_field_entry(field)?;
811 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
812 }
813 }
814
815 Some(doc)
816 }
817
818 fn add_json_value(
820 doc: &mut Document,
821 field: Field,
822 field_type: &FieldType,
823 value: &serde_json::Value,
824 ) {
825 match value {
826 serde_json::Value::String(s) => {
827 if matches!(field_type, FieldType::Text) {
828 doc.add_text(field, s.clone());
829 }
830 }
831 serde_json::Value::Number(n) => {
832 match field_type {
833 FieldType::I64 => {
834 if let Some(i) = n.as_i64() {
835 doc.add_i64(field, i);
836 }
837 }
838 FieldType::U64 => {
839 if let Some(u) = n.as_u64() {
840 doc.add_u64(field, u);
841 } else if let Some(i) = n.as_i64() {
842 if i >= 0 {
844 doc.add_u64(field, i as u64);
845 }
846 }
847 }
848 FieldType::F64 => {
849 if let Some(f) = n.as_f64() {
850 doc.add_f64(field, f);
851 }
852 }
853 _ => {}
854 }
855 }
856 serde_json::Value::Array(arr) => {
858 for item in arr {
859 Self::add_json_value(doc, field, field_type, item);
860 }
861 }
862 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
864 if let (Some(indices_val), Some(values_val)) =
865 (obj.get("indices"), obj.get("values"))
866 {
867 let indices: Vec<u32> = indices_val
868 .as_array()
869 .map(|arr| {
870 arr.iter()
871 .filter_map(|v| v.as_u64().map(|n| n as u32))
872 .collect()
873 })
874 .unwrap_or_default();
875 let values: Vec<f32> = values_val
876 .as_array()
877 .map(|arr| {
878 arr.iter()
879 .filter_map(|v| v.as_f64().map(|n| n as f32))
880 .collect()
881 })
882 .unwrap_or_default();
883 if indices.len() == values.len() {
884 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
885 doc.add_sparse_vector(field, entries);
886 }
887 }
888 }
889 _ if matches!(field_type, FieldType::Json) => {
891 doc.add_json(field, value.clone());
892 }
893 serde_json::Value::Object(_) => {}
894 _ => {}
895 }
896 }
897}
898
899#[cfg(test)]
900mod tests {
901 use super::*;
902
903 #[test]
904 fn test_schema_builder() {
905 let mut builder = Schema::builder();
906 let title = builder.add_text_field("title", true, true);
907 let body = builder.add_text_field("body", true, false);
908 let count = builder.add_u64_field("count", true, true);
909 let schema = builder.build();
910
911 assert_eq!(schema.get_field("title"), Some(title));
912 assert_eq!(schema.get_field("body"), Some(body));
913 assert_eq!(schema.get_field("count"), Some(count));
914 assert_eq!(schema.get_field("nonexistent"), None);
915 }
916
917 #[test]
918 fn test_document() {
919 let mut builder = Schema::builder();
920 let title = builder.add_text_field("title", true, true);
921 let count = builder.add_u64_field("count", true, true);
922 let _schema = builder.build();
923
924 let mut doc = Document::new();
925 doc.add_text(title, "Hello World");
926 doc.add_u64(count, 42);
927
928 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
929 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
930 }
931
932 #[test]
933 fn test_document_serialization() {
934 let mut builder = Schema::builder();
935 let title = builder.add_text_field("title", true, true);
936 let count = builder.add_u64_field("count", true, true);
937 let _schema = builder.build();
938
939 let mut doc = Document::new();
940 doc.add_text(title, "Hello World");
941 doc.add_u64(count, 42);
942
943 let json = serde_json::to_string(&doc).unwrap();
945 println!("Serialized doc: {}", json);
946
947 let doc2: Document = serde_json::from_str(&json).unwrap();
949 assert_eq!(
950 doc2.field_values().len(),
951 2,
952 "Should have 2 field values after deserialization"
953 );
954 assert_eq!(
955 doc2.get_first(title).unwrap().as_text(),
956 Some("Hello World")
957 );
958 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
959 }
960
961 #[test]
962 fn test_multivalue_field() {
963 let mut builder = Schema::builder();
964 let uris = builder.add_text_field("uris", true, true);
965 let title = builder.add_text_field("title", true, true);
966 let schema = builder.build();
967
968 let mut doc = Document::new();
970 doc.add_text(uris, "one");
971 doc.add_text(uris, "two");
972 doc.add_text(title, "Test Document");
973
974 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
976
977 let all_uris: Vec<_> = doc.get_all(uris).collect();
979 assert_eq!(all_uris.len(), 2);
980 assert_eq!(all_uris[0].as_text(), Some("one"));
981 assert_eq!(all_uris[1].as_text(), Some("two"));
982
983 let json = doc.to_json(&schema);
985 let uris_json = json.get("uris").unwrap();
986 assert!(uris_json.is_array(), "Multi-value field should be an array");
987 let uris_arr = uris_json.as_array().unwrap();
988 assert_eq!(uris_arr.len(), 2);
989 assert_eq!(uris_arr[0].as_str(), Some("one"));
990 assert_eq!(uris_arr[1].as_str(), Some("two"));
991
992 let title_json = json.get("title").unwrap();
994 assert!(
995 title_json.is_string(),
996 "Single-value field should be a string"
997 );
998 assert_eq!(title_json.as_str(), Some("Test Document"));
999 }
1000
1001 #[test]
1002 fn test_multivalue_from_json() {
1003 let mut builder = Schema::builder();
1004 let uris = builder.add_text_field("uris", true, true);
1005 let title = builder.add_text_field("title", true, true);
1006 let schema = builder.build();
1007
1008 let json = serde_json::json!({
1010 "uris": ["one", "two"],
1011 "title": "Test Document"
1012 });
1013
1014 let doc = Document::from_json(&json, &schema).unwrap();
1016
1017 let all_uris: Vec<_> = doc.get_all(uris).collect();
1019 assert_eq!(all_uris.len(), 2);
1020 assert_eq!(all_uris[0].as_text(), Some("one"));
1021 assert_eq!(all_uris[1].as_text(), Some("two"));
1022
1023 assert_eq!(
1025 doc.get_first(title).unwrap().as_text(),
1026 Some("Test Document")
1027 );
1028
1029 let json_out = doc.to_json(&schema);
1031 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1032 assert_eq!(uris_out.len(), 2);
1033 assert_eq!(uris_out[0].as_str(), Some("one"));
1034 assert_eq!(uris_out[1].as_str(), Some("two"));
1035 }
1036
1037 #[test]
1038 fn test_multi_attribute_forces_array() {
1039 let mut builder = Schema::builder();
1042 let uris = builder.add_text_field("uris", true, true);
1043 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1045 let schema = builder.build();
1046
1047 assert!(schema.get_field_entry(uris).unwrap().multi);
1049 assert!(!schema.get_field_entry(title).unwrap().multi);
1050
1051 let mut doc = Document::new();
1053 doc.add_text(uris, "only_one");
1054 doc.add_text(title, "Test Document");
1055
1056 let json = doc.to_json(&schema);
1058
1059 let uris_json = json.get("uris").unwrap();
1060 assert!(
1061 uris_json.is_array(),
1062 "Multi field should be array even with single value"
1063 );
1064 let uris_arr = uris_json.as_array().unwrap();
1065 assert_eq!(uris_arr.len(), 1);
1066 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1067
1068 let title_json = json.get("title").unwrap();
1070 assert!(
1071 title_json.is_string(),
1072 "Non-multi single-value field should be a string"
1073 );
1074 assert_eq!(title_json.as_str(), Some("Test Document"));
1075 }
1076
1077 #[test]
1078 fn test_sparse_vector_field() {
1079 let mut builder = Schema::builder();
1080 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1081 let title = builder.add_text_field("title", true, true);
1082 let schema = builder.build();
1083
1084 assert_eq!(schema.get_field("embedding"), Some(embedding));
1085 assert_eq!(
1086 schema.get_field_entry(embedding).unwrap().field_type,
1087 FieldType::SparseVector
1088 );
1089
1090 let mut doc = Document::new();
1092 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1093 doc.add_text(title, "Test Document");
1094
1095 let entries = doc
1097 .get_first(embedding)
1098 .unwrap()
1099 .as_sparse_vector()
1100 .unwrap();
1101 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1102
1103 let json = doc.to_json(&schema);
1105 let embedding_json = json.get("embedding").unwrap();
1106 assert!(embedding_json.is_object());
1107 assert_eq!(
1108 embedding_json
1109 .get("indices")
1110 .unwrap()
1111 .as_array()
1112 .unwrap()
1113 .len(),
1114 3
1115 );
1116
1117 let doc2 = Document::from_json(&json, &schema).unwrap();
1119 let entries2 = doc2
1120 .get_first(embedding)
1121 .unwrap()
1122 .as_sparse_vector()
1123 .unwrap();
1124 assert_eq!(entries2[0].0, 0);
1125 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1126 assert_eq!(entries2[1].0, 5);
1127 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1128 assert_eq!(entries2[2].0, 10);
1129 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1130 }
1131
1132 #[test]
1133 fn test_json_field() {
1134 let mut builder = Schema::builder();
1135 let metadata = builder.add_json_field("metadata", true);
1136 let title = builder.add_text_field("title", true, true);
1137 let schema = builder.build();
1138
1139 assert_eq!(schema.get_field("metadata"), Some(metadata));
1140 assert_eq!(
1141 schema.get_field_entry(metadata).unwrap().field_type,
1142 FieldType::Json
1143 );
1144 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1146 assert!(schema.get_field_entry(metadata).unwrap().stored);
1147
1148 let json_value = serde_json::json!({
1150 "author": "John Doe",
1151 "tags": ["rust", "search"],
1152 "nested": {"key": "value"}
1153 });
1154 let mut doc = Document::new();
1155 doc.add_json(metadata, json_value.clone());
1156 doc.add_text(title, "Test Document");
1157
1158 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1160 assert_eq!(stored_json, &json_value);
1161 assert_eq!(
1162 stored_json.get("author").unwrap().as_str(),
1163 Some("John Doe")
1164 );
1165
1166 let doc_json = doc.to_json(&schema);
1168 let metadata_out = doc_json.get("metadata").unwrap();
1169 assert_eq!(metadata_out, &json_value);
1170
1171 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1173 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1174 assert_eq!(stored_json2, &json_value);
1175 }
1176
1177 #[test]
1178 fn test_json_field_various_types() {
1179 let mut builder = Schema::builder();
1180 let data = builder.add_json_field("data", true);
1181 let _schema = builder.build();
1182
1183 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1185 let mut doc = Document::new();
1186 doc.add_json(data, arr_value.clone());
1187 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1188
1189 let str_value = serde_json::json!("just a string");
1191 let mut doc2 = Document::new();
1192 doc2.add_json(data, str_value.clone());
1193 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1194
1195 let num_value = serde_json::json!(42.5);
1197 let mut doc3 = Document::new();
1198 doc3.add_json(data, num_value.clone());
1199 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1200
1201 let null_value = serde_json::Value::Null;
1203 let mut doc4 = Document::new();
1204 doc4.add_json(data, null_value.clone());
1205 assert_eq!(
1206 doc4.get_first(data).unwrap().as_json().unwrap(),
1207 &null_value
1208 );
1209
1210 let bool_value = serde_json::json!(true);
1212 let mut doc5 = Document::new();
1213 doc5.add_json(data, bool_value.clone());
1214 assert_eq!(
1215 doc5.get_first(data).unwrap().as_json().unwrap(),
1216 &bool_value
1217 );
1218 }
1219}