1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "snake_case")]
65pub enum PositionMode {
66 Ordinal,
69 TokenPosition,
72 Full,
75}
76
77impl PositionMode {
78 pub fn tracks_ordinal(&self) -> bool {
80 matches!(self, PositionMode::Ordinal | PositionMode::Full)
81 }
82
83 pub fn tracks_token_position(&self) -> bool {
85 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
91#[serde(rename_all = "snake_case")]
92pub enum VectorIndexType {
93 Flat,
95 #[default]
97 RaBitQ,
98 IvfRaBitQ,
100 ScaNN,
102}
103
104#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct DenseVectorConfig {
113 pub dim: usize,
115 #[serde(default)]
118 pub index_type: VectorIndexType,
119 #[serde(default, skip_serializing_if = "Option::is_none")]
122 pub num_clusters: Option<usize>,
123 #[serde(default = "default_nprobe")]
125 pub nprobe: usize,
126 #[serde(default, skip_serializing_if = "Option::is_none")]
130 pub mrl_dim: Option<usize>,
131 #[serde(default, skip_serializing_if = "Option::is_none")]
135 pub build_threshold: Option<usize>,
136}
137
138fn default_nprobe() -> usize {
139 32
140}
141
142impl DenseVectorConfig {
143 pub fn new(dim: usize) -> Self {
144 Self {
145 dim,
146 index_type: VectorIndexType::RaBitQ,
147 num_clusters: None,
148 nprobe: 32,
149 mrl_dim: None,
150 build_threshold: None,
151 }
152 }
153
154 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
156 Self {
157 dim,
158 index_type: VectorIndexType::IvfRaBitQ,
159 num_clusters,
160 nprobe,
161 mrl_dim: None,
162 build_threshold: None,
163 }
164 }
165
166 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
168 Self {
169 dim,
170 index_type: VectorIndexType::ScaNN,
171 num_clusters,
172 nprobe,
173 mrl_dim: None,
174 build_threshold: None,
175 }
176 }
177
178 pub fn flat(dim: usize) -> Self {
180 Self {
181 dim,
182 index_type: VectorIndexType::Flat,
183 num_clusters: None,
184 nprobe: 0,
185 mrl_dim: None,
186 build_threshold: None,
187 }
188 }
189
190 pub fn with_mrl_dim(mut self, mrl_dim: usize) -> Self {
192 self.mrl_dim = Some(mrl_dim);
193 self
194 }
195
196 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
198 self.build_threshold = Some(threshold);
199 self
200 }
201
202 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
204 self.num_clusters = Some(num_clusters);
205 self
206 }
207
208 pub fn index_dim(&self) -> usize {
210 self.mrl_dim.unwrap_or(self.dim)
211 }
212
213 pub fn uses_ivf(&self) -> bool {
215 matches!(
216 self.index_type,
217 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
218 )
219 }
220
221 pub fn uses_scann(&self) -> bool {
223 self.index_type == VectorIndexType::ScaNN
224 }
225
226 pub fn is_flat(&self) -> bool {
228 self.index_type == VectorIndexType::Flat
229 }
230
231 pub fn default_build_threshold(&self) -> usize {
233 self.build_threshold.unwrap_or(match self.index_type {
234 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
236 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
237 })
238 }
239
240 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
242 self.num_clusters.unwrap_or_else(|| {
243 let optimal = (num_vectors as f64).sqrt() as usize;
245 optimal.clamp(16, 4096)
246 })
247 }
248}
249
250use super::query_field_router::QueryRouterRule;
251
252#[derive(Debug, Clone, Default, Serialize, Deserialize)]
254pub struct Schema {
255 fields: Vec<FieldEntry>,
256 name_to_field: HashMap<String, Field>,
257 #[serde(default)]
259 default_fields: Vec<Field>,
260 #[serde(default)]
262 query_routers: Vec<QueryRouterRule>,
263}
264
265impl Schema {
266 pub fn builder() -> SchemaBuilder {
267 SchemaBuilder::default()
268 }
269
270 pub fn get_field(&self, name: &str) -> Option<Field> {
271 self.name_to_field.get(name).copied()
272 }
273
274 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
275 self.fields.get(field.0 as usize)
276 }
277
278 pub fn get_field_name(&self, field: Field) -> Option<&str> {
279 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
280 }
281
282 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
283 self.fields
284 .iter()
285 .enumerate()
286 .map(|(i, e)| (Field(i as u32), e))
287 }
288
289 pub fn num_fields(&self) -> usize {
290 self.fields.len()
291 }
292
293 pub fn default_fields(&self) -> &[Field] {
295 &self.default_fields
296 }
297
298 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
300 self.default_fields = fields;
301 }
302
303 pub fn query_routers(&self) -> &[QueryRouterRule] {
305 &self.query_routers
306 }
307
308 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
310 self.query_routers = rules;
311 }
312}
313
314#[derive(Debug, Default)]
316pub struct SchemaBuilder {
317 fields: Vec<FieldEntry>,
318 default_fields: Vec<String>,
319 query_routers: Vec<QueryRouterRule>,
320}
321
322impl SchemaBuilder {
323 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
324 self.add_field_with_tokenizer(
325 name,
326 FieldType::Text,
327 indexed,
328 stored,
329 Some("default".to_string()),
330 )
331 }
332
333 pub fn add_text_field_with_tokenizer(
334 &mut self,
335 name: &str,
336 indexed: bool,
337 stored: bool,
338 tokenizer: &str,
339 ) -> Field {
340 self.add_field_with_tokenizer(
341 name,
342 FieldType::Text,
343 indexed,
344 stored,
345 Some(tokenizer.to_string()),
346 )
347 }
348
349 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
350 self.add_field(name, FieldType::U64, indexed, stored)
351 }
352
353 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
354 self.add_field(name, FieldType::I64, indexed, stored)
355 }
356
357 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
358 self.add_field(name, FieldType::F64, indexed, stored)
359 }
360
361 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
362 self.add_field(name, FieldType::Bytes, false, stored)
363 }
364
365 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
370 self.add_field(name, FieldType::Json, false, stored)
371 }
372
373 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
378 self.add_sparse_vector_field_with_config(
379 name,
380 indexed,
381 stored,
382 crate::structures::SparseVectorConfig::default(),
383 )
384 }
385
386 pub fn add_sparse_vector_field_with_config(
391 &mut self,
392 name: &str,
393 indexed: bool,
394 stored: bool,
395 config: crate::structures::SparseVectorConfig,
396 ) -> Field {
397 let field = Field(self.fields.len() as u32);
398 self.fields.push(FieldEntry {
399 name: name.to_string(),
400 field_type: FieldType::SparseVector,
401 indexed,
402 stored,
403 tokenizer: None,
404 multi: false,
405 positions: None,
406 sparse_vector_config: Some(config),
407 dense_vector_config: None,
408 });
409 field
410 }
411
412 pub fn set_sparse_vector_config(
414 &mut self,
415 field: Field,
416 config: crate::structures::SparseVectorConfig,
417 ) {
418 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
419 entry.sparse_vector_config = Some(config);
420 }
421 }
422
423 pub fn add_dense_vector_field(
428 &mut self,
429 name: &str,
430 dim: usize,
431 indexed: bool,
432 stored: bool,
433 ) -> Field {
434 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
435 }
436
437 pub fn add_dense_vector_field_with_config(
439 &mut self,
440 name: &str,
441 indexed: bool,
442 stored: bool,
443 config: DenseVectorConfig,
444 ) -> Field {
445 let field = Field(self.fields.len() as u32);
446 self.fields.push(FieldEntry {
447 name: name.to_string(),
448 field_type: FieldType::DenseVector,
449 indexed,
450 stored,
451 tokenizer: None,
452 multi: false,
453 positions: None,
454 sparse_vector_config: None,
455 dense_vector_config: Some(config),
456 });
457 field
458 }
459
460 fn add_field(
461 &mut self,
462 name: &str,
463 field_type: FieldType,
464 indexed: bool,
465 stored: bool,
466 ) -> Field {
467 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
468 }
469
470 fn add_field_with_tokenizer(
471 &mut self,
472 name: &str,
473 field_type: FieldType,
474 indexed: bool,
475 stored: bool,
476 tokenizer: Option<String>,
477 ) -> Field {
478 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
479 }
480
481 fn add_field_full(
482 &mut self,
483 name: &str,
484 field_type: FieldType,
485 indexed: bool,
486 stored: bool,
487 tokenizer: Option<String>,
488 multi: bool,
489 ) -> Field {
490 let field = Field(self.fields.len() as u32);
491 self.fields.push(FieldEntry {
492 name: name.to_string(),
493 field_type,
494 indexed,
495 stored,
496 tokenizer,
497 multi,
498 positions: None,
499 sparse_vector_config: None,
500 dense_vector_config: None,
501 });
502 field
503 }
504
505 pub fn set_multi(&mut self, field: Field, multi: bool) {
507 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
508 entry.multi = multi;
509 }
510 }
511
512 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
514 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
515 entry.positions = Some(mode);
516 }
517 }
518
519 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
521 self.default_fields = field_names;
522 }
523
524 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
526 self.query_routers = rules;
527 }
528
529 pub fn build(self) -> Schema {
530 let mut name_to_field = HashMap::new();
531 for (i, entry) in self.fields.iter().enumerate() {
532 name_to_field.insert(entry.name.clone(), Field(i as u32));
533 }
534
535 let default_fields: Vec<Field> = self
537 .default_fields
538 .iter()
539 .filter_map(|name| name_to_field.get(name).copied())
540 .collect();
541
542 Schema {
543 fields: self.fields,
544 name_to_field,
545 default_fields,
546 query_routers: self.query_routers,
547 }
548 }
549}
550
551#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
553pub enum FieldValue {
554 #[serde(rename = "text")]
555 Text(String),
556 #[serde(rename = "u64")]
557 U64(u64),
558 #[serde(rename = "i64")]
559 I64(i64),
560 #[serde(rename = "f64")]
561 F64(f64),
562 #[serde(rename = "bytes")]
563 Bytes(Vec<u8>),
564 #[serde(rename = "sparse_vector")]
566 SparseVector(Vec<(u32, f32)>),
567 #[serde(rename = "dense_vector")]
569 DenseVector(Vec<f32>),
570 #[serde(rename = "json")]
572 Json(serde_json::Value),
573}
574
575impl FieldValue {
576 pub fn as_text(&self) -> Option<&str> {
577 match self {
578 FieldValue::Text(s) => Some(s),
579 _ => None,
580 }
581 }
582
583 pub fn as_u64(&self) -> Option<u64> {
584 match self {
585 FieldValue::U64(v) => Some(*v),
586 _ => None,
587 }
588 }
589
590 pub fn as_i64(&self) -> Option<i64> {
591 match self {
592 FieldValue::I64(v) => Some(*v),
593 _ => None,
594 }
595 }
596
597 pub fn as_f64(&self) -> Option<f64> {
598 match self {
599 FieldValue::F64(v) => Some(*v),
600 _ => None,
601 }
602 }
603
604 pub fn as_bytes(&self) -> Option<&[u8]> {
605 match self {
606 FieldValue::Bytes(b) => Some(b),
607 _ => None,
608 }
609 }
610
611 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
612 match self {
613 FieldValue::SparseVector(entries) => Some(entries),
614 _ => None,
615 }
616 }
617
618 pub fn as_dense_vector(&self) -> Option<&[f32]> {
619 match self {
620 FieldValue::DenseVector(v) => Some(v),
621 _ => None,
622 }
623 }
624
625 pub fn as_json(&self) -> Option<&serde_json::Value> {
626 match self {
627 FieldValue::Json(v) => Some(v),
628 _ => None,
629 }
630 }
631}
632
633#[derive(Debug, Clone, Default, Serialize, Deserialize)]
635pub struct Document {
636 field_values: Vec<(Field, FieldValue)>,
637}
638
639impl Document {
640 pub fn new() -> Self {
641 Self::default()
642 }
643
644 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
645 self.field_values
646 .push((field, FieldValue::Text(value.into())));
647 }
648
649 pub fn add_u64(&mut self, field: Field, value: u64) {
650 self.field_values.push((field, FieldValue::U64(value)));
651 }
652
653 pub fn add_i64(&mut self, field: Field, value: i64) {
654 self.field_values.push((field, FieldValue::I64(value)));
655 }
656
657 pub fn add_f64(&mut self, field: Field, value: f64) {
658 self.field_values.push((field, FieldValue::F64(value)));
659 }
660
661 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
662 self.field_values.push((field, FieldValue::Bytes(value)));
663 }
664
665 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
666 self.field_values
667 .push((field, FieldValue::SparseVector(entries)));
668 }
669
670 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
671 self.field_values
672 .push((field, FieldValue::DenseVector(values)));
673 }
674
675 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
676 self.field_values.push((field, FieldValue::Json(value)));
677 }
678
679 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
680 self.field_values
681 .iter()
682 .find(|(f, _)| *f == field)
683 .map(|(_, v)| v)
684 }
685
686 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
687 self.field_values
688 .iter()
689 .filter(move |(f, _)| *f == field)
690 .map(|(_, v)| v)
691 }
692
693 pub fn field_values(&self) -> &[(Field, FieldValue)] {
694 &self.field_values
695 }
696
697 pub fn filter_stored(&self, schema: &Schema) -> Document {
699 Document {
700 field_values: self
701 .field_values
702 .iter()
703 .filter(|(field, _)| {
704 schema
705 .get_field_entry(*field)
706 .is_some_and(|entry| entry.stored)
707 })
708 .cloned()
709 .collect(),
710 }
711 }
712
713 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
719 use std::collections::HashMap;
720
721 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
723 HashMap::new();
724
725 for (field, value) in &self.field_values {
726 if let Some(entry) = schema.get_field_entry(*field) {
727 let json_value = match value {
728 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
729 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
730 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
731 FieldValue::F64(n) => serde_json::json!(n),
732 FieldValue::Bytes(b) => {
733 use base64::Engine;
734 serde_json::Value::String(
735 base64::engine::general_purpose::STANDARD.encode(b),
736 )
737 }
738 FieldValue::SparseVector(entries) => {
739 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
740 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
741 serde_json::json!({
742 "indices": indices,
743 "values": values
744 })
745 }
746 FieldValue::DenseVector(values) => {
747 serde_json::json!(values)
748 }
749 FieldValue::Json(v) => v.clone(),
750 };
751 field_values_map
752 .entry(*field)
753 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
754 .2
755 .push(json_value);
756 }
757 }
758
759 let mut map = serde_json::Map::new();
761 for (_field, (name, is_multi, values)) in field_values_map {
762 let json_value = if is_multi || values.len() > 1 {
763 serde_json::Value::Array(values)
764 } else {
765 values.into_iter().next().unwrap()
766 };
767 map.insert(name, json_value);
768 }
769
770 serde_json::Value::Object(map)
771 }
772
773 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
782 let obj = json.as_object()?;
783 let mut doc = Document::new();
784
785 for (key, value) in obj {
786 if let Some(field) = schema.get_field(key) {
787 let field_entry = schema.get_field_entry(field)?;
788 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
789 }
790 }
791
792 Some(doc)
793 }
794
795 fn add_json_value(
797 doc: &mut Document,
798 field: Field,
799 field_type: &FieldType,
800 value: &serde_json::Value,
801 ) {
802 match value {
803 serde_json::Value::String(s) => {
804 if matches!(field_type, FieldType::Text) {
805 doc.add_text(field, s.clone());
806 }
807 }
808 serde_json::Value::Number(n) => {
809 match field_type {
810 FieldType::I64 => {
811 if let Some(i) = n.as_i64() {
812 doc.add_i64(field, i);
813 }
814 }
815 FieldType::U64 => {
816 if let Some(u) = n.as_u64() {
817 doc.add_u64(field, u);
818 } else if let Some(i) = n.as_i64() {
819 if i >= 0 {
821 doc.add_u64(field, i as u64);
822 }
823 }
824 }
825 FieldType::F64 => {
826 if let Some(f) = n.as_f64() {
827 doc.add_f64(field, f);
828 }
829 }
830 _ => {}
831 }
832 }
833 serde_json::Value::Array(arr) => {
835 for item in arr {
836 Self::add_json_value(doc, field, field_type, item);
837 }
838 }
839 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
841 if let (Some(indices_val), Some(values_val)) =
842 (obj.get("indices"), obj.get("values"))
843 {
844 let indices: Vec<u32> = indices_val
845 .as_array()
846 .map(|arr| {
847 arr.iter()
848 .filter_map(|v| v.as_u64().map(|n| n as u32))
849 .collect()
850 })
851 .unwrap_or_default();
852 let values: Vec<f32> = values_val
853 .as_array()
854 .map(|arr| {
855 arr.iter()
856 .filter_map(|v| v.as_f64().map(|n| n as f32))
857 .collect()
858 })
859 .unwrap_or_default();
860 if indices.len() == values.len() {
861 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
862 doc.add_sparse_vector(field, entries);
863 }
864 }
865 }
866 _ if matches!(field_type, FieldType::Json) => {
868 doc.add_json(field, value.clone());
869 }
870 serde_json::Value::Object(_) => {}
871 _ => {}
872 }
873 }
874}
875
876#[cfg(test)]
877mod tests {
878 use super::*;
879
880 #[test]
881 fn test_schema_builder() {
882 let mut builder = Schema::builder();
883 let title = builder.add_text_field("title", true, true);
884 let body = builder.add_text_field("body", true, false);
885 let count = builder.add_u64_field("count", true, true);
886 let schema = builder.build();
887
888 assert_eq!(schema.get_field("title"), Some(title));
889 assert_eq!(schema.get_field("body"), Some(body));
890 assert_eq!(schema.get_field("count"), Some(count));
891 assert_eq!(schema.get_field("nonexistent"), None);
892 }
893
894 #[test]
895 fn test_document() {
896 let mut builder = Schema::builder();
897 let title = builder.add_text_field("title", true, true);
898 let count = builder.add_u64_field("count", true, true);
899 let _schema = builder.build();
900
901 let mut doc = Document::new();
902 doc.add_text(title, "Hello World");
903 doc.add_u64(count, 42);
904
905 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
906 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
907 }
908
909 #[test]
910 fn test_document_serialization() {
911 let mut builder = Schema::builder();
912 let title = builder.add_text_field("title", true, true);
913 let count = builder.add_u64_field("count", true, true);
914 let _schema = builder.build();
915
916 let mut doc = Document::new();
917 doc.add_text(title, "Hello World");
918 doc.add_u64(count, 42);
919
920 let json = serde_json::to_string(&doc).unwrap();
922 println!("Serialized doc: {}", json);
923
924 let doc2: Document = serde_json::from_str(&json).unwrap();
926 assert_eq!(
927 doc2.field_values().len(),
928 2,
929 "Should have 2 field values after deserialization"
930 );
931 assert_eq!(
932 doc2.get_first(title).unwrap().as_text(),
933 Some("Hello World")
934 );
935 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
936 }
937
938 #[test]
939 fn test_multivalue_field() {
940 let mut builder = Schema::builder();
941 let uris = builder.add_text_field("uris", true, true);
942 let title = builder.add_text_field("title", true, true);
943 let schema = builder.build();
944
945 let mut doc = Document::new();
947 doc.add_text(uris, "one");
948 doc.add_text(uris, "two");
949 doc.add_text(title, "Test Document");
950
951 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
953
954 let all_uris: Vec<_> = doc.get_all(uris).collect();
956 assert_eq!(all_uris.len(), 2);
957 assert_eq!(all_uris[0].as_text(), Some("one"));
958 assert_eq!(all_uris[1].as_text(), Some("two"));
959
960 let json = doc.to_json(&schema);
962 let uris_json = json.get("uris").unwrap();
963 assert!(uris_json.is_array(), "Multi-value field should be an array");
964 let uris_arr = uris_json.as_array().unwrap();
965 assert_eq!(uris_arr.len(), 2);
966 assert_eq!(uris_arr[0].as_str(), Some("one"));
967 assert_eq!(uris_arr[1].as_str(), Some("two"));
968
969 let title_json = json.get("title").unwrap();
971 assert!(
972 title_json.is_string(),
973 "Single-value field should be a string"
974 );
975 assert_eq!(title_json.as_str(), Some("Test Document"));
976 }
977
978 #[test]
979 fn test_multivalue_from_json() {
980 let mut builder = Schema::builder();
981 let uris = builder.add_text_field("uris", true, true);
982 let title = builder.add_text_field("title", true, true);
983 let schema = builder.build();
984
985 let json = serde_json::json!({
987 "uris": ["one", "two"],
988 "title": "Test Document"
989 });
990
991 let doc = Document::from_json(&json, &schema).unwrap();
993
994 let all_uris: Vec<_> = doc.get_all(uris).collect();
996 assert_eq!(all_uris.len(), 2);
997 assert_eq!(all_uris[0].as_text(), Some("one"));
998 assert_eq!(all_uris[1].as_text(), Some("two"));
999
1000 assert_eq!(
1002 doc.get_first(title).unwrap().as_text(),
1003 Some("Test Document")
1004 );
1005
1006 let json_out = doc.to_json(&schema);
1008 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1009 assert_eq!(uris_out.len(), 2);
1010 assert_eq!(uris_out[0].as_str(), Some("one"));
1011 assert_eq!(uris_out[1].as_str(), Some("two"));
1012 }
1013
1014 #[test]
1015 fn test_multi_attribute_forces_array() {
1016 let mut builder = Schema::builder();
1019 let uris = builder.add_text_field("uris", true, true);
1020 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1022 let schema = builder.build();
1023
1024 assert!(schema.get_field_entry(uris).unwrap().multi);
1026 assert!(!schema.get_field_entry(title).unwrap().multi);
1027
1028 let mut doc = Document::new();
1030 doc.add_text(uris, "only_one");
1031 doc.add_text(title, "Test Document");
1032
1033 let json = doc.to_json(&schema);
1035
1036 let uris_json = json.get("uris").unwrap();
1037 assert!(
1038 uris_json.is_array(),
1039 "Multi field should be array even with single value"
1040 );
1041 let uris_arr = uris_json.as_array().unwrap();
1042 assert_eq!(uris_arr.len(), 1);
1043 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1044
1045 let title_json = json.get("title").unwrap();
1047 assert!(
1048 title_json.is_string(),
1049 "Non-multi single-value field should be a string"
1050 );
1051 assert_eq!(title_json.as_str(), Some("Test Document"));
1052 }
1053
1054 #[test]
1055 fn test_sparse_vector_field() {
1056 let mut builder = Schema::builder();
1057 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1058 let title = builder.add_text_field("title", true, true);
1059 let schema = builder.build();
1060
1061 assert_eq!(schema.get_field("embedding"), Some(embedding));
1062 assert_eq!(
1063 schema.get_field_entry(embedding).unwrap().field_type,
1064 FieldType::SparseVector
1065 );
1066
1067 let mut doc = Document::new();
1069 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1070 doc.add_text(title, "Test Document");
1071
1072 let entries = doc
1074 .get_first(embedding)
1075 .unwrap()
1076 .as_sparse_vector()
1077 .unwrap();
1078 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1079
1080 let json = doc.to_json(&schema);
1082 let embedding_json = json.get("embedding").unwrap();
1083 assert!(embedding_json.is_object());
1084 assert_eq!(
1085 embedding_json
1086 .get("indices")
1087 .unwrap()
1088 .as_array()
1089 .unwrap()
1090 .len(),
1091 3
1092 );
1093
1094 let doc2 = Document::from_json(&json, &schema).unwrap();
1096 let entries2 = doc2
1097 .get_first(embedding)
1098 .unwrap()
1099 .as_sparse_vector()
1100 .unwrap();
1101 assert_eq!(entries2[0].0, 0);
1102 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1103 assert_eq!(entries2[1].0, 5);
1104 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1105 assert_eq!(entries2[2].0, 10);
1106 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1107 }
1108
1109 #[test]
1110 fn test_json_field() {
1111 let mut builder = Schema::builder();
1112 let metadata = builder.add_json_field("metadata", true);
1113 let title = builder.add_text_field("title", true, true);
1114 let schema = builder.build();
1115
1116 assert_eq!(schema.get_field("metadata"), Some(metadata));
1117 assert_eq!(
1118 schema.get_field_entry(metadata).unwrap().field_type,
1119 FieldType::Json
1120 );
1121 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1123 assert!(schema.get_field_entry(metadata).unwrap().stored);
1124
1125 let json_value = serde_json::json!({
1127 "author": "John Doe",
1128 "tags": ["rust", "search"],
1129 "nested": {"key": "value"}
1130 });
1131 let mut doc = Document::new();
1132 doc.add_json(metadata, json_value.clone());
1133 doc.add_text(title, "Test Document");
1134
1135 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1137 assert_eq!(stored_json, &json_value);
1138 assert_eq!(
1139 stored_json.get("author").unwrap().as_str(),
1140 Some("John Doe")
1141 );
1142
1143 let doc_json = doc.to_json(&schema);
1145 let metadata_out = doc_json.get("metadata").unwrap();
1146 assert_eq!(metadata_out, &json_value);
1147
1148 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1150 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1151 assert_eq!(stored_json2, &json_value);
1152 }
1153
1154 #[test]
1155 fn test_json_field_various_types() {
1156 let mut builder = Schema::builder();
1157 let data = builder.add_json_field("data", true);
1158 let _schema = builder.build();
1159
1160 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1162 let mut doc = Document::new();
1163 doc.add_json(data, arr_value.clone());
1164 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1165
1166 let str_value = serde_json::json!("just a string");
1168 let mut doc2 = Document::new();
1169 doc2.add_json(data, str_value.clone());
1170 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1171
1172 let num_value = serde_json::json!(42.5);
1174 let mut doc3 = Document::new();
1175 doc3.add_json(data, num_value.clone());
1176 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1177
1178 let null_value = serde_json::Value::Null;
1180 let mut doc4 = Document::new();
1181 doc4.add_json(data, null_value.clone());
1182 assert_eq!(
1183 doc4.get_first(data).unwrap().as_json().unwrap(),
1184 &null_value
1185 );
1186
1187 let bool_value = serde_json::json!(true);
1189 let mut doc5 = Document::new();
1190 doc5.add_json(data, bool_value.clone());
1191 assert_eq!(
1192 doc5.get_first(data).unwrap().as_json().unwrap(),
1193 &bool_value
1194 );
1195 }
1196}