1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "snake_case")]
65pub enum PositionMode {
66 Ordinal,
69 TokenPosition,
72 Full,
75}
76
77impl PositionMode {
78 pub fn tracks_ordinal(&self) -> bool {
80 matches!(self, PositionMode::Ordinal | PositionMode::Full)
81 }
82
83 pub fn tracks_token_position(&self) -> bool {
85 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
91#[serde(rename_all = "snake_case")]
92pub enum VectorIndexType {
93 Flat,
95 #[default]
97 RaBitQ,
98 IvfRaBitQ,
100 ScaNN,
102}
103
104#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
110#[serde(rename_all = "snake_case")]
111pub enum DenseVectorQuantization {
112 #[default]
114 F32,
115 F16,
117 UInt8,
119}
120
121impl DenseVectorQuantization {
122 pub fn element_size(self) -> usize {
124 match self {
125 Self::F32 => 4,
126 Self::F16 => 2,
127 Self::UInt8 => 1,
128 }
129 }
130
131 pub fn tag(self) -> u8 {
133 match self {
134 Self::F32 => 0,
135 Self::F16 => 1,
136 Self::UInt8 => 2,
137 }
138 }
139
140 pub fn from_tag(tag: u8) -> Option<Self> {
142 match tag {
143 0 => Some(Self::F32),
144 1 => Some(Self::F16),
145 2 => Some(Self::UInt8),
146 _ => None,
147 }
148 }
149}
150
151#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct DenseVectorConfig {
160 pub dim: usize,
162 #[serde(default)]
165 pub index_type: VectorIndexType,
166 #[serde(default)]
168 pub quantization: DenseVectorQuantization,
169 #[serde(default, skip_serializing_if = "Option::is_none")]
172 pub num_clusters: Option<usize>,
173 #[serde(default = "default_nprobe")]
175 pub nprobe: usize,
176 #[serde(default, skip_serializing_if = "Option::is_none")]
180 pub build_threshold: Option<usize>,
181}
182
183fn default_nprobe() -> usize {
184 32
185}
186
187impl DenseVectorConfig {
188 pub fn new(dim: usize) -> Self {
189 Self {
190 dim,
191 index_type: VectorIndexType::RaBitQ,
192 quantization: DenseVectorQuantization::F32,
193 num_clusters: None,
194 nprobe: 32,
195 build_threshold: None,
196 }
197 }
198
199 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
201 Self {
202 dim,
203 index_type: VectorIndexType::IvfRaBitQ,
204 quantization: DenseVectorQuantization::F32,
205 num_clusters,
206 nprobe,
207 build_threshold: None,
208 }
209 }
210
211 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
213 Self {
214 dim,
215 index_type: VectorIndexType::ScaNN,
216 quantization: DenseVectorQuantization::F32,
217 num_clusters,
218 nprobe,
219 build_threshold: None,
220 }
221 }
222
223 pub fn flat(dim: usize) -> Self {
225 Self {
226 dim,
227 index_type: VectorIndexType::Flat,
228 quantization: DenseVectorQuantization::F32,
229 num_clusters: None,
230 nprobe: 0,
231 build_threshold: None,
232 }
233 }
234
235 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
237 self.quantization = quantization;
238 self
239 }
240
241 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
243 self.build_threshold = Some(threshold);
244 self
245 }
246
247 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
249 self.num_clusters = Some(num_clusters);
250 self
251 }
252
253 pub fn uses_ivf(&self) -> bool {
255 matches!(
256 self.index_type,
257 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
258 )
259 }
260
261 pub fn uses_scann(&self) -> bool {
263 self.index_type == VectorIndexType::ScaNN
264 }
265
266 pub fn is_flat(&self) -> bool {
268 self.index_type == VectorIndexType::Flat
269 }
270
271 pub fn default_build_threshold(&self) -> usize {
273 self.build_threshold.unwrap_or(match self.index_type {
274 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
276 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
277 })
278 }
279
280 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
282 self.num_clusters.unwrap_or_else(|| {
283 let optimal = (num_vectors as f64).sqrt() as usize;
285 optimal.clamp(16, 4096)
286 })
287 }
288}
289
290use super::query_field_router::QueryRouterRule;
291
292#[derive(Debug, Clone, Default, Serialize, Deserialize)]
294pub struct Schema {
295 fields: Vec<FieldEntry>,
296 name_to_field: HashMap<String, Field>,
297 #[serde(default)]
299 default_fields: Vec<Field>,
300 #[serde(default)]
302 query_routers: Vec<QueryRouterRule>,
303}
304
305impl Schema {
306 pub fn builder() -> SchemaBuilder {
307 SchemaBuilder::default()
308 }
309
310 pub fn get_field(&self, name: &str) -> Option<Field> {
311 self.name_to_field.get(name).copied()
312 }
313
314 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
315 self.fields.get(field.0 as usize)
316 }
317
318 pub fn get_field_name(&self, field: Field) -> Option<&str> {
319 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
320 }
321
322 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
323 self.fields
324 .iter()
325 .enumerate()
326 .map(|(i, e)| (Field(i as u32), e))
327 }
328
329 pub fn num_fields(&self) -> usize {
330 self.fields.len()
331 }
332
333 pub fn default_fields(&self) -> &[Field] {
335 &self.default_fields
336 }
337
338 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
340 self.default_fields = fields;
341 }
342
343 pub fn query_routers(&self) -> &[QueryRouterRule] {
345 &self.query_routers
346 }
347
348 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
350 self.query_routers = rules;
351 }
352}
353
354#[derive(Debug, Default)]
356pub struct SchemaBuilder {
357 fields: Vec<FieldEntry>,
358 default_fields: Vec<String>,
359 query_routers: Vec<QueryRouterRule>,
360}
361
362impl SchemaBuilder {
363 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
364 self.add_field_with_tokenizer(
365 name,
366 FieldType::Text,
367 indexed,
368 stored,
369 Some("default".to_string()),
370 )
371 }
372
373 pub fn add_text_field_with_tokenizer(
374 &mut self,
375 name: &str,
376 indexed: bool,
377 stored: bool,
378 tokenizer: &str,
379 ) -> Field {
380 self.add_field_with_tokenizer(
381 name,
382 FieldType::Text,
383 indexed,
384 stored,
385 Some(tokenizer.to_string()),
386 )
387 }
388
389 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
390 self.add_field(name, FieldType::U64, indexed, stored)
391 }
392
393 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
394 self.add_field(name, FieldType::I64, indexed, stored)
395 }
396
397 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
398 self.add_field(name, FieldType::F64, indexed, stored)
399 }
400
401 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
402 self.add_field(name, FieldType::Bytes, false, stored)
403 }
404
405 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
410 self.add_field(name, FieldType::Json, false, stored)
411 }
412
413 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
418 self.add_sparse_vector_field_with_config(
419 name,
420 indexed,
421 stored,
422 crate::structures::SparseVectorConfig::default(),
423 )
424 }
425
426 pub fn add_sparse_vector_field_with_config(
431 &mut self,
432 name: &str,
433 indexed: bool,
434 stored: bool,
435 config: crate::structures::SparseVectorConfig,
436 ) -> Field {
437 let field = Field(self.fields.len() as u32);
438 self.fields.push(FieldEntry {
439 name: name.to_string(),
440 field_type: FieldType::SparseVector,
441 indexed,
442 stored,
443 tokenizer: None,
444 multi: false,
445 positions: None,
446 sparse_vector_config: Some(config),
447 dense_vector_config: None,
448 });
449 field
450 }
451
452 pub fn set_sparse_vector_config(
454 &mut self,
455 field: Field,
456 config: crate::structures::SparseVectorConfig,
457 ) {
458 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
459 entry.sparse_vector_config = Some(config);
460 }
461 }
462
463 pub fn add_dense_vector_field(
468 &mut self,
469 name: &str,
470 dim: usize,
471 indexed: bool,
472 stored: bool,
473 ) -> Field {
474 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
475 }
476
477 pub fn add_dense_vector_field_with_config(
479 &mut self,
480 name: &str,
481 indexed: bool,
482 stored: bool,
483 config: DenseVectorConfig,
484 ) -> Field {
485 let field = Field(self.fields.len() as u32);
486 self.fields.push(FieldEntry {
487 name: name.to_string(),
488 field_type: FieldType::DenseVector,
489 indexed,
490 stored,
491 tokenizer: None,
492 multi: false,
493 positions: None,
494 sparse_vector_config: None,
495 dense_vector_config: Some(config),
496 });
497 field
498 }
499
500 fn add_field(
501 &mut self,
502 name: &str,
503 field_type: FieldType,
504 indexed: bool,
505 stored: bool,
506 ) -> Field {
507 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
508 }
509
510 fn add_field_with_tokenizer(
511 &mut self,
512 name: &str,
513 field_type: FieldType,
514 indexed: bool,
515 stored: bool,
516 tokenizer: Option<String>,
517 ) -> Field {
518 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
519 }
520
521 fn add_field_full(
522 &mut self,
523 name: &str,
524 field_type: FieldType,
525 indexed: bool,
526 stored: bool,
527 tokenizer: Option<String>,
528 multi: bool,
529 ) -> Field {
530 let field = Field(self.fields.len() as u32);
531 self.fields.push(FieldEntry {
532 name: name.to_string(),
533 field_type,
534 indexed,
535 stored,
536 tokenizer,
537 multi,
538 positions: None,
539 sparse_vector_config: None,
540 dense_vector_config: None,
541 });
542 field
543 }
544
545 pub fn set_multi(&mut self, field: Field, multi: bool) {
547 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
548 entry.multi = multi;
549 }
550 }
551
552 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
554 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
555 entry.positions = Some(mode);
556 }
557 }
558
559 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
561 self.default_fields = field_names;
562 }
563
564 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
566 self.query_routers = rules;
567 }
568
569 pub fn build(self) -> Schema {
570 let mut name_to_field = HashMap::new();
571 for (i, entry) in self.fields.iter().enumerate() {
572 name_to_field.insert(entry.name.clone(), Field(i as u32));
573 }
574
575 let default_fields: Vec<Field> = self
577 .default_fields
578 .iter()
579 .filter_map(|name| name_to_field.get(name).copied())
580 .collect();
581
582 Schema {
583 fields: self.fields,
584 name_to_field,
585 default_fields,
586 query_routers: self.query_routers,
587 }
588 }
589}
590
591#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
593pub enum FieldValue {
594 #[serde(rename = "text")]
595 Text(String),
596 #[serde(rename = "u64")]
597 U64(u64),
598 #[serde(rename = "i64")]
599 I64(i64),
600 #[serde(rename = "f64")]
601 F64(f64),
602 #[serde(rename = "bytes")]
603 Bytes(Vec<u8>),
604 #[serde(rename = "sparse_vector")]
606 SparseVector(Vec<(u32, f32)>),
607 #[serde(rename = "dense_vector")]
609 DenseVector(Vec<f32>),
610 #[serde(rename = "json")]
612 Json(serde_json::Value),
613}
614
615impl FieldValue {
616 pub fn as_text(&self) -> Option<&str> {
617 match self {
618 FieldValue::Text(s) => Some(s),
619 _ => None,
620 }
621 }
622
623 pub fn as_u64(&self) -> Option<u64> {
624 match self {
625 FieldValue::U64(v) => Some(*v),
626 _ => None,
627 }
628 }
629
630 pub fn as_i64(&self) -> Option<i64> {
631 match self {
632 FieldValue::I64(v) => Some(*v),
633 _ => None,
634 }
635 }
636
637 pub fn as_f64(&self) -> Option<f64> {
638 match self {
639 FieldValue::F64(v) => Some(*v),
640 _ => None,
641 }
642 }
643
644 pub fn as_bytes(&self) -> Option<&[u8]> {
645 match self {
646 FieldValue::Bytes(b) => Some(b),
647 _ => None,
648 }
649 }
650
651 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
652 match self {
653 FieldValue::SparseVector(entries) => Some(entries),
654 _ => None,
655 }
656 }
657
658 pub fn as_dense_vector(&self) -> Option<&[f32]> {
659 match self {
660 FieldValue::DenseVector(v) => Some(v),
661 _ => None,
662 }
663 }
664
665 pub fn as_json(&self) -> Option<&serde_json::Value> {
666 match self {
667 FieldValue::Json(v) => Some(v),
668 _ => None,
669 }
670 }
671}
672
673#[derive(Debug, Clone, Default, Serialize, Deserialize)]
675pub struct Document {
676 field_values: Vec<(Field, FieldValue)>,
677}
678
679impl Document {
680 pub fn new() -> Self {
681 Self::default()
682 }
683
684 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
685 self.field_values
686 .push((field, FieldValue::Text(value.into())));
687 }
688
689 pub fn add_u64(&mut self, field: Field, value: u64) {
690 self.field_values.push((field, FieldValue::U64(value)));
691 }
692
693 pub fn add_i64(&mut self, field: Field, value: i64) {
694 self.field_values.push((field, FieldValue::I64(value)));
695 }
696
697 pub fn add_f64(&mut self, field: Field, value: f64) {
698 self.field_values.push((field, FieldValue::F64(value)));
699 }
700
701 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
702 self.field_values.push((field, FieldValue::Bytes(value)));
703 }
704
705 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
706 self.field_values
707 .push((field, FieldValue::SparseVector(entries)));
708 }
709
710 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
711 self.field_values
712 .push((field, FieldValue::DenseVector(values)));
713 }
714
715 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
716 self.field_values.push((field, FieldValue::Json(value)));
717 }
718
719 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
720 self.field_values
721 .iter()
722 .find(|(f, _)| *f == field)
723 .map(|(_, v)| v)
724 }
725
726 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
727 self.field_values
728 .iter()
729 .filter(move |(f, _)| *f == field)
730 .map(|(_, v)| v)
731 }
732
733 pub fn field_values(&self) -> &[(Field, FieldValue)] {
734 &self.field_values
735 }
736
737 pub fn filter_stored(&self, schema: &Schema) -> Document {
739 Document {
740 field_values: self
741 .field_values
742 .iter()
743 .filter(|(field, _)| {
744 schema
745 .get_field_entry(*field)
746 .is_some_and(|entry| entry.stored)
747 })
748 .cloned()
749 .collect(),
750 }
751 }
752
753 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
759 use std::collections::HashMap;
760
761 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
763 HashMap::new();
764
765 for (field, value) in &self.field_values {
766 if let Some(entry) = schema.get_field_entry(*field) {
767 let json_value = match value {
768 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
769 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
770 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
771 FieldValue::F64(n) => serde_json::json!(n),
772 FieldValue::Bytes(b) => {
773 use base64::Engine;
774 serde_json::Value::String(
775 base64::engine::general_purpose::STANDARD.encode(b),
776 )
777 }
778 FieldValue::SparseVector(entries) => {
779 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
780 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
781 serde_json::json!({
782 "indices": indices,
783 "values": values
784 })
785 }
786 FieldValue::DenseVector(values) => {
787 serde_json::json!(values)
788 }
789 FieldValue::Json(v) => v.clone(),
790 };
791 field_values_map
792 .entry(*field)
793 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
794 .2
795 .push(json_value);
796 }
797 }
798
799 let mut map = serde_json::Map::new();
801 for (_field, (name, is_multi, values)) in field_values_map {
802 let json_value = if is_multi || values.len() > 1 {
803 serde_json::Value::Array(values)
804 } else {
805 values.into_iter().next().unwrap()
806 };
807 map.insert(name, json_value);
808 }
809
810 serde_json::Value::Object(map)
811 }
812
813 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
822 let obj = json.as_object()?;
823 let mut doc = Document::new();
824
825 for (key, value) in obj {
826 if let Some(field) = schema.get_field(key) {
827 let field_entry = schema.get_field_entry(field)?;
828 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
829 }
830 }
831
832 Some(doc)
833 }
834
835 fn add_json_value(
837 doc: &mut Document,
838 field: Field,
839 field_type: &FieldType,
840 value: &serde_json::Value,
841 ) {
842 match value {
843 serde_json::Value::String(s) => {
844 if matches!(field_type, FieldType::Text) {
845 doc.add_text(field, s.clone());
846 }
847 }
848 serde_json::Value::Number(n) => {
849 match field_type {
850 FieldType::I64 => {
851 if let Some(i) = n.as_i64() {
852 doc.add_i64(field, i);
853 }
854 }
855 FieldType::U64 => {
856 if let Some(u) = n.as_u64() {
857 doc.add_u64(field, u);
858 } else if let Some(i) = n.as_i64() {
859 if i >= 0 {
861 doc.add_u64(field, i as u64);
862 }
863 }
864 }
865 FieldType::F64 => {
866 if let Some(f) = n.as_f64() {
867 doc.add_f64(field, f);
868 }
869 }
870 _ => {}
871 }
872 }
873 serde_json::Value::Array(arr) => {
875 for item in arr {
876 Self::add_json_value(doc, field, field_type, item);
877 }
878 }
879 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
881 if let (Some(indices_val), Some(values_val)) =
882 (obj.get("indices"), obj.get("values"))
883 {
884 let indices: Vec<u32> = indices_val
885 .as_array()
886 .map(|arr| {
887 arr.iter()
888 .filter_map(|v| v.as_u64().map(|n| n as u32))
889 .collect()
890 })
891 .unwrap_or_default();
892 let values: Vec<f32> = values_val
893 .as_array()
894 .map(|arr| {
895 arr.iter()
896 .filter_map(|v| v.as_f64().map(|n| n as f32))
897 .collect()
898 })
899 .unwrap_or_default();
900 if indices.len() == values.len() {
901 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
902 doc.add_sparse_vector(field, entries);
903 }
904 }
905 }
906 _ if matches!(field_type, FieldType::Json) => {
908 doc.add_json(field, value.clone());
909 }
910 serde_json::Value::Object(_) => {}
911 _ => {}
912 }
913 }
914}
915
916#[cfg(test)]
917mod tests {
918 use super::*;
919
920 #[test]
921 fn test_schema_builder() {
922 let mut builder = Schema::builder();
923 let title = builder.add_text_field("title", true, true);
924 let body = builder.add_text_field("body", true, false);
925 let count = builder.add_u64_field("count", true, true);
926 let schema = builder.build();
927
928 assert_eq!(schema.get_field("title"), Some(title));
929 assert_eq!(schema.get_field("body"), Some(body));
930 assert_eq!(schema.get_field("count"), Some(count));
931 assert_eq!(schema.get_field("nonexistent"), None);
932 }
933
934 #[test]
935 fn test_document() {
936 let mut builder = Schema::builder();
937 let title = builder.add_text_field("title", true, true);
938 let count = builder.add_u64_field("count", true, true);
939 let _schema = builder.build();
940
941 let mut doc = Document::new();
942 doc.add_text(title, "Hello World");
943 doc.add_u64(count, 42);
944
945 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
946 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
947 }
948
949 #[test]
950 fn test_document_serialization() {
951 let mut builder = Schema::builder();
952 let title = builder.add_text_field("title", true, true);
953 let count = builder.add_u64_field("count", true, true);
954 let _schema = builder.build();
955
956 let mut doc = Document::new();
957 doc.add_text(title, "Hello World");
958 doc.add_u64(count, 42);
959
960 let json = serde_json::to_string(&doc).unwrap();
962 println!("Serialized doc: {}", json);
963
964 let doc2: Document = serde_json::from_str(&json).unwrap();
966 assert_eq!(
967 doc2.field_values().len(),
968 2,
969 "Should have 2 field values after deserialization"
970 );
971 assert_eq!(
972 doc2.get_first(title).unwrap().as_text(),
973 Some("Hello World")
974 );
975 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
976 }
977
978 #[test]
979 fn test_multivalue_field() {
980 let mut builder = Schema::builder();
981 let uris = builder.add_text_field("uris", true, true);
982 let title = builder.add_text_field("title", true, true);
983 let schema = builder.build();
984
985 let mut doc = Document::new();
987 doc.add_text(uris, "one");
988 doc.add_text(uris, "two");
989 doc.add_text(title, "Test Document");
990
991 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
993
994 let all_uris: Vec<_> = doc.get_all(uris).collect();
996 assert_eq!(all_uris.len(), 2);
997 assert_eq!(all_uris[0].as_text(), Some("one"));
998 assert_eq!(all_uris[1].as_text(), Some("two"));
999
1000 let json = doc.to_json(&schema);
1002 let uris_json = json.get("uris").unwrap();
1003 assert!(uris_json.is_array(), "Multi-value field should be an array");
1004 let uris_arr = uris_json.as_array().unwrap();
1005 assert_eq!(uris_arr.len(), 2);
1006 assert_eq!(uris_arr[0].as_str(), Some("one"));
1007 assert_eq!(uris_arr[1].as_str(), Some("two"));
1008
1009 let title_json = json.get("title").unwrap();
1011 assert!(
1012 title_json.is_string(),
1013 "Single-value field should be a string"
1014 );
1015 assert_eq!(title_json.as_str(), Some("Test Document"));
1016 }
1017
1018 #[test]
1019 fn test_multivalue_from_json() {
1020 let mut builder = Schema::builder();
1021 let uris = builder.add_text_field("uris", true, true);
1022 let title = builder.add_text_field("title", true, true);
1023 let schema = builder.build();
1024
1025 let json = serde_json::json!({
1027 "uris": ["one", "two"],
1028 "title": "Test Document"
1029 });
1030
1031 let doc = Document::from_json(&json, &schema).unwrap();
1033
1034 let all_uris: Vec<_> = doc.get_all(uris).collect();
1036 assert_eq!(all_uris.len(), 2);
1037 assert_eq!(all_uris[0].as_text(), Some("one"));
1038 assert_eq!(all_uris[1].as_text(), Some("two"));
1039
1040 assert_eq!(
1042 doc.get_first(title).unwrap().as_text(),
1043 Some("Test Document")
1044 );
1045
1046 let json_out = doc.to_json(&schema);
1048 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1049 assert_eq!(uris_out.len(), 2);
1050 assert_eq!(uris_out[0].as_str(), Some("one"));
1051 assert_eq!(uris_out[1].as_str(), Some("two"));
1052 }
1053
1054 #[test]
1055 fn test_multi_attribute_forces_array() {
1056 let mut builder = Schema::builder();
1059 let uris = builder.add_text_field("uris", true, true);
1060 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1062 let schema = builder.build();
1063
1064 assert!(schema.get_field_entry(uris).unwrap().multi);
1066 assert!(!schema.get_field_entry(title).unwrap().multi);
1067
1068 let mut doc = Document::new();
1070 doc.add_text(uris, "only_one");
1071 doc.add_text(title, "Test Document");
1072
1073 let json = doc.to_json(&schema);
1075
1076 let uris_json = json.get("uris").unwrap();
1077 assert!(
1078 uris_json.is_array(),
1079 "Multi field should be array even with single value"
1080 );
1081 let uris_arr = uris_json.as_array().unwrap();
1082 assert_eq!(uris_arr.len(), 1);
1083 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1084
1085 let title_json = json.get("title").unwrap();
1087 assert!(
1088 title_json.is_string(),
1089 "Non-multi single-value field should be a string"
1090 );
1091 assert_eq!(title_json.as_str(), Some("Test Document"));
1092 }
1093
1094 #[test]
1095 fn test_sparse_vector_field() {
1096 let mut builder = Schema::builder();
1097 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1098 let title = builder.add_text_field("title", true, true);
1099 let schema = builder.build();
1100
1101 assert_eq!(schema.get_field("embedding"), Some(embedding));
1102 assert_eq!(
1103 schema.get_field_entry(embedding).unwrap().field_type,
1104 FieldType::SparseVector
1105 );
1106
1107 let mut doc = Document::new();
1109 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1110 doc.add_text(title, "Test Document");
1111
1112 let entries = doc
1114 .get_first(embedding)
1115 .unwrap()
1116 .as_sparse_vector()
1117 .unwrap();
1118 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1119
1120 let json = doc.to_json(&schema);
1122 let embedding_json = json.get("embedding").unwrap();
1123 assert!(embedding_json.is_object());
1124 assert_eq!(
1125 embedding_json
1126 .get("indices")
1127 .unwrap()
1128 .as_array()
1129 .unwrap()
1130 .len(),
1131 3
1132 );
1133
1134 let doc2 = Document::from_json(&json, &schema).unwrap();
1136 let entries2 = doc2
1137 .get_first(embedding)
1138 .unwrap()
1139 .as_sparse_vector()
1140 .unwrap();
1141 assert_eq!(entries2[0].0, 0);
1142 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1143 assert_eq!(entries2[1].0, 5);
1144 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1145 assert_eq!(entries2[2].0, 10);
1146 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1147 }
1148
1149 #[test]
1150 fn test_json_field() {
1151 let mut builder = Schema::builder();
1152 let metadata = builder.add_json_field("metadata", true);
1153 let title = builder.add_text_field("title", true, true);
1154 let schema = builder.build();
1155
1156 assert_eq!(schema.get_field("metadata"), Some(metadata));
1157 assert_eq!(
1158 schema.get_field_entry(metadata).unwrap().field_type,
1159 FieldType::Json
1160 );
1161 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1163 assert!(schema.get_field_entry(metadata).unwrap().stored);
1164
1165 let json_value = serde_json::json!({
1167 "author": "John Doe",
1168 "tags": ["rust", "search"],
1169 "nested": {"key": "value"}
1170 });
1171 let mut doc = Document::new();
1172 doc.add_json(metadata, json_value.clone());
1173 doc.add_text(title, "Test Document");
1174
1175 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1177 assert_eq!(stored_json, &json_value);
1178 assert_eq!(
1179 stored_json.get("author").unwrap().as_str(),
1180 Some("John Doe")
1181 );
1182
1183 let doc_json = doc.to_json(&schema);
1185 let metadata_out = doc_json.get("metadata").unwrap();
1186 assert_eq!(metadata_out, &json_value);
1187
1188 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1190 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1191 assert_eq!(stored_json2, &json_value);
1192 }
1193
1194 #[test]
1195 fn test_json_field_various_types() {
1196 let mut builder = Schema::builder();
1197 let data = builder.add_json_field("data", true);
1198 let _schema = builder.build();
1199
1200 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1202 let mut doc = Document::new();
1203 doc.add_json(data, arr_value.clone());
1204 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1205
1206 let str_value = serde_json::json!("just a string");
1208 let mut doc2 = Document::new();
1209 doc2.add_json(data, str_value.clone());
1210 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1211
1212 let num_value = serde_json::json!(42.5);
1214 let mut doc3 = Document::new();
1215 doc3.add_json(data, num_value.clone());
1216 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1217
1218 let null_value = serde_json::Value::Null;
1220 let mut doc4 = Document::new();
1221 doc4.add_json(data, null_value.clone());
1222 assert_eq!(
1223 doc4.get_first(data).unwrap().as_json().unwrap(),
1224 &null_value
1225 );
1226
1227 let bool_value = serde_json::json!(true);
1229 let mut doc5 = Document::new();
1230 doc5.add_json(data, bool_value.clone());
1231 assert_eq!(
1232 doc5.get_first(data).unwrap().as_json().unwrap(),
1233 &bool_value
1234 );
1235 }
1236}