1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60 #[serde(default)]
63 pub fast: bool,
64}
65
66#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
68#[serde(rename_all = "snake_case")]
69pub enum PositionMode {
70 Ordinal,
73 TokenPosition,
76 Full,
79}
80
81impl PositionMode {
82 pub fn tracks_ordinal(&self) -> bool {
84 matches!(self, PositionMode::Ordinal | PositionMode::Full)
85 }
86
87 pub fn tracks_token_position(&self) -> bool {
89 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
90 }
91}
92
93#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
95#[serde(rename_all = "snake_case")]
96pub enum VectorIndexType {
97 Flat,
99 #[default]
101 RaBitQ,
102 IvfRaBitQ,
104 ScaNN,
106}
107
108#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
114#[serde(rename_all = "snake_case")]
115pub enum DenseVectorQuantization {
116 #[default]
118 F32,
119 F16,
121 UInt8,
123}
124
125impl DenseVectorQuantization {
126 pub fn element_size(self) -> usize {
128 match self {
129 Self::F32 => 4,
130 Self::F16 => 2,
131 Self::UInt8 => 1,
132 }
133 }
134
135 pub fn tag(self) -> u8 {
137 match self {
138 Self::F32 => 0,
139 Self::F16 => 1,
140 Self::UInt8 => 2,
141 }
142 }
143
144 pub fn from_tag(tag: u8) -> Option<Self> {
146 match tag {
147 0 => Some(Self::F32),
148 1 => Some(Self::F16),
149 2 => Some(Self::UInt8),
150 _ => None,
151 }
152 }
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct DenseVectorConfig {
164 pub dim: usize,
166 #[serde(default)]
169 pub index_type: VectorIndexType,
170 #[serde(default)]
172 pub quantization: DenseVectorQuantization,
173 #[serde(default, skip_serializing_if = "Option::is_none")]
176 pub num_clusters: Option<usize>,
177 #[serde(default = "default_nprobe")]
179 pub nprobe: usize,
180 #[serde(default, skip_serializing_if = "Option::is_none")]
184 pub build_threshold: Option<usize>,
185 #[serde(default = "default_unit_norm")]
190 pub unit_norm: bool,
191}
192
193fn default_nprobe() -> usize {
194 32
195}
196
197fn default_unit_norm() -> bool {
198 true
199}
200
201impl DenseVectorConfig {
202 pub fn new(dim: usize) -> Self {
203 Self {
204 dim,
205 index_type: VectorIndexType::RaBitQ,
206 quantization: DenseVectorQuantization::F32,
207 num_clusters: None,
208 nprobe: 32,
209 build_threshold: None,
210 unit_norm: true,
211 }
212 }
213
214 pub fn with_ivf(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
216 Self {
217 dim,
218 index_type: VectorIndexType::IvfRaBitQ,
219 quantization: DenseVectorQuantization::F32,
220 num_clusters,
221 nprobe,
222 build_threshold: None,
223 unit_norm: true,
224 }
225 }
226
227 pub fn with_scann(dim: usize, num_clusters: Option<usize>, nprobe: usize) -> Self {
229 Self {
230 dim,
231 index_type: VectorIndexType::ScaNN,
232 quantization: DenseVectorQuantization::F32,
233 num_clusters,
234 nprobe,
235 build_threshold: None,
236 unit_norm: true,
237 }
238 }
239
240 pub fn flat(dim: usize) -> Self {
242 Self {
243 dim,
244 index_type: VectorIndexType::Flat,
245 quantization: DenseVectorQuantization::F32,
246 num_clusters: None,
247 nprobe: 0,
248 build_threshold: None,
249 unit_norm: true,
250 }
251 }
252
253 pub fn with_quantization(mut self, quantization: DenseVectorQuantization) -> Self {
255 self.quantization = quantization;
256 self
257 }
258
259 pub fn with_build_threshold(mut self, threshold: usize) -> Self {
261 self.build_threshold = Some(threshold);
262 self
263 }
264
265 pub fn with_unit_norm(mut self) -> Self {
267 self.unit_norm = true;
268 self
269 }
270
271 pub fn with_num_clusters(mut self, num_clusters: usize) -> Self {
273 self.num_clusters = Some(num_clusters);
274 self
275 }
276
277 pub fn uses_ivf(&self) -> bool {
279 matches!(
280 self.index_type,
281 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN
282 )
283 }
284
285 pub fn uses_scann(&self) -> bool {
287 self.index_type == VectorIndexType::ScaNN
288 }
289
290 pub fn is_flat(&self) -> bool {
292 self.index_type == VectorIndexType::Flat
293 }
294
295 pub fn default_build_threshold(&self) -> usize {
297 self.build_threshold.unwrap_or(match self.index_type {
298 VectorIndexType::Flat => usize::MAX, VectorIndexType::RaBitQ => 1000,
300 VectorIndexType::IvfRaBitQ | VectorIndexType::ScaNN => 10000,
301 })
302 }
303
304 pub fn optimal_num_clusters(&self, num_vectors: usize) -> usize {
306 self.num_clusters.unwrap_or_else(|| {
307 let optimal = (num_vectors as f64).sqrt() as usize;
309 optimal.clamp(16, 4096)
310 })
311 }
312}
313
314use super::query_field_router::QueryRouterRule;
315
316#[derive(Debug, Clone, Default, Serialize, Deserialize)]
318pub struct Schema {
319 fields: Vec<FieldEntry>,
320 name_to_field: HashMap<String, Field>,
321 #[serde(default)]
323 default_fields: Vec<Field>,
324 #[serde(default)]
326 query_routers: Vec<QueryRouterRule>,
327}
328
329impl Schema {
330 pub fn builder() -> SchemaBuilder {
331 SchemaBuilder::default()
332 }
333
334 pub fn get_field(&self, name: &str) -> Option<Field> {
335 self.name_to_field.get(name).copied()
336 }
337
338 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
339 self.fields.get(field.0 as usize)
340 }
341
342 pub fn get_field_name(&self, field: Field) -> Option<&str> {
343 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
344 }
345
346 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
347 self.fields
348 .iter()
349 .enumerate()
350 .map(|(i, e)| (Field(i as u32), e))
351 }
352
353 pub fn num_fields(&self) -> usize {
354 self.fields.len()
355 }
356
357 pub fn default_fields(&self) -> &[Field] {
359 &self.default_fields
360 }
361
362 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
364 self.default_fields = fields;
365 }
366
367 pub fn query_routers(&self) -> &[QueryRouterRule] {
369 &self.query_routers
370 }
371
372 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
374 self.query_routers = rules;
375 }
376}
377
378#[derive(Debug, Default)]
380pub struct SchemaBuilder {
381 fields: Vec<FieldEntry>,
382 default_fields: Vec<String>,
383 query_routers: Vec<QueryRouterRule>,
384}
385
386impl SchemaBuilder {
387 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
388 self.add_field_with_tokenizer(
389 name,
390 FieldType::Text,
391 indexed,
392 stored,
393 Some("default".to_string()),
394 )
395 }
396
397 pub fn add_text_field_with_tokenizer(
398 &mut self,
399 name: &str,
400 indexed: bool,
401 stored: bool,
402 tokenizer: &str,
403 ) -> Field {
404 self.add_field_with_tokenizer(
405 name,
406 FieldType::Text,
407 indexed,
408 stored,
409 Some(tokenizer.to_string()),
410 )
411 }
412
413 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
414 self.add_field(name, FieldType::U64, indexed, stored)
415 }
416
417 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
418 self.add_field(name, FieldType::I64, indexed, stored)
419 }
420
421 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
422 self.add_field(name, FieldType::F64, indexed, stored)
423 }
424
425 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
426 self.add_field(name, FieldType::Bytes, false, stored)
427 }
428
429 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
434 self.add_field(name, FieldType::Json, false, stored)
435 }
436
437 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
442 self.add_sparse_vector_field_with_config(
443 name,
444 indexed,
445 stored,
446 crate::structures::SparseVectorConfig::default(),
447 )
448 }
449
450 pub fn add_sparse_vector_field_with_config(
455 &mut self,
456 name: &str,
457 indexed: bool,
458 stored: bool,
459 config: crate::structures::SparseVectorConfig,
460 ) -> Field {
461 let field = Field(self.fields.len() as u32);
462 self.fields.push(FieldEntry {
463 name: name.to_string(),
464 field_type: FieldType::SparseVector,
465 indexed,
466 stored,
467 tokenizer: None,
468 multi: false,
469 positions: None,
470 sparse_vector_config: Some(config),
471 dense_vector_config: None,
472 fast: false,
473 });
474 field
475 }
476
477 pub fn set_sparse_vector_config(
479 &mut self,
480 field: Field,
481 config: crate::structures::SparseVectorConfig,
482 ) {
483 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
484 entry.sparse_vector_config = Some(config);
485 }
486 }
487
488 pub fn add_dense_vector_field(
493 &mut self,
494 name: &str,
495 dim: usize,
496 indexed: bool,
497 stored: bool,
498 ) -> Field {
499 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
500 }
501
502 pub fn add_dense_vector_field_with_config(
504 &mut self,
505 name: &str,
506 indexed: bool,
507 stored: bool,
508 config: DenseVectorConfig,
509 ) -> Field {
510 let field = Field(self.fields.len() as u32);
511 self.fields.push(FieldEntry {
512 name: name.to_string(),
513 field_type: FieldType::DenseVector,
514 indexed,
515 stored,
516 tokenizer: None,
517 multi: false,
518 positions: None,
519 sparse_vector_config: None,
520 dense_vector_config: Some(config),
521 fast: false,
522 });
523 field
524 }
525
526 fn add_field(
527 &mut self,
528 name: &str,
529 field_type: FieldType,
530 indexed: bool,
531 stored: bool,
532 ) -> Field {
533 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
534 }
535
536 fn add_field_with_tokenizer(
537 &mut self,
538 name: &str,
539 field_type: FieldType,
540 indexed: bool,
541 stored: bool,
542 tokenizer: Option<String>,
543 ) -> Field {
544 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
545 }
546
547 fn add_field_full(
548 &mut self,
549 name: &str,
550 field_type: FieldType,
551 indexed: bool,
552 stored: bool,
553 tokenizer: Option<String>,
554 multi: bool,
555 ) -> Field {
556 let field = Field(self.fields.len() as u32);
557 self.fields.push(FieldEntry {
558 name: name.to_string(),
559 field_type,
560 indexed,
561 stored,
562 tokenizer,
563 multi,
564 positions: None,
565 sparse_vector_config: None,
566 dense_vector_config: None,
567 fast: false,
568 });
569 field
570 }
571
572 pub fn set_multi(&mut self, field: Field, multi: bool) {
574 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
575 entry.multi = multi;
576 }
577 }
578
579 pub fn set_fast(&mut self, field: Field, fast: bool) {
582 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
583 entry.fast = fast;
584 }
585 }
586
587 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
589 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
590 entry.positions = Some(mode);
591 }
592 }
593
594 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
596 self.default_fields = field_names;
597 }
598
599 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
601 self.query_routers = rules;
602 }
603
604 pub fn build(self) -> Schema {
605 let mut name_to_field = HashMap::new();
606 for (i, entry) in self.fields.iter().enumerate() {
607 name_to_field.insert(entry.name.clone(), Field(i as u32));
608 }
609
610 let default_fields: Vec<Field> = self
612 .default_fields
613 .iter()
614 .filter_map(|name| name_to_field.get(name).copied())
615 .collect();
616
617 Schema {
618 fields: self.fields,
619 name_to_field,
620 default_fields,
621 query_routers: self.query_routers,
622 }
623 }
624}
625
626#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
628pub enum FieldValue {
629 #[serde(rename = "text")]
630 Text(String),
631 #[serde(rename = "u64")]
632 U64(u64),
633 #[serde(rename = "i64")]
634 I64(i64),
635 #[serde(rename = "f64")]
636 F64(f64),
637 #[serde(rename = "bytes")]
638 Bytes(Vec<u8>),
639 #[serde(rename = "sparse_vector")]
641 SparseVector(Vec<(u32, f32)>),
642 #[serde(rename = "dense_vector")]
644 DenseVector(Vec<f32>),
645 #[serde(rename = "json")]
647 Json(serde_json::Value),
648}
649
650impl FieldValue {
651 pub fn as_text(&self) -> Option<&str> {
652 match self {
653 FieldValue::Text(s) => Some(s),
654 _ => None,
655 }
656 }
657
658 pub fn as_u64(&self) -> Option<u64> {
659 match self {
660 FieldValue::U64(v) => Some(*v),
661 _ => None,
662 }
663 }
664
665 pub fn as_i64(&self) -> Option<i64> {
666 match self {
667 FieldValue::I64(v) => Some(*v),
668 _ => None,
669 }
670 }
671
672 pub fn as_f64(&self) -> Option<f64> {
673 match self {
674 FieldValue::F64(v) => Some(*v),
675 _ => None,
676 }
677 }
678
679 pub fn as_bytes(&self) -> Option<&[u8]> {
680 match self {
681 FieldValue::Bytes(b) => Some(b),
682 _ => None,
683 }
684 }
685
686 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
687 match self {
688 FieldValue::SparseVector(entries) => Some(entries),
689 _ => None,
690 }
691 }
692
693 pub fn as_dense_vector(&self) -> Option<&[f32]> {
694 match self {
695 FieldValue::DenseVector(v) => Some(v),
696 _ => None,
697 }
698 }
699
700 pub fn as_json(&self) -> Option<&serde_json::Value> {
701 match self {
702 FieldValue::Json(v) => Some(v),
703 _ => None,
704 }
705 }
706}
707
708#[derive(Debug, Clone, Default, Serialize, Deserialize)]
710pub struct Document {
711 field_values: Vec<(Field, FieldValue)>,
712}
713
714impl Document {
715 pub fn new() -> Self {
716 Self::default()
717 }
718
719 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
720 self.field_values
721 .push((field, FieldValue::Text(value.into())));
722 }
723
724 pub fn add_u64(&mut self, field: Field, value: u64) {
725 self.field_values.push((field, FieldValue::U64(value)));
726 }
727
728 pub fn add_i64(&mut self, field: Field, value: i64) {
729 self.field_values.push((field, FieldValue::I64(value)));
730 }
731
732 pub fn add_f64(&mut self, field: Field, value: f64) {
733 self.field_values.push((field, FieldValue::F64(value)));
734 }
735
736 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
737 self.field_values.push((field, FieldValue::Bytes(value)));
738 }
739
740 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
741 self.field_values
742 .push((field, FieldValue::SparseVector(entries)));
743 }
744
745 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
746 self.field_values
747 .push((field, FieldValue::DenseVector(values)));
748 }
749
750 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
751 self.field_values.push((field, FieldValue::Json(value)));
752 }
753
754 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
755 self.field_values
756 .iter()
757 .find(|(f, _)| *f == field)
758 .map(|(_, v)| v)
759 }
760
761 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
762 self.field_values
763 .iter()
764 .filter(move |(f, _)| *f == field)
765 .map(|(_, v)| v)
766 }
767
768 pub fn field_values(&self) -> &[(Field, FieldValue)] {
769 &self.field_values
770 }
771
772 pub fn filter_stored(&self, schema: &Schema) -> Document {
774 Document {
775 field_values: self
776 .field_values
777 .iter()
778 .filter(|(field, _)| {
779 schema
780 .get_field_entry(*field)
781 .is_some_and(|entry| entry.stored)
782 })
783 .cloned()
784 .collect(),
785 }
786 }
787
788 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
794 use std::collections::HashMap;
795
796 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
798 HashMap::new();
799
800 for (field, value) in &self.field_values {
801 if let Some(entry) = schema.get_field_entry(*field) {
802 let json_value = match value {
803 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
804 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
805 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
806 FieldValue::F64(n) => serde_json::json!(n),
807 FieldValue::Bytes(b) => {
808 use base64::Engine;
809 serde_json::Value::String(
810 base64::engine::general_purpose::STANDARD.encode(b),
811 )
812 }
813 FieldValue::SparseVector(entries) => {
814 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
815 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
816 serde_json::json!({
817 "indices": indices,
818 "values": values
819 })
820 }
821 FieldValue::DenseVector(values) => {
822 serde_json::json!(values)
823 }
824 FieldValue::Json(v) => v.clone(),
825 };
826 field_values_map
827 .entry(*field)
828 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
829 .2
830 .push(json_value);
831 }
832 }
833
834 let mut map = serde_json::Map::new();
836 for (_field, (name, is_multi, values)) in field_values_map {
837 let json_value = if is_multi || values.len() > 1 {
838 serde_json::Value::Array(values)
839 } else {
840 values.into_iter().next().unwrap()
841 };
842 map.insert(name, json_value);
843 }
844
845 serde_json::Value::Object(map)
846 }
847
848 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
857 let obj = json.as_object()?;
858 let mut doc = Document::new();
859
860 for (key, value) in obj {
861 if let Some(field) = schema.get_field(key) {
862 let field_entry = schema.get_field_entry(field)?;
863 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
864 }
865 }
866
867 Some(doc)
868 }
869
870 fn add_json_value(
872 doc: &mut Document,
873 field: Field,
874 field_type: &FieldType,
875 value: &serde_json::Value,
876 ) {
877 match value {
878 serde_json::Value::String(s) => {
879 if matches!(field_type, FieldType::Text) {
880 doc.add_text(field, s.clone());
881 }
882 }
883 serde_json::Value::Number(n) => {
884 match field_type {
885 FieldType::I64 => {
886 if let Some(i) = n.as_i64() {
887 doc.add_i64(field, i);
888 }
889 }
890 FieldType::U64 => {
891 if let Some(u) = n.as_u64() {
892 doc.add_u64(field, u);
893 } else if let Some(i) = n.as_i64() {
894 if i >= 0 {
896 doc.add_u64(field, i as u64);
897 }
898 }
899 }
900 FieldType::F64 => {
901 if let Some(f) = n.as_f64() {
902 doc.add_f64(field, f);
903 }
904 }
905 _ => {}
906 }
907 }
908 serde_json::Value::Array(arr) => {
910 for item in arr {
911 Self::add_json_value(doc, field, field_type, item);
912 }
913 }
914 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
916 if let (Some(indices_val), Some(values_val)) =
917 (obj.get("indices"), obj.get("values"))
918 {
919 let indices: Vec<u32> = indices_val
920 .as_array()
921 .map(|arr| {
922 arr.iter()
923 .filter_map(|v| v.as_u64().map(|n| n as u32))
924 .collect()
925 })
926 .unwrap_or_default();
927 let values: Vec<f32> = values_val
928 .as_array()
929 .map(|arr| {
930 arr.iter()
931 .filter_map(|v| v.as_f64().map(|n| n as f32))
932 .collect()
933 })
934 .unwrap_or_default();
935 if indices.len() == values.len() {
936 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
937 doc.add_sparse_vector(field, entries);
938 }
939 }
940 }
941 _ if matches!(field_type, FieldType::Json) => {
943 doc.add_json(field, value.clone());
944 }
945 serde_json::Value::Object(_) => {}
946 _ => {}
947 }
948 }
949}
950
951#[cfg(test)]
952mod tests {
953 use super::*;
954
955 #[test]
956 fn test_schema_builder() {
957 let mut builder = Schema::builder();
958 let title = builder.add_text_field("title", true, true);
959 let body = builder.add_text_field("body", true, false);
960 let count = builder.add_u64_field("count", true, true);
961 let schema = builder.build();
962
963 assert_eq!(schema.get_field("title"), Some(title));
964 assert_eq!(schema.get_field("body"), Some(body));
965 assert_eq!(schema.get_field("count"), Some(count));
966 assert_eq!(schema.get_field("nonexistent"), None);
967 }
968
969 #[test]
970 fn test_document() {
971 let mut builder = Schema::builder();
972 let title = builder.add_text_field("title", true, true);
973 let count = builder.add_u64_field("count", true, true);
974 let _schema = builder.build();
975
976 let mut doc = Document::new();
977 doc.add_text(title, "Hello World");
978 doc.add_u64(count, 42);
979
980 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
981 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
982 }
983
984 #[test]
985 fn test_document_serialization() {
986 let mut builder = Schema::builder();
987 let title = builder.add_text_field("title", true, true);
988 let count = builder.add_u64_field("count", true, true);
989 let _schema = builder.build();
990
991 let mut doc = Document::new();
992 doc.add_text(title, "Hello World");
993 doc.add_u64(count, 42);
994
995 let json = serde_json::to_string(&doc).unwrap();
997 println!("Serialized doc: {}", json);
998
999 let doc2: Document = serde_json::from_str(&json).unwrap();
1001 assert_eq!(
1002 doc2.field_values().len(),
1003 2,
1004 "Should have 2 field values after deserialization"
1005 );
1006 assert_eq!(
1007 doc2.get_first(title).unwrap().as_text(),
1008 Some("Hello World")
1009 );
1010 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
1011 }
1012
1013 #[test]
1014 fn test_multivalue_field() {
1015 let mut builder = Schema::builder();
1016 let uris = builder.add_text_field("uris", true, true);
1017 let title = builder.add_text_field("title", true, true);
1018 let schema = builder.build();
1019
1020 let mut doc = Document::new();
1022 doc.add_text(uris, "one");
1023 doc.add_text(uris, "two");
1024 doc.add_text(title, "Test Document");
1025
1026 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
1028
1029 let all_uris: Vec<_> = doc.get_all(uris).collect();
1031 assert_eq!(all_uris.len(), 2);
1032 assert_eq!(all_uris[0].as_text(), Some("one"));
1033 assert_eq!(all_uris[1].as_text(), Some("two"));
1034
1035 let json = doc.to_json(&schema);
1037 let uris_json = json.get("uris").unwrap();
1038 assert!(uris_json.is_array(), "Multi-value field should be an array");
1039 let uris_arr = uris_json.as_array().unwrap();
1040 assert_eq!(uris_arr.len(), 2);
1041 assert_eq!(uris_arr[0].as_str(), Some("one"));
1042 assert_eq!(uris_arr[1].as_str(), Some("two"));
1043
1044 let title_json = json.get("title").unwrap();
1046 assert!(
1047 title_json.is_string(),
1048 "Single-value field should be a string"
1049 );
1050 assert_eq!(title_json.as_str(), Some("Test Document"));
1051 }
1052
1053 #[test]
1054 fn test_multivalue_from_json() {
1055 let mut builder = Schema::builder();
1056 let uris = builder.add_text_field("uris", true, true);
1057 let title = builder.add_text_field("title", true, true);
1058 let schema = builder.build();
1059
1060 let json = serde_json::json!({
1062 "uris": ["one", "two"],
1063 "title": "Test Document"
1064 });
1065
1066 let doc = Document::from_json(&json, &schema).unwrap();
1068
1069 let all_uris: Vec<_> = doc.get_all(uris).collect();
1071 assert_eq!(all_uris.len(), 2);
1072 assert_eq!(all_uris[0].as_text(), Some("one"));
1073 assert_eq!(all_uris[1].as_text(), Some("two"));
1074
1075 assert_eq!(
1077 doc.get_first(title).unwrap().as_text(),
1078 Some("Test Document")
1079 );
1080
1081 let json_out = doc.to_json(&schema);
1083 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
1084 assert_eq!(uris_out.len(), 2);
1085 assert_eq!(uris_out[0].as_str(), Some("one"));
1086 assert_eq!(uris_out[1].as_str(), Some("two"));
1087 }
1088
1089 #[test]
1090 fn test_multi_attribute_forces_array() {
1091 let mut builder = Schema::builder();
1094 let uris = builder.add_text_field("uris", true, true);
1095 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
1097 let schema = builder.build();
1098
1099 assert!(schema.get_field_entry(uris).unwrap().multi);
1101 assert!(!schema.get_field_entry(title).unwrap().multi);
1102
1103 let mut doc = Document::new();
1105 doc.add_text(uris, "only_one");
1106 doc.add_text(title, "Test Document");
1107
1108 let json = doc.to_json(&schema);
1110
1111 let uris_json = json.get("uris").unwrap();
1112 assert!(
1113 uris_json.is_array(),
1114 "Multi field should be array even with single value"
1115 );
1116 let uris_arr = uris_json.as_array().unwrap();
1117 assert_eq!(uris_arr.len(), 1);
1118 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
1119
1120 let title_json = json.get("title").unwrap();
1122 assert!(
1123 title_json.is_string(),
1124 "Non-multi single-value field should be a string"
1125 );
1126 assert_eq!(title_json.as_str(), Some("Test Document"));
1127 }
1128
1129 #[test]
1130 fn test_sparse_vector_field() {
1131 let mut builder = Schema::builder();
1132 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1133 let title = builder.add_text_field("title", true, true);
1134 let schema = builder.build();
1135
1136 assert_eq!(schema.get_field("embedding"), Some(embedding));
1137 assert_eq!(
1138 schema.get_field_entry(embedding).unwrap().field_type,
1139 FieldType::SparseVector
1140 );
1141
1142 let mut doc = Document::new();
1144 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1145 doc.add_text(title, "Test Document");
1146
1147 let entries = doc
1149 .get_first(embedding)
1150 .unwrap()
1151 .as_sparse_vector()
1152 .unwrap();
1153 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1154
1155 let json = doc.to_json(&schema);
1157 let embedding_json = json.get("embedding").unwrap();
1158 assert!(embedding_json.is_object());
1159 assert_eq!(
1160 embedding_json
1161 .get("indices")
1162 .unwrap()
1163 .as_array()
1164 .unwrap()
1165 .len(),
1166 3
1167 );
1168
1169 let doc2 = Document::from_json(&json, &schema).unwrap();
1171 let entries2 = doc2
1172 .get_first(embedding)
1173 .unwrap()
1174 .as_sparse_vector()
1175 .unwrap();
1176 assert_eq!(entries2[0].0, 0);
1177 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1178 assert_eq!(entries2[1].0, 5);
1179 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1180 assert_eq!(entries2[2].0, 10);
1181 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1182 }
1183
1184 #[test]
1185 fn test_json_field() {
1186 let mut builder = Schema::builder();
1187 let metadata = builder.add_json_field("metadata", true);
1188 let title = builder.add_text_field("title", true, true);
1189 let schema = builder.build();
1190
1191 assert_eq!(schema.get_field("metadata"), Some(metadata));
1192 assert_eq!(
1193 schema.get_field_entry(metadata).unwrap().field_type,
1194 FieldType::Json
1195 );
1196 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1198 assert!(schema.get_field_entry(metadata).unwrap().stored);
1199
1200 let json_value = serde_json::json!({
1202 "author": "John Doe",
1203 "tags": ["rust", "search"],
1204 "nested": {"key": "value"}
1205 });
1206 let mut doc = Document::new();
1207 doc.add_json(metadata, json_value.clone());
1208 doc.add_text(title, "Test Document");
1209
1210 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1212 assert_eq!(stored_json, &json_value);
1213 assert_eq!(
1214 stored_json.get("author").unwrap().as_str(),
1215 Some("John Doe")
1216 );
1217
1218 let doc_json = doc.to_json(&schema);
1220 let metadata_out = doc_json.get("metadata").unwrap();
1221 assert_eq!(metadata_out, &json_value);
1222
1223 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1225 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1226 assert_eq!(stored_json2, &json_value);
1227 }
1228
1229 #[test]
1230 fn test_json_field_various_types() {
1231 let mut builder = Schema::builder();
1232 let data = builder.add_json_field("data", true);
1233 let _schema = builder.build();
1234
1235 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1237 let mut doc = Document::new();
1238 doc.add_json(data, arr_value.clone());
1239 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1240
1241 let str_value = serde_json::json!("just a string");
1243 let mut doc2 = Document::new();
1244 doc2.add_json(data, str_value.clone());
1245 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1246
1247 let num_value = serde_json::json!(42.5);
1249 let mut doc3 = Document::new();
1250 doc3.add_json(data, num_value.clone());
1251 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1252
1253 let null_value = serde_json::Value::Null;
1255 let mut doc4 = Document::new();
1256 doc4.add_json(data, null_value.clone());
1257 assert_eq!(
1258 doc4.get_first(data).unwrap().as_json().unwrap(),
1259 &null_value
1260 );
1261
1262 let bool_value = serde_json::json!(true);
1264 let mut doc5 = Document::new();
1265 doc5.add_json(data, bool_value.clone());
1266 assert_eq!(
1267 doc5.get_first(data).unwrap().as_json().unwrap(),
1268 &bool_value
1269 );
1270 }
1271}