1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub dense_vector_config: Option<DenseVectorConfig>,
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
61#[serde(rename_all = "snake_case")]
62pub enum VectorIndexType {
63 #[default]
65 RaBitQ,
66 IvfRaBitQ,
68 ScaNN,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct DenseVectorConfig {
75 pub dim: usize,
77 #[serde(default)]
79 pub index_type: VectorIndexType,
80 #[serde(default = "default_store_raw")]
82 pub store_raw: bool,
83 #[serde(default, skip_serializing_if = "Option::is_none")]
86 pub coarse_centroids_path: Option<String>,
87 #[serde(default, skip_serializing_if = "Option::is_none")]
89 pub pq_codebook_path: Option<String>,
90 #[serde(default = "default_nprobe")]
92 pub nprobe: usize,
93 #[serde(default, skip_serializing_if = "Option::is_none")]
97 pub mrl_dim: Option<usize>,
98}
99
100fn default_store_raw() -> bool {
101 true
102}
103
104fn default_nprobe() -> usize {
105 32
106}
107
108impl DenseVectorConfig {
109 pub fn new(dim: usize) -> Self {
110 Self {
111 dim,
112 index_type: VectorIndexType::RaBitQ,
113 store_raw: true,
114 coarse_centroids_path: None,
115 pq_codebook_path: None,
116 nprobe: 32,
117 mrl_dim: None,
118 }
119 }
120
121 pub fn with_ivf(dim: usize, centroids_path: String, nprobe: usize) -> Self {
122 Self {
123 dim,
124 index_type: VectorIndexType::IvfRaBitQ,
125 store_raw: true,
126 coarse_centroids_path: Some(centroids_path),
127 pq_codebook_path: None,
128 nprobe,
129 mrl_dim: None,
130 }
131 }
132
133 pub fn with_scann(
135 dim: usize,
136 centroids_path: String,
137 codebook_path: String,
138 nprobe: usize,
139 ) -> Self {
140 Self {
141 dim,
142 index_type: VectorIndexType::ScaNN,
143 store_raw: true,
144 coarse_centroids_path: Some(centroids_path),
145 pq_codebook_path: Some(codebook_path),
146 nprobe,
147 mrl_dim: None,
148 }
149 }
150
151 pub fn without_raw(dim: usize) -> Self {
152 Self {
153 dim,
154 index_type: VectorIndexType::RaBitQ,
155 store_raw: false,
156 coarse_centroids_path: None,
157 pq_codebook_path: None,
158 nprobe: 32,
159 mrl_dim: None,
160 }
161 }
162
163 pub fn with_mrl_dim(mut self, mrl_dim: usize) -> Self {
165 self.mrl_dim = Some(mrl_dim);
166 self
167 }
168
169 pub fn index_dim(&self) -> usize {
171 self.mrl_dim.unwrap_or(self.dim)
172 }
173
174 pub fn uses_ivf(&self) -> bool {
176 self.coarse_centroids_path.is_some()
177 }
178
179 pub fn uses_scann(&self) -> bool {
181 self.index_type == VectorIndexType::ScaNN
182 }
183}
184
185use super::query_field_router::QueryRouterRule;
186
187#[derive(Debug, Clone, Default, Serialize, Deserialize)]
189pub struct Schema {
190 fields: Vec<FieldEntry>,
191 name_to_field: HashMap<String, Field>,
192 #[serde(default)]
194 default_fields: Vec<Field>,
195 #[serde(default)]
197 query_routers: Vec<QueryRouterRule>,
198}
199
200impl Schema {
201 pub fn builder() -> SchemaBuilder {
202 SchemaBuilder::default()
203 }
204
205 pub fn get_field(&self, name: &str) -> Option<Field> {
206 self.name_to_field.get(name).copied()
207 }
208
209 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
210 self.fields.get(field.0 as usize)
211 }
212
213 pub fn get_field_name(&self, field: Field) -> Option<&str> {
214 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
215 }
216
217 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
218 self.fields
219 .iter()
220 .enumerate()
221 .map(|(i, e)| (Field(i as u32), e))
222 }
223
224 pub fn num_fields(&self) -> usize {
225 self.fields.len()
226 }
227
228 pub fn default_fields(&self) -> &[Field] {
230 &self.default_fields
231 }
232
233 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
235 self.default_fields = fields;
236 }
237
238 pub fn query_routers(&self) -> &[QueryRouterRule] {
240 &self.query_routers
241 }
242
243 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
245 self.query_routers = rules;
246 }
247}
248
249#[derive(Debug, Default)]
251pub struct SchemaBuilder {
252 fields: Vec<FieldEntry>,
253 default_fields: Vec<String>,
254 query_routers: Vec<QueryRouterRule>,
255}
256
257impl SchemaBuilder {
258 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
259 self.add_field_with_tokenizer(
260 name,
261 FieldType::Text,
262 indexed,
263 stored,
264 Some("default".to_string()),
265 )
266 }
267
268 pub fn add_text_field_with_tokenizer(
269 &mut self,
270 name: &str,
271 indexed: bool,
272 stored: bool,
273 tokenizer: &str,
274 ) -> Field {
275 self.add_field_with_tokenizer(
276 name,
277 FieldType::Text,
278 indexed,
279 stored,
280 Some(tokenizer.to_string()),
281 )
282 }
283
284 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
285 self.add_field(name, FieldType::U64, indexed, stored)
286 }
287
288 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
289 self.add_field(name, FieldType::I64, indexed, stored)
290 }
291
292 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
293 self.add_field(name, FieldType::F64, indexed, stored)
294 }
295
296 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
297 self.add_field(name, FieldType::Bytes, false, stored)
298 }
299
300 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
305 self.add_field(name, FieldType::Json, false, stored)
306 }
307
308 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
313 self.add_sparse_vector_field_with_config(
314 name,
315 indexed,
316 stored,
317 crate::structures::SparseVectorConfig::default(),
318 )
319 }
320
321 pub fn add_sparse_vector_field_with_config(
326 &mut self,
327 name: &str,
328 indexed: bool,
329 stored: bool,
330 config: crate::structures::SparseVectorConfig,
331 ) -> Field {
332 let field = Field(self.fields.len() as u32);
333 self.fields.push(FieldEntry {
334 name: name.to_string(),
335 field_type: FieldType::SparseVector,
336 indexed,
337 stored,
338 tokenizer: None,
339 multi: false,
340 sparse_vector_config: Some(config),
341 dense_vector_config: None,
342 });
343 field
344 }
345
346 pub fn set_sparse_vector_config(
348 &mut self,
349 field: Field,
350 config: crate::structures::SparseVectorConfig,
351 ) {
352 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
353 entry.sparse_vector_config = Some(config);
354 }
355 }
356
357 pub fn add_dense_vector_field(
362 &mut self,
363 name: &str,
364 dim: usize,
365 indexed: bool,
366 stored: bool,
367 ) -> Field {
368 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
369 }
370
371 pub fn add_dense_vector_field_with_config(
373 &mut self,
374 name: &str,
375 indexed: bool,
376 stored: bool,
377 config: DenseVectorConfig,
378 ) -> Field {
379 let field = Field(self.fields.len() as u32);
380 self.fields.push(FieldEntry {
381 name: name.to_string(),
382 field_type: FieldType::DenseVector,
383 indexed,
384 stored,
385 tokenizer: None,
386 multi: false,
387 sparse_vector_config: None,
388 dense_vector_config: Some(config),
389 });
390 field
391 }
392
393 fn add_field(
394 &mut self,
395 name: &str,
396 field_type: FieldType,
397 indexed: bool,
398 stored: bool,
399 ) -> Field {
400 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
401 }
402
403 fn add_field_with_tokenizer(
404 &mut self,
405 name: &str,
406 field_type: FieldType,
407 indexed: bool,
408 stored: bool,
409 tokenizer: Option<String>,
410 ) -> Field {
411 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
412 }
413
414 fn add_field_full(
415 &mut self,
416 name: &str,
417 field_type: FieldType,
418 indexed: bool,
419 stored: bool,
420 tokenizer: Option<String>,
421 multi: bool,
422 ) -> Field {
423 let field = Field(self.fields.len() as u32);
424 self.fields.push(FieldEntry {
425 name: name.to_string(),
426 field_type,
427 indexed,
428 stored,
429 tokenizer,
430 multi,
431 sparse_vector_config: None,
432 dense_vector_config: None,
433 });
434 field
435 }
436
437 pub fn set_multi(&mut self, field: Field, multi: bool) {
439 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
440 entry.multi = multi;
441 }
442 }
443
444 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
446 self.default_fields = field_names;
447 }
448
449 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
451 self.query_routers = rules;
452 }
453
454 pub fn build(self) -> Schema {
455 let mut name_to_field = HashMap::new();
456 for (i, entry) in self.fields.iter().enumerate() {
457 name_to_field.insert(entry.name.clone(), Field(i as u32));
458 }
459
460 let default_fields: Vec<Field> = self
462 .default_fields
463 .iter()
464 .filter_map(|name| name_to_field.get(name).copied())
465 .collect();
466
467 Schema {
468 fields: self.fields,
469 name_to_field,
470 default_fields,
471 query_routers: self.query_routers,
472 }
473 }
474}
475
476#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
478pub enum FieldValue {
479 #[serde(rename = "text")]
480 Text(String),
481 #[serde(rename = "u64")]
482 U64(u64),
483 #[serde(rename = "i64")]
484 I64(i64),
485 #[serde(rename = "f64")]
486 F64(f64),
487 #[serde(rename = "bytes")]
488 Bytes(Vec<u8>),
489 #[serde(rename = "sparse_vector")]
491 SparseVector { indices: Vec<u32>, values: Vec<f32> },
492 #[serde(rename = "dense_vector")]
494 DenseVector(Vec<f32>),
495 #[serde(rename = "json")]
497 Json(serde_json::Value),
498}
499
500impl FieldValue {
501 pub fn as_text(&self) -> Option<&str> {
502 match self {
503 FieldValue::Text(s) => Some(s),
504 _ => None,
505 }
506 }
507
508 pub fn as_u64(&self) -> Option<u64> {
509 match self {
510 FieldValue::U64(v) => Some(*v),
511 _ => None,
512 }
513 }
514
515 pub fn as_i64(&self) -> Option<i64> {
516 match self {
517 FieldValue::I64(v) => Some(*v),
518 _ => None,
519 }
520 }
521
522 pub fn as_f64(&self) -> Option<f64> {
523 match self {
524 FieldValue::F64(v) => Some(*v),
525 _ => None,
526 }
527 }
528
529 pub fn as_bytes(&self) -> Option<&[u8]> {
530 match self {
531 FieldValue::Bytes(b) => Some(b),
532 _ => None,
533 }
534 }
535
536 pub fn as_sparse_vector(&self) -> Option<(&[u32], &[f32])> {
537 match self {
538 FieldValue::SparseVector { indices, values } => Some((indices, values)),
539 _ => None,
540 }
541 }
542
543 pub fn as_dense_vector(&self) -> Option<&[f32]> {
544 match self {
545 FieldValue::DenseVector(v) => Some(v),
546 _ => None,
547 }
548 }
549
550 pub fn as_json(&self) -> Option<&serde_json::Value> {
551 match self {
552 FieldValue::Json(v) => Some(v),
553 _ => None,
554 }
555 }
556}
557
558#[derive(Debug, Clone, Default, Serialize, Deserialize)]
560pub struct Document {
561 field_values: Vec<(Field, FieldValue)>,
562}
563
564impl Document {
565 pub fn new() -> Self {
566 Self::default()
567 }
568
569 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
570 self.field_values
571 .push((field, FieldValue::Text(value.into())));
572 }
573
574 pub fn add_u64(&mut self, field: Field, value: u64) {
575 self.field_values.push((field, FieldValue::U64(value)));
576 }
577
578 pub fn add_i64(&mut self, field: Field, value: i64) {
579 self.field_values.push((field, FieldValue::I64(value)));
580 }
581
582 pub fn add_f64(&mut self, field: Field, value: f64) {
583 self.field_values.push((field, FieldValue::F64(value)));
584 }
585
586 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
587 self.field_values.push((field, FieldValue::Bytes(value)));
588 }
589
590 pub fn add_sparse_vector(&mut self, field: Field, indices: Vec<u32>, values: Vec<f32>) {
591 debug_assert_eq!(
592 indices.len(),
593 values.len(),
594 "Sparse vector indices and values must have same length"
595 );
596 self.field_values
597 .push((field, FieldValue::SparseVector { indices, values }));
598 }
599
600 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
601 self.field_values
602 .push((field, FieldValue::DenseVector(values)));
603 }
604
605 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
606 self.field_values.push((field, FieldValue::Json(value)));
607 }
608
609 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
610 self.field_values
611 .iter()
612 .find(|(f, _)| *f == field)
613 .map(|(_, v)| v)
614 }
615
616 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
617 self.field_values
618 .iter()
619 .filter(move |(f, _)| *f == field)
620 .map(|(_, v)| v)
621 }
622
623 pub fn field_values(&self) -> &[(Field, FieldValue)] {
624 &self.field_values
625 }
626
627 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
633 use std::collections::HashMap;
634
635 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
637 HashMap::new();
638
639 for (field, value) in &self.field_values {
640 if let Some(entry) = schema.get_field_entry(*field) {
641 let json_value = match value {
642 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
643 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
644 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
645 FieldValue::F64(n) => serde_json::json!(n),
646 FieldValue::Bytes(b) => {
647 use base64::Engine;
648 serde_json::Value::String(
649 base64::engine::general_purpose::STANDARD.encode(b),
650 )
651 }
652 FieldValue::SparseVector { indices, values } => {
653 serde_json::json!({
654 "indices": indices,
655 "values": values
656 })
657 }
658 FieldValue::DenseVector(values) => {
659 serde_json::json!(values)
660 }
661 FieldValue::Json(v) => v.clone(),
662 };
663 field_values_map
664 .entry(*field)
665 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
666 .2
667 .push(json_value);
668 }
669 }
670
671 let mut map = serde_json::Map::new();
673 for (_field, (name, is_multi, values)) in field_values_map {
674 let json_value = if is_multi || values.len() > 1 {
675 serde_json::Value::Array(values)
676 } else {
677 values.into_iter().next().unwrap()
678 };
679 map.insert(name, json_value);
680 }
681
682 serde_json::Value::Object(map)
683 }
684
685 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
694 let obj = json.as_object()?;
695 let mut doc = Document::new();
696
697 for (key, value) in obj {
698 if let Some(field) = schema.get_field(key) {
699 let field_entry = schema.get_field_entry(field)?;
700 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
701 }
702 }
703
704 Some(doc)
705 }
706
707 fn add_json_value(
709 doc: &mut Document,
710 field: Field,
711 field_type: &FieldType,
712 value: &serde_json::Value,
713 ) {
714 match value {
715 serde_json::Value::String(s) => {
716 if matches!(field_type, FieldType::Text) {
717 doc.add_text(field, s.clone());
718 }
719 }
720 serde_json::Value::Number(n) => {
721 match field_type {
722 FieldType::I64 => {
723 if let Some(i) = n.as_i64() {
724 doc.add_i64(field, i);
725 }
726 }
727 FieldType::U64 => {
728 if let Some(u) = n.as_u64() {
729 doc.add_u64(field, u);
730 } else if let Some(i) = n.as_i64() {
731 if i >= 0 {
733 doc.add_u64(field, i as u64);
734 }
735 }
736 }
737 FieldType::F64 => {
738 if let Some(f) = n.as_f64() {
739 doc.add_f64(field, f);
740 }
741 }
742 _ => {}
743 }
744 }
745 serde_json::Value::Array(arr) => {
747 for item in arr {
748 Self::add_json_value(doc, field, field_type, item);
749 }
750 }
751 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
753 if let (Some(indices_val), Some(values_val)) =
754 (obj.get("indices"), obj.get("values"))
755 {
756 let indices: Vec<u32> = indices_val
757 .as_array()
758 .map(|arr| {
759 arr.iter()
760 .filter_map(|v| v.as_u64().map(|n| n as u32))
761 .collect()
762 })
763 .unwrap_or_default();
764 let values: Vec<f32> = values_val
765 .as_array()
766 .map(|arr| {
767 arr.iter()
768 .filter_map(|v| v.as_f64().map(|n| n as f32))
769 .collect()
770 })
771 .unwrap_or_default();
772 if indices.len() == values.len() {
773 doc.add_sparse_vector(field, indices, values);
774 }
775 }
776 }
777 _ if matches!(field_type, FieldType::Json) => {
779 doc.add_json(field, value.clone());
780 }
781 serde_json::Value::Object(_) => {}
782 _ => {}
783 }
784 }
785}
786
787#[cfg(test)]
788mod tests {
789 use super::*;
790
791 #[test]
792 fn test_schema_builder() {
793 let mut builder = Schema::builder();
794 let title = builder.add_text_field("title", true, true);
795 let body = builder.add_text_field("body", true, false);
796 let count = builder.add_u64_field("count", true, true);
797 let schema = builder.build();
798
799 assert_eq!(schema.get_field("title"), Some(title));
800 assert_eq!(schema.get_field("body"), Some(body));
801 assert_eq!(schema.get_field("count"), Some(count));
802 assert_eq!(schema.get_field("nonexistent"), None);
803 }
804
805 #[test]
806 fn test_document() {
807 let mut builder = Schema::builder();
808 let title = builder.add_text_field("title", true, true);
809 let count = builder.add_u64_field("count", true, true);
810 let _schema = builder.build();
811
812 let mut doc = Document::new();
813 doc.add_text(title, "Hello World");
814 doc.add_u64(count, 42);
815
816 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
817 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
818 }
819
820 #[test]
821 fn test_document_serialization() {
822 let mut builder = Schema::builder();
823 let title = builder.add_text_field("title", true, true);
824 let count = builder.add_u64_field("count", true, true);
825 let _schema = builder.build();
826
827 let mut doc = Document::new();
828 doc.add_text(title, "Hello World");
829 doc.add_u64(count, 42);
830
831 let json = serde_json::to_string(&doc).unwrap();
833 println!("Serialized doc: {}", json);
834
835 let doc2: Document = serde_json::from_str(&json).unwrap();
837 assert_eq!(
838 doc2.field_values().len(),
839 2,
840 "Should have 2 field values after deserialization"
841 );
842 assert_eq!(
843 doc2.get_first(title).unwrap().as_text(),
844 Some("Hello World")
845 );
846 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
847 }
848
849 #[test]
850 fn test_multivalue_field() {
851 let mut builder = Schema::builder();
852 let uris = builder.add_text_field("uris", true, true);
853 let title = builder.add_text_field("title", true, true);
854 let schema = builder.build();
855
856 let mut doc = Document::new();
858 doc.add_text(uris, "one");
859 doc.add_text(uris, "two");
860 doc.add_text(title, "Test Document");
861
862 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
864
865 let all_uris: Vec<_> = doc.get_all(uris).collect();
867 assert_eq!(all_uris.len(), 2);
868 assert_eq!(all_uris[0].as_text(), Some("one"));
869 assert_eq!(all_uris[1].as_text(), Some("two"));
870
871 let json = doc.to_json(&schema);
873 let uris_json = json.get("uris").unwrap();
874 assert!(uris_json.is_array(), "Multi-value field should be an array");
875 let uris_arr = uris_json.as_array().unwrap();
876 assert_eq!(uris_arr.len(), 2);
877 assert_eq!(uris_arr[0].as_str(), Some("one"));
878 assert_eq!(uris_arr[1].as_str(), Some("two"));
879
880 let title_json = json.get("title").unwrap();
882 assert!(
883 title_json.is_string(),
884 "Single-value field should be a string"
885 );
886 assert_eq!(title_json.as_str(), Some("Test Document"));
887 }
888
889 #[test]
890 fn test_multivalue_from_json() {
891 let mut builder = Schema::builder();
892 let uris = builder.add_text_field("uris", true, true);
893 let title = builder.add_text_field("title", true, true);
894 let schema = builder.build();
895
896 let json = serde_json::json!({
898 "uris": ["one", "two"],
899 "title": "Test Document"
900 });
901
902 let doc = Document::from_json(&json, &schema).unwrap();
904
905 let all_uris: Vec<_> = doc.get_all(uris).collect();
907 assert_eq!(all_uris.len(), 2);
908 assert_eq!(all_uris[0].as_text(), Some("one"));
909 assert_eq!(all_uris[1].as_text(), Some("two"));
910
911 assert_eq!(
913 doc.get_first(title).unwrap().as_text(),
914 Some("Test Document")
915 );
916
917 let json_out = doc.to_json(&schema);
919 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
920 assert_eq!(uris_out.len(), 2);
921 assert_eq!(uris_out[0].as_str(), Some("one"));
922 assert_eq!(uris_out[1].as_str(), Some("two"));
923 }
924
925 #[test]
926 fn test_multi_attribute_forces_array() {
927 let mut builder = Schema::builder();
930 let uris = builder.add_text_field("uris", true, true);
931 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
933 let schema = builder.build();
934
935 assert!(schema.get_field_entry(uris).unwrap().multi);
937 assert!(!schema.get_field_entry(title).unwrap().multi);
938
939 let mut doc = Document::new();
941 doc.add_text(uris, "only_one");
942 doc.add_text(title, "Test Document");
943
944 let json = doc.to_json(&schema);
946
947 let uris_json = json.get("uris").unwrap();
948 assert!(
949 uris_json.is_array(),
950 "Multi field should be array even with single value"
951 );
952 let uris_arr = uris_json.as_array().unwrap();
953 assert_eq!(uris_arr.len(), 1);
954 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
955
956 let title_json = json.get("title").unwrap();
958 assert!(
959 title_json.is_string(),
960 "Non-multi single-value field should be a string"
961 );
962 assert_eq!(title_json.as_str(), Some("Test Document"));
963 }
964
965 #[test]
966 fn test_sparse_vector_field() {
967 let mut builder = Schema::builder();
968 let embedding = builder.add_sparse_vector_field("embedding", true, true);
969 let title = builder.add_text_field("title", true, true);
970 let schema = builder.build();
971
972 assert_eq!(schema.get_field("embedding"), Some(embedding));
973 assert_eq!(
974 schema.get_field_entry(embedding).unwrap().field_type,
975 FieldType::SparseVector
976 );
977
978 let mut doc = Document::new();
980 doc.add_sparse_vector(embedding, vec![0, 5, 10], vec![1.0, 2.5, 0.5]);
981 doc.add_text(title, "Test Document");
982
983 let (indices, values) = doc
985 .get_first(embedding)
986 .unwrap()
987 .as_sparse_vector()
988 .unwrap();
989 assert_eq!(indices, &[0, 5, 10]);
990 assert_eq!(values, &[1.0, 2.5, 0.5]);
991
992 let json = doc.to_json(&schema);
994 let embedding_json = json.get("embedding").unwrap();
995 assert!(embedding_json.is_object());
996 assert_eq!(
997 embedding_json
998 .get("indices")
999 .unwrap()
1000 .as_array()
1001 .unwrap()
1002 .len(),
1003 3
1004 );
1005
1006 let doc2 = Document::from_json(&json, &schema).unwrap();
1008 let (indices2, values2) = doc2
1009 .get_first(embedding)
1010 .unwrap()
1011 .as_sparse_vector()
1012 .unwrap();
1013 assert_eq!(indices2, &[0, 5, 10]);
1014 assert!((values2[0] - 1.0).abs() < 1e-6);
1015 assert!((values2[1] - 2.5).abs() < 1e-6);
1016 assert!((values2[2] - 0.5).abs() < 1e-6);
1017 }
1018
1019 #[test]
1020 fn test_json_field() {
1021 let mut builder = Schema::builder();
1022 let metadata = builder.add_json_field("metadata", true);
1023 let title = builder.add_text_field("title", true, true);
1024 let schema = builder.build();
1025
1026 assert_eq!(schema.get_field("metadata"), Some(metadata));
1027 assert_eq!(
1028 schema.get_field_entry(metadata).unwrap().field_type,
1029 FieldType::Json
1030 );
1031 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1033 assert!(schema.get_field_entry(metadata).unwrap().stored);
1034
1035 let json_value = serde_json::json!({
1037 "author": "John Doe",
1038 "tags": ["rust", "search"],
1039 "nested": {"key": "value"}
1040 });
1041 let mut doc = Document::new();
1042 doc.add_json(metadata, json_value.clone());
1043 doc.add_text(title, "Test Document");
1044
1045 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1047 assert_eq!(stored_json, &json_value);
1048 assert_eq!(
1049 stored_json.get("author").unwrap().as_str(),
1050 Some("John Doe")
1051 );
1052
1053 let doc_json = doc.to_json(&schema);
1055 let metadata_out = doc_json.get("metadata").unwrap();
1056 assert_eq!(metadata_out, &json_value);
1057
1058 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1060 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1061 assert_eq!(stored_json2, &json_value);
1062 }
1063
1064 #[test]
1065 fn test_json_field_various_types() {
1066 let mut builder = Schema::builder();
1067 let data = builder.add_json_field("data", true);
1068 let _schema = builder.build();
1069
1070 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1072 let mut doc = Document::new();
1073 doc.add_json(data, arr_value.clone());
1074 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1075
1076 let str_value = serde_json::json!("just a string");
1078 let mut doc2 = Document::new();
1079 doc2.add_json(data, str_value.clone());
1080 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1081
1082 let num_value = serde_json::json!(42.5);
1084 let mut doc3 = Document::new();
1085 doc3.add_json(data, num_value.clone());
1086 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1087
1088 let null_value = serde_json::Value::Null;
1090 let mut doc4 = Document::new();
1091 doc4.add_json(data, null_value.clone());
1092 assert_eq!(
1093 doc4.get_first(data).unwrap().as_json().unwrap(),
1094 &null_value
1095 );
1096
1097 let bool_value = serde_json::json!(true);
1099 let mut doc5 = Document::new();
1100 doc5.add_json(data, bool_value.clone());
1101 assert_eq!(
1102 doc5.get_first(data).unwrap().as_json().unwrap(),
1103 &bool_value
1104 );
1105 }
1106}