1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub dense_vector_config: Option<DenseVectorConfig>,
57}
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
61#[serde(rename_all = "snake_case")]
62pub enum VectorIndexType {
63 #[default]
65 RaBitQ,
66 IvfRaBitQ,
68 ScaNN,
70}
71
72#[derive(Debug, Clone, Serialize, Deserialize)]
74pub struct DenseVectorConfig {
75 pub dim: usize,
77 #[serde(default)]
79 pub index_type: VectorIndexType,
80 #[serde(default = "default_store_raw")]
82 pub store_raw: bool,
83 #[serde(default, skip_serializing_if = "Option::is_none")]
86 pub coarse_centroids_path: Option<String>,
87 #[serde(default, skip_serializing_if = "Option::is_none")]
89 pub pq_codebook_path: Option<String>,
90 #[serde(default = "default_nprobe")]
92 pub nprobe: usize,
93 #[serde(default, skip_serializing_if = "Option::is_none")]
97 pub mrl_dim: Option<usize>,
98}
99
100fn default_store_raw() -> bool {
101 true
102}
103
104fn default_nprobe() -> usize {
105 32
106}
107
108impl DenseVectorConfig {
109 pub fn new(dim: usize) -> Self {
110 Self {
111 dim,
112 index_type: VectorIndexType::RaBitQ,
113 store_raw: true,
114 coarse_centroids_path: None,
115 pq_codebook_path: None,
116 nprobe: 32,
117 mrl_dim: None,
118 }
119 }
120
121 pub fn with_ivf(dim: usize, centroids_path: String, nprobe: usize) -> Self {
122 Self {
123 dim,
124 index_type: VectorIndexType::IvfRaBitQ,
125 store_raw: true,
126 coarse_centroids_path: Some(centroids_path),
127 pq_codebook_path: None,
128 nprobe,
129 mrl_dim: None,
130 }
131 }
132
133 pub fn with_scann(
135 dim: usize,
136 centroids_path: String,
137 codebook_path: String,
138 nprobe: usize,
139 ) -> Self {
140 Self {
141 dim,
142 index_type: VectorIndexType::ScaNN,
143 store_raw: true,
144 coarse_centroids_path: Some(centroids_path),
145 pq_codebook_path: Some(codebook_path),
146 nprobe,
147 mrl_dim: None,
148 }
149 }
150
151 pub fn without_raw(dim: usize) -> Self {
152 Self {
153 dim,
154 index_type: VectorIndexType::RaBitQ,
155 store_raw: false,
156 coarse_centroids_path: None,
157 pq_codebook_path: None,
158 nprobe: 32,
159 mrl_dim: None,
160 }
161 }
162
163 pub fn with_mrl_dim(mut self, mrl_dim: usize) -> Self {
165 self.mrl_dim = Some(mrl_dim);
166 self
167 }
168
169 pub fn index_dim(&self) -> usize {
171 self.mrl_dim.unwrap_or(self.dim)
172 }
173
174 pub fn uses_ivf(&self) -> bool {
176 self.coarse_centroids_path.is_some()
177 }
178
179 pub fn uses_scann(&self) -> bool {
181 self.index_type == VectorIndexType::ScaNN
182 }
183}
184
185use super::query_field_router::QueryRouterRule;
186
187#[derive(Debug, Clone, Default, Serialize, Deserialize)]
189pub struct Schema {
190 fields: Vec<FieldEntry>,
191 name_to_field: HashMap<String, Field>,
192 #[serde(default)]
194 default_fields: Vec<Field>,
195 #[serde(default)]
197 query_routers: Vec<QueryRouterRule>,
198}
199
200impl Schema {
201 pub fn builder() -> SchemaBuilder {
202 SchemaBuilder::default()
203 }
204
205 pub fn get_field(&self, name: &str) -> Option<Field> {
206 self.name_to_field.get(name).copied()
207 }
208
209 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
210 self.fields.get(field.0 as usize)
211 }
212
213 pub fn get_field_name(&self, field: Field) -> Option<&str> {
214 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
215 }
216
217 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
218 self.fields
219 .iter()
220 .enumerate()
221 .map(|(i, e)| (Field(i as u32), e))
222 }
223
224 pub fn num_fields(&self) -> usize {
225 self.fields.len()
226 }
227
228 pub fn default_fields(&self) -> &[Field] {
230 &self.default_fields
231 }
232
233 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
235 self.default_fields = fields;
236 }
237
238 pub fn query_routers(&self) -> &[QueryRouterRule] {
240 &self.query_routers
241 }
242
243 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
245 self.query_routers = rules;
246 }
247}
248
249#[derive(Debug, Default)]
251pub struct SchemaBuilder {
252 fields: Vec<FieldEntry>,
253 default_fields: Vec<String>,
254 query_routers: Vec<QueryRouterRule>,
255}
256
257impl SchemaBuilder {
258 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
259 self.add_field_with_tokenizer(
260 name,
261 FieldType::Text,
262 indexed,
263 stored,
264 Some("default".to_string()),
265 )
266 }
267
268 pub fn add_text_field_with_tokenizer(
269 &mut self,
270 name: &str,
271 indexed: bool,
272 stored: bool,
273 tokenizer: &str,
274 ) -> Field {
275 self.add_field_with_tokenizer(
276 name,
277 FieldType::Text,
278 indexed,
279 stored,
280 Some(tokenizer.to_string()),
281 )
282 }
283
284 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
285 self.add_field(name, FieldType::U64, indexed, stored)
286 }
287
288 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
289 self.add_field(name, FieldType::I64, indexed, stored)
290 }
291
292 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
293 self.add_field(name, FieldType::F64, indexed, stored)
294 }
295
296 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
297 self.add_field(name, FieldType::Bytes, false, stored)
298 }
299
300 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
305 self.add_field(name, FieldType::Json, false, stored)
306 }
307
308 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
313 self.add_sparse_vector_field_with_config(
314 name,
315 indexed,
316 stored,
317 crate::structures::SparseVectorConfig::default(),
318 )
319 }
320
321 pub fn add_sparse_vector_field_with_config(
326 &mut self,
327 name: &str,
328 indexed: bool,
329 stored: bool,
330 config: crate::structures::SparseVectorConfig,
331 ) -> Field {
332 let field = Field(self.fields.len() as u32);
333 self.fields.push(FieldEntry {
334 name: name.to_string(),
335 field_type: FieldType::SparseVector,
336 indexed,
337 stored,
338 tokenizer: None,
339 multi: false,
340 sparse_vector_config: Some(config),
341 dense_vector_config: None,
342 });
343 field
344 }
345
346 pub fn set_sparse_vector_config(
348 &mut self,
349 field: Field,
350 config: crate::structures::SparseVectorConfig,
351 ) {
352 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
353 entry.sparse_vector_config = Some(config);
354 }
355 }
356
357 pub fn add_dense_vector_field(
362 &mut self,
363 name: &str,
364 dim: usize,
365 indexed: bool,
366 stored: bool,
367 ) -> Field {
368 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
369 }
370
371 pub fn add_dense_vector_field_with_config(
373 &mut self,
374 name: &str,
375 indexed: bool,
376 stored: bool,
377 config: DenseVectorConfig,
378 ) -> Field {
379 let field = Field(self.fields.len() as u32);
380 self.fields.push(FieldEntry {
381 name: name.to_string(),
382 field_type: FieldType::DenseVector,
383 indexed,
384 stored,
385 tokenizer: None,
386 multi: false,
387 sparse_vector_config: None,
388 dense_vector_config: Some(config),
389 });
390 field
391 }
392
393 fn add_field(
394 &mut self,
395 name: &str,
396 field_type: FieldType,
397 indexed: bool,
398 stored: bool,
399 ) -> Field {
400 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
401 }
402
403 fn add_field_with_tokenizer(
404 &mut self,
405 name: &str,
406 field_type: FieldType,
407 indexed: bool,
408 stored: bool,
409 tokenizer: Option<String>,
410 ) -> Field {
411 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
412 }
413
414 fn add_field_full(
415 &mut self,
416 name: &str,
417 field_type: FieldType,
418 indexed: bool,
419 stored: bool,
420 tokenizer: Option<String>,
421 multi: bool,
422 ) -> Field {
423 let field = Field(self.fields.len() as u32);
424 self.fields.push(FieldEntry {
425 name: name.to_string(),
426 field_type,
427 indexed,
428 stored,
429 tokenizer,
430 multi,
431 sparse_vector_config: None,
432 dense_vector_config: None,
433 });
434 field
435 }
436
437 pub fn set_multi(&mut self, field: Field, multi: bool) {
439 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
440 entry.multi = multi;
441 }
442 }
443
444 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
446 self.default_fields = field_names;
447 }
448
449 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
451 self.query_routers = rules;
452 }
453
454 pub fn build(self) -> Schema {
455 let mut name_to_field = HashMap::new();
456 for (i, entry) in self.fields.iter().enumerate() {
457 name_to_field.insert(entry.name.clone(), Field(i as u32));
458 }
459
460 let default_fields: Vec<Field> = self
462 .default_fields
463 .iter()
464 .filter_map(|name| name_to_field.get(name).copied())
465 .collect();
466
467 Schema {
468 fields: self.fields,
469 name_to_field,
470 default_fields,
471 query_routers: self.query_routers,
472 }
473 }
474}
475
476#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
478pub enum FieldValue {
479 #[serde(rename = "text")]
480 Text(String),
481 #[serde(rename = "u64")]
482 U64(u64),
483 #[serde(rename = "i64")]
484 I64(i64),
485 #[serde(rename = "f64")]
486 F64(f64),
487 #[serde(rename = "bytes")]
488 Bytes(Vec<u8>),
489 #[serde(rename = "sparse_vector")]
491 SparseVector(Vec<(u32, f32)>),
492 #[serde(rename = "dense_vector")]
494 DenseVector(Vec<f32>),
495 #[serde(rename = "json")]
497 Json(serde_json::Value),
498}
499
500impl FieldValue {
501 pub fn as_text(&self) -> Option<&str> {
502 match self {
503 FieldValue::Text(s) => Some(s),
504 _ => None,
505 }
506 }
507
508 pub fn as_u64(&self) -> Option<u64> {
509 match self {
510 FieldValue::U64(v) => Some(*v),
511 _ => None,
512 }
513 }
514
515 pub fn as_i64(&self) -> Option<i64> {
516 match self {
517 FieldValue::I64(v) => Some(*v),
518 _ => None,
519 }
520 }
521
522 pub fn as_f64(&self) -> Option<f64> {
523 match self {
524 FieldValue::F64(v) => Some(*v),
525 _ => None,
526 }
527 }
528
529 pub fn as_bytes(&self) -> Option<&[u8]> {
530 match self {
531 FieldValue::Bytes(b) => Some(b),
532 _ => None,
533 }
534 }
535
536 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
537 match self {
538 FieldValue::SparseVector(entries) => Some(entries),
539 _ => None,
540 }
541 }
542
543 pub fn as_dense_vector(&self) -> Option<&[f32]> {
544 match self {
545 FieldValue::DenseVector(v) => Some(v),
546 _ => None,
547 }
548 }
549
550 pub fn as_json(&self) -> Option<&serde_json::Value> {
551 match self {
552 FieldValue::Json(v) => Some(v),
553 _ => None,
554 }
555 }
556}
557
558#[derive(Debug, Clone, Default, Serialize, Deserialize)]
560pub struct Document {
561 field_values: Vec<(Field, FieldValue)>,
562}
563
564impl Document {
565 pub fn new() -> Self {
566 Self::default()
567 }
568
569 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
570 self.field_values
571 .push((field, FieldValue::Text(value.into())));
572 }
573
574 pub fn add_u64(&mut self, field: Field, value: u64) {
575 self.field_values.push((field, FieldValue::U64(value)));
576 }
577
578 pub fn add_i64(&mut self, field: Field, value: i64) {
579 self.field_values.push((field, FieldValue::I64(value)));
580 }
581
582 pub fn add_f64(&mut self, field: Field, value: f64) {
583 self.field_values.push((field, FieldValue::F64(value)));
584 }
585
586 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
587 self.field_values.push((field, FieldValue::Bytes(value)));
588 }
589
590 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
591 self.field_values
592 .push((field, FieldValue::SparseVector(entries)));
593 }
594
595 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
596 self.field_values
597 .push((field, FieldValue::DenseVector(values)));
598 }
599
600 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
601 self.field_values.push((field, FieldValue::Json(value)));
602 }
603
604 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
605 self.field_values
606 .iter()
607 .find(|(f, _)| *f == field)
608 .map(|(_, v)| v)
609 }
610
611 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
612 self.field_values
613 .iter()
614 .filter(move |(f, _)| *f == field)
615 .map(|(_, v)| v)
616 }
617
618 pub fn field_values(&self) -> &[(Field, FieldValue)] {
619 &self.field_values
620 }
621
622 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
628 use std::collections::HashMap;
629
630 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
632 HashMap::new();
633
634 for (field, value) in &self.field_values {
635 if let Some(entry) = schema.get_field_entry(*field) {
636 let json_value = match value {
637 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
638 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
639 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
640 FieldValue::F64(n) => serde_json::json!(n),
641 FieldValue::Bytes(b) => {
642 use base64::Engine;
643 serde_json::Value::String(
644 base64::engine::general_purpose::STANDARD.encode(b),
645 )
646 }
647 FieldValue::SparseVector(entries) => {
648 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
649 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
650 serde_json::json!({
651 "indices": indices,
652 "values": values
653 })
654 }
655 FieldValue::DenseVector(values) => {
656 serde_json::json!(values)
657 }
658 FieldValue::Json(v) => v.clone(),
659 };
660 field_values_map
661 .entry(*field)
662 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
663 .2
664 .push(json_value);
665 }
666 }
667
668 let mut map = serde_json::Map::new();
670 for (_field, (name, is_multi, values)) in field_values_map {
671 let json_value = if is_multi || values.len() > 1 {
672 serde_json::Value::Array(values)
673 } else {
674 values.into_iter().next().unwrap()
675 };
676 map.insert(name, json_value);
677 }
678
679 serde_json::Value::Object(map)
680 }
681
682 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
691 let obj = json.as_object()?;
692 let mut doc = Document::new();
693
694 for (key, value) in obj {
695 if let Some(field) = schema.get_field(key) {
696 let field_entry = schema.get_field_entry(field)?;
697 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
698 }
699 }
700
701 Some(doc)
702 }
703
704 fn add_json_value(
706 doc: &mut Document,
707 field: Field,
708 field_type: &FieldType,
709 value: &serde_json::Value,
710 ) {
711 match value {
712 serde_json::Value::String(s) => {
713 if matches!(field_type, FieldType::Text) {
714 doc.add_text(field, s.clone());
715 }
716 }
717 serde_json::Value::Number(n) => {
718 match field_type {
719 FieldType::I64 => {
720 if let Some(i) = n.as_i64() {
721 doc.add_i64(field, i);
722 }
723 }
724 FieldType::U64 => {
725 if let Some(u) = n.as_u64() {
726 doc.add_u64(field, u);
727 } else if let Some(i) = n.as_i64() {
728 if i >= 0 {
730 doc.add_u64(field, i as u64);
731 }
732 }
733 }
734 FieldType::F64 => {
735 if let Some(f) = n.as_f64() {
736 doc.add_f64(field, f);
737 }
738 }
739 _ => {}
740 }
741 }
742 serde_json::Value::Array(arr) => {
744 for item in arr {
745 Self::add_json_value(doc, field, field_type, item);
746 }
747 }
748 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
750 if let (Some(indices_val), Some(values_val)) =
751 (obj.get("indices"), obj.get("values"))
752 {
753 let indices: Vec<u32> = indices_val
754 .as_array()
755 .map(|arr| {
756 arr.iter()
757 .filter_map(|v| v.as_u64().map(|n| n as u32))
758 .collect()
759 })
760 .unwrap_or_default();
761 let values: Vec<f32> = values_val
762 .as_array()
763 .map(|arr| {
764 arr.iter()
765 .filter_map(|v| v.as_f64().map(|n| n as f32))
766 .collect()
767 })
768 .unwrap_or_default();
769 if indices.len() == values.len() {
770 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
771 doc.add_sparse_vector(field, entries);
772 }
773 }
774 }
775 _ if matches!(field_type, FieldType::Json) => {
777 doc.add_json(field, value.clone());
778 }
779 serde_json::Value::Object(_) => {}
780 _ => {}
781 }
782 }
783}
784
785#[cfg(test)]
786mod tests {
787 use super::*;
788
789 #[test]
790 fn test_schema_builder() {
791 let mut builder = Schema::builder();
792 let title = builder.add_text_field("title", true, true);
793 let body = builder.add_text_field("body", true, false);
794 let count = builder.add_u64_field("count", true, true);
795 let schema = builder.build();
796
797 assert_eq!(schema.get_field("title"), Some(title));
798 assert_eq!(schema.get_field("body"), Some(body));
799 assert_eq!(schema.get_field("count"), Some(count));
800 assert_eq!(schema.get_field("nonexistent"), None);
801 }
802
803 #[test]
804 fn test_document() {
805 let mut builder = Schema::builder();
806 let title = builder.add_text_field("title", true, true);
807 let count = builder.add_u64_field("count", true, true);
808 let _schema = builder.build();
809
810 let mut doc = Document::new();
811 doc.add_text(title, "Hello World");
812 doc.add_u64(count, 42);
813
814 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
815 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
816 }
817
818 #[test]
819 fn test_document_serialization() {
820 let mut builder = Schema::builder();
821 let title = builder.add_text_field("title", true, true);
822 let count = builder.add_u64_field("count", true, true);
823 let _schema = builder.build();
824
825 let mut doc = Document::new();
826 doc.add_text(title, "Hello World");
827 doc.add_u64(count, 42);
828
829 let json = serde_json::to_string(&doc).unwrap();
831 println!("Serialized doc: {}", json);
832
833 let doc2: Document = serde_json::from_str(&json).unwrap();
835 assert_eq!(
836 doc2.field_values().len(),
837 2,
838 "Should have 2 field values after deserialization"
839 );
840 assert_eq!(
841 doc2.get_first(title).unwrap().as_text(),
842 Some("Hello World")
843 );
844 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
845 }
846
847 #[test]
848 fn test_multivalue_field() {
849 let mut builder = Schema::builder();
850 let uris = builder.add_text_field("uris", true, true);
851 let title = builder.add_text_field("title", true, true);
852 let schema = builder.build();
853
854 let mut doc = Document::new();
856 doc.add_text(uris, "one");
857 doc.add_text(uris, "two");
858 doc.add_text(title, "Test Document");
859
860 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
862
863 let all_uris: Vec<_> = doc.get_all(uris).collect();
865 assert_eq!(all_uris.len(), 2);
866 assert_eq!(all_uris[0].as_text(), Some("one"));
867 assert_eq!(all_uris[1].as_text(), Some("two"));
868
869 let json = doc.to_json(&schema);
871 let uris_json = json.get("uris").unwrap();
872 assert!(uris_json.is_array(), "Multi-value field should be an array");
873 let uris_arr = uris_json.as_array().unwrap();
874 assert_eq!(uris_arr.len(), 2);
875 assert_eq!(uris_arr[0].as_str(), Some("one"));
876 assert_eq!(uris_arr[1].as_str(), Some("two"));
877
878 let title_json = json.get("title").unwrap();
880 assert!(
881 title_json.is_string(),
882 "Single-value field should be a string"
883 );
884 assert_eq!(title_json.as_str(), Some("Test Document"));
885 }
886
887 #[test]
888 fn test_multivalue_from_json() {
889 let mut builder = Schema::builder();
890 let uris = builder.add_text_field("uris", true, true);
891 let title = builder.add_text_field("title", true, true);
892 let schema = builder.build();
893
894 let json = serde_json::json!({
896 "uris": ["one", "two"],
897 "title": "Test Document"
898 });
899
900 let doc = Document::from_json(&json, &schema).unwrap();
902
903 let all_uris: Vec<_> = doc.get_all(uris).collect();
905 assert_eq!(all_uris.len(), 2);
906 assert_eq!(all_uris[0].as_text(), Some("one"));
907 assert_eq!(all_uris[1].as_text(), Some("two"));
908
909 assert_eq!(
911 doc.get_first(title).unwrap().as_text(),
912 Some("Test Document")
913 );
914
915 let json_out = doc.to_json(&schema);
917 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
918 assert_eq!(uris_out.len(), 2);
919 assert_eq!(uris_out[0].as_str(), Some("one"));
920 assert_eq!(uris_out[1].as_str(), Some("two"));
921 }
922
923 #[test]
924 fn test_multi_attribute_forces_array() {
925 let mut builder = Schema::builder();
928 let uris = builder.add_text_field("uris", true, true);
929 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
931 let schema = builder.build();
932
933 assert!(schema.get_field_entry(uris).unwrap().multi);
935 assert!(!schema.get_field_entry(title).unwrap().multi);
936
937 let mut doc = Document::new();
939 doc.add_text(uris, "only_one");
940 doc.add_text(title, "Test Document");
941
942 let json = doc.to_json(&schema);
944
945 let uris_json = json.get("uris").unwrap();
946 assert!(
947 uris_json.is_array(),
948 "Multi field should be array even with single value"
949 );
950 let uris_arr = uris_json.as_array().unwrap();
951 assert_eq!(uris_arr.len(), 1);
952 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
953
954 let title_json = json.get("title").unwrap();
956 assert!(
957 title_json.is_string(),
958 "Non-multi single-value field should be a string"
959 );
960 assert_eq!(title_json.as_str(), Some("Test Document"));
961 }
962
963 #[test]
964 fn test_sparse_vector_field() {
965 let mut builder = Schema::builder();
966 let embedding = builder.add_sparse_vector_field("embedding", true, true);
967 let title = builder.add_text_field("title", true, true);
968 let schema = builder.build();
969
970 assert_eq!(schema.get_field("embedding"), Some(embedding));
971 assert_eq!(
972 schema.get_field_entry(embedding).unwrap().field_type,
973 FieldType::SparseVector
974 );
975
976 let mut doc = Document::new();
978 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
979 doc.add_text(title, "Test Document");
980
981 let entries = doc
983 .get_first(embedding)
984 .unwrap()
985 .as_sparse_vector()
986 .unwrap();
987 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
988
989 let json = doc.to_json(&schema);
991 let embedding_json = json.get("embedding").unwrap();
992 assert!(embedding_json.is_object());
993 assert_eq!(
994 embedding_json
995 .get("indices")
996 .unwrap()
997 .as_array()
998 .unwrap()
999 .len(),
1000 3
1001 );
1002
1003 let doc2 = Document::from_json(&json, &schema).unwrap();
1005 let entries2 = doc2
1006 .get_first(embedding)
1007 .unwrap()
1008 .as_sparse_vector()
1009 .unwrap();
1010 assert_eq!(entries2[0].0, 0);
1011 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1012 assert_eq!(entries2[1].0, 5);
1013 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1014 assert_eq!(entries2[2].0, 10);
1015 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1016 }
1017
1018 #[test]
1019 fn test_json_field() {
1020 let mut builder = Schema::builder();
1021 let metadata = builder.add_json_field("metadata", true);
1022 let title = builder.add_text_field("title", true, true);
1023 let schema = builder.build();
1024
1025 assert_eq!(schema.get_field("metadata"), Some(metadata));
1026 assert_eq!(
1027 schema.get_field_entry(metadata).unwrap().field_type,
1028 FieldType::Json
1029 );
1030 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1032 assert!(schema.get_field_entry(metadata).unwrap().stored);
1033
1034 let json_value = serde_json::json!({
1036 "author": "John Doe",
1037 "tags": ["rust", "search"],
1038 "nested": {"key": "value"}
1039 });
1040 let mut doc = Document::new();
1041 doc.add_json(metadata, json_value.clone());
1042 doc.add_text(title, "Test Document");
1043
1044 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1046 assert_eq!(stored_json, &json_value);
1047 assert_eq!(
1048 stored_json.get("author").unwrap().as_str(),
1049 Some("John Doe")
1050 );
1051
1052 let doc_json = doc.to_json(&schema);
1054 let metadata_out = doc_json.get("metadata").unwrap();
1055 assert_eq!(metadata_out, &json_value);
1056
1057 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1059 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1060 assert_eq!(stored_json2, &json_value);
1061 }
1062
1063 #[test]
1064 fn test_json_field_various_types() {
1065 let mut builder = Schema::builder();
1066 let data = builder.add_json_field("data", true);
1067 let _schema = builder.build();
1068
1069 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1071 let mut doc = Document::new();
1072 doc.add_json(data, arr_value.clone());
1073 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1074
1075 let str_value = serde_json::json!("just a string");
1077 let mut doc2 = Document::new();
1078 doc2.add_json(data, str_value.clone());
1079 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1080
1081 let num_value = serde_json::json!(42.5);
1083 let mut doc3 = Document::new();
1084 doc3.add_json(data, num_value.clone());
1085 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1086
1087 let null_value = serde_json::Value::Null;
1089 let mut doc4 = Document::new();
1090 doc4.add_json(data, null_value.clone());
1091 assert_eq!(
1092 doc4.get_first(data).unwrap().as_json().unwrap(),
1093 &null_value
1094 );
1095
1096 let bool_value = serde_json::json!(true);
1098 let mut doc5 = Document::new();
1099 doc5.add_json(data, bool_value.clone());
1100 assert_eq!(
1101 doc5.get_first(data).unwrap().as_json().unwrap(),
1102 &bool_value
1103 );
1104 }
1105}