1use serde::{Deserialize, Serialize};
4use std::collections::HashMap;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
8pub struct Field(pub u32);
9
10#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
12pub enum FieldType {
13 #[serde(rename = "text")]
15 Text,
16 #[serde(rename = "u64")]
18 U64,
19 #[serde(rename = "i64")]
21 I64,
22 #[serde(rename = "f64")]
24 F64,
25 #[serde(rename = "bytes")]
27 Bytes,
28 #[serde(rename = "sparse_vector")]
30 SparseVector,
31 #[serde(rename = "dense_vector")]
33 DenseVector,
34 #[serde(rename = "json")]
36 Json,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct FieldEntry {
42 pub name: String,
43 pub field_type: FieldType,
44 pub indexed: bool,
45 pub stored: bool,
46 pub tokenizer: Option<String>,
48 #[serde(default)]
50 pub multi: bool,
51 #[serde(default, skip_serializing_if = "Option::is_none")]
53 pub positions: Option<PositionMode>,
54 #[serde(default, skip_serializing_if = "Option::is_none")]
56 pub sparse_vector_config: Option<crate::structures::SparseVectorConfig>,
57 #[serde(default, skip_serializing_if = "Option::is_none")]
59 pub dense_vector_config: Option<DenseVectorConfig>,
60}
61
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "snake_case")]
65pub enum PositionMode {
66 Ordinal,
69 TokenPosition,
72 Full,
75}
76
77impl PositionMode {
78 pub fn tracks_ordinal(&self) -> bool {
80 matches!(self, PositionMode::Ordinal | PositionMode::Full)
81 }
82
83 pub fn tracks_token_position(&self) -> bool {
85 matches!(self, PositionMode::TokenPosition | PositionMode::Full)
86 }
87}
88
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
91#[serde(rename_all = "snake_case")]
92pub enum VectorIndexType {
93 #[default]
95 RaBitQ,
96 IvfRaBitQ,
98 ScaNN,
100}
101
102#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct DenseVectorConfig {
105 pub dim: usize,
107 #[serde(default)]
109 pub index_type: VectorIndexType,
110 #[serde(default = "default_store_raw")]
112 pub store_raw: bool,
113 #[serde(default, skip_serializing_if = "Option::is_none")]
116 pub coarse_centroids_path: Option<String>,
117 #[serde(default, skip_serializing_if = "Option::is_none")]
119 pub pq_codebook_path: Option<String>,
120 #[serde(default = "default_nprobe")]
122 pub nprobe: usize,
123 #[serde(default, skip_serializing_if = "Option::is_none")]
127 pub mrl_dim: Option<usize>,
128}
129
130fn default_store_raw() -> bool {
131 true
132}
133
134fn default_nprobe() -> usize {
135 32
136}
137
138impl DenseVectorConfig {
139 pub fn new(dim: usize) -> Self {
140 Self {
141 dim,
142 index_type: VectorIndexType::RaBitQ,
143 store_raw: true,
144 coarse_centroids_path: None,
145 pq_codebook_path: None,
146 nprobe: 32,
147 mrl_dim: None,
148 }
149 }
150
151 pub fn with_ivf(dim: usize, centroids_path: String, nprobe: usize) -> Self {
152 Self {
153 dim,
154 index_type: VectorIndexType::IvfRaBitQ,
155 store_raw: true,
156 coarse_centroids_path: Some(centroids_path),
157 pq_codebook_path: None,
158 nprobe,
159 mrl_dim: None,
160 }
161 }
162
163 pub fn with_scann(
165 dim: usize,
166 centroids_path: String,
167 codebook_path: String,
168 nprobe: usize,
169 ) -> Self {
170 Self {
171 dim,
172 index_type: VectorIndexType::ScaNN,
173 store_raw: true,
174 coarse_centroids_path: Some(centroids_path),
175 pq_codebook_path: Some(codebook_path),
176 nprobe,
177 mrl_dim: None,
178 }
179 }
180
181 pub fn without_raw(dim: usize) -> Self {
182 Self {
183 dim,
184 index_type: VectorIndexType::RaBitQ,
185 store_raw: false,
186 coarse_centroids_path: None,
187 pq_codebook_path: None,
188 nprobe: 32,
189 mrl_dim: None,
190 }
191 }
192
193 pub fn with_mrl_dim(mut self, mrl_dim: usize) -> Self {
195 self.mrl_dim = Some(mrl_dim);
196 self
197 }
198
199 pub fn index_dim(&self) -> usize {
201 self.mrl_dim.unwrap_or(self.dim)
202 }
203
204 pub fn uses_ivf(&self) -> bool {
206 self.coarse_centroids_path.is_some()
207 }
208
209 pub fn uses_scann(&self) -> bool {
211 self.index_type == VectorIndexType::ScaNN
212 }
213}
214
215use super::query_field_router::QueryRouterRule;
216
217#[derive(Debug, Clone, Default, Serialize, Deserialize)]
219pub struct Schema {
220 fields: Vec<FieldEntry>,
221 name_to_field: HashMap<String, Field>,
222 #[serde(default)]
224 default_fields: Vec<Field>,
225 #[serde(default)]
227 query_routers: Vec<QueryRouterRule>,
228}
229
230impl Schema {
231 pub fn builder() -> SchemaBuilder {
232 SchemaBuilder::default()
233 }
234
235 pub fn get_field(&self, name: &str) -> Option<Field> {
236 self.name_to_field.get(name).copied()
237 }
238
239 pub fn get_field_entry(&self, field: Field) -> Option<&FieldEntry> {
240 self.fields.get(field.0 as usize)
241 }
242
243 pub fn get_field_name(&self, field: Field) -> Option<&str> {
244 self.fields.get(field.0 as usize).map(|e| e.name.as_str())
245 }
246
247 pub fn fields(&self) -> impl Iterator<Item = (Field, &FieldEntry)> {
248 self.fields
249 .iter()
250 .enumerate()
251 .map(|(i, e)| (Field(i as u32), e))
252 }
253
254 pub fn num_fields(&self) -> usize {
255 self.fields.len()
256 }
257
258 pub fn default_fields(&self) -> &[Field] {
260 &self.default_fields
261 }
262
263 pub fn set_default_fields(&mut self, fields: Vec<Field>) {
265 self.default_fields = fields;
266 }
267
268 pub fn query_routers(&self) -> &[QueryRouterRule] {
270 &self.query_routers
271 }
272
273 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
275 self.query_routers = rules;
276 }
277}
278
279#[derive(Debug, Default)]
281pub struct SchemaBuilder {
282 fields: Vec<FieldEntry>,
283 default_fields: Vec<String>,
284 query_routers: Vec<QueryRouterRule>,
285}
286
287impl SchemaBuilder {
288 pub fn add_text_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
289 self.add_field_with_tokenizer(
290 name,
291 FieldType::Text,
292 indexed,
293 stored,
294 Some("default".to_string()),
295 )
296 }
297
298 pub fn add_text_field_with_tokenizer(
299 &mut self,
300 name: &str,
301 indexed: bool,
302 stored: bool,
303 tokenizer: &str,
304 ) -> Field {
305 self.add_field_with_tokenizer(
306 name,
307 FieldType::Text,
308 indexed,
309 stored,
310 Some(tokenizer.to_string()),
311 )
312 }
313
314 pub fn add_u64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
315 self.add_field(name, FieldType::U64, indexed, stored)
316 }
317
318 pub fn add_i64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
319 self.add_field(name, FieldType::I64, indexed, stored)
320 }
321
322 pub fn add_f64_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
323 self.add_field(name, FieldType::F64, indexed, stored)
324 }
325
326 pub fn add_bytes_field(&mut self, name: &str, stored: bool) -> Field {
327 self.add_field(name, FieldType::Bytes, false, stored)
328 }
329
330 pub fn add_json_field(&mut self, name: &str, stored: bool) -> Field {
335 self.add_field(name, FieldType::Json, false, stored)
336 }
337
338 pub fn add_sparse_vector_field(&mut self, name: &str, indexed: bool, stored: bool) -> Field {
343 self.add_sparse_vector_field_with_config(
344 name,
345 indexed,
346 stored,
347 crate::structures::SparseVectorConfig::default(),
348 )
349 }
350
351 pub fn add_sparse_vector_field_with_config(
356 &mut self,
357 name: &str,
358 indexed: bool,
359 stored: bool,
360 config: crate::structures::SparseVectorConfig,
361 ) -> Field {
362 let field = Field(self.fields.len() as u32);
363 self.fields.push(FieldEntry {
364 name: name.to_string(),
365 field_type: FieldType::SparseVector,
366 indexed,
367 stored,
368 tokenizer: None,
369 multi: false,
370 positions: None,
371 sparse_vector_config: Some(config),
372 dense_vector_config: None,
373 });
374 field
375 }
376
377 pub fn set_sparse_vector_config(
379 &mut self,
380 field: Field,
381 config: crate::structures::SparseVectorConfig,
382 ) {
383 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
384 entry.sparse_vector_config = Some(config);
385 }
386 }
387
388 pub fn add_dense_vector_field(
393 &mut self,
394 name: &str,
395 dim: usize,
396 indexed: bool,
397 stored: bool,
398 ) -> Field {
399 self.add_dense_vector_field_with_config(name, indexed, stored, DenseVectorConfig::new(dim))
400 }
401
402 pub fn add_dense_vector_field_with_config(
404 &mut self,
405 name: &str,
406 indexed: bool,
407 stored: bool,
408 config: DenseVectorConfig,
409 ) -> Field {
410 let field = Field(self.fields.len() as u32);
411 self.fields.push(FieldEntry {
412 name: name.to_string(),
413 field_type: FieldType::DenseVector,
414 indexed,
415 stored,
416 tokenizer: None,
417 multi: false,
418 positions: None,
419 sparse_vector_config: None,
420 dense_vector_config: Some(config),
421 });
422 field
423 }
424
425 fn add_field(
426 &mut self,
427 name: &str,
428 field_type: FieldType,
429 indexed: bool,
430 stored: bool,
431 ) -> Field {
432 self.add_field_with_tokenizer(name, field_type, indexed, stored, None)
433 }
434
435 fn add_field_with_tokenizer(
436 &mut self,
437 name: &str,
438 field_type: FieldType,
439 indexed: bool,
440 stored: bool,
441 tokenizer: Option<String>,
442 ) -> Field {
443 self.add_field_full(name, field_type, indexed, stored, tokenizer, false)
444 }
445
446 fn add_field_full(
447 &mut self,
448 name: &str,
449 field_type: FieldType,
450 indexed: bool,
451 stored: bool,
452 tokenizer: Option<String>,
453 multi: bool,
454 ) -> Field {
455 let field = Field(self.fields.len() as u32);
456 self.fields.push(FieldEntry {
457 name: name.to_string(),
458 field_type,
459 indexed,
460 stored,
461 tokenizer,
462 multi,
463 positions: None,
464 sparse_vector_config: None,
465 dense_vector_config: None,
466 });
467 field
468 }
469
470 pub fn set_multi(&mut self, field: Field, multi: bool) {
472 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
473 entry.multi = multi;
474 }
475 }
476
477 pub fn set_positions(&mut self, field: Field, mode: PositionMode) {
479 if let Some(entry) = self.fields.get_mut(field.0 as usize) {
480 entry.positions = Some(mode);
481 }
482 }
483
484 pub fn set_default_fields(&mut self, field_names: Vec<String>) {
486 self.default_fields = field_names;
487 }
488
489 pub fn set_query_routers(&mut self, rules: Vec<QueryRouterRule>) {
491 self.query_routers = rules;
492 }
493
494 pub fn build(self) -> Schema {
495 let mut name_to_field = HashMap::new();
496 for (i, entry) in self.fields.iter().enumerate() {
497 name_to_field.insert(entry.name.clone(), Field(i as u32));
498 }
499
500 let default_fields: Vec<Field> = self
502 .default_fields
503 .iter()
504 .filter_map(|name| name_to_field.get(name).copied())
505 .collect();
506
507 Schema {
508 fields: self.fields,
509 name_to_field,
510 default_fields,
511 query_routers: self.query_routers,
512 }
513 }
514}
515
516#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
518pub enum FieldValue {
519 #[serde(rename = "text")]
520 Text(String),
521 #[serde(rename = "u64")]
522 U64(u64),
523 #[serde(rename = "i64")]
524 I64(i64),
525 #[serde(rename = "f64")]
526 F64(f64),
527 #[serde(rename = "bytes")]
528 Bytes(Vec<u8>),
529 #[serde(rename = "sparse_vector")]
531 SparseVector(Vec<(u32, f32)>),
532 #[serde(rename = "dense_vector")]
534 DenseVector(Vec<f32>),
535 #[serde(rename = "json")]
537 Json(serde_json::Value),
538}
539
540impl FieldValue {
541 pub fn as_text(&self) -> Option<&str> {
542 match self {
543 FieldValue::Text(s) => Some(s),
544 _ => None,
545 }
546 }
547
548 pub fn as_u64(&self) -> Option<u64> {
549 match self {
550 FieldValue::U64(v) => Some(*v),
551 _ => None,
552 }
553 }
554
555 pub fn as_i64(&self) -> Option<i64> {
556 match self {
557 FieldValue::I64(v) => Some(*v),
558 _ => None,
559 }
560 }
561
562 pub fn as_f64(&self) -> Option<f64> {
563 match self {
564 FieldValue::F64(v) => Some(*v),
565 _ => None,
566 }
567 }
568
569 pub fn as_bytes(&self) -> Option<&[u8]> {
570 match self {
571 FieldValue::Bytes(b) => Some(b),
572 _ => None,
573 }
574 }
575
576 pub fn as_sparse_vector(&self) -> Option<&[(u32, f32)]> {
577 match self {
578 FieldValue::SparseVector(entries) => Some(entries),
579 _ => None,
580 }
581 }
582
583 pub fn as_dense_vector(&self) -> Option<&[f32]> {
584 match self {
585 FieldValue::DenseVector(v) => Some(v),
586 _ => None,
587 }
588 }
589
590 pub fn as_json(&self) -> Option<&serde_json::Value> {
591 match self {
592 FieldValue::Json(v) => Some(v),
593 _ => None,
594 }
595 }
596}
597
598#[derive(Debug, Clone, Default, Serialize, Deserialize)]
600pub struct Document {
601 field_values: Vec<(Field, FieldValue)>,
602}
603
604impl Document {
605 pub fn new() -> Self {
606 Self::default()
607 }
608
609 pub fn add_text(&mut self, field: Field, value: impl Into<String>) {
610 self.field_values
611 .push((field, FieldValue::Text(value.into())));
612 }
613
614 pub fn add_u64(&mut self, field: Field, value: u64) {
615 self.field_values.push((field, FieldValue::U64(value)));
616 }
617
618 pub fn add_i64(&mut self, field: Field, value: i64) {
619 self.field_values.push((field, FieldValue::I64(value)));
620 }
621
622 pub fn add_f64(&mut self, field: Field, value: f64) {
623 self.field_values.push((field, FieldValue::F64(value)));
624 }
625
626 pub fn add_bytes(&mut self, field: Field, value: Vec<u8>) {
627 self.field_values.push((field, FieldValue::Bytes(value)));
628 }
629
630 pub fn add_sparse_vector(&mut self, field: Field, entries: Vec<(u32, f32)>) {
631 self.field_values
632 .push((field, FieldValue::SparseVector(entries)));
633 }
634
635 pub fn add_dense_vector(&mut self, field: Field, values: Vec<f32>) {
636 self.field_values
637 .push((field, FieldValue::DenseVector(values)));
638 }
639
640 pub fn add_json(&mut self, field: Field, value: serde_json::Value) {
641 self.field_values.push((field, FieldValue::Json(value)));
642 }
643
644 pub fn get_first(&self, field: Field) -> Option<&FieldValue> {
645 self.field_values
646 .iter()
647 .find(|(f, _)| *f == field)
648 .map(|(_, v)| v)
649 }
650
651 pub fn get_all(&self, field: Field) -> impl Iterator<Item = &FieldValue> {
652 self.field_values
653 .iter()
654 .filter(move |(f, _)| *f == field)
655 .map(|(_, v)| v)
656 }
657
658 pub fn field_values(&self) -> &[(Field, FieldValue)] {
659 &self.field_values
660 }
661
662 pub fn to_json(&self, schema: &Schema) -> serde_json::Value {
668 use std::collections::HashMap;
669
670 let mut field_values_map: HashMap<Field, (String, bool, Vec<serde_json::Value>)> =
672 HashMap::new();
673
674 for (field, value) in &self.field_values {
675 if let Some(entry) = schema.get_field_entry(*field) {
676 let json_value = match value {
677 FieldValue::Text(s) => serde_json::Value::String(s.clone()),
678 FieldValue::U64(n) => serde_json::Value::Number((*n).into()),
679 FieldValue::I64(n) => serde_json::Value::Number((*n).into()),
680 FieldValue::F64(n) => serde_json::json!(n),
681 FieldValue::Bytes(b) => {
682 use base64::Engine;
683 serde_json::Value::String(
684 base64::engine::general_purpose::STANDARD.encode(b),
685 )
686 }
687 FieldValue::SparseVector(entries) => {
688 let indices: Vec<u32> = entries.iter().map(|(i, _)| *i).collect();
689 let values: Vec<f32> = entries.iter().map(|(_, v)| *v).collect();
690 serde_json::json!({
691 "indices": indices,
692 "values": values
693 })
694 }
695 FieldValue::DenseVector(values) => {
696 serde_json::json!(values)
697 }
698 FieldValue::Json(v) => v.clone(),
699 };
700 field_values_map
701 .entry(*field)
702 .or_insert_with(|| (entry.name.clone(), entry.multi, Vec::new()))
703 .2
704 .push(json_value);
705 }
706 }
707
708 let mut map = serde_json::Map::new();
710 for (_field, (name, is_multi, values)) in field_values_map {
711 let json_value = if is_multi || values.len() > 1 {
712 serde_json::Value::Array(values)
713 } else {
714 values.into_iter().next().unwrap()
715 };
716 map.insert(name, json_value);
717 }
718
719 serde_json::Value::Object(map)
720 }
721
722 pub fn from_json(json: &serde_json::Value, schema: &Schema) -> Option<Self> {
731 let obj = json.as_object()?;
732 let mut doc = Document::new();
733
734 for (key, value) in obj {
735 if let Some(field) = schema.get_field(key) {
736 let field_entry = schema.get_field_entry(field)?;
737 Self::add_json_value(&mut doc, field, &field_entry.field_type, value);
738 }
739 }
740
741 Some(doc)
742 }
743
744 fn add_json_value(
746 doc: &mut Document,
747 field: Field,
748 field_type: &FieldType,
749 value: &serde_json::Value,
750 ) {
751 match value {
752 serde_json::Value::String(s) => {
753 if matches!(field_type, FieldType::Text) {
754 doc.add_text(field, s.clone());
755 }
756 }
757 serde_json::Value::Number(n) => {
758 match field_type {
759 FieldType::I64 => {
760 if let Some(i) = n.as_i64() {
761 doc.add_i64(field, i);
762 }
763 }
764 FieldType::U64 => {
765 if let Some(u) = n.as_u64() {
766 doc.add_u64(field, u);
767 } else if let Some(i) = n.as_i64() {
768 if i >= 0 {
770 doc.add_u64(field, i as u64);
771 }
772 }
773 }
774 FieldType::F64 => {
775 if let Some(f) = n.as_f64() {
776 doc.add_f64(field, f);
777 }
778 }
779 _ => {}
780 }
781 }
782 serde_json::Value::Array(arr) => {
784 for item in arr {
785 Self::add_json_value(doc, field, field_type, item);
786 }
787 }
788 serde_json::Value::Object(obj) if matches!(field_type, FieldType::SparseVector) => {
790 if let (Some(indices_val), Some(values_val)) =
791 (obj.get("indices"), obj.get("values"))
792 {
793 let indices: Vec<u32> = indices_val
794 .as_array()
795 .map(|arr| {
796 arr.iter()
797 .filter_map(|v| v.as_u64().map(|n| n as u32))
798 .collect()
799 })
800 .unwrap_or_default();
801 let values: Vec<f32> = values_val
802 .as_array()
803 .map(|arr| {
804 arr.iter()
805 .filter_map(|v| v.as_f64().map(|n| n as f32))
806 .collect()
807 })
808 .unwrap_or_default();
809 if indices.len() == values.len() {
810 let entries: Vec<(u32, f32)> = indices.into_iter().zip(values).collect();
811 doc.add_sparse_vector(field, entries);
812 }
813 }
814 }
815 _ if matches!(field_type, FieldType::Json) => {
817 doc.add_json(field, value.clone());
818 }
819 serde_json::Value::Object(_) => {}
820 _ => {}
821 }
822 }
823}
824
825#[cfg(test)]
826mod tests {
827 use super::*;
828
829 #[test]
830 fn test_schema_builder() {
831 let mut builder = Schema::builder();
832 let title = builder.add_text_field("title", true, true);
833 let body = builder.add_text_field("body", true, false);
834 let count = builder.add_u64_field("count", true, true);
835 let schema = builder.build();
836
837 assert_eq!(schema.get_field("title"), Some(title));
838 assert_eq!(schema.get_field("body"), Some(body));
839 assert_eq!(schema.get_field("count"), Some(count));
840 assert_eq!(schema.get_field("nonexistent"), None);
841 }
842
843 #[test]
844 fn test_document() {
845 let mut builder = Schema::builder();
846 let title = builder.add_text_field("title", true, true);
847 let count = builder.add_u64_field("count", true, true);
848 let _schema = builder.build();
849
850 let mut doc = Document::new();
851 doc.add_text(title, "Hello World");
852 doc.add_u64(count, 42);
853
854 assert_eq!(doc.get_first(title).unwrap().as_text(), Some("Hello World"));
855 assert_eq!(doc.get_first(count).unwrap().as_u64(), Some(42));
856 }
857
858 #[test]
859 fn test_document_serialization() {
860 let mut builder = Schema::builder();
861 let title = builder.add_text_field("title", true, true);
862 let count = builder.add_u64_field("count", true, true);
863 let _schema = builder.build();
864
865 let mut doc = Document::new();
866 doc.add_text(title, "Hello World");
867 doc.add_u64(count, 42);
868
869 let json = serde_json::to_string(&doc).unwrap();
871 println!("Serialized doc: {}", json);
872
873 let doc2: Document = serde_json::from_str(&json).unwrap();
875 assert_eq!(
876 doc2.field_values().len(),
877 2,
878 "Should have 2 field values after deserialization"
879 );
880 assert_eq!(
881 doc2.get_first(title).unwrap().as_text(),
882 Some("Hello World")
883 );
884 assert_eq!(doc2.get_first(count).unwrap().as_u64(), Some(42));
885 }
886
887 #[test]
888 fn test_multivalue_field() {
889 let mut builder = Schema::builder();
890 let uris = builder.add_text_field("uris", true, true);
891 let title = builder.add_text_field("title", true, true);
892 let schema = builder.build();
893
894 let mut doc = Document::new();
896 doc.add_text(uris, "one");
897 doc.add_text(uris, "two");
898 doc.add_text(title, "Test Document");
899
900 assert_eq!(doc.get_first(uris).unwrap().as_text(), Some("one"));
902
903 let all_uris: Vec<_> = doc.get_all(uris).collect();
905 assert_eq!(all_uris.len(), 2);
906 assert_eq!(all_uris[0].as_text(), Some("one"));
907 assert_eq!(all_uris[1].as_text(), Some("two"));
908
909 let json = doc.to_json(&schema);
911 let uris_json = json.get("uris").unwrap();
912 assert!(uris_json.is_array(), "Multi-value field should be an array");
913 let uris_arr = uris_json.as_array().unwrap();
914 assert_eq!(uris_arr.len(), 2);
915 assert_eq!(uris_arr[0].as_str(), Some("one"));
916 assert_eq!(uris_arr[1].as_str(), Some("two"));
917
918 let title_json = json.get("title").unwrap();
920 assert!(
921 title_json.is_string(),
922 "Single-value field should be a string"
923 );
924 assert_eq!(title_json.as_str(), Some("Test Document"));
925 }
926
927 #[test]
928 fn test_multivalue_from_json() {
929 let mut builder = Schema::builder();
930 let uris = builder.add_text_field("uris", true, true);
931 let title = builder.add_text_field("title", true, true);
932 let schema = builder.build();
933
934 let json = serde_json::json!({
936 "uris": ["one", "two"],
937 "title": "Test Document"
938 });
939
940 let doc = Document::from_json(&json, &schema).unwrap();
942
943 let all_uris: Vec<_> = doc.get_all(uris).collect();
945 assert_eq!(all_uris.len(), 2);
946 assert_eq!(all_uris[0].as_text(), Some("one"));
947 assert_eq!(all_uris[1].as_text(), Some("two"));
948
949 assert_eq!(
951 doc.get_first(title).unwrap().as_text(),
952 Some("Test Document")
953 );
954
955 let json_out = doc.to_json(&schema);
957 let uris_out = json_out.get("uris").unwrap().as_array().unwrap();
958 assert_eq!(uris_out.len(), 2);
959 assert_eq!(uris_out[0].as_str(), Some("one"));
960 assert_eq!(uris_out[1].as_str(), Some("two"));
961 }
962
963 #[test]
964 fn test_multi_attribute_forces_array() {
965 let mut builder = Schema::builder();
968 let uris = builder.add_text_field("uris", true, true);
969 builder.set_multi(uris, true); let title = builder.add_text_field("title", true, true);
971 let schema = builder.build();
972
973 assert!(schema.get_field_entry(uris).unwrap().multi);
975 assert!(!schema.get_field_entry(title).unwrap().multi);
976
977 let mut doc = Document::new();
979 doc.add_text(uris, "only_one");
980 doc.add_text(title, "Test Document");
981
982 let json = doc.to_json(&schema);
984
985 let uris_json = json.get("uris").unwrap();
986 assert!(
987 uris_json.is_array(),
988 "Multi field should be array even with single value"
989 );
990 let uris_arr = uris_json.as_array().unwrap();
991 assert_eq!(uris_arr.len(), 1);
992 assert_eq!(uris_arr[0].as_str(), Some("only_one"));
993
994 let title_json = json.get("title").unwrap();
996 assert!(
997 title_json.is_string(),
998 "Non-multi single-value field should be a string"
999 );
1000 assert_eq!(title_json.as_str(), Some("Test Document"));
1001 }
1002
1003 #[test]
1004 fn test_sparse_vector_field() {
1005 let mut builder = Schema::builder();
1006 let embedding = builder.add_sparse_vector_field("embedding", true, true);
1007 let title = builder.add_text_field("title", true, true);
1008 let schema = builder.build();
1009
1010 assert_eq!(schema.get_field("embedding"), Some(embedding));
1011 assert_eq!(
1012 schema.get_field_entry(embedding).unwrap().field_type,
1013 FieldType::SparseVector
1014 );
1015
1016 let mut doc = Document::new();
1018 doc.add_sparse_vector(embedding, vec![(0, 1.0), (5, 2.5), (10, 0.5)]);
1019 doc.add_text(title, "Test Document");
1020
1021 let entries = doc
1023 .get_first(embedding)
1024 .unwrap()
1025 .as_sparse_vector()
1026 .unwrap();
1027 assert_eq!(entries, &[(0, 1.0), (5, 2.5), (10, 0.5)]);
1028
1029 let json = doc.to_json(&schema);
1031 let embedding_json = json.get("embedding").unwrap();
1032 assert!(embedding_json.is_object());
1033 assert_eq!(
1034 embedding_json
1035 .get("indices")
1036 .unwrap()
1037 .as_array()
1038 .unwrap()
1039 .len(),
1040 3
1041 );
1042
1043 let doc2 = Document::from_json(&json, &schema).unwrap();
1045 let entries2 = doc2
1046 .get_first(embedding)
1047 .unwrap()
1048 .as_sparse_vector()
1049 .unwrap();
1050 assert_eq!(entries2[0].0, 0);
1051 assert!((entries2[0].1 - 1.0).abs() < 1e-6);
1052 assert_eq!(entries2[1].0, 5);
1053 assert!((entries2[1].1 - 2.5).abs() < 1e-6);
1054 assert_eq!(entries2[2].0, 10);
1055 assert!((entries2[2].1 - 0.5).abs() < 1e-6);
1056 }
1057
1058 #[test]
1059 fn test_json_field() {
1060 let mut builder = Schema::builder();
1061 let metadata = builder.add_json_field("metadata", true);
1062 let title = builder.add_text_field("title", true, true);
1063 let schema = builder.build();
1064
1065 assert_eq!(schema.get_field("metadata"), Some(metadata));
1066 assert_eq!(
1067 schema.get_field_entry(metadata).unwrap().field_type,
1068 FieldType::Json
1069 );
1070 assert!(!schema.get_field_entry(metadata).unwrap().indexed);
1072 assert!(schema.get_field_entry(metadata).unwrap().stored);
1073
1074 let json_value = serde_json::json!({
1076 "author": "John Doe",
1077 "tags": ["rust", "search"],
1078 "nested": {"key": "value"}
1079 });
1080 let mut doc = Document::new();
1081 doc.add_json(metadata, json_value.clone());
1082 doc.add_text(title, "Test Document");
1083
1084 let stored_json = doc.get_first(metadata).unwrap().as_json().unwrap();
1086 assert_eq!(stored_json, &json_value);
1087 assert_eq!(
1088 stored_json.get("author").unwrap().as_str(),
1089 Some("John Doe")
1090 );
1091
1092 let doc_json = doc.to_json(&schema);
1094 let metadata_out = doc_json.get("metadata").unwrap();
1095 assert_eq!(metadata_out, &json_value);
1096
1097 let doc2 = Document::from_json(&doc_json, &schema).unwrap();
1099 let stored_json2 = doc2.get_first(metadata).unwrap().as_json().unwrap();
1100 assert_eq!(stored_json2, &json_value);
1101 }
1102
1103 #[test]
1104 fn test_json_field_various_types() {
1105 let mut builder = Schema::builder();
1106 let data = builder.add_json_field("data", true);
1107 let _schema = builder.build();
1108
1109 let arr_value = serde_json::json!([1, 2, 3, "four", null]);
1111 let mut doc = Document::new();
1112 doc.add_json(data, arr_value.clone());
1113 assert_eq!(doc.get_first(data).unwrap().as_json().unwrap(), &arr_value);
1114
1115 let str_value = serde_json::json!("just a string");
1117 let mut doc2 = Document::new();
1118 doc2.add_json(data, str_value.clone());
1119 assert_eq!(doc2.get_first(data).unwrap().as_json().unwrap(), &str_value);
1120
1121 let num_value = serde_json::json!(42.5);
1123 let mut doc3 = Document::new();
1124 doc3.add_json(data, num_value.clone());
1125 assert_eq!(doc3.get_first(data).unwrap().as_json().unwrap(), &num_value);
1126
1127 let null_value = serde_json::Value::Null;
1129 let mut doc4 = Document::new();
1130 doc4.add_json(data, null_value.clone());
1131 assert_eq!(
1132 doc4.get_first(data).unwrap().as_json().unwrap(),
1133 &null_value
1134 );
1135
1136 let bool_value = serde_json::json!(true);
1138 let mut doc5 = Document::new();
1139 doc5.add_json(data, bool_value.clone());
1140 assert_eq!(
1141 doc5.get_first(data).unwrap().as_json().unwrap(),
1142 &bool_value
1143 );
1144 }
1145}