1use pest::Parser;
49use pest_derive::Parser;
50
51use super::query_field_router::{QueryRouterRule, RoutingMode};
52use super::schema::{FieldType, Schema, SchemaBuilder};
53use crate::Result;
54use crate::error::Error;
55
56#[derive(Parser)]
57#[grammar = "dsl/sdl/sdl.pest"]
58pub struct SdlParser;
59
60use super::schema::DenseVectorConfig;
61use crate::structures::{IndexSize, SparseVectorConfig, WeightQuantization};
62
63#[derive(Debug, Clone)]
65pub struct FieldDef {
66 pub name: String,
67 pub field_type: FieldType,
68 pub indexed: bool,
69 pub stored: bool,
70 pub tokenizer: Option<String>,
72 pub multi: bool,
74 pub sparse_vector_config: Option<SparseVectorConfig>,
76 pub dense_vector_config: Option<DenseVectorConfig>,
78}
79
80#[derive(Debug, Clone)]
82pub struct IndexDef {
83 pub name: String,
84 pub fields: Vec<FieldDef>,
85 pub default_fields: Vec<String>,
86 pub query_routers: Vec<QueryRouterRule>,
88}
89
90impl IndexDef {
91 pub fn to_schema(&self) -> Schema {
93 let mut builder = SchemaBuilder::default();
94
95 for field in &self.fields {
96 let f = match field.field_type {
97 FieldType::Text => {
98 let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
99 builder.add_text_field_with_tokenizer(
100 &field.name,
101 field.indexed,
102 field.stored,
103 tokenizer,
104 )
105 }
106 FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
107 FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
108 FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
109 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
110 FieldType::Json => builder.add_json_field(&field.name, field.stored),
111 FieldType::SparseVector => {
112 if let Some(config) = &field.sparse_vector_config {
113 builder.add_sparse_vector_field_with_config(
114 &field.name,
115 field.indexed,
116 field.stored,
117 *config,
118 )
119 } else {
120 builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
121 }
122 }
123 FieldType::DenseVector => {
124 let config = field
126 .dense_vector_config
127 .as_ref()
128 .expect("DenseVector field requires dimension to be specified");
129 builder.add_dense_vector_field_with_config(
130 &field.name,
131 field.indexed,
132 field.stored,
133 config.clone(),
134 )
135 }
136 };
137 if field.multi {
138 builder.set_multi(f, true);
139 }
140 }
141
142 if !self.default_fields.is_empty() {
144 builder.set_default_fields(self.default_fields.clone());
145 }
146
147 if !self.query_routers.is_empty() {
149 builder.set_query_routers(self.query_routers.clone());
150 }
151
152 builder.build()
153 }
154
155 pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
160 if self.query_routers.is_empty() {
161 return Ok(None);
162 }
163
164 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
165 .map(Some)
166 .map_err(Error::Schema)
167 }
168}
169
170fn parse_field_type(type_str: &str) -> Result<FieldType> {
172 match type_str {
173 "text" | "string" | "str" => Ok(FieldType::Text),
174 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
175 "i64" | "int" | "integer" => Ok(FieldType::I64),
176 "f64" | "float" | "double" => Ok(FieldType::F64),
177 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
178 "json" => Ok(FieldType::Json),
179 "sparse_vector" => Ok(FieldType::SparseVector),
180 "dense_vector" | "vector" => Ok(FieldType::DenseVector),
181 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
182 }
183}
184
185#[derive(Debug, Clone, Default)]
187struct IndexConfig {
188 index_type: Option<super::schema::VectorIndexType>,
189 centroids_path: Option<String>,
190 codebook_path: Option<String>,
191 nprobe: Option<usize>,
192 mrl_dim: Option<usize>,
193 quantization: Option<WeightQuantization>,
195 weight_threshold: Option<f32>,
196}
197
198fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> (bool, bool, bool, Option<IndexConfig>) {
201 let mut indexed = false;
202 let mut stored = false;
203 let mut multi = false;
204 let mut index_config = None;
205
206 for attr in pair.into_inner() {
207 if attr.as_rule() == Rule::attribute {
208 let mut found_indexed_with_config = false;
211 for inner in attr.clone().into_inner() {
212 if inner.as_rule() == Rule::indexed_with_config {
213 indexed = true;
214 index_config = Some(parse_index_config(inner));
215 found_indexed_with_config = true;
216 break;
217 }
218 }
219 if !found_indexed_with_config {
220 match attr.as_str() {
222 "indexed" => indexed = true,
223 "stored" => stored = true,
224 "multi" => multi = true,
225 _ => {}
226 }
227 }
228 }
229 }
230
231 (indexed, stored, multi, index_config)
232}
233
234fn parse_index_config(pair: pest::iterators::Pair<Rule>) -> IndexConfig {
236 let mut config = IndexConfig::default();
237
238 for inner in pair.into_inner() {
243 if inner.as_rule() == Rule::index_config_params {
244 for param in inner.into_inner() {
245 if param.as_rule() == Rule::index_config_param {
246 for p in param.into_inner() {
247 parse_single_index_config_param(&mut config, p);
248 }
249 }
250 }
251 }
252 }
253
254 config
255}
256
257fn parse_single_index_config_param(config: &mut IndexConfig, p: pest::iterators::Pair<Rule>) {
259 use super::schema::VectorIndexType;
260
261 match p.as_rule() {
262 Rule::index_type_spec => {
263 config.index_type = Some(match p.as_str() {
264 "scann" => VectorIndexType::ScaNN,
265 "rabitq" => VectorIndexType::IvfRaBitQ,
266 _ => VectorIndexType::IvfRaBitQ,
267 });
268 }
269 Rule::index_type_kwarg => {
270 if let Some(t) = p.into_inner().next() {
272 config.index_type = Some(match t.as_str() {
273 "scann" => VectorIndexType::ScaNN,
274 "rabitq" => VectorIndexType::IvfRaBitQ,
275 _ => VectorIndexType::IvfRaBitQ,
276 });
277 }
278 }
279 Rule::centroids_kwarg => {
280 if let Some(path) = p.into_inner().next()
283 && let Some(inner_path) = path.into_inner().next()
284 {
285 config.centroids_path = Some(inner_path.as_str().to_string());
286 }
287 }
288 Rule::codebook_kwarg => {
289 if let Some(path) = p.into_inner().next()
291 && let Some(inner_path) = path.into_inner().next()
292 {
293 config.codebook_path = Some(inner_path.as_str().to_string());
294 }
295 }
296 Rule::nprobe_kwarg => {
297 if let Some(n) = p.into_inner().next() {
299 config.nprobe = Some(n.as_str().parse().unwrap_or(32));
300 }
301 }
302 Rule::mrl_dim_kwarg => {
303 if let Some(n) = p.into_inner().next() {
305 config.mrl_dim = Some(n.as_str().parse().unwrap_or(0));
306 }
307 }
308 Rule::quantization_kwarg => {
309 if let Some(q) = p.into_inner().next() {
311 config.quantization = Some(match q.as_str() {
312 "float32" | "f32" => WeightQuantization::Float32,
313 "float16" | "f16" => WeightQuantization::Float16,
314 "uint8" | "u8" => WeightQuantization::UInt8,
315 "uint4" | "u4" => WeightQuantization::UInt4,
316 _ => WeightQuantization::default(),
317 });
318 }
319 }
320 Rule::weight_threshold_kwarg => {
321 if let Some(t) = p.into_inner().next() {
323 config.weight_threshold = Some(t.as_str().parse().unwrap_or(0.0));
324 }
325 }
326 _ => {}
327 }
328}
329
330fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
332 let mut inner = pair.into_inner();
333
334 let name = inner
335 .next()
336 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
337 .as_str()
338 .to_string();
339
340 let field_type_str = inner
341 .next()
342 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
343 .as_str();
344
345 let field_type = parse_field_type(field_type_str)?;
346
347 let mut tokenizer = None;
349 let mut sparse_vector_config = None;
350 let mut dense_vector_config = None;
351 let mut indexed = true;
352 let mut stored = true;
353 let mut multi = false;
354 let mut index_config: Option<IndexConfig> = None;
355
356 for item in inner {
357 match item.as_rule() {
358 Rule::tokenizer_spec => {
359 if let Some(tok_name) = item.into_inner().next() {
361 tokenizer = Some(tok_name.as_str().to_string());
362 }
363 }
364 Rule::sparse_vector_config => {
365 sparse_vector_config = Some(parse_sparse_vector_config(item));
367 }
368 Rule::dense_vector_config => {
369 dense_vector_config = Some(parse_dense_vector_config(item));
371 }
372 Rule::attributes => {
373 let (idx, sto, mul, idx_cfg) = parse_attributes(item);
374 indexed = idx;
375 stored = sto;
376 multi = mul;
377 index_config = idx_cfg;
378 }
379 _ => {}
380 }
381 }
382
383 if let Some(idx_cfg) = index_config {
385 if let Some(ref mut dv_config) = dense_vector_config {
386 apply_index_config_to_dense_vector(dv_config, idx_cfg);
387 } else if field_type == FieldType::SparseVector {
388 let sv_config = sparse_vector_config.get_or_insert(SparseVectorConfig::default());
390 apply_index_config_to_sparse_vector(sv_config, idx_cfg);
391 }
392 }
393
394 Ok(FieldDef {
395 name,
396 field_type,
397 indexed,
398 stored,
399 tokenizer,
400 multi,
401 sparse_vector_config,
402 dense_vector_config,
403 })
404}
405
406fn apply_index_config_to_dense_vector(config: &mut DenseVectorConfig, idx_cfg: IndexConfig) {
408 use super::schema::VectorIndexType;
409
410 let nprobe = idx_cfg.nprobe.unwrap_or(32);
411
412 match idx_cfg.index_type {
413 Some(VectorIndexType::ScaNN) => {
414 config.index_type = VectorIndexType::ScaNN;
415 config.coarse_centroids_path = idx_cfg.centroids_path;
416 config.pq_codebook_path = idx_cfg.codebook_path;
417 config.nprobe = nprobe;
418 }
419 Some(VectorIndexType::IvfRaBitQ) => {
420 config.index_type = VectorIndexType::IvfRaBitQ;
421 config.coarse_centroids_path = idx_cfg.centroids_path;
422 config.nprobe = nprobe;
423 }
424 Some(VectorIndexType::RaBitQ) | None => {
425 if idx_cfg.centroids_path.is_some() {
427 config.index_type = VectorIndexType::IvfRaBitQ;
428 config.coarse_centroids_path = idx_cfg.centroids_path;
429 config.nprobe = nprobe;
430 }
431 }
433 }
434
435 if idx_cfg.mrl_dim.is_some() {
437 config.mrl_dim = idx_cfg.mrl_dim;
438 }
439}
440
441fn parse_sparse_vector_config(pair: pest::iterators::Pair<Rule>) -> SparseVectorConfig {
444 let mut index_size = IndexSize::default();
445
446 for inner in pair.into_inner() {
448 if inner.as_rule() == Rule::index_size_spec {
449 index_size = match inner.as_str() {
450 "u16" => IndexSize::U16,
451 "u32" => IndexSize::U32,
452 _ => IndexSize::default(),
453 };
454 }
455 }
456
457 SparseVectorConfig {
458 index_size,
459 weight_quantization: WeightQuantization::default(),
460 weight_threshold: 0.0,
461 }
462}
463
464fn apply_index_config_to_sparse_vector(config: &mut SparseVectorConfig, idx_cfg: IndexConfig) {
466 if let Some(q) = idx_cfg.quantization {
467 config.weight_quantization = q;
468 }
469 if let Some(t) = idx_cfg.weight_threshold {
470 config.weight_threshold = t;
471 }
472}
473
474fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
477 let mut dim: usize = 0;
478
479 for params in pair.into_inner() {
481 if params.as_rule() == Rule::dense_vector_params {
482 for inner in params.into_inner() {
483 match inner.as_rule() {
484 Rule::dense_vector_keyword_params => {
485 for kwarg in inner.into_inner() {
487 if kwarg.as_rule() == Rule::dims_kwarg
488 && let Some(d) = kwarg.into_inner().next()
489 {
490 dim = d.as_str().parse().unwrap_or(0);
491 }
492 }
493 }
494 Rule::dense_vector_positional_params => {
495 if let Some(dim_pair) = inner.into_inner().next() {
497 dim = dim_pair.as_str().parse().unwrap_or(0);
498 }
499 }
500 _ => {}
501 }
502 }
503 }
504 }
505
506 DenseVectorConfig::new(dim)
507}
508
509fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
511 pair.into_inner().map(|p| p.as_str().to_string()).collect()
512}
513
514fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
516 let mut pattern = String::new();
517 let mut substitution = String::new();
518 let mut target_field = String::new();
519 let mut mode = RoutingMode::Additional;
520
521 for prop in pair.into_inner() {
522 if prop.as_rule() != Rule::query_router_prop {
523 continue;
524 }
525
526 for inner in prop.into_inner() {
527 match inner.as_rule() {
528 Rule::query_router_pattern => {
529 if let Some(regex_str) = inner.into_inner().next() {
530 pattern = parse_string_value(regex_str);
531 }
532 }
533 Rule::query_router_substitution => {
534 if let Some(quoted) = inner.into_inner().next() {
535 substitution = parse_string_value(quoted);
536 }
537 }
538 Rule::query_router_target => {
539 if let Some(ident) = inner.into_inner().next() {
540 target_field = ident.as_str().to_string();
541 }
542 }
543 Rule::query_router_mode => {
544 if let Some(mode_val) = inner.into_inner().next() {
545 mode = match mode_val.as_str() {
546 "exclusive" => RoutingMode::Exclusive,
547 "additional" => RoutingMode::Additional,
548 _ => RoutingMode::Additional,
549 };
550 }
551 }
552 _ => {}
553 }
554 }
555 }
556
557 if pattern.is_empty() {
558 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
559 }
560 if substitution.is_empty() {
561 return Err(Error::Schema(
562 "query_router missing 'substitution'".to_string(),
563 ));
564 }
565 if target_field.is_empty() {
566 return Err(Error::Schema(
567 "query_router missing 'target_field'".to_string(),
568 ));
569 }
570
571 Ok(QueryRouterRule {
572 pattern,
573 substitution,
574 target_field,
575 mode,
576 })
577}
578
579fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
581 let s = pair.as_str();
582 match pair.as_rule() {
583 Rule::regex_string => {
584 if let Some(inner) = pair.into_inner().next() {
586 parse_string_value(inner)
587 } else {
588 s.to_string()
589 }
590 }
591 Rule::raw_string => {
592 s[2..s.len() - 1].to_string()
594 }
595 Rule::quoted_string => {
596 let inner = &s[1..s.len() - 1];
598 inner
600 .replace("\\n", "\n")
601 .replace("\\t", "\t")
602 .replace("\\\"", "\"")
603 .replace("\\\\", "\\")
604 }
605 _ => s.to_string(),
606 }
607}
608
609fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
611 let mut inner = pair.into_inner();
612
613 let name = inner
614 .next()
615 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
616 .as_str()
617 .to_string();
618
619 let mut fields = Vec::new();
620 let mut default_fields = Vec::new();
621 let mut query_routers = Vec::new();
622
623 for item in inner {
624 match item.as_rule() {
625 Rule::field_def => {
626 fields.push(parse_field_def(item)?);
627 }
628 Rule::default_fields_def => {
629 default_fields = parse_default_fields_def(item);
630 }
631 Rule::query_router_def => {
632 query_routers.push(parse_query_router_def(item)?);
633 }
634 _ => {}
635 }
636 }
637
638 Ok(IndexDef {
639 name,
640 fields,
641 default_fields,
642 query_routers,
643 })
644}
645
646pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
648 let pairs = SdlParser::parse(Rule::file, input)
649 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
650
651 let mut indexes = Vec::new();
652
653 for pair in pairs {
654 if pair.as_rule() == Rule::file {
655 for inner in pair.into_inner() {
656 if inner.as_rule() == Rule::index_def {
657 indexes.push(parse_index_def(inner)?);
658 }
659 }
660 }
661 }
662
663 Ok(indexes)
664}
665
666pub fn parse_single_index(input: &str) -> Result<IndexDef> {
668 let indexes = parse_sdl(input)?;
669
670 if indexes.is_empty() {
671 return Err(Error::Schema("No index definition found".to_string()));
672 }
673
674 if indexes.len() > 1 {
675 return Err(Error::Schema(
676 "Multiple index definitions found, expected one".to_string(),
677 ));
678 }
679
680 Ok(indexes.into_iter().next().unwrap())
681}
682
683#[cfg(test)]
684mod tests {
685 use super::*;
686
687 #[test]
688 fn test_parse_simple_schema() {
689 let sdl = r#"
690 index articles {
691 field title: text [indexed, stored]
692 field body: text [indexed]
693 }
694 "#;
695
696 let indexes = parse_sdl(sdl).unwrap();
697 assert_eq!(indexes.len(), 1);
698
699 let index = &indexes[0];
700 assert_eq!(index.name, "articles");
701 assert_eq!(index.fields.len(), 2);
702
703 assert_eq!(index.fields[0].name, "title");
704 assert!(matches!(index.fields[0].field_type, FieldType::Text));
705 assert!(index.fields[0].indexed);
706 assert!(index.fields[0].stored);
707
708 assert_eq!(index.fields[1].name, "body");
709 assert!(matches!(index.fields[1].field_type, FieldType::Text));
710 assert!(index.fields[1].indexed);
711 assert!(!index.fields[1].stored);
712 }
713
714 #[test]
715 fn test_parse_all_field_types() {
716 let sdl = r#"
717 index test {
718 field text_field: text [indexed, stored]
719 field u64_field: u64 [indexed, stored]
720 field i64_field: i64 [indexed, stored]
721 field f64_field: f64 [indexed, stored]
722 field bytes_field: bytes [stored]
723 }
724 "#;
725
726 let indexes = parse_sdl(sdl).unwrap();
727 let index = &indexes[0];
728
729 assert!(matches!(index.fields[0].field_type, FieldType::Text));
730 assert!(matches!(index.fields[1].field_type, FieldType::U64));
731 assert!(matches!(index.fields[2].field_type, FieldType::I64));
732 assert!(matches!(index.fields[3].field_type, FieldType::F64));
733 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
734 }
735
736 #[test]
737 fn test_parse_with_comments() {
738 let sdl = r#"
739 # This is a comment
740 index articles {
741 # Title field
742 field title: text [indexed, stored]
743 field body: text [indexed] # inline comment not supported yet
744 }
745 "#;
746
747 let indexes = parse_sdl(sdl).unwrap();
748 assert_eq!(indexes[0].fields.len(), 2);
749 }
750
751 #[test]
752 fn test_parse_type_aliases() {
753 let sdl = r#"
754 index test {
755 field a: string [indexed]
756 field b: int [indexed]
757 field c: uint [indexed]
758 field d: float [indexed]
759 field e: binary [stored]
760 }
761 "#;
762
763 let indexes = parse_sdl(sdl).unwrap();
764 let index = &indexes[0];
765
766 assert!(matches!(index.fields[0].field_type, FieldType::Text));
767 assert!(matches!(index.fields[1].field_type, FieldType::I64));
768 assert!(matches!(index.fields[2].field_type, FieldType::U64));
769 assert!(matches!(index.fields[3].field_type, FieldType::F64));
770 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
771 }
772
773 #[test]
774 fn test_to_schema() {
775 let sdl = r#"
776 index articles {
777 field title: text [indexed, stored]
778 field views: u64 [indexed, stored]
779 }
780 "#;
781
782 let indexes = parse_sdl(sdl).unwrap();
783 let schema = indexes[0].to_schema();
784
785 assert!(schema.get_field("title").is_some());
786 assert!(schema.get_field("views").is_some());
787 assert!(schema.get_field("nonexistent").is_none());
788 }
789
790 #[test]
791 fn test_default_attributes() {
792 let sdl = r#"
793 index test {
794 field title: text
795 }
796 "#;
797
798 let indexes = parse_sdl(sdl).unwrap();
799 let field = &indexes[0].fields[0];
800
801 assert!(field.indexed);
803 assert!(field.stored);
804 }
805
806 #[test]
807 fn test_multiple_indexes() {
808 let sdl = r#"
809 index articles {
810 field title: text [indexed, stored]
811 }
812
813 index users {
814 field name: text [indexed, stored]
815 field email: text [indexed, stored]
816 }
817 "#;
818
819 let indexes = parse_sdl(sdl).unwrap();
820 assert_eq!(indexes.len(), 2);
821 assert_eq!(indexes[0].name, "articles");
822 assert_eq!(indexes[1].name, "users");
823 }
824
825 #[test]
826 fn test_tokenizer_spec() {
827 let sdl = r#"
828 index articles {
829 field title: text<en_stem> [indexed, stored]
830 field body: text<default> [indexed]
831 field author: text [indexed, stored]
832 }
833 "#;
834
835 let indexes = parse_sdl(sdl).unwrap();
836 let index = &indexes[0];
837
838 assert_eq!(index.fields[0].name, "title");
839 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
840
841 assert_eq!(index.fields[1].name, "body");
842 assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
843
844 assert_eq!(index.fields[2].name, "author");
845 assert_eq!(index.fields[2].tokenizer, None); }
847
848 #[test]
849 fn test_tokenizer_in_schema() {
850 let sdl = r#"
851 index articles {
852 field title: text<german> [indexed, stored]
853 field body: text<en_stem> [indexed]
854 }
855 "#;
856
857 let indexes = parse_sdl(sdl).unwrap();
858 let schema = indexes[0].to_schema();
859
860 let title_field = schema.get_field("title").unwrap();
861 let title_entry = schema.get_field_entry(title_field).unwrap();
862 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
863
864 let body_field = schema.get_field("body").unwrap();
865 let body_entry = schema.get_field_entry(body_field).unwrap();
866 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
867 }
868
869 #[test]
870 fn test_query_router_basic() {
871 let sdl = r#"
872 index documents {
873 field title: text [indexed, stored]
874 field uri: text [indexed, stored]
875
876 query_router {
877 pattern: "10\\.\\d{4,}/[^\\s]+"
878 substitution: "doi://{0}"
879 target_field: uris
880 mode: exclusive
881 }
882 }
883 "#;
884
885 let indexes = parse_sdl(sdl).unwrap();
886 let index = &indexes[0];
887
888 assert_eq!(index.query_routers.len(), 1);
889 let router = &index.query_routers[0];
890 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
891 assert_eq!(router.substitution, "doi://{0}");
892 assert_eq!(router.target_field, "uris");
893 assert_eq!(router.mode, RoutingMode::Exclusive);
894 }
895
896 #[test]
897 fn test_query_router_raw_string() {
898 let sdl = r#"
899 index documents {
900 field uris: text [indexed, stored]
901
902 query_router {
903 pattern: r"^pmid:(\d+)$"
904 substitution: "pubmed://{1}"
905 target_field: uris
906 mode: additional
907 }
908 }
909 "#;
910
911 let indexes = parse_sdl(sdl).unwrap();
912 let router = &indexes[0].query_routers[0];
913
914 assert_eq!(router.pattern, r"^pmid:(\d+)$");
915 assert_eq!(router.substitution, "pubmed://{1}");
916 assert_eq!(router.mode, RoutingMode::Additional);
917 }
918
919 #[test]
920 fn test_multiple_query_routers() {
921 let sdl = r#"
922 index documents {
923 field uris: text [indexed, stored]
924
925 query_router {
926 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
927 substitution: "doi://{1}"
928 target_field: uris
929 mode: exclusive
930 }
931
932 query_router {
933 pattern: r"^pmid:(\d+)$"
934 substitution: "pubmed://{1}"
935 target_field: uris
936 mode: exclusive
937 }
938
939 query_router {
940 pattern: r"^arxiv:(\d+\.\d+)$"
941 substitution: "arxiv://{1}"
942 target_field: uris
943 mode: additional
944 }
945 }
946 "#;
947
948 let indexes = parse_sdl(sdl).unwrap();
949 assert_eq!(indexes[0].query_routers.len(), 3);
950 }
951
952 #[test]
953 fn test_query_router_default_mode() {
954 let sdl = r#"
955 index documents {
956 field uris: text [indexed, stored]
957
958 query_router {
959 pattern: r"test"
960 substitution: "{0}"
961 target_field: uris
962 }
963 }
964 "#;
965
966 let indexes = parse_sdl(sdl).unwrap();
967 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
969 }
970
971 #[test]
972 fn test_multi_attribute() {
973 let sdl = r#"
974 index documents {
975 field uris: text [indexed, stored, multi]
976 field title: text [indexed, stored]
977 }
978 "#;
979
980 let indexes = parse_sdl(sdl).unwrap();
981 assert_eq!(indexes.len(), 1);
982
983 let fields = &indexes[0].fields;
984 assert_eq!(fields.len(), 2);
985
986 assert_eq!(fields[0].name, "uris");
988 assert!(fields[0].multi, "uris field should have multi=true");
989
990 assert_eq!(fields[1].name, "title");
992 assert!(!fields[1].multi, "title field should have multi=false");
993
994 let schema = indexes[0].to_schema();
996 let uris_field = schema.get_field("uris").unwrap();
997 let title_field = schema.get_field("title").unwrap();
998
999 assert!(schema.get_field_entry(uris_field).unwrap().multi);
1000 assert!(!schema.get_field_entry(title_field).unwrap().multi);
1001 }
1002
1003 #[test]
1004 fn test_sparse_vector_field() {
1005 let sdl = r#"
1006 index documents {
1007 field embedding: sparse_vector [indexed, stored]
1008 }
1009 "#;
1010
1011 let indexes = parse_sdl(sdl).unwrap();
1012 assert_eq!(indexes.len(), 1);
1013 assert_eq!(indexes[0].fields.len(), 1);
1014 assert_eq!(indexes[0].fields[0].name, "embedding");
1015 assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
1016 assert!(indexes[0].fields[0].sparse_vector_config.is_none());
1017 }
1018
1019 #[test]
1020 fn test_sparse_vector_with_config() {
1021 let sdl = r#"
1022 index documents {
1023 field embedding: sparse_vector<u16> [indexed<quantization: uint8>, stored]
1024 field dense: sparse_vector<u32> [indexed<quantization: float32>]
1025 }
1026 "#;
1027
1028 let indexes = parse_sdl(sdl).unwrap();
1029 assert_eq!(indexes[0].fields.len(), 2);
1030
1031 let f1 = &indexes[0].fields[0];
1033 assert_eq!(f1.name, "embedding");
1034 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1035 assert_eq!(config1.index_size, IndexSize::U16);
1036 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1037
1038 let f2 = &indexes[0].fields[1];
1040 assert_eq!(f2.name, "dense");
1041 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1042 assert_eq!(config2.index_size, IndexSize::U32);
1043 assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
1044 }
1045
1046 #[test]
1047 fn test_sparse_vector_with_weight_threshold() {
1048 let sdl = r#"
1049 index documents {
1050 field embedding: sparse_vector<u16> [indexed<quantization: uint8, weight_threshold: 0.1>, stored]
1051 field embedding2: sparse_vector<u32> [indexed<quantization: float16, weight_threshold: 0.05>]
1052 }
1053 "#;
1054
1055 let indexes = parse_sdl(sdl).unwrap();
1056 assert_eq!(indexes[0].fields.len(), 2);
1057
1058 let f1 = &indexes[0].fields[0];
1060 assert_eq!(f1.name, "embedding");
1061 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1062 assert_eq!(config1.index_size, IndexSize::U16);
1063 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1064 assert!((config1.weight_threshold - 0.1).abs() < 0.001);
1065
1066 let f2 = &indexes[0].fields[1];
1068 assert_eq!(f2.name, "embedding2");
1069 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1070 assert_eq!(config2.index_size, IndexSize::U32);
1071 assert_eq!(config2.weight_quantization, WeightQuantization::Float16);
1072 assert!((config2.weight_threshold - 0.05).abs() < 0.001);
1073 }
1074
1075 #[test]
1076 fn test_dense_vector_field() {
1077 let sdl = r#"
1078 index documents {
1079 field embedding: dense_vector<768> [indexed, stored]
1080 }
1081 "#;
1082
1083 let indexes = parse_sdl(sdl).unwrap();
1084 assert_eq!(indexes.len(), 1);
1085 assert_eq!(indexes[0].fields.len(), 1);
1086
1087 let f = &indexes[0].fields[0];
1088 assert_eq!(f.name, "embedding");
1089 assert_eq!(f.field_type, FieldType::DenseVector);
1090
1091 let config = f.dense_vector_config.as_ref().unwrap();
1092 assert_eq!(config.dim, 768);
1093 }
1094
1095 #[test]
1096 fn test_dense_vector_alias() {
1097 let sdl = r#"
1098 index documents {
1099 field embedding: vector<1536> [indexed]
1100 }
1101 "#;
1102
1103 let indexes = parse_sdl(sdl).unwrap();
1104 assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
1105 assert_eq!(
1106 indexes[0].fields[0]
1107 .dense_vector_config
1108 .as_ref()
1109 .unwrap()
1110 .dim,
1111 1536
1112 );
1113 }
1114
1115 #[test]
1116 fn test_dense_vector_with_centroids() {
1117 let sdl = r#"
1118 index documents {
1119 field embedding: dense_vector<768> [indexed<centroids: "centroids.bin">, stored]
1120 }
1121 "#;
1122
1123 let indexes = parse_sdl(sdl).unwrap();
1124 assert_eq!(indexes.len(), 1);
1125
1126 let f = &indexes[0].fields[0];
1127 assert_eq!(f.name, "embedding");
1128 assert_eq!(f.field_type, FieldType::DenseVector);
1129
1130 let config = f.dense_vector_config.as_ref().unwrap();
1131 assert_eq!(config.dim, 768);
1132 assert_eq!(
1133 config.coarse_centroids_path.as_deref(),
1134 Some("centroids.bin")
1135 );
1136 assert_eq!(config.nprobe, 32); }
1138
1139 #[test]
1140 fn test_dense_vector_with_centroids_and_nprobe() {
1141 let sdl = r#"
1142 index documents {
1143 field embedding: dense_vector<1536> [indexed<centroids: "/path/to/centroids.bin", nprobe: 64>]
1144 }
1145 "#;
1146
1147 let indexes = parse_sdl(sdl).unwrap();
1148 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1149
1150 assert_eq!(config.dim, 1536);
1151 assert_eq!(
1152 config.coarse_centroids_path.as_deref(),
1153 Some("/path/to/centroids.bin")
1154 );
1155 assert_eq!(config.nprobe, 64);
1156 }
1157
1158 #[test]
1159 fn test_dense_vector_keyword_syntax() {
1160 let sdl = r#"
1161 index documents {
1162 field embedding: dense_vector<dims: 1536> [indexed, stored]
1163 }
1164 "#;
1165
1166 let indexes = parse_sdl(sdl).unwrap();
1167 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1168
1169 assert_eq!(config.dim, 1536);
1170 assert!(config.coarse_centroids_path.is_none());
1171 }
1172
1173 #[test]
1174 fn test_dense_vector_keyword_syntax_full() {
1175 let sdl = r#"
1176 index documents {
1177 field embedding: dense_vector<dims: 1536> [indexed<centroids: "/path/to/centroids.bin", nprobe: 64>]
1178 }
1179 "#;
1180
1181 let indexes = parse_sdl(sdl).unwrap();
1182 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1183
1184 assert_eq!(config.dim, 1536);
1185 assert_eq!(
1186 config.coarse_centroids_path.as_deref(),
1187 Some("/path/to/centroids.bin")
1188 );
1189 assert_eq!(config.nprobe, 64);
1190 }
1191
1192 #[test]
1193 fn test_dense_vector_keyword_syntax_partial() {
1194 let sdl = r#"
1195 index documents {
1196 field embedding: dense_vector<dims: 768> [indexed<centroids: "centroids.bin">]
1197 }
1198 "#;
1199
1200 let indexes = parse_sdl(sdl).unwrap();
1201 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1202
1203 assert_eq!(config.dim, 768);
1204 assert_eq!(
1205 config.coarse_centroids_path.as_deref(),
1206 Some("centroids.bin")
1207 );
1208 assert_eq!(config.nprobe, 32); }
1210
1211 #[test]
1212 fn test_dense_vector_scann_index() {
1213 use crate::dsl::schema::VectorIndexType;
1214
1215 let sdl = r#"
1216 index documents {
1217 field embedding: dense_vector<dims: 768> [indexed<scann, centroids: "centroids.bin", codebook: "pq_codebook.bin", nprobe: 64>]
1218 }
1219 "#;
1220
1221 let indexes = parse_sdl(sdl).unwrap();
1222 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1223
1224 assert_eq!(config.dim, 768);
1225 assert_eq!(config.index_type, VectorIndexType::ScaNN);
1226 assert_eq!(
1227 config.coarse_centroids_path.as_deref(),
1228 Some("centroids.bin")
1229 );
1230 assert_eq!(config.pq_codebook_path.as_deref(), Some("pq_codebook.bin"));
1231 assert_eq!(config.nprobe, 64);
1232 }
1233
1234 #[test]
1235 fn test_dense_vector_rabitq_index() {
1236 use crate::dsl::schema::VectorIndexType;
1237
1238 let sdl = r#"
1239 index documents {
1240 field embedding: dense_vector<dims: 1536> [indexed<rabitq, centroids: "centroids.bin">]
1241 }
1242 "#;
1243
1244 let indexes = parse_sdl(sdl).unwrap();
1245 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1246
1247 assert_eq!(config.dim, 1536);
1248 assert_eq!(config.index_type, VectorIndexType::IvfRaBitQ);
1249 assert_eq!(
1250 config.coarse_centroids_path.as_deref(),
1251 Some("centroids.bin")
1252 );
1253 assert!(config.pq_codebook_path.is_none());
1254 }
1255
1256 #[test]
1257 fn test_dense_vector_rabitq_no_centroids() {
1258 use crate::dsl::schema::VectorIndexType;
1259
1260 let sdl = r#"
1261 index documents {
1262 field embedding: dense_vector<dims: 768> [indexed<rabitq>]
1263 }
1264 "#;
1265
1266 let indexes = parse_sdl(sdl).unwrap();
1267 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1268
1269 assert_eq!(config.dim, 768);
1270 assert_eq!(config.index_type, VectorIndexType::IvfRaBitQ);
1271 assert!(config.coarse_centroids_path.is_none());
1272 }
1273
1274 #[test]
1275 fn test_dense_vector_default_index_type() {
1276 use crate::dsl::schema::VectorIndexType;
1277
1278 let sdl = r#"
1280 index documents {
1281 field embedding: dense_vector<dims: 768> [indexed]
1282 }
1283 "#;
1284
1285 let indexes = parse_sdl(sdl).unwrap();
1286 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1287
1288 assert_eq!(config.dim, 768);
1289 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1290 }
1291
1292 #[test]
1293 fn test_dense_vector_mrl_dim() {
1294 let sdl = r#"
1296 index documents {
1297 field embedding: dense_vector<1536> [indexed<mrl_dim: 256>]
1298 }
1299 "#;
1300
1301 let indexes = parse_sdl(sdl).unwrap();
1302 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1303
1304 assert_eq!(config.dim, 1536);
1305 assert_eq!(config.mrl_dim, Some(256));
1306 assert_eq!(config.index_dim(), 256);
1307 }
1308
1309 #[test]
1310 fn test_dense_vector_mrl_dim_with_centroids() {
1311 let sdl = r#"
1313 index documents {
1314 field embedding: dense_vector<768> [indexed<centroids: "centroids.bin", nprobe: 64, mrl_dim: 128>]
1315 }
1316 "#;
1317
1318 let indexes = parse_sdl(sdl).unwrap();
1319 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1320
1321 assert_eq!(config.dim, 768);
1322 assert_eq!(config.mrl_dim, Some(128));
1323 assert_eq!(config.index_dim(), 128);
1324 assert_eq!(
1325 config.coarse_centroids_path.as_deref(),
1326 Some("centroids.bin")
1327 );
1328 assert_eq!(config.nprobe, 64);
1329 }
1330
1331 #[test]
1332 fn test_dense_vector_no_mrl_dim() {
1333 let sdl = r#"
1335 index documents {
1336 field embedding: dense_vector<dims: 768> [indexed]
1337 }
1338 "#;
1339
1340 let indexes = parse_sdl(sdl).unwrap();
1341 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1342
1343 assert_eq!(config.dim, 768);
1344 assert_eq!(config.mrl_dim, None);
1345 assert_eq!(config.index_dim(), 768);
1346 }
1347
1348 #[test]
1349 fn test_json_field_type() {
1350 let sdl = r#"
1351 index documents {
1352 field title: text [indexed, stored]
1353 field metadata: json [stored]
1354 field extra: json
1355 }
1356 "#;
1357
1358 let indexes = parse_sdl(sdl).unwrap();
1359 let index = &indexes[0];
1360
1361 assert_eq!(index.fields.len(), 3);
1362
1363 assert_eq!(index.fields[1].name, "metadata");
1365 assert!(matches!(index.fields[1].field_type, FieldType::Json));
1366 assert!(index.fields[1].stored);
1367 assert_eq!(index.fields[2].name, "extra");
1371 assert!(matches!(index.fields[2].field_type, FieldType::Json));
1372
1373 let schema = index.to_schema();
1375 let metadata_field = schema.get_field("metadata").unwrap();
1376 let entry = schema.get_field_entry(metadata_field).unwrap();
1377 assert_eq!(entry.field_type, FieldType::Json);
1378 assert!(!entry.indexed); assert!(entry.stored);
1380 }
1381}