1use pest::Parser;
46use pest_derive::Parser;
47
48use super::query_field_router::{QueryRouterRule, RoutingMode};
49use super::schema::{DenseVectorQuantization, FieldType, Schema, SchemaBuilder};
50use crate::Result;
51use crate::error::Error;
52
53#[derive(Parser)]
54#[grammar = "dsl/sdl/sdl.pest"]
55pub struct SdlParser;
56
57use super::schema::DenseVectorConfig;
58use crate::structures::{
59 IndexSize, QueryWeighting, SparseQueryConfig, SparseVectorConfig, WeightQuantization,
60};
61
62#[derive(Debug, Clone)]
64pub struct FieldDef {
65 pub name: String,
66 pub field_type: FieldType,
67 pub indexed: bool,
68 pub stored: bool,
69 pub tokenizer: Option<String>,
71 pub multi: bool,
73 pub positions: Option<super::schema::PositionMode>,
75 pub sparse_vector_config: Option<SparseVectorConfig>,
77 pub dense_vector_config: Option<DenseVectorConfig>,
79 pub fast: bool,
81}
82
83#[derive(Debug, Clone)]
85pub struct IndexDef {
86 pub name: String,
87 pub fields: Vec<FieldDef>,
88 pub default_fields: Vec<String>,
89 pub query_routers: Vec<QueryRouterRule>,
91}
92
93impl IndexDef {
94 pub fn to_schema(&self) -> Schema {
96 let mut builder = SchemaBuilder::default();
97
98 for field in &self.fields {
99 let f = match field.field_type {
100 FieldType::Text => {
101 let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
102 builder.add_text_field_with_tokenizer(
103 &field.name,
104 field.indexed,
105 field.stored,
106 tokenizer,
107 )
108 }
109 FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
110 FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
111 FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
112 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
113 FieldType::Json => builder.add_json_field(&field.name, field.stored),
114 FieldType::SparseVector => {
115 if let Some(config) = &field.sparse_vector_config {
116 builder.add_sparse_vector_field_with_config(
117 &field.name,
118 field.indexed,
119 field.stored,
120 config.clone(),
121 )
122 } else {
123 builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
124 }
125 }
126 FieldType::DenseVector => {
127 let config = field
129 .dense_vector_config
130 .as_ref()
131 .expect("DenseVector field requires dimension to be specified");
132 builder.add_dense_vector_field_with_config(
133 &field.name,
134 field.indexed,
135 field.stored,
136 config.clone(),
137 )
138 }
139 };
140 if field.multi {
141 builder.set_multi(f, true);
142 }
143 if field.fast {
144 builder.set_fast(f, true);
145 }
146 let positions = field.positions.or({
148 if field.multi
150 && matches!(
151 field.field_type,
152 FieldType::SparseVector | FieldType::DenseVector
153 )
154 {
155 Some(super::schema::PositionMode::Ordinal)
156 } else {
157 None
158 }
159 });
160 if let Some(mode) = positions {
161 builder.set_positions(f, mode);
162 }
163 }
164
165 if !self.default_fields.is_empty() {
167 builder.set_default_fields(self.default_fields.clone());
168 }
169
170 if !self.query_routers.is_empty() {
172 builder.set_query_routers(self.query_routers.clone());
173 }
174
175 builder.build()
176 }
177
178 pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
183 if self.query_routers.is_empty() {
184 return Ok(None);
185 }
186
187 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
188 .map(Some)
189 .map_err(Error::Schema)
190 }
191}
192
193fn parse_field_type(type_str: &str) -> Result<FieldType> {
195 match type_str {
196 "text" | "string" | "str" => Ok(FieldType::Text),
197 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
198 "i64" | "int" | "integer" => Ok(FieldType::I64),
199 "f64" | "float" | "double" => Ok(FieldType::F64),
200 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
201 "json" => Ok(FieldType::Json),
202 "sparse_vector" => Ok(FieldType::SparseVector),
203 "dense_vector" | "vector" => Ok(FieldType::DenseVector),
204 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
205 }
206}
207
208#[derive(Debug, Clone, Default)]
210struct IndexConfig {
211 index_type: Option<super::schema::VectorIndexType>,
212 num_clusters: Option<usize>,
213 nprobe: Option<usize>,
214 build_threshold: Option<usize>,
215 quantization: Option<WeightQuantization>,
217 weight_threshold: Option<f32>,
218 block_size: Option<usize>,
219 pruning: Option<f32>,
220 query_tokenizer: Option<String>,
222 query_weighting: Option<QueryWeighting>,
223 query_weight_threshold: Option<f32>,
224 query_max_dims: Option<usize>,
225 query_pruning: Option<f32>,
226 positions: Option<super::schema::PositionMode>,
228}
229
230fn parse_attributes(
235 pair: pest::iterators::Pair<Rule>,
236) -> (bool, bool, bool, bool, Option<IndexConfig>) {
237 let mut indexed = false;
238 let mut stored = false;
239 let mut multi = false;
240 let mut fast = false;
241 let mut index_config = None;
242
243 for attr in pair.into_inner() {
244 if attr.as_rule() == Rule::attribute {
245 let mut found_config = false;
247 for inner in attr.clone().into_inner() {
248 match inner.as_rule() {
249 Rule::indexed_with_config => {
250 indexed = true;
251 index_config = Some(parse_index_config(inner));
252 found_config = true;
253 break;
254 }
255 Rule::stored_with_config => {
256 stored = true;
257 multi = true; found_config = true;
259 break;
260 }
261 _ => {}
262 }
263 }
264 if !found_config {
265 match attr.as_str() {
267 "indexed" => indexed = true,
268 "stored" => stored = true,
269 "fast" => fast = true,
270 _ => {}
271 }
272 }
273 }
274 }
275
276 (indexed, stored, multi, fast, index_config)
277}
278
279fn parse_index_config(pair: pest::iterators::Pair<Rule>) -> IndexConfig {
281 let mut config = IndexConfig::default();
282
283 for inner in pair.into_inner() {
288 if inner.as_rule() == Rule::index_config_params {
289 for param in inner.into_inner() {
290 if param.as_rule() == Rule::index_config_param {
291 for p in param.into_inner() {
292 parse_single_index_config_param(&mut config, p);
293 }
294 }
295 }
296 }
297 }
298
299 config
300}
301
302fn parse_single_index_config_param(config: &mut IndexConfig, p: pest::iterators::Pair<Rule>) {
304 use super::schema::VectorIndexType;
305
306 match p.as_rule() {
307 Rule::index_type_spec => {
308 config.index_type = Some(match p.as_str() {
309 "flat" => VectorIndexType::Flat,
310 "rabitq" => VectorIndexType::RaBitQ,
311 "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
312 "scann" => VectorIndexType::ScaNN,
313 _ => VectorIndexType::RaBitQ,
314 });
315 }
316 Rule::index_type_kwarg => {
317 if let Some(t) = p.into_inner().next() {
319 config.index_type = Some(match t.as_str() {
320 "flat" => VectorIndexType::Flat,
321 "rabitq" => VectorIndexType::RaBitQ,
322 "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
323 "scann" => VectorIndexType::ScaNN,
324 _ => VectorIndexType::RaBitQ,
325 });
326 }
327 }
328 Rule::num_clusters_kwarg => {
329 if let Some(n) = p.into_inner().next() {
331 config.num_clusters = Some(n.as_str().parse().unwrap_or(256));
332 }
333 }
334 Rule::build_threshold_kwarg => {
335 if let Some(n) = p.into_inner().next() {
337 config.build_threshold = Some(n.as_str().parse().unwrap_or(10000));
338 }
339 }
340 Rule::nprobe_kwarg => {
341 if let Some(n) = p.into_inner().next() {
343 config.nprobe = Some(n.as_str().parse().unwrap_or(32));
344 }
345 }
346 Rule::quantization_kwarg => {
347 if let Some(q) = p.into_inner().next() {
349 config.quantization = Some(match q.as_str() {
350 "float32" | "f32" => WeightQuantization::Float32,
351 "float16" | "f16" => WeightQuantization::Float16,
352 "uint8" | "u8" => WeightQuantization::UInt8,
353 "uint4" | "u4" => WeightQuantization::UInt4,
354 _ => WeightQuantization::default(),
355 });
356 }
357 }
358 Rule::weight_threshold_kwarg => {
359 if let Some(t) = p.into_inner().next() {
361 config.weight_threshold = Some(t.as_str().parse().unwrap_or(0.0));
362 }
363 }
364 Rule::block_size_kwarg => {
365 if let Some(n) = p.into_inner().next() {
367 config.block_size = Some(n.as_str().parse().unwrap_or(128));
368 }
369 }
370 Rule::pruning_kwarg => {
371 if let Some(f) = p.into_inner().next() {
373 config.pruning = Some(f.as_str().parse().unwrap_or(1.0));
374 }
375 }
376 Rule::query_config_block => {
377 parse_query_config_block(config, p);
379 }
380 Rule::positions_kwarg => {
381 use super::schema::PositionMode;
383 config.positions = Some(match p.as_str() {
384 "ordinal" => PositionMode::Ordinal,
385 "token_position" => PositionMode::TokenPosition,
386 _ => PositionMode::Full, });
388 }
389 _ => {}
390 }
391}
392
393fn parse_query_config_block(config: &mut IndexConfig, pair: pest::iterators::Pair<Rule>) {
395 for inner in pair.into_inner() {
396 if inner.as_rule() == Rule::query_config_params {
397 for param in inner.into_inner() {
398 if param.as_rule() == Rule::query_config_param {
399 for p in param.into_inner() {
400 match p.as_rule() {
401 Rule::query_tokenizer_kwarg => {
402 if let Some(path) = p.into_inner().next()
404 && let Some(inner_path) = path.into_inner().next()
405 {
406 config.query_tokenizer = Some(inner_path.as_str().to_string());
407 }
408 }
409 Rule::query_weighting_kwarg => {
410 if let Some(w) = p.into_inner().next() {
412 config.query_weighting = Some(match w.as_str() {
413 "one" => QueryWeighting::One,
414 "idf" => QueryWeighting::Idf,
415 "idf_file" => QueryWeighting::IdfFile,
416 _ => QueryWeighting::One,
417 });
418 }
419 }
420 Rule::query_weight_threshold_kwarg => {
421 if let Some(t) = p.into_inner().next() {
422 config.query_weight_threshold =
423 Some(t.as_str().parse().unwrap_or(0.0));
424 }
425 }
426 Rule::query_max_dims_kwarg => {
427 if let Some(t) = p.into_inner().next() {
428 config.query_max_dims = Some(t.as_str().parse().unwrap_or(0));
429 }
430 }
431 Rule::query_pruning_kwarg => {
432 if let Some(t) = p.into_inner().next() {
433 config.query_pruning = Some(t.as_str().parse().unwrap_or(1.0));
434 }
435 }
436 _ => {}
437 }
438 }
439 }
440 }
441 }
442 }
443}
444
445fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
447 let mut inner = pair.into_inner();
448
449 let name = inner
450 .next()
451 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
452 .as_str()
453 .to_string();
454
455 let field_type_str = inner
456 .next()
457 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
458 .as_str();
459
460 let field_type = parse_field_type(field_type_str)?;
461
462 let mut tokenizer = None;
464 let mut sparse_vector_config = None;
465 let mut dense_vector_config = None;
466 let mut indexed = true;
467 let mut stored = true;
468 let mut multi = false;
469 let mut fast = false;
470 let mut index_config: Option<IndexConfig> = None;
471
472 for item in inner {
473 match item.as_rule() {
474 Rule::tokenizer_spec => {
475 if let Some(tok_name) = item.into_inner().next() {
477 tokenizer = Some(tok_name.as_str().to_string());
478 }
479 }
480 Rule::sparse_vector_config => {
481 sparse_vector_config = Some(parse_sparse_vector_config(item));
483 }
484 Rule::dense_vector_config => {
485 dense_vector_config = Some(parse_dense_vector_config(item));
487 }
488 Rule::attributes => {
489 let (idx, sto, mul, fst, idx_cfg) = parse_attributes(item);
490 indexed = idx;
491 stored = sto;
492 multi = mul;
493 fast = fst;
494 index_config = idx_cfg;
495 }
496 _ => {}
497 }
498 }
499
500 let mut positions = None;
502 if let Some(idx_cfg) = index_config {
503 positions = idx_cfg.positions;
504 if let Some(ref mut dv_config) = dense_vector_config {
505 apply_index_config_to_dense_vector(dv_config, idx_cfg);
506 } else if field_type == FieldType::SparseVector {
507 let sv_config = sparse_vector_config.get_or_insert(SparseVectorConfig::default());
509 apply_index_config_to_sparse_vector(sv_config, idx_cfg);
510 }
511 }
512
513 Ok(FieldDef {
514 name,
515 field_type,
516 indexed,
517 stored,
518 tokenizer,
519 multi,
520 positions,
521 sparse_vector_config,
522 dense_vector_config,
523 fast,
524 })
525}
526
527fn apply_index_config_to_dense_vector(config: &mut DenseVectorConfig, idx_cfg: IndexConfig) {
529 if let Some(index_type) = idx_cfg.index_type {
531 config.index_type = index_type;
532 }
533
534 if idx_cfg.num_clusters.is_some() {
536 config.num_clusters = idx_cfg.num_clusters;
537 }
538
539 if let Some(nprobe) = idx_cfg.nprobe {
541 config.nprobe = nprobe;
542 }
543
544 if idx_cfg.build_threshold.is_some() {
546 config.build_threshold = idx_cfg.build_threshold;
547 }
548}
549
550fn parse_sparse_vector_config(pair: pest::iterators::Pair<Rule>) -> SparseVectorConfig {
553 let mut index_size = IndexSize::default();
554
555 for inner in pair.into_inner() {
557 if inner.as_rule() == Rule::index_size_spec {
558 index_size = match inner.as_str() {
559 "u16" => IndexSize::U16,
560 "u32" => IndexSize::U32,
561 _ => IndexSize::default(),
562 };
563 }
564 }
565
566 SparseVectorConfig {
567 index_size,
568 weight_quantization: WeightQuantization::default(),
569 weight_threshold: 0.0,
570 block_size: 128,
571 pruning: None,
572 query_config: None,
573 }
574}
575
576fn apply_index_config_to_sparse_vector(config: &mut SparseVectorConfig, idx_cfg: IndexConfig) {
578 if let Some(q) = idx_cfg.quantization {
579 config.weight_quantization = q;
580 }
581 if let Some(t) = idx_cfg.weight_threshold {
582 config.weight_threshold = t;
583 }
584 if let Some(bs) = idx_cfg.block_size {
585 let adjusted = bs.next_power_of_two();
586 if adjusted != bs {
587 log::warn!(
588 "block_size {} adjusted to next power of two: {}",
589 bs,
590 adjusted
591 );
592 }
593 config.block_size = adjusted;
594 }
595 if let Some(p) = idx_cfg.pruning {
596 let clamped = p.clamp(0.0, 1.0);
597 if (clamped - p).abs() > f32::EPSILON {
598 log::warn!(
599 "pruning {} clamped to valid range [0.0, 1.0]: {}",
600 p,
601 clamped
602 );
603 }
604 config.pruning = Some(clamped);
605 }
606 if idx_cfg.query_tokenizer.is_some()
608 || idx_cfg.query_weighting.is_some()
609 || idx_cfg.query_weight_threshold.is_some()
610 || idx_cfg.query_max_dims.is_some()
611 || idx_cfg.query_pruning.is_some()
612 {
613 let query_config = config
614 .query_config
615 .get_or_insert(SparseQueryConfig::default());
616 if let Some(tokenizer) = idx_cfg.query_tokenizer {
617 query_config.tokenizer = Some(tokenizer);
618 }
619 if let Some(weighting) = idx_cfg.query_weighting {
620 query_config.weighting = weighting;
621 }
622 if let Some(t) = idx_cfg.query_weight_threshold {
623 query_config.weight_threshold = t;
624 }
625 if let Some(d) = idx_cfg.query_max_dims {
626 query_config.max_query_dims = Some(d);
627 }
628 if let Some(p) = idx_cfg.query_pruning {
629 query_config.pruning = Some(p);
630 }
631 }
632}
633
634fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
637 let mut dim: usize = 0;
638 let mut quantization = DenseVectorQuantization::F32;
639
640 for params in pair.into_inner() {
642 if params.as_rule() == Rule::dense_vector_params {
643 for inner in params.into_inner() {
644 match inner.as_rule() {
645 Rule::dense_vector_keyword_params => {
646 for kwarg in inner.into_inner() {
647 match kwarg.as_rule() {
648 Rule::dims_kwarg => {
649 if let Some(d) = kwarg.into_inner().next() {
650 dim = d.as_str().parse().unwrap_or(0);
651 }
652 }
653 Rule::quant_type_spec => {
654 quantization = parse_quant_type(kwarg.as_str());
655 }
656 _ => {}
657 }
658 }
659 }
660 Rule::dense_vector_positional_params => {
661 for item in inner.into_inner() {
662 match item.as_rule() {
663 Rule::dimension_spec => {
664 dim = item.as_str().parse().unwrap_or(0);
665 }
666 Rule::quant_type_spec => {
667 quantization = parse_quant_type(item.as_str());
668 }
669 _ => {}
670 }
671 }
672 }
673 _ => {}
674 }
675 }
676 }
677 }
678
679 DenseVectorConfig::new(dim).with_quantization(quantization)
680}
681
682fn parse_quant_type(s: &str) -> DenseVectorQuantization {
683 match s.trim() {
684 "f16" => DenseVectorQuantization::F16,
685 "uint8" | "u8" => DenseVectorQuantization::UInt8,
686 _ => DenseVectorQuantization::F32,
687 }
688}
689
690fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
692 pair.into_inner().map(|p| p.as_str().to_string()).collect()
693}
694
695fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
697 let mut pattern = String::new();
698 let mut substitution = String::new();
699 let mut target_field = String::new();
700 let mut mode = RoutingMode::Additional;
701
702 for prop in pair.into_inner() {
703 if prop.as_rule() != Rule::query_router_prop {
704 continue;
705 }
706
707 for inner in prop.into_inner() {
708 match inner.as_rule() {
709 Rule::query_router_pattern => {
710 if let Some(regex_str) = inner.into_inner().next() {
711 pattern = parse_string_value(regex_str);
712 }
713 }
714 Rule::query_router_substitution => {
715 if let Some(quoted) = inner.into_inner().next() {
716 substitution = parse_string_value(quoted);
717 }
718 }
719 Rule::query_router_target => {
720 if let Some(ident) = inner.into_inner().next() {
721 target_field = ident.as_str().to_string();
722 }
723 }
724 Rule::query_router_mode => {
725 if let Some(mode_val) = inner.into_inner().next() {
726 mode = match mode_val.as_str() {
727 "exclusive" => RoutingMode::Exclusive,
728 "additional" => RoutingMode::Additional,
729 _ => RoutingMode::Additional,
730 };
731 }
732 }
733 _ => {}
734 }
735 }
736 }
737
738 if pattern.is_empty() {
739 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
740 }
741 if substitution.is_empty() {
742 return Err(Error::Schema(
743 "query_router missing 'substitution'".to_string(),
744 ));
745 }
746 if target_field.is_empty() {
747 return Err(Error::Schema(
748 "query_router missing 'target_field'".to_string(),
749 ));
750 }
751
752 Ok(QueryRouterRule {
753 pattern,
754 substitution,
755 target_field,
756 mode,
757 })
758}
759
760fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
762 let s = pair.as_str();
763 match pair.as_rule() {
764 Rule::regex_string => {
765 if let Some(inner) = pair.into_inner().next() {
767 parse_string_value(inner)
768 } else {
769 s.to_string()
770 }
771 }
772 Rule::raw_string => {
773 s[2..s.len() - 1].to_string()
775 }
776 Rule::quoted_string => {
777 let inner = &s[1..s.len() - 1];
779 inner
781 .replace("\\n", "\n")
782 .replace("\\t", "\t")
783 .replace("\\\"", "\"")
784 .replace("\\\\", "\\")
785 }
786 _ => s.to_string(),
787 }
788}
789
790fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
792 let mut inner = pair.into_inner();
793
794 let name = inner
795 .next()
796 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
797 .as_str()
798 .to_string();
799
800 let mut fields = Vec::new();
801 let mut default_fields = Vec::new();
802 let mut query_routers = Vec::new();
803
804 for item in inner {
805 match item.as_rule() {
806 Rule::field_def => {
807 fields.push(parse_field_def(item)?);
808 }
809 Rule::default_fields_def => {
810 default_fields = parse_default_fields_def(item);
811 }
812 Rule::query_router_def => {
813 query_routers.push(parse_query_router_def(item)?);
814 }
815 _ => {}
816 }
817 }
818
819 Ok(IndexDef {
820 name,
821 fields,
822 default_fields,
823 query_routers,
824 })
825}
826
827pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
829 let pairs = SdlParser::parse(Rule::file, input)
830 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
831
832 let mut indexes = Vec::new();
833
834 for pair in pairs {
835 if pair.as_rule() == Rule::file {
836 for inner in pair.into_inner() {
837 if inner.as_rule() == Rule::index_def {
838 indexes.push(parse_index_def(inner)?);
839 }
840 }
841 }
842 }
843
844 Ok(indexes)
845}
846
847pub fn parse_single_index(input: &str) -> Result<IndexDef> {
849 let indexes = parse_sdl(input)?;
850
851 if indexes.is_empty() {
852 return Err(Error::Schema("No index definition found".to_string()));
853 }
854
855 if indexes.len() > 1 {
856 return Err(Error::Schema(
857 "Multiple index definitions found, expected one".to_string(),
858 ));
859 }
860
861 Ok(indexes.into_iter().next().unwrap())
862}
863
864#[cfg(test)]
865mod tests {
866 use super::*;
867
868 #[test]
869 fn test_parse_simple_schema() {
870 let sdl = r#"
871 index articles {
872 field title: text [indexed, stored]
873 field body: text [indexed]
874 }
875 "#;
876
877 let indexes = parse_sdl(sdl).unwrap();
878 assert_eq!(indexes.len(), 1);
879
880 let index = &indexes[0];
881 assert_eq!(index.name, "articles");
882 assert_eq!(index.fields.len(), 2);
883
884 assert_eq!(index.fields[0].name, "title");
885 assert!(matches!(index.fields[0].field_type, FieldType::Text));
886 assert!(index.fields[0].indexed);
887 assert!(index.fields[0].stored);
888
889 assert_eq!(index.fields[1].name, "body");
890 assert!(matches!(index.fields[1].field_type, FieldType::Text));
891 assert!(index.fields[1].indexed);
892 assert!(!index.fields[1].stored);
893 }
894
895 #[test]
896 fn test_parse_all_field_types() {
897 let sdl = r#"
898 index test {
899 field text_field: text [indexed, stored]
900 field u64_field: u64 [indexed, stored]
901 field i64_field: i64 [indexed, stored]
902 field f64_field: f64 [indexed, stored]
903 field bytes_field: bytes [stored]
904 }
905 "#;
906
907 let indexes = parse_sdl(sdl).unwrap();
908 let index = &indexes[0];
909
910 assert!(matches!(index.fields[0].field_type, FieldType::Text));
911 assert!(matches!(index.fields[1].field_type, FieldType::U64));
912 assert!(matches!(index.fields[2].field_type, FieldType::I64));
913 assert!(matches!(index.fields[3].field_type, FieldType::F64));
914 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
915 }
916
917 #[test]
918 fn test_parse_with_comments() {
919 let sdl = r#"
920 # This is a comment
921 index articles {
922 # Title field
923 field title: text [indexed, stored]
924 field body: text [indexed] # inline comment not supported yet
925 }
926 "#;
927
928 let indexes = parse_sdl(sdl).unwrap();
929 assert_eq!(indexes[0].fields.len(), 2);
930 }
931
932 #[test]
933 fn test_parse_type_aliases() {
934 let sdl = r#"
935 index test {
936 field a: string [indexed]
937 field b: int [indexed]
938 field c: uint [indexed]
939 field d: float [indexed]
940 field e: binary [stored]
941 }
942 "#;
943
944 let indexes = parse_sdl(sdl).unwrap();
945 let index = &indexes[0];
946
947 assert!(matches!(index.fields[0].field_type, FieldType::Text));
948 assert!(matches!(index.fields[1].field_type, FieldType::I64));
949 assert!(matches!(index.fields[2].field_type, FieldType::U64));
950 assert!(matches!(index.fields[3].field_type, FieldType::F64));
951 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
952 }
953
954 #[test]
955 fn test_to_schema() {
956 let sdl = r#"
957 index articles {
958 field title: text [indexed, stored]
959 field views: u64 [indexed, stored]
960 }
961 "#;
962
963 let indexes = parse_sdl(sdl).unwrap();
964 let schema = indexes[0].to_schema();
965
966 assert!(schema.get_field("title").is_some());
967 assert!(schema.get_field("views").is_some());
968 assert!(schema.get_field("nonexistent").is_none());
969 }
970
971 #[test]
972 fn test_default_attributes() {
973 let sdl = r#"
974 index test {
975 field title: text
976 }
977 "#;
978
979 let indexes = parse_sdl(sdl).unwrap();
980 let field = &indexes[0].fields[0];
981
982 assert!(field.indexed);
984 assert!(field.stored);
985 }
986
987 #[test]
988 fn test_multiple_indexes() {
989 let sdl = r#"
990 index articles {
991 field title: text [indexed, stored]
992 }
993
994 index users {
995 field name: text [indexed, stored]
996 field email: text [indexed, stored]
997 }
998 "#;
999
1000 let indexes = parse_sdl(sdl).unwrap();
1001 assert_eq!(indexes.len(), 2);
1002 assert_eq!(indexes[0].name, "articles");
1003 assert_eq!(indexes[1].name, "users");
1004 }
1005
1006 #[test]
1007 fn test_tokenizer_spec() {
1008 let sdl = r#"
1009 index articles {
1010 field title: text<en_stem> [indexed, stored]
1011 field body: text<default> [indexed]
1012 field author: text [indexed, stored]
1013 }
1014 "#;
1015
1016 let indexes = parse_sdl(sdl).unwrap();
1017 let index = &indexes[0];
1018
1019 assert_eq!(index.fields[0].name, "title");
1020 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
1021
1022 assert_eq!(index.fields[1].name, "body");
1023 assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
1024
1025 assert_eq!(index.fields[2].name, "author");
1026 assert_eq!(index.fields[2].tokenizer, None); }
1028
1029 #[test]
1030 fn test_tokenizer_in_schema() {
1031 let sdl = r#"
1032 index articles {
1033 field title: text<german> [indexed, stored]
1034 field body: text<en_stem> [indexed]
1035 }
1036 "#;
1037
1038 let indexes = parse_sdl(sdl).unwrap();
1039 let schema = indexes[0].to_schema();
1040
1041 let title_field = schema.get_field("title").unwrap();
1042 let title_entry = schema.get_field_entry(title_field).unwrap();
1043 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
1044
1045 let body_field = schema.get_field("body").unwrap();
1046 let body_entry = schema.get_field_entry(body_field).unwrap();
1047 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
1048 }
1049
1050 #[test]
1051 fn test_query_router_basic() {
1052 let sdl = r#"
1053 index documents {
1054 field title: text [indexed, stored]
1055 field uri: text [indexed, stored]
1056
1057 query_router {
1058 pattern: "10\\.\\d{4,}/[^\\s]+"
1059 substitution: "doi://{0}"
1060 target_field: uris
1061 mode: exclusive
1062 }
1063 }
1064 "#;
1065
1066 let indexes = parse_sdl(sdl).unwrap();
1067 let index = &indexes[0];
1068
1069 assert_eq!(index.query_routers.len(), 1);
1070 let router = &index.query_routers[0];
1071 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
1072 assert_eq!(router.substitution, "doi://{0}");
1073 assert_eq!(router.target_field, "uris");
1074 assert_eq!(router.mode, RoutingMode::Exclusive);
1075 }
1076
1077 #[test]
1078 fn test_query_router_raw_string() {
1079 let sdl = r#"
1080 index documents {
1081 field uris: text [indexed, stored]
1082
1083 query_router {
1084 pattern: r"^pmid:(\d+)$"
1085 substitution: "pubmed://{1}"
1086 target_field: uris
1087 mode: additional
1088 }
1089 }
1090 "#;
1091
1092 let indexes = parse_sdl(sdl).unwrap();
1093 let router = &indexes[0].query_routers[0];
1094
1095 assert_eq!(router.pattern, r"^pmid:(\d+)$");
1096 assert_eq!(router.substitution, "pubmed://{1}");
1097 assert_eq!(router.mode, RoutingMode::Additional);
1098 }
1099
1100 #[test]
1101 fn test_multiple_query_routers() {
1102 let sdl = r#"
1103 index documents {
1104 field uris: text [indexed, stored]
1105
1106 query_router {
1107 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
1108 substitution: "doi://{1}"
1109 target_field: uris
1110 mode: exclusive
1111 }
1112
1113 query_router {
1114 pattern: r"^pmid:(\d+)$"
1115 substitution: "pubmed://{1}"
1116 target_field: uris
1117 mode: exclusive
1118 }
1119
1120 query_router {
1121 pattern: r"^arxiv:(\d+\.\d+)$"
1122 substitution: "arxiv://{1}"
1123 target_field: uris
1124 mode: additional
1125 }
1126 }
1127 "#;
1128
1129 let indexes = parse_sdl(sdl).unwrap();
1130 assert_eq!(indexes[0].query_routers.len(), 3);
1131 }
1132
1133 #[test]
1134 fn test_query_router_default_mode() {
1135 let sdl = r#"
1136 index documents {
1137 field uris: text [indexed, stored]
1138
1139 query_router {
1140 pattern: r"test"
1141 substitution: "{0}"
1142 target_field: uris
1143 }
1144 }
1145 "#;
1146
1147 let indexes = parse_sdl(sdl).unwrap();
1148 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
1150 }
1151
1152 #[test]
1153 fn test_multi_attribute() {
1154 let sdl = r#"
1155 index documents {
1156 field uris: text [indexed, stored<multi>]
1157 field title: text [indexed, stored]
1158 }
1159 "#;
1160
1161 let indexes = parse_sdl(sdl).unwrap();
1162 assert_eq!(indexes.len(), 1);
1163
1164 let fields = &indexes[0].fields;
1165 assert_eq!(fields.len(), 2);
1166
1167 assert_eq!(fields[0].name, "uris");
1169 assert!(fields[0].multi, "uris field should have multi=true");
1170
1171 assert_eq!(fields[1].name, "title");
1173 assert!(!fields[1].multi, "title field should have multi=false");
1174
1175 let schema = indexes[0].to_schema();
1177 let uris_field = schema.get_field("uris").unwrap();
1178 let title_field = schema.get_field("title").unwrap();
1179
1180 assert!(schema.get_field_entry(uris_field).unwrap().multi);
1181 assert!(!schema.get_field_entry(title_field).unwrap().multi);
1182 }
1183
1184 #[test]
1185 fn test_sparse_vector_field() {
1186 let sdl = r#"
1187 index documents {
1188 field embedding: sparse_vector [indexed, stored]
1189 }
1190 "#;
1191
1192 let indexes = parse_sdl(sdl).unwrap();
1193 assert_eq!(indexes.len(), 1);
1194 assert_eq!(indexes[0].fields.len(), 1);
1195 assert_eq!(indexes[0].fields[0].name, "embedding");
1196 assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
1197 assert!(indexes[0].fields[0].sparse_vector_config.is_none());
1198 }
1199
1200 #[test]
1201 fn test_sparse_vector_with_config() {
1202 let sdl = r#"
1203 index documents {
1204 field embedding: sparse_vector<u16> [indexed<quantization: uint8>, stored]
1205 field dense: sparse_vector<u32> [indexed<quantization: float32>]
1206 }
1207 "#;
1208
1209 let indexes = parse_sdl(sdl).unwrap();
1210 assert_eq!(indexes[0].fields.len(), 2);
1211
1212 let f1 = &indexes[0].fields[0];
1214 assert_eq!(f1.name, "embedding");
1215 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1216 assert_eq!(config1.index_size, IndexSize::U16);
1217 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1218
1219 let f2 = &indexes[0].fields[1];
1221 assert_eq!(f2.name, "dense");
1222 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1223 assert_eq!(config2.index_size, IndexSize::U32);
1224 assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
1225 }
1226
1227 #[test]
1228 fn test_sparse_vector_with_weight_threshold() {
1229 let sdl = r#"
1230 index documents {
1231 field embedding: sparse_vector<u16> [indexed<quantization: uint8, weight_threshold: 0.1>, stored]
1232 field embedding2: sparse_vector<u32> [indexed<quantization: float16, weight_threshold: 0.05>]
1233 }
1234 "#;
1235
1236 let indexes = parse_sdl(sdl).unwrap();
1237 assert_eq!(indexes[0].fields.len(), 2);
1238
1239 let f1 = &indexes[0].fields[0];
1241 assert_eq!(f1.name, "embedding");
1242 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1243 assert_eq!(config1.index_size, IndexSize::U16);
1244 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1245 assert!((config1.weight_threshold - 0.1).abs() < 0.001);
1246
1247 let f2 = &indexes[0].fields[1];
1249 assert_eq!(f2.name, "embedding2");
1250 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1251 assert_eq!(config2.index_size, IndexSize::U32);
1252 assert_eq!(config2.weight_quantization, WeightQuantization::Float16);
1253 assert!((config2.weight_threshold - 0.05).abs() < 0.001);
1254 }
1255
1256 #[test]
1257 fn test_sparse_vector_with_pruning() {
1258 let sdl = r#"
1259 index documents {
1260 field embedding: sparse_vector [indexed<quantization: uint8, pruning: 0.1>, stored]
1261 }
1262 "#;
1263
1264 let indexes = parse_sdl(sdl).unwrap();
1265 let f = &indexes[0].fields[0];
1266 assert_eq!(f.name, "embedding");
1267 let config = f.sparse_vector_config.as_ref().unwrap();
1268 assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1269 assert_eq!(config.pruning, Some(0.1));
1270 }
1271
1272 #[test]
1273 fn test_dense_vector_field() {
1274 let sdl = r#"
1275 index documents {
1276 field embedding: dense_vector<768> [indexed, stored]
1277 }
1278 "#;
1279
1280 let indexes = parse_sdl(sdl).unwrap();
1281 assert_eq!(indexes.len(), 1);
1282 assert_eq!(indexes[0].fields.len(), 1);
1283
1284 let f = &indexes[0].fields[0];
1285 assert_eq!(f.name, "embedding");
1286 assert_eq!(f.field_type, FieldType::DenseVector);
1287
1288 let config = f.dense_vector_config.as_ref().unwrap();
1289 assert_eq!(config.dim, 768);
1290 }
1291
1292 #[test]
1293 fn test_dense_vector_alias() {
1294 let sdl = r#"
1295 index documents {
1296 field embedding: vector<1536> [indexed]
1297 }
1298 "#;
1299
1300 let indexes = parse_sdl(sdl).unwrap();
1301 assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
1302 assert_eq!(
1303 indexes[0].fields[0]
1304 .dense_vector_config
1305 .as_ref()
1306 .unwrap()
1307 .dim,
1308 1536
1309 );
1310 }
1311
1312 #[test]
1313 fn test_dense_vector_with_num_clusters() {
1314 let sdl = r#"
1315 index documents {
1316 field embedding: dense_vector<768> [indexed<ivf_rabitq, num_clusters: 256>, stored]
1317 }
1318 "#;
1319
1320 let indexes = parse_sdl(sdl).unwrap();
1321 assert_eq!(indexes.len(), 1);
1322
1323 let f = &indexes[0].fields[0];
1324 assert_eq!(f.name, "embedding");
1325 assert_eq!(f.field_type, FieldType::DenseVector);
1326
1327 let config = f.dense_vector_config.as_ref().unwrap();
1328 assert_eq!(config.dim, 768);
1329 assert_eq!(config.num_clusters, Some(256));
1330 assert_eq!(config.nprobe, 32); }
1332
1333 #[test]
1334 fn test_dense_vector_with_num_clusters_and_nprobe() {
1335 let sdl = r#"
1336 index documents {
1337 field embedding: dense_vector<1536> [indexed<ivf_rabitq, num_clusters: 512, nprobe: 64>]
1338 }
1339 "#;
1340
1341 let indexes = parse_sdl(sdl).unwrap();
1342 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1343
1344 assert_eq!(config.dim, 1536);
1345 assert_eq!(config.num_clusters, Some(512));
1346 assert_eq!(config.nprobe, 64);
1347 }
1348
1349 #[test]
1350 fn test_dense_vector_keyword_syntax() {
1351 let sdl = r#"
1352 index documents {
1353 field embedding: dense_vector<dims: 1536> [indexed, stored]
1354 }
1355 "#;
1356
1357 let indexes = parse_sdl(sdl).unwrap();
1358 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1359
1360 assert_eq!(config.dim, 1536);
1361 assert!(config.num_clusters.is_none());
1362 }
1363
1364 #[test]
1365 fn test_dense_vector_keyword_syntax_full() {
1366 let sdl = r#"
1367 index documents {
1368 field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 256, nprobe: 64>]
1369 }
1370 "#;
1371
1372 let indexes = parse_sdl(sdl).unwrap();
1373 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1374
1375 assert_eq!(config.dim, 1536);
1376 assert_eq!(config.num_clusters, Some(256));
1377 assert_eq!(config.nprobe, 64);
1378 }
1379
1380 #[test]
1381 fn test_dense_vector_keyword_syntax_partial() {
1382 let sdl = r#"
1383 index documents {
1384 field embedding: dense_vector<dims: 768> [indexed<ivf_rabitq, num_clusters: 128>]
1385 }
1386 "#;
1387
1388 let indexes = parse_sdl(sdl).unwrap();
1389 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1390
1391 assert_eq!(config.dim, 768);
1392 assert_eq!(config.num_clusters, Some(128));
1393 assert_eq!(config.nprobe, 32); }
1395
1396 #[test]
1397 fn test_dense_vector_scann_index() {
1398 use crate::dsl::schema::VectorIndexType;
1399
1400 let sdl = r#"
1401 index documents {
1402 field embedding: dense_vector<dims: 768> [indexed<scann, num_clusters: 256, nprobe: 64>]
1403 }
1404 "#;
1405
1406 let indexes = parse_sdl(sdl).unwrap();
1407 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1408
1409 assert_eq!(config.dim, 768);
1410 assert_eq!(config.index_type, VectorIndexType::ScaNN);
1411 assert_eq!(config.num_clusters, Some(256));
1412 assert_eq!(config.nprobe, 64);
1413 }
1414
1415 #[test]
1416 fn test_dense_vector_ivf_rabitq_index() {
1417 use crate::dsl::schema::VectorIndexType;
1418
1419 let sdl = r#"
1420 index documents {
1421 field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 512>]
1422 }
1423 "#;
1424
1425 let indexes = parse_sdl(sdl).unwrap();
1426 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1427
1428 assert_eq!(config.dim, 1536);
1429 assert_eq!(config.index_type, VectorIndexType::IvfRaBitQ);
1430 assert_eq!(config.num_clusters, Some(512));
1431 }
1432
1433 #[test]
1434 fn test_dense_vector_rabitq_no_clusters() {
1435 use crate::dsl::schema::VectorIndexType;
1436
1437 let sdl = r#"
1438 index documents {
1439 field embedding: dense_vector<dims: 768> [indexed<rabitq>]
1440 }
1441 "#;
1442
1443 let indexes = parse_sdl(sdl).unwrap();
1444 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1445
1446 assert_eq!(config.dim, 768);
1447 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1448 assert!(config.num_clusters.is_none());
1449 }
1450
1451 #[test]
1452 fn test_dense_vector_flat_index() {
1453 use crate::dsl::schema::VectorIndexType;
1454
1455 let sdl = r#"
1456 index documents {
1457 field embedding: dense_vector<dims: 768> [indexed<flat>]
1458 }
1459 "#;
1460
1461 let indexes = parse_sdl(sdl).unwrap();
1462 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1463
1464 assert_eq!(config.dim, 768);
1465 assert_eq!(config.index_type, VectorIndexType::Flat);
1466 }
1467
1468 #[test]
1469 fn test_dense_vector_default_index_type() {
1470 use crate::dsl::schema::VectorIndexType;
1471
1472 let sdl = r#"
1474 index documents {
1475 field embedding: dense_vector<dims: 768> [indexed]
1476 }
1477 "#;
1478
1479 let indexes = parse_sdl(sdl).unwrap();
1480 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1481
1482 assert_eq!(config.dim, 768);
1483 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1484 }
1485
1486 #[test]
1487 fn test_dense_vector_f16_quantization() {
1488 use crate::dsl::schema::{DenseVectorQuantization, VectorIndexType};
1489
1490 let sdl = r#"
1491 index documents {
1492 field embedding: dense_vector<768, f16> [indexed]
1493 }
1494 "#;
1495
1496 let indexes = parse_sdl(sdl).unwrap();
1497 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1498
1499 assert_eq!(config.dim, 768);
1500 assert_eq!(config.quantization, DenseVectorQuantization::F16);
1501 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1502 }
1503
1504 #[test]
1505 fn test_dense_vector_uint8_quantization() {
1506 use crate::dsl::schema::DenseVectorQuantization;
1507
1508 let sdl = r#"
1509 index documents {
1510 field embedding: dense_vector<1024, uint8> [indexed<ivf_rabitq>]
1511 }
1512 "#;
1513
1514 let indexes = parse_sdl(sdl).unwrap();
1515 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1516
1517 assert_eq!(config.dim, 1024);
1518 assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1519 }
1520
1521 #[test]
1522 fn test_dense_vector_u8_alias() {
1523 use crate::dsl::schema::DenseVectorQuantization;
1524
1525 let sdl = r#"
1526 index documents {
1527 field embedding: dense_vector<512, u8> [indexed]
1528 }
1529 "#;
1530
1531 let indexes = parse_sdl(sdl).unwrap();
1532 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1533
1534 assert_eq!(config.dim, 512);
1535 assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1536 }
1537
1538 #[test]
1539 fn test_dense_vector_default_f32_quantization() {
1540 use crate::dsl::schema::DenseVectorQuantization;
1541
1542 let sdl = r#"
1544 index documents {
1545 field embedding: dense_vector<768> [indexed]
1546 }
1547 "#;
1548
1549 let indexes = parse_sdl(sdl).unwrap();
1550 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1551
1552 assert_eq!(config.dim, 768);
1553 assert_eq!(config.quantization, DenseVectorQuantization::F32);
1554 }
1555
1556 #[test]
1557 fn test_dense_vector_keyword_with_quantization() {
1558 use crate::dsl::schema::DenseVectorQuantization;
1559
1560 let sdl = r#"
1561 index documents {
1562 field embedding: dense_vector<dims: 768, f16> [indexed]
1563 }
1564 "#;
1565
1566 let indexes = parse_sdl(sdl).unwrap();
1567 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1568
1569 assert_eq!(config.dim, 768);
1570 assert_eq!(config.quantization, DenseVectorQuantization::F16);
1571 }
1572
1573 #[test]
1574 fn test_json_field_type() {
1575 let sdl = r#"
1576 index documents {
1577 field title: text [indexed, stored]
1578 field metadata: json [stored]
1579 field extra: json
1580 }
1581 "#;
1582
1583 let indexes = parse_sdl(sdl).unwrap();
1584 let index = &indexes[0];
1585
1586 assert_eq!(index.fields.len(), 3);
1587
1588 assert_eq!(index.fields[1].name, "metadata");
1590 assert!(matches!(index.fields[1].field_type, FieldType::Json));
1591 assert!(index.fields[1].stored);
1592 assert_eq!(index.fields[2].name, "extra");
1596 assert!(matches!(index.fields[2].field_type, FieldType::Json));
1597
1598 let schema = index.to_schema();
1600 let metadata_field = schema.get_field("metadata").unwrap();
1601 let entry = schema.get_field_entry(metadata_field).unwrap();
1602 assert_eq!(entry.field_type, FieldType::Json);
1603 assert!(!entry.indexed); assert!(entry.stored);
1605 }
1606
1607 #[test]
1608 fn test_sparse_vector_query_config() {
1609 use crate::structures::QueryWeighting;
1610
1611 let sdl = r#"
1612 index documents {
1613 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "Alibaba-NLP/gte-Qwen2-1.5B-instruct", weighting: idf>>]
1614 }
1615 "#;
1616
1617 let indexes = parse_sdl(sdl).unwrap();
1618 let index = &indexes[0];
1619
1620 assert_eq!(index.fields.len(), 1);
1621 assert_eq!(index.fields[0].name, "embedding");
1622 assert!(matches!(
1623 index.fields[0].field_type,
1624 FieldType::SparseVector
1625 ));
1626
1627 let config = index.fields[0].sparse_vector_config.as_ref().unwrap();
1628 assert_eq!(config.index_size, IndexSize::U16);
1629 assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1630
1631 let query_config = config.query_config.as_ref().unwrap();
1633 assert_eq!(
1634 query_config.tokenizer.as_deref(),
1635 Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1636 );
1637 assert_eq!(query_config.weighting, QueryWeighting::Idf);
1638
1639 let schema = index.to_schema();
1641 let embedding_field = schema.get_field("embedding").unwrap();
1642 let entry = schema.get_field_entry(embedding_field).unwrap();
1643 let sv_config = entry.sparse_vector_config.as_ref().unwrap();
1644 let qc = sv_config.query_config.as_ref().unwrap();
1645 assert_eq!(
1646 qc.tokenizer.as_deref(),
1647 Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1648 );
1649 assert_eq!(qc.weighting, QueryWeighting::Idf);
1650 }
1651
1652 #[test]
1653 fn test_sparse_vector_query_config_weighting_one() {
1654 use crate::structures::QueryWeighting;
1655
1656 let sdl = r#"
1657 index documents {
1658 field embedding: sparse_vector [indexed<query<weighting: one>>]
1659 }
1660 "#;
1661
1662 let indexes = parse_sdl(sdl).unwrap();
1663 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1664
1665 let query_config = config.query_config.as_ref().unwrap();
1666 assert!(query_config.tokenizer.is_none());
1667 assert_eq!(query_config.weighting, QueryWeighting::One);
1668 }
1669
1670 #[test]
1671 fn test_sparse_vector_query_config_weighting_idf_file() {
1672 use crate::structures::QueryWeighting;
1673
1674 let sdl = r#"
1675 index documents {
1676 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "opensearch-neural-sparse-encoding-v1", weighting: idf_file>>]
1677 }
1678 "#;
1679
1680 let indexes = parse_sdl(sdl).unwrap();
1681 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1682
1683 let query_config = config.query_config.as_ref().unwrap();
1684 assert_eq!(
1685 query_config.tokenizer.as_deref(),
1686 Some("opensearch-neural-sparse-encoding-v1")
1687 );
1688 assert_eq!(query_config.weighting, QueryWeighting::IdfFile);
1689
1690 let schema = indexes[0].to_schema();
1692 let field = schema.get_field("embedding").unwrap();
1693 let entry = schema.get_field_entry(field).unwrap();
1694 let sc = entry.sparse_vector_config.as_ref().unwrap();
1695 let qc = sc.query_config.as_ref().unwrap();
1696 assert_eq!(qc.weighting, QueryWeighting::IdfFile);
1697 }
1698
1699 #[test]
1700 fn test_sparse_vector_query_config_pruning_params() {
1701 let sdl = r#"
1702 index documents {
1703 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<weighting: idf, weight_threshold: 0.03, max_dims: 25, pruning: 0.2>>]
1704 }
1705 "#;
1706
1707 let indexes = parse_sdl(sdl).unwrap();
1708 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1709
1710 let qc = config.query_config.as_ref().unwrap();
1711 assert_eq!(qc.weighting, QueryWeighting::Idf);
1712 assert!((qc.weight_threshold - 0.03).abs() < 0.001);
1713 assert_eq!(qc.max_query_dims, Some(25));
1714 assert!((qc.pruning.unwrap() - 0.2).abs() < 0.001);
1715
1716 let schema = indexes[0].to_schema();
1718 let field = schema.get_field("embedding").unwrap();
1719 let entry = schema.get_field_entry(field).unwrap();
1720 let sc = entry.sparse_vector_config.as_ref().unwrap();
1721 let rqc = sc.query_config.as_ref().unwrap();
1722 assert!((rqc.weight_threshold - 0.03).abs() < 0.001);
1723 assert_eq!(rqc.max_query_dims, Some(25));
1724 assert!((rqc.pruning.unwrap() - 0.2).abs() < 0.001);
1725 }
1726
1727 #[test]
1728 fn test_fast_attribute() {
1729 let sdl = r#"
1730 index products {
1731 field name: text [indexed, stored]
1732 field price: f64 [indexed, fast]
1733 field category: text [indexed, stored, fast]
1734 field count: u64 [fast]
1735 field score: i64 [indexed, stored, fast]
1736 }
1737 "#;
1738
1739 let indexes = parse_sdl(sdl).unwrap();
1740 assert_eq!(indexes.len(), 1);
1741 let index = &indexes[0];
1742 assert_eq!(index.fields.len(), 5);
1743
1744 assert!(!index.fields[0].fast);
1746 assert!(index.fields[1].fast);
1748 assert!(matches!(index.fields[1].field_type, FieldType::F64));
1749 assert!(index.fields[2].fast);
1751 assert!(matches!(index.fields[2].field_type, FieldType::Text));
1752 assert!(index.fields[3].fast);
1754 assert!(matches!(index.fields[3].field_type, FieldType::U64));
1755 assert!(index.fields[4].fast);
1757 assert!(matches!(index.fields[4].field_type, FieldType::I64));
1758
1759 let schema = index.to_schema();
1761 let price_field = schema.get_field("price").unwrap();
1762 assert!(schema.get_field_entry(price_field).unwrap().fast);
1763
1764 let category_field = schema.get_field("category").unwrap();
1765 assert!(schema.get_field_entry(category_field).unwrap().fast);
1766
1767 let name_field = schema.get_field("name").unwrap();
1768 assert!(!schema.get_field_entry(name_field).unwrap().fast);
1769 }
1770}