1use pest::Parser;
46use pest_derive::Parser;
47
48use super::query_field_router::{QueryRouterRule, RoutingMode};
49use super::schema::{DenseVectorQuantization, FieldType, Schema, SchemaBuilder};
50use crate::Result;
51use crate::error::Error;
52
53#[derive(Parser)]
54#[grammar = "dsl/sdl/sdl.pest"]
55pub struct SdlParser;
56
57use super::schema::DenseVectorConfig;
58use crate::structures::{
59 IndexSize, QueryWeighting, SparseQueryConfig, SparseVectorConfig, WeightQuantization,
60};
61
62#[derive(Debug, Clone)]
64pub struct FieldDef {
65 pub name: String,
66 pub field_type: FieldType,
67 pub indexed: bool,
68 pub stored: bool,
69 pub tokenizer: Option<String>,
71 pub multi: bool,
73 pub positions: Option<super::schema::PositionMode>,
75 pub sparse_vector_config: Option<SparseVectorConfig>,
77 pub dense_vector_config: Option<DenseVectorConfig>,
79}
80
81#[derive(Debug, Clone)]
83pub struct IndexDef {
84 pub name: String,
85 pub fields: Vec<FieldDef>,
86 pub default_fields: Vec<String>,
87 pub query_routers: Vec<QueryRouterRule>,
89}
90
91impl IndexDef {
92 pub fn to_schema(&self) -> Schema {
94 let mut builder = SchemaBuilder::default();
95
96 for field in &self.fields {
97 let f = match field.field_type {
98 FieldType::Text => {
99 let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
100 builder.add_text_field_with_tokenizer(
101 &field.name,
102 field.indexed,
103 field.stored,
104 tokenizer,
105 )
106 }
107 FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
108 FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
109 FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
110 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
111 FieldType::Json => builder.add_json_field(&field.name, field.stored),
112 FieldType::SparseVector => {
113 if let Some(config) = &field.sparse_vector_config {
114 builder.add_sparse_vector_field_with_config(
115 &field.name,
116 field.indexed,
117 field.stored,
118 config.clone(),
119 )
120 } else {
121 builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
122 }
123 }
124 FieldType::DenseVector => {
125 let config = field
127 .dense_vector_config
128 .as_ref()
129 .expect("DenseVector field requires dimension to be specified");
130 builder.add_dense_vector_field_with_config(
131 &field.name,
132 field.indexed,
133 field.stored,
134 config.clone(),
135 )
136 }
137 };
138 if field.multi {
139 builder.set_multi(f, true);
140 }
141 let positions = field.positions.or({
143 if field.multi
145 && matches!(
146 field.field_type,
147 FieldType::SparseVector | FieldType::DenseVector
148 )
149 {
150 Some(super::schema::PositionMode::Ordinal)
151 } else {
152 None
153 }
154 });
155 if let Some(mode) = positions {
156 builder.set_positions(f, mode);
157 }
158 }
159
160 if !self.default_fields.is_empty() {
162 builder.set_default_fields(self.default_fields.clone());
163 }
164
165 if !self.query_routers.is_empty() {
167 builder.set_query_routers(self.query_routers.clone());
168 }
169
170 builder.build()
171 }
172
173 pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
178 if self.query_routers.is_empty() {
179 return Ok(None);
180 }
181
182 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
183 .map(Some)
184 .map_err(Error::Schema)
185 }
186}
187
188fn parse_field_type(type_str: &str) -> Result<FieldType> {
190 match type_str {
191 "text" | "string" | "str" => Ok(FieldType::Text),
192 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
193 "i64" | "int" | "integer" => Ok(FieldType::I64),
194 "f64" | "float" | "double" => Ok(FieldType::F64),
195 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
196 "json" => Ok(FieldType::Json),
197 "sparse_vector" => Ok(FieldType::SparseVector),
198 "dense_vector" | "vector" => Ok(FieldType::DenseVector),
199 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
200 }
201}
202
203#[derive(Debug, Clone, Default)]
205struct IndexConfig {
206 index_type: Option<super::schema::VectorIndexType>,
207 num_clusters: Option<usize>,
208 nprobe: Option<usize>,
209 build_threshold: Option<usize>,
210 quantization: Option<WeightQuantization>,
212 weight_threshold: Option<f32>,
213 block_size: Option<usize>,
214 posting_list_pruning: Option<f32>,
215 query_tokenizer: Option<String>,
217 query_weighting: Option<QueryWeighting>,
218 positions: Option<super::schema::PositionMode>,
220}
221
222fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> (bool, bool, bool, Option<IndexConfig>) {
227 let mut indexed = false;
228 let mut stored = false;
229 let mut multi = false;
230 let mut index_config = None;
231
232 for attr in pair.into_inner() {
233 if attr.as_rule() == Rule::attribute {
234 let mut found_config = false;
236 for inner in attr.clone().into_inner() {
237 match inner.as_rule() {
238 Rule::indexed_with_config => {
239 indexed = true;
240 index_config = Some(parse_index_config(inner));
241 found_config = true;
242 break;
243 }
244 Rule::stored_with_config => {
245 stored = true;
246 multi = true; found_config = true;
248 break;
249 }
250 _ => {}
251 }
252 }
253 if !found_config {
254 match attr.as_str() {
256 "indexed" => indexed = true,
257 "stored" => stored = true,
258 _ => {}
259 }
260 }
261 }
262 }
263
264 (indexed, stored, multi, index_config)
265}
266
267fn parse_index_config(pair: pest::iterators::Pair<Rule>) -> IndexConfig {
269 let mut config = IndexConfig::default();
270
271 for inner in pair.into_inner() {
276 if inner.as_rule() == Rule::index_config_params {
277 for param in inner.into_inner() {
278 if param.as_rule() == Rule::index_config_param {
279 for p in param.into_inner() {
280 parse_single_index_config_param(&mut config, p);
281 }
282 }
283 }
284 }
285 }
286
287 config
288}
289
290fn parse_single_index_config_param(config: &mut IndexConfig, p: pest::iterators::Pair<Rule>) {
292 use super::schema::VectorIndexType;
293
294 match p.as_rule() {
295 Rule::index_type_spec => {
296 config.index_type = Some(match p.as_str() {
297 "flat" => VectorIndexType::Flat,
298 "rabitq" => VectorIndexType::RaBitQ,
299 "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
300 "scann" => VectorIndexType::ScaNN,
301 _ => VectorIndexType::RaBitQ,
302 });
303 }
304 Rule::index_type_kwarg => {
305 if let Some(t) = p.into_inner().next() {
307 config.index_type = Some(match t.as_str() {
308 "flat" => VectorIndexType::Flat,
309 "rabitq" => VectorIndexType::RaBitQ,
310 "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
311 "scann" => VectorIndexType::ScaNN,
312 _ => VectorIndexType::RaBitQ,
313 });
314 }
315 }
316 Rule::num_clusters_kwarg => {
317 if let Some(n) = p.into_inner().next() {
319 config.num_clusters = Some(n.as_str().parse().unwrap_or(256));
320 }
321 }
322 Rule::build_threshold_kwarg => {
323 if let Some(n) = p.into_inner().next() {
325 config.build_threshold = Some(n.as_str().parse().unwrap_or(10000));
326 }
327 }
328 Rule::nprobe_kwarg => {
329 if let Some(n) = p.into_inner().next() {
331 config.nprobe = Some(n.as_str().parse().unwrap_or(32));
332 }
333 }
334 Rule::quantization_kwarg => {
335 if let Some(q) = p.into_inner().next() {
337 config.quantization = Some(match q.as_str() {
338 "float32" | "f32" => WeightQuantization::Float32,
339 "float16" | "f16" => WeightQuantization::Float16,
340 "uint8" | "u8" => WeightQuantization::UInt8,
341 "uint4" | "u4" => WeightQuantization::UInt4,
342 _ => WeightQuantization::default(),
343 });
344 }
345 }
346 Rule::weight_threshold_kwarg => {
347 if let Some(t) = p.into_inner().next() {
349 config.weight_threshold = Some(t.as_str().parse().unwrap_or(0.0));
350 }
351 }
352 Rule::block_size_kwarg => {
353 if let Some(n) = p.into_inner().next() {
355 config.block_size = Some(n.as_str().parse().unwrap_or(128));
356 }
357 }
358 Rule::pruning_kwarg => {
359 if let Some(f) = p.into_inner().next() {
361 config.posting_list_pruning = Some(f.as_str().parse().unwrap_or(1.0));
362 }
363 }
364 Rule::query_config_block => {
365 parse_query_config_block(config, p);
367 }
368 Rule::positions_kwarg => {
369 use super::schema::PositionMode;
371 config.positions = Some(match p.as_str() {
372 "ordinal" => PositionMode::Ordinal,
373 "token_position" => PositionMode::TokenPosition,
374 _ => PositionMode::Full, });
376 }
377 _ => {}
378 }
379}
380
381fn parse_query_config_block(config: &mut IndexConfig, pair: pest::iterators::Pair<Rule>) {
383 for inner in pair.into_inner() {
384 if inner.as_rule() == Rule::query_config_params {
385 for param in inner.into_inner() {
386 if param.as_rule() == Rule::query_config_param {
387 for p in param.into_inner() {
388 match p.as_rule() {
389 Rule::query_tokenizer_kwarg => {
390 if let Some(path) = p.into_inner().next()
392 && let Some(inner_path) = path.into_inner().next()
393 {
394 config.query_tokenizer = Some(inner_path.as_str().to_string());
395 }
396 }
397 Rule::query_weighting_kwarg => {
398 if let Some(w) = p.into_inner().next() {
400 config.query_weighting = Some(match w.as_str() {
401 "one" => QueryWeighting::One,
402 "idf" => QueryWeighting::Idf,
403 "idf_file" => QueryWeighting::IdfFile,
404 _ => QueryWeighting::One,
405 });
406 }
407 }
408 _ => {}
409 }
410 }
411 }
412 }
413 }
414 }
415}
416
417fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
419 let mut inner = pair.into_inner();
420
421 let name = inner
422 .next()
423 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
424 .as_str()
425 .to_string();
426
427 let field_type_str = inner
428 .next()
429 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
430 .as_str();
431
432 let field_type = parse_field_type(field_type_str)?;
433
434 let mut tokenizer = None;
436 let mut sparse_vector_config = None;
437 let mut dense_vector_config = None;
438 let mut indexed = true;
439 let mut stored = true;
440 let mut multi = false;
441 let mut index_config: Option<IndexConfig> = None;
442
443 for item in inner {
444 match item.as_rule() {
445 Rule::tokenizer_spec => {
446 if let Some(tok_name) = item.into_inner().next() {
448 tokenizer = Some(tok_name.as_str().to_string());
449 }
450 }
451 Rule::sparse_vector_config => {
452 sparse_vector_config = Some(parse_sparse_vector_config(item));
454 }
455 Rule::dense_vector_config => {
456 dense_vector_config = Some(parse_dense_vector_config(item));
458 }
459 Rule::attributes => {
460 let (idx, sto, mul, idx_cfg) = parse_attributes(item);
461 indexed = idx;
462 stored = sto;
463 multi = mul;
464 index_config = idx_cfg;
465 }
466 _ => {}
467 }
468 }
469
470 let mut positions = None;
472 if let Some(idx_cfg) = index_config {
473 positions = idx_cfg.positions;
474 if let Some(ref mut dv_config) = dense_vector_config {
475 apply_index_config_to_dense_vector(dv_config, idx_cfg);
476 } else if field_type == FieldType::SparseVector {
477 let sv_config = sparse_vector_config.get_or_insert(SparseVectorConfig::default());
479 apply_index_config_to_sparse_vector(sv_config, idx_cfg);
480 }
481 }
482
483 Ok(FieldDef {
484 name,
485 field_type,
486 indexed,
487 stored,
488 tokenizer,
489 multi,
490 positions,
491 sparse_vector_config,
492 dense_vector_config,
493 })
494}
495
496fn apply_index_config_to_dense_vector(config: &mut DenseVectorConfig, idx_cfg: IndexConfig) {
498 if let Some(index_type) = idx_cfg.index_type {
500 config.index_type = index_type;
501 }
502
503 if idx_cfg.num_clusters.is_some() {
505 config.num_clusters = idx_cfg.num_clusters;
506 }
507
508 if let Some(nprobe) = idx_cfg.nprobe {
510 config.nprobe = nprobe;
511 }
512
513 if idx_cfg.build_threshold.is_some() {
515 config.build_threshold = idx_cfg.build_threshold;
516 }
517}
518
519fn parse_sparse_vector_config(pair: pest::iterators::Pair<Rule>) -> SparseVectorConfig {
522 let mut index_size = IndexSize::default();
523
524 for inner in pair.into_inner() {
526 if inner.as_rule() == Rule::index_size_spec {
527 index_size = match inner.as_str() {
528 "u16" => IndexSize::U16,
529 "u32" => IndexSize::U32,
530 _ => IndexSize::default(),
531 };
532 }
533 }
534
535 SparseVectorConfig {
536 index_size,
537 weight_quantization: WeightQuantization::default(),
538 weight_threshold: 0.0,
539 block_size: 128,
540 posting_list_pruning: None,
541 query_config: None,
542 }
543}
544
545fn apply_index_config_to_sparse_vector(config: &mut SparseVectorConfig, idx_cfg: IndexConfig) {
547 if let Some(q) = idx_cfg.quantization {
548 config.weight_quantization = q;
549 }
550 if let Some(t) = idx_cfg.weight_threshold {
551 config.weight_threshold = t;
552 }
553 if let Some(bs) = idx_cfg.block_size {
554 let adjusted = bs.next_power_of_two();
555 if adjusted != bs {
556 log::warn!(
557 "block_size {} adjusted to next power of two: {}",
558 bs,
559 adjusted
560 );
561 }
562 config.block_size = adjusted;
563 }
564 if let Some(p) = idx_cfg.posting_list_pruning {
565 let clamped = p.clamp(0.0, 1.0);
566 if (clamped - p).abs() > f32::EPSILON {
567 log::warn!(
568 "pruning {} clamped to valid range [0.0, 1.0]: {}",
569 p,
570 clamped
571 );
572 }
573 config.posting_list_pruning = Some(clamped);
574 }
575 if idx_cfg.query_tokenizer.is_some() || idx_cfg.query_weighting.is_some() {
577 let query_config = config
578 .query_config
579 .get_or_insert(SparseQueryConfig::default());
580 if let Some(tokenizer) = idx_cfg.query_tokenizer {
581 query_config.tokenizer = Some(tokenizer);
582 }
583 if let Some(weighting) = idx_cfg.query_weighting {
584 query_config.weighting = weighting;
585 }
586 }
587}
588
589fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
592 let mut dim: usize = 0;
593 let mut quantization = DenseVectorQuantization::F32;
594
595 for params in pair.into_inner() {
597 if params.as_rule() == Rule::dense_vector_params {
598 for inner in params.into_inner() {
599 match inner.as_rule() {
600 Rule::dense_vector_keyword_params => {
601 for kwarg in inner.into_inner() {
602 match kwarg.as_rule() {
603 Rule::dims_kwarg => {
604 if let Some(d) = kwarg.into_inner().next() {
605 dim = d.as_str().parse().unwrap_or(0);
606 }
607 }
608 Rule::quant_type_spec => {
609 quantization = parse_quant_type(kwarg.as_str());
610 }
611 _ => {}
612 }
613 }
614 }
615 Rule::dense_vector_positional_params => {
616 for item in inner.into_inner() {
617 match item.as_rule() {
618 Rule::dimension_spec => {
619 dim = item.as_str().parse().unwrap_or(0);
620 }
621 Rule::quant_type_spec => {
622 quantization = parse_quant_type(item.as_str());
623 }
624 _ => {}
625 }
626 }
627 }
628 _ => {}
629 }
630 }
631 }
632 }
633
634 DenseVectorConfig::new(dim).with_quantization(quantization)
635}
636
637fn parse_quant_type(s: &str) -> DenseVectorQuantization {
638 match s.trim() {
639 "f16" => DenseVectorQuantization::F16,
640 "uint8" | "u8" => DenseVectorQuantization::UInt8,
641 _ => DenseVectorQuantization::F32,
642 }
643}
644
645fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
647 pair.into_inner().map(|p| p.as_str().to_string()).collect()
648}
649
650fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
652 let mut pattern = String::new();
653 let mut substitution = String::new();
654 let mut target_field = String::new();
655 let mut mode = RoutingMode::Additional;
656
657 for prop in pair.into_inner() {
658 if prop.as_rule() != Rule::query_router_prop {
659 continue;
660 }
661
662 for inner in prop.into_inner() {
663 match inner.as_rule() {
664 Rule::query_router_pattern => {
665 if let Some(regex_str) = inner.into_inner().next() {
666 pattern = parse_string_value(regex_str);
667 }
668 }
669 Rule::query_router_substitution => {
670 if let Some(quoted) = inner.into_inner().next() {
671 substitution = parse_string_value(quoted);
672 }
673 }
674 Rule::query_router_target => {
675 if let Some(ident) = inner.into_inner().next() {
676 target_field = ident.as_str().to_string();
677 }
678 }
679 Rule::query_router_mode => {
680 if let Some(mode_val) = inner.into_inner().next() {
681 mode = match mode_val.as_str() {
682 "exclusive" => RoutingMode::Exclusive,
683 "additional" => RoutingMode::Additional,
684 _ => RoutingMode::Additional,
685 };
686 }
687 }
688 _ => {}
689 }
690 }
691 }
692
693 if pattern.is_empty() {
694 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
695 }
696 if substitution.is_empty() {
697 return Err(Error::Schema(
698 "query_router missing 'substitution'".to_string(),
699 ));
700 }
701 if target_field.is_empty() {
702 return Err(Error::Schema(
703 "query_router missing 'target_field'".to_string(),
704 ));
705 }
706
707 Ok(QueryRouterRule {
708 pattern,
709 substitution,
710 target_field,
711 mode,
712 })
713}
714
715fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
717 let s = pair.as_str();
718 match pair.as_rule() {
719 Rule::regex_string => {
720 if let Some(inner) = pair.into_inner().next() {
722 parse_string_value(inner)
723 } else {
724 s.to_string()
725 }
726 }
727 Rule::raw_string => {
728 s[2..s.len() - 1].to_string()
730 }
731 Rule::quoted_string => {
732 let inner = &s[1..s.len() - 1];
734 inner
736 .replace("\\n", "\n")
737 .replace("\\t", "\t")
738 .replace("\\\"", "\"")
739 .replace("\\\\", "\\")
740 }
741 _ => s.to_string(),
742 }
743}
744
745fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
747 let mut inner = pair.into_inner();
748
749 let name = inner
750 .next()
751 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
752 .as_str()
753 .to_string();
754
755 let mut fields = Vec::new();
756 let mut default_fields = Vec::new();
757 let mut query_routers = Vec::new();
758
759 for item in inner {
760 match item.as_rule() {
761 Rule::field_def => {
762 fields.push(parse_field_def(item)?);
763 }
764 Rule::default_fields_def => {
765 default_fields = parse_default_fields_def(item);
766 }
767 Rule::query_router_def => {
768 query_routers.push(parse_query_router_def(item)?);
769 }
770 _ => {}
771 }
772 }
773
774 Ok(IndexDef {
775 name,
776 fields,
777 default_fields,
778 query_routers,
779 })
780}
781
782pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
784 let pairs = SdlParser::parse(Rule::file, input)
785 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
786
787 let mut indexes = Vec::new();
788
789 for pair in pairs {
790 if pair.as_rule() == Rule::file {
791 for inner in pair.into_inner() {
792 if inner.as_rule() == Rule::index_def {
793 indexes.push(parse_index_def(inner)?);
794 }
795 }
796 }
797 }
798
799 Ok(indexes)
800}
801
802pub fn parse_single_index(input: &str) -> Result<IndexDef> {
804 let indexes = parse_sdl(input)?;
805
806 if indexes.is_empty() {
807 return Err(Error::Schema("No index definition found".to_string()));
808 }
809
810 if indexes.len() > 1 {
811 return Err(Error::Schema(
812 "Multiple index definitions found, expected one".to_string(),
813 ));
814 }
815
816 Ok(indexes.into_iter().next().unwrap())
817}
818
819#[cfg(test)]
820mod tests {
821 use super::*;
822
823 #[test]
824 fn test_parse_simple_schema() {
825 let sdl = r#"
826 index articles {
827 field title: text [indexed, stored]
828 field body: text [indexed]
829 }
830 "#;
831
832 let indexes = parse_sdl(sdl).unwrap();
833 assert_eq!(indexes.len(), 1);
834
835 let index = &indexes[0];
836 assert_eq!(index.name, "articles");
837 assert_eq!(index.fields.len(), 2);
838
839 assert_eq!(index.fields[0].name, "title");
840 assert!(matches!(index.fields[0].field_type, FieldType::Text));
841 assert!(index.fields[0].indexed);
842 assert!(index.fields[0].stored);
843
844 assert_eq!(index.fields[1].name, "body");
845 assert!(matches!(index.fields[1].field_type, FieldType::Text));
846 assert!(index.fields[1].indexed);
847 assert!(!index.fields[1].stored);
848 }
849
850 #[test]
851 fn test_parse_all_field_types() {
852 let sdl = r#"
853 index test {
854 field text_field: text [indexed, stored]
855 field u64_field: u64 [indexed, stored]
856 field i64_field: i64 [indexed, stored]
857 field f64_field: f64 [indexed, stored]
858 field bytes_field: bytes [stored]
859 }
860 "#;
861
862 let indexes = parse_sdl(sdl).unwrap();
863 let index = &indexes[0];
864
865 assert!(matches!(index.fields[0].field_type, FieldType::Text));
866 assert!(matches!(index.fields[1].field_type, FieldType::U64));
867 assert!(matches!(index.fields[2].field_type, FieldType::I64));
868 assert!(matches!(index.fields[3].field_type, FieldType::F64));
869 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
870 }
871
872 #[test]
873 fn test_parse_with_comments() {
874 let sdl = r#"
875 # This is a comment
876 index articles {
877 # Title field
878 field title: text [indexed, stored]
879 field body: text [indexed] # inline comment not supported yet
880 }
881 "#;
882
883 let indexes = parse_sdl(sdl).unwrap();
884 assert_eq!(indexes[0].fields.len(), 2);
885 }
886
887 #[test]
888 fn test_parse_type_aliases() {
889 let sdl = r#"
890 index test {
891 field a: string [indexed]
892 field b: int [indexed]
893 field c: uint [indexed]
894 field d: float [indexed]
895 field e: binary [stored]
896 }
897 "#;
898
899 let indexes = parse_sdl(sdl).unwrap();
900 let index = &indexes[0];
901
902 assert!(matches!(index.fields[0].field_type, FieldType::Text));
903 assert!(matches!(index.fields[1].field_type, FieldType::I64));
904 assert!(matches!(index.fields[2].field_type, FieldType::U64));
905 assert!(matches!(index.fields[3].field_type, FieldType::F64));
906 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
907 }
908
909 #[test]
910 fn test_to_schema() {
911 let sdl = r#"
912 index articles {
913 field title: text [indexed, stored]
914 field views: u64 [indexed, stored]
915 }
916 "#;
917
918 let indexes = parse_sdl(sdl).unwrap();
919 let schema = indexes[0].to_schema();
920
921 assert!(schema.get_field("title").is_some());
922 assert!(schema.get_field("views").is_some());
923 assert!(schema.get_field("nonexistent").is_none());
924 }
925
926 #[test]
927 fn test_default_attributes() {
928 let sdl = r#"
929 index test {
930 field title: text
931 }
932 "#;
933
934 let indexes = parse_sdl(sdl).unwrap();
935 let field = &indexes[0].fields[0];
936
937 assert!(field.indexed);
939 assert!(field.stored);
940 }
941
942 #[test]
943 fn test_multiple_indexes() {
944 let sdl = r#"
945 index articles {
946 field title: text [indexed, stored]
947 }
948
949 index users {
950 field name: text [indexed, stored]
951 field email: text [indexed, stored]
952 }
953 "#;
954
955 let indexes = parse_sdl(sdl).unwrap();
956 assert_eq!(indexes.len(), 2);
957 assert_eq!(indexes[0].name, "articles");
958 assert_eq!(indexes[1].name, "users");
959 }
960
961 #[test]
962 fn test_tokenizer_spec() {
963 let sdl = r#"
964 index articles {
965 field title: text<en_stem> [indexed, stored]
966 field body: text<default> [indexed]
967 field author: text [indexed, stored]
968 }
969 "#;
970
971 let indexes = parse_sdl(sdl).unwrap();
972 let index = &indexes[0];
973
974 assert_eq!(index.fields[0].name, "title");
975 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
976
977 assert_eq!(index.fields[1].name, "body");
978 assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
979
980 assert_eq!(index.fields[2].name, "author");
981 assert_eq!(index.fields[2].tokenizer, None); }
983
984 #[test]
985 fn test_tokenizer_in_schema() {
986 let sdl = r#"
987 index articles {
988 field title: text<german> [indexed, stored]
989 field body: text<en_stem> [indexed]
990 }
991 "#;
992
993 let indexes = parse_sdl(sdl).unwrap();
994 let schema = indexes[0].to_schema();
995
996 let title_field = schema.get_field("title").unwrap();
997 let title_entry = schema.get_field_entry(title_field).unwrap();
998 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
999
1000 let body_field = schema.get_field("body").unwrap();
1001 let body_entry = schema.get_field_entry(body_field).unwrap();
1002 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
1003 }
1004
1005 #[test]
1006 fn test_query_router_basic() {
1007 let sdl = r#"
1008 index documents {
1009 field title: text [indexed, stored]
1010 field uri: text [indexed, stored]
1011
1012 query_router {
1013 pattern: "10\\.\\d{4,}/[^\\s]+"
1014 substitution: "doi://{0}"
1015 target_field: uris
1016 mode: exclusive
1017 }
1018 }
1019 "#;
1020
1021 let indexes = parse_sdl(sdl).unwrap();
1022 let index = &indexes[0];
1023
1024 assert_eq!(index.query_routers.len(), 1);
1025 let router = &index.query_routers[0];
1026 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
1027 assert_eq!(router.substitution, "doi://{0}");
1028 assert_eq!(router.target_field, "uris");
1029 assert_eq!(router.mode, RoutingMode::Exclusive);
1030 }
1031
1032 #[test]
1033 fn test_query_router_raw_string() {
1034 let sdl = r#"
1035 index documents {
1036 field uris: text [indexed, stored]
1037
1038 query_router {
1039 pattern: r"^pmid:(\d+)$"
1040 substitution: "pubmed://{1}"
1041 target_field: uris
1042 mode: additional
1043 }
1044 }
1045 "#;
1046
1047 let indexes = parse_sdl(sdl).unwrap();
1048 let router = &indexes[0].query_routers[0];
1049
1050 assert_eq!(router.pattern, r"^pmid:(\d+)$");
1051 assert_eq!(router.substitution, "pubmed://{1}");
1052 assert_eq!(router.mode, RoutingMode::Additional);
1053 }
1054
1055 #[test]
1056 fn test_multiple_query_routers() {
1057 let sdl = r#"
1058 index documents {
1059 field uris: text [indexed, stored]
1060
1061 query_router {
1062 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
1063 substitution: "doi://{1}"
1064 target_field: uris
1065 mode: exclusive
1066 }
1067
1068 query_router {
1069 pattern: r"^pmid:(\d+)$"
1070 substitution: "pubmed://{1}"
1071 target_field: uris
1072 mode: exclusive
1073 }
1074
1075 query_router {
1076 pattern: r"^arxiv:(\d+\.\d+)$"
1077 substitution: "arxiv://{1}"
1078 target_field: uris
1079 mode: additional
1080 }
1081 }
1082 "#;
1083
1084 let indexes = parse_sdl(sdl).unwrap();
1085 assert_eq!(indexes[0].query_routers.len(), 3);
1086 }
1087
1088 #[test]
1089 fn test_query_router_default_mode() {
1090 let sdl = r#"
1091 index documents {
1092 field uris: text [indexed, stored]
1093
1094 query_router {
1095 pattern: r"test"
1096 substitution: "{0}"
1097 target_field: uris
1098 }
1099 }
1100 "#;
1101
1102 let indexes = parse_sdl(sdl).unwrap();
1103 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
1105 }
1106
1107 #[test]
1108 fn test_multi_attribute() {
1109 let sdl = r#"
1110 index documents {
1111 field uris: text [indexed, stored<multi>]
1112 field title: text [indexed, stored]
1113 }
1114 "#;
1115
1116 let indexes = parse_sdl(sdl).unwrap();
1117 assert_eq!(indexes.len(), 1);
1118
1119 let fields = &indexes[0].fields;
1120 assert_eq!(fields.len(), 2);
1121
1122 assert_eq!(fields[0].name, "uris");
1124 assert!(fields[0].multi, "uris field should have multi=true");
1125
1126 assert_eq!(fields[1].name, "title");
1128 assert!(!fields[1].multi, "title field should have multi=false");
1129
1130 let schema = indexes[0].to_schema();
1132 let uris_field = schema.get_field("uris").unwrap();
1133 let title_field = schema.get_field("title").unwrap();
1134
1135 assert!(schema.get_field_entry(uris_field).unwrap().multi);
1136 assert!(!schema.get_field_entry(title_field).unwrap().multi);
1137 }
1138
1139 #[test]
1140 fn test_sparse_vector_field() {
1141 let sdl = r#"
1142 index documents {
1143 field embedding: sparse_vector [indexed, stored]
1144 }
1145 "#;
1146
1147 let indexes = parse_sdl(sdl).unwrap();
1148 assert_eq!(indexes.len(), 1);
1149 assert_eq!(indexes[0].fields.len(), 1);
1150 assert_eq!(indexes[0].fields[0].name, "embedding");
1151 assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
1152 assert!(indexes[0].fields[0].sparse_vector_config.is_none());
1153 }
1154
1155 #[test]
1156 fn test_sparse_vector_with_config() {
1157 let sdl = r#"
1158 index documents {
1159 field embedding: sparse_vector<u16> [indexed<quantization: uint8>, stored]
1160 field dense: sparse_vector<u32> [indexed<quantization: float32>]
1161 }
1162 "#;
1163
1164 let indexes = parse_sdl(sdl).unwrap();
1165 assert_eq!(indexes[0].fields.len(), 2);
1166
1167 let f1 = &indexes[0].fields[0];
1169 assert_eq!(f1.name, "embedding");
1170 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1171 assert_eq!(config1.index_size, IndexSize::U16);
1172 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1173
1174 let f2 = &indexes[0].fields[1];
1176 assert_eq!(f2.name, "dense");
1177 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1178 assert_eq!(config2.index_size, IndexSize::U32);
1179 assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
1180 }
1181
1182 #[test]
1183 fn test_sparse_vector_with_weight_threshold() {
1184 let sdl = r#"
1185 index documents {
1186 field embedding: sparse_vector<u16> [indexed<quantization: uint8, weight_threshold: 0.1>, stored]
1187 field embedding2: sparse_vector<u32> [indexed<quantization: float16, weight_threshold: 0.05>]
1188 }
1189 "#;
1190
1191 let indexes = parse_sdl(sdl).unwrap();
1192 assert_eq!(indexes[0].fields.len(), 2);
1193
1194 let f1 = &indexes[0].fields[0];
1196 assert_eq!(f1.name, "embedding");
1197 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1198 assert_eq!(config1.index_size, IndexSize::U16);
1199 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1200 assert!((config1.weight_threshold - 0.1).abs() < 0.001);
1201
1202 let f2 = &indexes[0].fields[1];
1204 assert_eq!(f2.name, "embedding2");
1205 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1206 assert_eq!(config2.index_size, IndexSize::U32);
1207 assert_eq!(config2.weight_quantization, WeightQuantization::Float16);
1208 assert!((config2.weight_threshold - 0.05).abs() < 0.001);
1209 }
1210
1211 #[test]
1212 fn test_sparse_vector_with_pruning() {
1213 let sdl = r#"
1214 index documents {
1215 field embedding: sparse_vector [indexed<quantization: uint8, pruning: 0.1>, stored]
1216 }
1217 "#;
1218
1219 let indexes = parse_sdl(sdl).unwrap();
1220 let f = &indexes[0].fields[0];
1221 assert_eq!(f.name, "embedding");
1222 let config = f.sparse_vector_config.as_ref().unwrap();
1223 assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1224 assert_eq!(config.posting_list_pruning, Some(0.1));
1225 }
1226
1227 #[test]
1228 fn test_dense_vector_field() {
1229 let sdl = r#"
1230 index documents {
1231 field embedding: dense_vector<768> [indexed, stored]
1232 }
1233 "#;
1234
1235 let indexes = parse_sdl(sdl).unwrap();
1236 assert_eq!(indexes.len(), 1);
1237 assert_eq!(indexes[0].fields.len(), 1);
1238
1239 let f = &indexes[0].fields[0];
1240 assert_eq!(f.name, "embedding");
1241 assert_eq!(f.field_type, FieldType::DenseVector);
1242
1243 let config = f.dense_vector_config.as_ref().unwrap();
1244 assert_eq!(config.dim, 768);
1245 }
1246
1247 #[test]
1248 fn test_dense_vector_alias() {
1249 let sdl = r#"
1250 index documents {
1251 field embedding: vector<1536> [indexed]
1252 }
1253 "#;
1254
1255 let indexes = parse_sdl(sdl).unwrap();
1256 assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
1257 assert_eq!(
1258 indexes[0].fields[0]
1259 .dense_vector_config
1260 .as_ref()
1261 .unwrap()
1262 .dim,
1263 1536
1264 );
1265 }
1266
1267 #[test]
1268 fn test_dense_vector_with_num_clusters() {
1269 let sdl = r#"
1270 index documents {
1271 field embedding: dense_vector<768> [indexed<ivf_rabitq, num_clusters: 256>, stored]
1272 }
1273 "#;
1274
1275 let indexes = parse_sdl(sdl).unwrap();
1276 assert_eq!(indexes.len(), 1);
1277
1278 let f = &indexes[0].fields[0];
1279 assert_eq!(f.name, "embedding");
1280 assert_eq!(f.field_type, FieldType::DenseVector);
1281
1282 let config = f.dense_vector_config.as_ref().unwrap();
1283 assert_eq!(config.dim, 768);
1284 assert_eq!(config.num_clusters, Some(256));
1285 assert_eq!(config.nprobe, 32); }
1287
1288 #[test]
1289 fn test_dense_vector_with_num_clusters_and_nprobe() {
1290 let sdl = r#"
1291 index documents {
1292 field embedding: dense_vector<1536> [indexed<ivf_rabitq, num_clusters: 512, nprobe: 64>]
1293 }
1294 "#;
1295
1296 let indexes = parse_sdl(sdl).unwrap();
1297 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1298
1299 assert_eq!(config.dim, 1536);
1300 assert_eq!(config.num_clusters, Some(512));
1301 assert_eq!(config.nprobe, 64);
1302 }
1303
1304 #[test]
1305 fn test_dense_vector_keyword_syntax() {
1306 let sdl = r#"
1307 index documents {
1308 field embedding: dense_vector<dims: 1536> [indexed, stored]
1309 }
1310 "#;
1311
1312 let indexes = parse_sdl(sdl).unwrap();
1313 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1314
1315 assert_eq!(config.dim, 1536);
1316 assert!(config.num_clusters.is_none());
1317 }
1318
1319 #[test]
1320 fn test_dense_vector_keyword_syntax_full() {
1321 let sdl = r#"
1322 index documents {
1323 field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 256, nprobe: 64>]
1324 }
1325 "#;
1326
1327 let indexes = parse_sdl(sdl).unwrap();
1328 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1329
1330 assert_eq!(config.dim, 1536);
1331 assert_eq!(config.num_clusters, Some(256));
1332 assert_eq!(config.nprobe, 64);
1333 }
1334
1335 #[test]
1336 fn test_dense_vector_keyword_syntax_partial() {
1337 let sdl = r#"
1338 index documents {
1339 field embedding: dense_vector<dims: 768> [indexed<ivf_rabitq, num_clusters: 128>]
1340 }
1341 "#;
1342
1343 let indexes = parse_sdl(sdl).unwrap();
1344 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1345
1346 assert_eq!(config.dim, 768);
1347 assert_eq!(config.num_clusters, Some(128));
1348 assert_eq!(config.nprobe, 32); }
1350
1351 #[test]
1352 fn test_dense_vector_scann_index() {
1353 use crate::dsl::schema::VectorIndexType;
1354
1355 let sdl = r#"
1356 index documents {
1357 field embedding: dense_vector<dims: 768> [indexed<scann, num_clusters: 256, nprobe: 64>]
1358 }
1359 "#;
1360
1361 let indexes = parse_sdl(sdl).unwrap();
1362 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1363
1364 assert_eq!(config.dim, 768);
1365 assert_eq!(config.index_type, VectorIndexType::ScaNN);
1366 assert_eq!(config.num_clusters, Some(256));
1367 assert_eq!(config.nprobe, 64);
1368 }
1369
1370 #[test]
1371 fn test_dense_vector_ivf_rabitq_index() {
1372 use crate::dsl::schema::VectorIndexType;
1373
1374 let sdl = r#"
1375 index documents {
1376 field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 512>]
1377 }
1378 "#;
1379
1380 let indexes = parse_sdl(sdl).unwrap();
1381 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1382
1383 assert_eq!(config.dim, 1536);
1384 assert_eq!(config.index_type, VectorIndexType::IvfRaBitQ);
1385 assert_eq!(config.num_clusters, Some(512));
1386 }
1387
1388 #[test]
1389 fn test_dense_vector_rabitq_no_clusters() {
1390 use crate::dsl::schema::VectorIndexType;
1391
1392 let sdl = r#"
1393 index documents {
1394 field embedding: dense_vector<dims: 768> [indexed<rabitq>]
1395 }
1396 "#;
1397
1398 let indexes = parse_sdl(sdl).unwrap();
1399 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1400
1401 assert_eq!(config.dim, 768);
1402 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1403 assert!(config.num_clusters.is_none());
1404 }
1405
1406 #[test]
1407 fn test_dense_vector_flat_index() {
1408 use crate::dsl::schema::VectorIndexType;
1409
1410 let sdl = r#"
1411 index documents {
1412 field embedding: dense_vector<dims: 768> [indexed<flat>]
1413 }
1414 "#;
1415
1416 let indexes = parse_sdl(sdl).unwrap();
1417 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1418
1419 assert_eq!(config.dim, 768);
1420 assert_eq!(config.index_type, VectorIndexType::Flat);
1421 }
1422
1423 #[test]
1424 fn test_dense_vector_default_index_type() {
1425 use crate::dsl::schema::VectorIndexType;
1426
1427 let sdl = r#"
1429 index documents {
1430 field embedding: dense_vector<dims: 768> [indexed]
1431 }
1432 "#;
1433
1434 let indexes = parse_sdl(sdl).unwrap();
1435 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1436
1437 assert_eq!(config.dim, 768);
1438 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1439 }
1440
1441 #[test]
1442 fn test_dense_vector_f16_quantization() {
1443 use crate::dsl::schema::{DenseVectorQuantization, VectorIndexType};
1444
1445 let sdl = r#"
1446 index documents {
1447 field embedding: dense_vector<768, f16> [indexed]
1448 }
1449 "#;
1450
1451 let indexes = parse_sdl(sdl).unwrap();
1452 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1453
1454 assert_eq!(config.dim, 768);
1455 assert_eq!(config.quantization, DenseVectorQuantization::F16);
1456 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1457 }
1458
1459 #[test]
1460 fn test_dense_vector_uint8_quantization() {
1461 use crate::dsl::schema::DenseVectorQuantization;
1462
1463 let sdl = r#"
1464 index documents {
1465 field embedding: dense_vector<1024, uint8> [indexed<ivf_rabitq>]
1466 }
1467 "#;
1468
1469 let indexes = parse_sdl(sdl).unwrap();
1470 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1471
1472 assert_eq!(config.dim, 1024);
1473 assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1474 }
1475
1476 #[test]
1477 fn test_dense_vector_u8_alias() {
1478 use crate::dsl::schema::DenseVectorQuantization;
1479
1480 let sdl = r#"
1481 index documents {
1482 field embedding: dense_vector<512, u8> [indexed]
1483 }
1484 "#;
1485
1486 let indexes = parse_sdl(sdl).unwrap();
1487 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1488
1489 assert_eq!(config.dim, 512);
1490 assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1491 }
1492
1493 #[test]
1494 fn test_dense_vector_default_f32_quantization() {
1495 use crate::dsl::schema::DenseVectorQuantization;
1496
1497 let sdl = r#"
1499 index documents {
1500 field embedding: dense_vector<768> [indexed]
1501 }
1502 "#;
1503
1504 let indexes = parse_sdl(sdl).unwrap();
1505 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1506
1507 assert_eq!(config.dim, 768);
1508 assert_eq!(config.quantization, DenseVectorQuantization::F32);
1509 }
1510
1511 #[test]
1512 fn test_dense_vector_keyword_with_quantization() {
1513 use crate::dsl::schema::DenseVectorQuantization;
1514
1515 let sdl = r#"
1516 index documents {
1517 field embedding: dense_vector<dims: 768, f16> [indexed]
1518 }
1519 "#;
1520
1521 let indexes = parse_sdl(sdl).unwrap();
1522 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1523
1524 assert_eq!(config.dim, 768);
1525 assert_eq!(config.quantization, DenseVectorQuantization::F16);
1526 }
1527
1528 #[test]
1529 fn test_json_field_type() {
1530 let sdl = r#"
1531 index documents {
1532 field title: text [indexed, stored]
1533 field metadata: json [stored]
1534 field extra: json
1535 }
1536 "#;
1537
1538 let indexes = parse_sdl(sdl).unwrap();
1539 let index = &indexes[0];
1540
1541 assert_eq!(index.fields.len(), 3);
1542
1543 assert_eq!(index.fields[1].name, "metadata");
1545 assert!(matches!(index.fields[1].field_type, FieldType::Json));
1546 assert!(index.fields[1].stored);
1547 assert_eq!(index.fields[2].name, "extra");
1551 assert!(matches!(index.fields[2].field_type, FieldType::Json));
1552
1553 let schema = index.to_schema();
1555 let metadata_field = schema.get_field("metadata").unwrap();
1556 let entry = schema.get_field_entry(metadata_field).unwrap();
1557 assert_eq!(entry.field_type, FieldType::Json);
1558 assert!(!entry.indexed); assert!(entry.stored);
1560 }
1561
1562 #[test]
1563 fn test_sparse_vector_query_config() {
1564 use crate::structures::QueryWeighting;
1565
1566 let sdl = r#"
1567 index documents {
1568 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "Alibaba-NLP/gte-Qwen2-1.5B-instruct", weighting: idf>>]
1569 }
1570 "#;
1571
1572 let indexes = parse_sdl(sdl).unwrap();
1573 let index = &indexes[0];
1574
1575 assert_eq!(index.fields.len(), 1);
1576 assert_eq!(index.fields[0].name, "embedding");
1577 assert!(matches!(
1578 index.fields[0].field_type,
1579 FieldType::SparseVector
1580 ));
1581
1582 let config = index.fields[0].sparse_vector_config.as_ref().unwrap();
1583 assert_eq!(config.index_size, IndexSize::U16);
1584 assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1585
1586 let query_config = config.query_config.as_ref().unwrap();
1588 assert_eq!(
1589 query_config.tokenizer.as_deref(),
1590 Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1591 );
1592 assert_eq!(query_config.weighting, QueryWeighting::Idf);
1593
1594 let schema = index.to_schema();
1596 let embedding_field = schema.get_field("embedding").unwrap();
1597 let entry = schema.get_field_entry(embedding_field).unwrap();
1598 let sv_config = entry.sparse_vector_config.as_ref().unwrap();
1599 let qc = sv_config.query_config.as_ref().unwrap();
1600 assert_eq!(
1601 qc.tokenizer.as_deref(),
1602 Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1603 );
1604 assert_eq!(qc.weighting, QueryWeighting::Idf);
1605 }
1606
1607 #[test]
1608 fn test_sparse_vector_query_config_weighting_one() {
1609 use crate::structures::QueryWeighting;
1610
1611 let sdl = r#"
1612 index documents {
1613 field embedding: sparse_vector [indexed<query<weighting: one>>]
1614 }
1615 "#;
1616
1617 let indexes = parse_sdl(sdl).unwrap();
1618 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1619
1620 let query_config = config.query_config.as_ref().unwrap();
1621 assert!(query_config.tokenizer.is_none());
1622 assert_eq!(query_config.weighting, QueryWeighting::One);
1623 }
1624
1625 #[test]
1626 fn test_sparse_vector_query_config_weighting_idf_file() {
1627 use crate::structures::QueryWeighting;
1628
1629 let sdl = r#"
1630 index documents {
1631 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "opensearch-neural-sparse-encoding-v1", weighting: idf_file>>]
1632 }
1633 "#;
1634
1635 let indexes = parse_sdl(sdl).unwrap();
1636 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1637
1638 let query_config = config.query_config.as_ref().unwrap();
1639 assert_eq!(
1640 query_config.tokenizer.as_deref(),
1641 Some("opensearch-neural-sparse-encoding-v1")
1642 );
1643 assert_eq!(query_config.weighting, QueryWeighting::IdfFile);
1644
1645 let schema = indexes[0].to_schema();
1647 let field = schema.get_field("embedding").unwrap();
1648 let entry = schema.get_field_entry(field).unwrap();
1649 let sc = entry.sparse_vector_config.as_ref().unwrap();
1650 let qc = sc.query_config.as_ref().unwrap();
1651 assert_eq!(qc.weighting, QueryWeighting::IdfFile);
1652 }
1653}