1use pest::Parser;
46use pest_derive::Parser;
47
48use super::query_field_router::{QueryRouterRule, RoutingMode};
49use super::schema::{DenseVectorQuantization, FieldType, Schema, SchemaBuilder};
50use crate::Result;
51use crate::error::Error;
52
53#[derive(Parser)]
54#[grammar = "dsl/sdl/sdl.pest"]
55pub struct SdlParser;
56
57use super::schema::DenseVectorConfig;
58use crate::structures::{
59 IndexSize, QueryWeighting, SparseQueryConfig, SparseVectorConfig, WeightQuantization,
60};
61
62#[derive(Debug, Clone)]
64pub struct FieldDef {
65 pub name: String,
66 pub field_type: FieldType,
67 pub indexed: bool,
68 pub stored: bool,
69 pub tokenizer: Option<String>,
71 pub multi: bool,
73 pub positions: Option<super::schema::PositionMode>,
75 pub sparse_vector_config: Option<SparseVectorConfig>,
77 pub dense_vector_config: Option<DenseVectorConfig>,
79 pub fast: bool,
81 pub primary: bool,
83}
84
85#[derive(Debug, Clone)]
87pub struct IndexDef {
88 pub name: String,
89 pub fields: Vec<FieldDef>,
90 pub default_fields: Vec<String>,
91 pub query_routers: Vec<QueryRouterRule>,
93}
94
95impl IndexDef {
96 pub fn to_schema(&self) -> Schema {
98 let mut builder = SchemaBuilder::default();
99
100 for field in &self.fields {
101 let f = match field.field_type {
102 FieldType::Text => {
103 let tokenizer = field.tokenizer.as_deref().unwrap_or("simple");
104 builder.add_text_field_with_tokenizer(
105 &field.name,
106 field.indexed,
107 field.stored,
108 tokenizer,
109 )
110 }
111 FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
112 FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
113 FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
114 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
115 FieldType::Json => builder.add_json_field(&field.name, field.stored),
116 FieldType::SparseVector => {
117 if let Some(config) = &field.sparse_vector_config {
118 builder.add_sparse_vector_field_with_config(
119 &field.name,
120 field.indexed,
121 field.stored,
122 config.clone(),
123 )
124 } else {
125 builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
126 }
127 }
128 FieldType::DenseVector => {
129 let config = field
131 .dense_vector_config
132 .as_ref()
133 .expect("DenseVector field requires dimension to be specified");
134 builder.add_dense_vector_field_with_config(
135 &field.name,
136 field.indexed,
137 field.stored,
138 config.clone(),
139 )
140 }
141 };
142 if field.multi {
143 builder.set_multi(f, true);
144 }
145 if field.fast {
146 builder.set_fast(f, true);
147 }
148 if field.primary {
149 builder.set_primary_key(f);
150 }
151 let positions = field.positions.or({
153 if field.multi
155 && matches!(
156 field.field_type,
157 FieldType::SparseVector | FieldType::DenseVector
158 )
159 {
160 Some(super::schema::PositionMode::Ordinal)
161 } else {
162 None
163 }
164 });
165 if let Some(mode) = positions {
166 builder.set_positions(f, mode);
167 }
168 }
169
170 if !self.default_fields.is_empty() {
172 builder.set_default_fields(self.default_fields.clone());
173 }
174
175 if !self.query_routers.is_empty() {
177 builder.set_query_routers(self.query_routers.clone());
178 }
179
180 builder.build()
181 }
182
183 pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
188 if self.query_routers.is_empty() {
189 return Ok(None);
190 }
191
192 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
193 .map(Some)
194 .map_err(Error::Schema)
195 }
196}
197
198fn parse_field_type(type_str: &str) -> Result<FieldType> {
200 match type_str {
201 "text" | "string" | "str" => Ok(FieldType::Text),
202 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
203 "i64" | "int" | "integer" => Ok(FieldType::I64),
204 "f64" | "float" | "double" => Ok(FieldType::F64),
205 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
206 "json" => Ok(FieldType::Json),
207 "sparse_vector" => Ok(FieldType::SparseVector),
208 "dense_vector" | "vector" => Ok(FieldType::DenseVector),
209 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
210 }
211}
212
213#[derive(Debug, Clone, Default)]
215struct IndexConfig {
216 index_type: Option<super::schema::VectorIndexType>,
217 num_clusters: Option<usize>,
218 nprobe: Option<usize>,
219 build_threshold: Option<usize>,
220 quantization: Option<WeightQuantization>,
222 weight_threshold: Option<f32>,
223 block_size: Option<usize>,
224 pruning: Option<f32>,
225 query_tokenizer: Option<String>,
227 query_weighting: Option<QueryWeighting>,
228 query_weight_threshold: Option<f32>,
229 query_max_dims: Option<usize>,
230 query_pruning: Option<f32>,
231 positions: Option<super::schema::PositionMode>,
233}
234
235fn parse_attributes(
240 pair: pest::iterators::Pair<Rule>,
241) -> (bool, bool, bool, bool, bool, Option<IndexConfig>) {
242 let mut indexed = false;
243 let mut stored = false;
244 let mut multi = false;
245 let mut fast = false;
246 let mut primary = false;
247 let mut index_config = None;
248
249 for attr in pair.into_inner() {
250 if attr.as_rule() == Rule::attribute {
251 let mut found_config = false;
253 for inner in attr.clone().into_inner() {
254 match inner.as_rule() {
255 Rule::indexed_with_config => {
256 indexed = true;
257 index_config = Some(parse_index_config(inner));
258 found_config = true;
259 break;
260 }
261 Rule::stored_with_config => {
262 stored = true;
263 multi = true; found_config = true;
265 break;
266 }
267 _ => {}
268 }
269 }
270 if !found_config {
271 match attr.as_str() {
273 "indexed" => indexed = true,
274 "stored" => stored = true,
275 "fast" => fast = true,
276 "primary" => primary = true,
277 _ => {}
278 }
279 }
280 }
281 }
282
283 (indexed, stored, multi, fast, primary, index_config)
284}
285
286fn parse_index_config(pair: pest::iterators::Pair<Rule>) -> IndexConfig {
288 let mut config = IndexConfig::default();
289
290 for inner in pair.into_inner() {
295 if inner.as_rule() == Rule::index_config_params {
296 for param in inner.into_inner() {
297 if param.as_rule() == Rule::index_config_param {
298 for p in param.into_inner() {
299 parse_single_index_config_param(&mut config, p);
300 }
301 }
302 }
303 }
304 }
305
306 config
307}
308
309fn parse_single_index_config_param(config: &mut IndexConfig, p: pest::iterators::Pair<Rule>) {
311 use super::schema::VectorIndexType;
312
313 match p.as_rule() {
314 Rule::index_type_spec => {
315 config.index_type = Some(match p.as_str() {
316 "flat" => VectorIndexType::Flat,
317 "rabitq" => VectorIndexType::RaBitQ,
318 "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
319 "scann" => VectorIndexType::ScaNN,
320 _ => VectorIndexType::RaBitQ,
321 });
322 }
323 Rule::index_type_kwarg => {
324 if let Some(t) = p.into_inner().next() {
326 config.index_type = Some(match t.as_str() {
327 "flat" => VectorIndexType::Flat,
328 "rabitq" => VectorIndexType::RaBitQ,
329 "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
330 "scann" => VectorIndexType::ScaNN,
331 _ => VectorIndexType::RaBitQ,
332 });
333 }
334 }
335 Rule::num_clusters_kwarg => {
336 if let Some(n) = p.into_inner().next() {
338 config.num_clusters = Some(n.as_str().parse().unwrap_or(256));
339 }
340 }
341 Rule::build_threshold_kwarg => {
342 if let Some(n) = p.into_inner().next() {
344 config.build_threshold = Some(n.as_str().parse().unwrap_or(10000));
345 }
346 }
347 Rule::nprobe_kwarg => {
348 if let Some(n) = p.into_inner().next() {
350 config.nprobe = Some(n.as_str().parse().unwrap_or(32));
351 }
352 }
353 Rule::quantization_kwarg => {
354 if let Some(q) = p.into_inner().next() {
356 config.quantization = Some(match q.as_str() {
357 "float32" | "f32" => WeightQuantization::Float32,
358 "float16" | "f16" => WeightQuantization::Float16,
359 "uint8" | "u8" => WeightQuantization::UInt8,
360 "uint4" | "u4" => WeightQuantization::UInt4,
361 _ => WeightQuantization::default(),
362 });
363 }
364 }
365 Rule::weight_threshold_kwarg => {
366 if let Some(t) = p.into_inner().next() {
368 config.weight_threshold = Some(t.as_str().parse().unwrap_or(0.0));
369 }
370 }
371 Rule::block_size_kwarg => {
372 if let Some(n) = p.into_inner().next() {
374 config.block_size = Some(n.as_str().parse().unwrap_or(128));
375 }
376 }
377 Rule::pruning_kwarg => {
378 if let Some(f) = p.into_inner().next() {
380 config.pruning = Some(f.as_str().parse().unwrap_or(1.0));
381 }
382 }
383 Rule::query_config_block => {
384 parse_query_config_block(config, p);
386 }
387 Rule::positions_kwarg => {
388 use super::schema::PositionMode;
390 config.positions = Some(match p.as_str() {
391 "ordinal" => PositionMode::Ordinal,
392 "token_position" => PositionMode::TokenPosition,
393 _ => PositionMode::Full, });
395 }
396 _ => {}
397 }
398}
399
400fn parse_query_config_block(config: &mut IndexConfig, pair: pest::iterators::Pair<Rule>) {
402 for inner in pair.into_inner() {
403 if inner.as_rule() == Rule::query_config_params {
404 for param in inner.into_inner() {
405 if param.as_rule() == Rule::query_config_param {
406 for p in param.into_inner() {
407 match p.as_rule() {
408 Rule::query_tokenizer_kwarg => {
409 if let Some(path) = p.into_inner().next()
411 && let Some(inner_path) = path.into_inner().next()
412 {
413 config.query_tokenizer = Some(inner_path.as_str().to_string());
414 }
415 }
416 Rule::query_weighting_kwarg => {
417 if let Some(w) = p.into_inner().next() {
419 config.query_weighting = Some(match w.as_str() {
420 "one" => QueryWeighting::One,
421 "idf" => QueryWeighting::Idf,
422 "idf_file" => QueryWeighting::IdfFile,
423 _ => QueryWeighting::One,
424 });
425 }
426 }
427 Rule::query_weight_threshold_kwarg => {
428 if let Some(t) = p.into_inner().next() {
429 config.query_weight_threshold =
430 Some(t.as_str().parse().unwrap_or(0.0));
431 }
432 }
433 Rule::query_max_dims_kwarg => {
434 if let Some(t) = p.into_inner().next() {
435 config.query_max_dims = Some(t.as_str().parse().unwrap_or(0));
436 }
437 }
438 Rule::query_pruning_kwarg => {
439 if let Some(t) = p.into_inner().next() {
440 config.query_pruning = Some(t.as_str().parse().unwrap_or(1.0));
441 }
442 }
443 _ => {}
444 }
445 }
446 }
447 }
448 }
449 }
450}
451
452fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
454 let mut inner = pair.into_inner();
455
456 let name = inner
457 .next()
458 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
459 .as_str()
460 .to_string();
461
462 let field_type_str = inner
463 .next()
464 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
465 .as_str();
466
467 let field_type = parse_field_type(field_type_str)?;
468
469 let mut tokenizer = None;
471 let mut sparse_vector_config = None;
472 let mut dense_vector_config = None;
473 let mut indexed = true;
474 let mut stored = true;
475 let mut multi = false;
476 let mut fast = false;
477 let mut primary = false;
478 let mut index_config: Option<IndexConfig> = None;
479
480 for item in inner {
481 match item.as_rule() {
482 Rule::tokenizer_spec => {
483 if let Some(tok_name) = item.into_inner().next() {
485 tokenizer = Some(tok_name.as_str().to_string());
486 }
487 }
488 Rule::sparse_vector_config => {
489 sparse_vector_config = Some(parse_sparse_vector_config(item));
491 }
492 Rule::dense_vector_config => {
493 dense_vector_config = Some(parse_dense_vector_config(item));
495 }
496 Rule::attributes => {
497 let (idx, sto, mul, fst, pri, idx_cfg) = parse_attributes(item);
498 indexed = idx;
499 stored = sto;
500 multi = mul;
501 fast = fst;
502 primary = pri;
503 index_config = idx_cfg;
504 }
505 _ => {}
506 }
507 }
508
509 if primary {
511 fast = true;
512 indexed = true;
513 }
514
515 let mut positions = None;
517 if let Some(idx_cfg) = index_config {
518 positions = idx_cfg.positions;
519 if let Some(ref mut dv_config) = dense_vector_config {
520 apply_index_config_to_dense_vector(dv_config, idx_cfg);
521 } else if field_type == FieldType::SparseVector {
522 let sv_config = sparse_vector_config.get_or_insert(SparseVectorConfig::default());
524 apply_index_config_to_sparse_vector(sv_config, idx_cfg);
525 }
526 }
527
528 Ok(FieldDef {
529 name,
530 field_type,
531 indexed,
532 stored,
533 tokenizer,
534 multi,
535 positions,
536 sparse_vector_config,
537 dense_vector_config,
538 fast,
539 primary,
540 })
541}
542
543fn apply_index_config_to_dense_vector(config: &mut DenseVectorConfig, idx_cfg: IndexConfig) {
545 if let Some(index_type) = idx_cfg.index_type {
547 config.index_type = index_type;
548 }
549
550 if idx_cfg.num_clusters.is_some() {
552 config.num_clusters = idx_cfg.num_clusters;
553 }
554
555 if let Some(nprobe) = idx_cfg.nprobe {
557 config.nprobe = nprobe;
558 }
559
560 if idx_cfg.build_threshold.is_some() {
562 config.build_threshold = idx_cfg.build_threshold;
563 }
564}
565
566fn parse_sparse_vector_config(pair: pest::iterators::Pair<Rule>) -> SparseVectorConfig {
569 let mut index_size = IndexSize::default();
570
571 for inner in pair.into_inner() {
573 if inner.as_rule() == Rule::index_size_spec {
574 index_size = match inner.as_str() {
575 "u16" => IndexSize::U16,
576 "u32" => IndexSize::U32,
577 _ => IndexSize::default(),
578 };
579 }
580 }
581
582 SparseVectorConfig {
583 index_size,
584 weight_quantization: WeightQuantization::default(),
585 weight_threshold: 0.0,
586 block_size: 128,
587 pruning: None,
588 query_config: None,
589 }
590}
591
592fn apply_index_config_to_sparse_vector(config: &mut SparseVectorConfig, idx_cfg: IndexConfig) {
594 if let Some(q) = idx_cfg.quantization {
595 config.weight_quantization = q;
596 }
597 if let Some(t) = idx_cfg.weight_threshold {
598 config.weight_threshold = t;
599 }
600 if let Some(bs) = idx_cfg.block_size {
601 let adjusted = bs.next_power_of_two();
602 if adjusted != bs {
603 log::warn!(
604 "block_size {} adjusted to next power of two: {}",
605 bs,
606 adjusted
607 );
608 }
609 config.block_size = adjusted;
610 }
611 if let Some(p) = idx_cfg.pruning {
612 let clamped = p.clamp(0.0, 1.0);
613 if (clamped - p).abs() > f32::EPSILON {
614 log::warn!(
615 "pruning {} clamped to valid range [0.0, 1.0]: {}",
616 p,
617 clamped
618 );
619 }
620 config.pruning = Some(clamped);
621 }
622 if idx_cfg.query_tokenizer.is_some()
624 || idx_cfg.query_weighting.is_some()
625 || idx_cfg.query_weight_threshold.is_some()
626 || idx_cfg.query_max_dims.is_some()
627 || idx_cfg.query_pruning.is_some()
628 {
629 let query_config = config
630 .query_config
631 .get_or_insert(SparseQueryConfig::default());
632 if let Some(tokenizer) = idx_cfg.query_tokenizer {
633 query_config.tokenizer = Some(tokenizer);
634 }
635 if let Some(weighting) = idx_cfg.query_weighting {
636 query_config.weighting = weighting;
637 }
638 if let Some(t) = idx_cfg.query_weight_threshold {
639 query_config.weight_threshold = t;
640 }
641 if let Some(d) = idx_cfg.query_max_dims {
642 query_config.max_query_dims = Some(d);
643 }
644 if let Some(p) = idx_cfg.query_pruning {
645 query_config.pruning = Some(p);
646 }
647 }
648}
649
650fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
653 let mut dim: usize = 0;
654 let mut quantization = DenseVectorQuantization::F32;
655
656 for params in pair.into_inner() {
658 if params.as_rule() == Rule::dense_vector_params {
659 for inner in params.into_inner() {
660 match inner.as_rule() {
661 Rule::dense_vector_keyword_params => {
662 for kwarg in inner.into_inner() {
663 match kwarg.as_rule() {
664 Rule::dims_kwarg => {
665 if let Some(d) = kwarg.into_inner().next() {
666 dim = d.as_str().parse().unwrap_or(0);
667 }
668 }
669 Rule::quant_type_spec => {
670 quantization = parse_quant_type(kwarg.as_str());
671 }
672 _ => {}
673 }
674 }
675 }
676 Rule::dense_vector_positional_params => {
677 for item in inner.into_inner() {
678 match item.as_rule() {
679 Rule::dimension_spec => {
680 dim = item.as_str().parse().unwrap_or(0);
681 }
682 Rule::quant_type_spec => {
683 quantization = parse_quant_type(item.as_str());
684 }
685 _ => {}
686 }
687 }
688 }
689 _ => {}
690 }
691 }
692 }
693 }
694
695 DenseVectorConfig::new(dim).with_quantization(quantization)
696}
697
698fn parse_quant_type(s: &str) -> DenseVectorQuantization {
699 match s.trim() {
700 "f16" => DenseVectorQuantization::F16,
701 "uint8" | "u8" => DenseVectorQuantization::UInt8,
702 _ => DenseVectorQuantization::F32,
703 }
704}
705
706fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
708 pair.into_inner().map(|p| p.as_str().to_string()).collect()
709}
710
711fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
713 let mut pattern = String::new();
714 let mut substitution = String::new();
715 let mut target_field = String::new();
716 let mut mode = RoutingMode::Additional;
717
718 for prop in pair.into_inner() {
719 if prop.as_rule() != Rule::query_router_prop {
720 continue;
721 }
722
723 for inner in prop.into_inner() {
724 match inner.as_rule() {
725 Rule::query_router_pattern => {
726 if let Some(regex_str) = inner.into_inner().next() {
727 pattern = parse_string_value(regex_str);
728 }
729 }
730 Rule::query_router_substitution => {
731 if let Some(quoted) = inner.into_inner().next() {
732 substitution = parse_string_value(quoted);
733 }
734 }
735 Rule::query_router_target => {
736 if let Some(ident) = inner.into_inner().next() {
737 target_field = ident.as_str().to_string();
738 }
739 }
740 Rule::query_router_mode => {
741 if let Some(mode_val) = inner.into_inner().next() {
742 mode = match mode_val.as_str() {
743 "exclusive" => RoutingMode::Exclusive,
744 "additional" => RoutingMode::Additional,
745 _ => RoutingMode::Additional,
746 };
747 }
748 }
749 _ => {}
750 }
751 }
752 }
753
754 if pattern.is_empty() {
755 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
756 }
757 if substitution.is_empty() {
758 return Err(Error::Schema(
759 "query_router missing 'substitution'".to_string(),
760 ));
761 }
762 if target_field.is_empty() {
763 return Err(Error::Schema(
764 "query_router missing 'target_field'".to_string(),
765 ));
766 }
767
768 Ok(QueryRouterRule {
769 pattern,
770 substitution,
771 target_field,
772 mode,
773 })
774}
775
776fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
778 let s = pair.as_str();
779 match pair.as_rule() {
780 Rule::regex_string => {
781 if let Some(inner) = pair.into_inner().next() {
783 parse_string_value(inner)
784 } else {
785 s.to_string()
786 }
787 }
788 Rule::raw_string => {
789 s[2..s.len() - 1].to_string()
791 }
792 Rule::quoted_string => {
793 let inner = &s[1..s.len() - 1];
795 inner
797 .replace("\\n", "\n")
798 .replace("\\t", "\t")
799 .replace("\\\"", "\"")
800 .replace("\\\\", "\\")
801 }
802 _ => s.to_string(),
803 }
804}
805
806fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
808 let mut inner = pair.into_inner();
809
810 let name = inner
811 .next()
812 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
813 .as_str()
814 .to_string();
815
816 let mut fields = Vec::new();
817 let mut default_fields = Vec::new();
818 let mut query_routers = Vec::new();
819
820 for item in inner {
821 match item.as_rule() {
822 Rule::field_def => {
823 fields.push(parse_field_def(item)?);
824 }
825 Rule::default_fields_def => {
826 default_fields = parse_default_fields_def(item);
827 }
828 Rule::query_router_def => {
829 query_routers.push(parse_query_router_def(item)?);
830 }
831 _ => {}
832 }
833 }
834
835 let primary_fields: Vec<&FieldDef> = fields.iter().filter(|f| f.primary).collect();
837 if primary_fields.len() > 1 {
838 return Err(Error::Schema(format!(
839 "Index '{}' has {} primary key fields, but at most one is allowed",
840 name,
841 primary_fields.len()
842 )));
843 }
844 if let Some(pk) = primary_fields.first() {
845 if pk.field_type != FieldType::Text {
846 return Err(Error::Schema(format!(
847 "Primary key field '{}' must be of type text, got {:?}",
848 pk.name, pk.field_type
849 )));
850 }
851 if pk.multi {
852 return Err(Error::Schema(format!(
853 "Primary key field '{}' cannot be multi-valued",
854 pk.name
855 )));
856 }
857 }
858
859 Ok(IndexDef {
860 name,
861 fields,
862 default_fields,
863 query_routers,
864 })
865}
866
867pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
869 let pairs = SdlParser::parse(Rule::file, input)
870 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
871
872 let mut indexes = Vec::new();
873
874 for pair in pairs {
875 if pair.as_rule() == Rule::file {
876 for inner in pair.into_inner() {
877 if inner.as_rule() == Rule::index_def {
878 indexes.push(parse_index_def(inner)?);
879 }
880 }
881 }
882 }
883
884 Ok(indexes)
885}
886
887pub fn parse_single_index(input: &str) -> Result<IndexDef> {
889 let indexes = parse_sdl(input)?;
890
891 if indexes.is_empty() {
892 return Err(Error::Schema("No index definition found".to_string()));
893 }
894
895 if indexes.len() > 1 {
896 return Err(Error::Schema(
897 "Multiple index definitions found, expected one".to_string(),
898 ));
899 }
900
901 Ok(indexes.into_iter().next().unwrap())
902}
903
904#[cfg(test)]
905mod tests {
906 use super::*;
907
908 #[test]
909 fn test_parse_simple_schema() {
910 let sdl = r#"
911 index articles {
912 field title: text [indexed, stored]
913 field body: text [indexed]
914 }
915 "#;
916
917 let indexes = parse_sdl(sdl).unwrap();
918 assert_eq!(indexes.len(), 1);
919
920 let index = &indexes[0];
921 assert_eq!(index.name, "articles");
922 assert_eq!(index.fields.len(), 2);
923
924 assert_eq!(index.fields[0].name, "title");
925 assert!(matches!(index.fields[0].field_type, FieldType::Text));
926 assert!(index.fields[0].indexed);
927 assert!(index.fields[0].stored);
928
929 assert_eq!(index.fields[1].name, "body");
930 assert!(matches!(index.fields[1].field_type, FieldType::Text));
931 assert!(index.fields[1].indexed);
932 assert!(!index.fields[1].stored);
933 }
934
935 #[test]
936 fn test_parse_all_field_types() {
937 let sdl = r#"
938 index test {
939 field text_field: text [indexed, stored]
940 field u64_field: u64 [indexed, stored]
941 field i64_field: i64 [indexed, stored]
942 field f64_field: f64 [indexed, stored]
943 field bytes_field: bytes [stored]
944 }
945 "#;
946
947 let indexes = parse_sdl(sdl).unwrap();
948 let index = &indexes[0];
949
950 assert!(matches!(index.fields[0].field_type, FieldType::Text));
951 assert!(matches!(index.fields[1].field_type, FieldType::U64));
952 assert!(matches!(index.fields[2].field_type, FieldType::I64));
953 assert!(matches!(index.fields[3].field_type, FieldType::F64));
954 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
955 }
956
957 #[test]
958 fn test_parse_with_comments() {
959 let sdl = r#"
960 # This is a comment
961 index articles {
962 # Title field
963 field title: text [indexed, stored]
964 field body: text [indexed] # inline comment not supported yet
965 }
966 "#;
967
968 let indexes = parse_sdl(sdl).unwrap();
969 assert_eq!(indexes[0].fields.len(), 2);
970 }
971
972 #[test]
973 fn test_parse_type_aliases() {
974 let sdl = r#"
975 index test {
976 field a: string [indexed]
977 field b: int [indexed]
978 field c: uint [indexed]
979 field d: float [indexed]
980 field e: binary [stored]
981 }
982 "#;
983
984 let indexes = parse_sdl(sdl).unwrap();
985 let index = &indexes[0];
986
987 assert!(matches!(index.fields[0].field_type, FieldType::Text));
988 assert!(matches!(index.fields[1].field_type, FieldType::I64));
989 assert!(matches!(index.fields[2].field_type, FieldType::U64));
990 assert!(matches!(index.fields[3].field_type, FieldType::F64));
991 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
992 }
993
994 #[test]
995 fn test_to_schema() {
996 let sdl = r#"
997 index articles {
998 field title: text [indexed, stored]
999 field views: u64 [indexed, stored]
1000 }
1001 "#;
1002
1003 let indexes = parse_sdl(sdl).unwrap();
1004 let schema = indexes[0].to_schema();
1005
1006 assert!(schema.get_field("title").is_some());
1007 assert!(schema.get_field("views").is_some());
1008 assert!(schema.get_field("nonexistent").is_none());
1009 }
1010
1011 #[test]
1012 fn test_default_attributes() {
1013 let sdl = r#"
1014 index test {
1015 field title: text
1016 }
1017 "#;
1018
1019 let indexes = parse_sdl(sdl).unwrap();
1020 let field = &indexes[0].fields[0];
1021
1022 assert!(field.indexed);
1024 assert!(field.stored);
1025 }
1026
1027 #[test]
1028 fn test_multiple_indexes() {
1029 let sdl = r#"
1030 index articles {
1031 field title: text [indexed, stored]
1032 }
1033
1034 index users {
1035 field name: text [indexed, stored]
1036 field email: text [indexed, stored]
1037 }
1038 "#;
1039
1040 let indexes = parse_sdl(sdl).unwrap();
1041 assert_eq!(indexes.len(), 2);
1042 assert_eq!(indexes[0].name, "articles");
1043 assert_eq!(indexes[1].name, "users");
1044 }
1045
1046 #[test]
1047 fn test_tokenizer_spec() {
1048 let sdl = r#"
1049 index articles {
1050 field title: text<en_stem> [indexed, stored]
1051 field body: text<simple> [indexed]
1052 field author: text [indexed, stored]
1053 }
1054 "#;
1055
1056 let indexes = parse_sdl(sdl).unwrap();
1057 let index = &indexes[0];
1058
1059 assert_eq!(index.fields[0].name, "title");
1060 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
1061
1062 assert_eq!(index.fields[1].name, "body");
1063 assert_eq!(index.fields[1].tokenizer, Some("simple".to_string()));
1064
1065 assert_eq!(index.fields[2].name, "author");
1066 assert_eq!(index.fields[2].tokenizer, None); }
1068
1069 #[test]
1070 fn test_tokenizer_in_schema() {
1071 let sdl = r#"
1072 index articles {
1073 field title: text<german> [indexed, stored]
1074 field body: text<en_stem> [indexed]
1075 }
1076 "#;
1077
1078 let indexes = parse_sdl(sdl).unwrap();
1079 let schema = indexes[0].to_schema();
1080
1081 let title_field = schema.get_field("title").unwrap();
1082 let title_entry = schema.get_field_entry(title_field).unwrap();
1083 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
1084
1085 let body_field = schema.get_field("body").unwrap();
1086 let body_entry = schema.get_field_entry(body_field).unwrap();
1087 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
1088 }
1089
1090 #[test]
1091 fn test_query_router_basic() {
1092 let sdl = r#"
1093 index documents {
1094 field title: text [indexed, stored]
1095 field uri: text [indexed, stored]
1096
1097 query_router {
1098 pattern: "10\\.\\d{4,}/[^\\s]+"
1099 substitution: "doi://{0}"
1100 target_field: uris
1101 mode: exclusive
1102 }
1103 }
1104 "#;
1105
1106 let indexes = parse_sdl(sdl).unwrap();
1107 let index = &indexes[0];
1108
1109 assert_eq!(index.query_routers.len(), 1);
1110 let router = &index.query_routers[0];
1111 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
1112 assert_eq!(router.substitution, "doi://{0}");
1113 assert_eq!(router.target_field, "uris");
1114 assert_eq!(router.mode, RoutingMode::Exclusive);
1115 }
1116
1117 #[test]
1118 fn test_query_router_raw_string() {
1119 let sdl = r#"
1120 index documents {
1121 field uris: text [indexed, stored]
1122
1123 query_router {
1124 pattern: r"^pmid:(\d+)$"
1125 substitution: "pubmed://{1}"
1126 target_field: uris
1127 mode: additional
1128 }
1129 }
1130 "#;
1131
1132 let indexes = parse_sdl(sdl).unwrap();
1133 let router = &indexes[0].query_routers[0];
1134
1135 assert_eq!(router.pattern, r"^pmid:(\d+)$");
1136 assert_eq!(router.substitution, "pubmed://{1}");
1137 assert_eq!(router.mode, RoutingMode::Additional);
1138 }
1139
1140 #[test]
1141 fn test_multiple_query_routers() {
1142 let sdl = r#"
1143 index documents {
1144 field uris: text [indexed, stored]
1145
1146 query_router {
1147 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
1148 substitution: "doi://{1}"
1149 target_field: uris
1150 mode: exclusive
1151 }
1152
1153 query_router {
1154 pattern: r"^pmid:(\d+)$"
1155 substitution: "pubmed://{1}"
1156 target_field: uris
1157 mode: exclusive
1158 }
1159
1160 query_router {
1161 pattern: r"^arxiv:(\d+\.\d+)$"
1162 substitution: "arxiv://{1}"
1163 target_field: uris
1164 mode: additional
1165 }
1166 }
1167 "#;
1168
1169 let indexes = parse_sdl(sdl).unwrap();
1170 assert_eq!(indexes[0].query_routers.len(), 3);
1171 }
1172
1173 #[test]
1174 fn test_query_router_default_mode() {
1175 let sdl = r#"
1176 index documents {
1177 field uris: text [indexed, stored]
1178
1179 query_router {
1180 pattern: r"test"
1181 substitution: "{0}"
1182 target_field: uris
1183 }
1184 }
1185 "#;
1186
1187 let indexes = parse_sdl(sdl).unwrap();
1188 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
1190 }
1191
1192 #[test]
1193 fn test_multi_attribute() {
1194 let sdl = r#"
1195 index documents {
1196 field uris: text [indexed, stored<multi>]
1197 field title: text [indexed, stored]
1198 }
1199 "#;
1200
1201 let indexes = parse_sdl(sdl).unwrap();
1202 assert_eq!(indexes.len(), 1);
1203
1204 let fields = &indexes[0].fields;
1205 assert_eq!(fields.len(), 2);
1206
1207 assert_eq!(fields[0].name, "uris");
1209 assert!(fields[0].multi, "uris field should have multi=true");
1210
1211 assert_eq!(fields[1].name, "title");
1213 assert!(!fields[1].multi, "title field should have multi=false");
1214
1215 let schema = indexes[0].to_schema();
1217 let uris_field = schema.get_field("uris").unwrap();
1218 let title_field = schema.get_field("title").unwrap();
1219
1220 assert!(schema.get_field_entry(uris_field).unwrap().multi);
1221 assert!(!schema.get_field_entry(title_field).unwrap().multi);
1222 }
1223
1224 #[test]
1225 fn test_sparse_vector_field() {
1226 let sdl = r#"
1227 index documents {
1228 field embedding: sparse_vector [indexed, stored]
1229 }
1230 "#;
1231
1232 let indexes = parse_sdl(sdl).unwrap();
1233 assert_eq!(indexes.len(), 1);
1234 assert_eq!(indexes[0].fields.len(), 1);
1235 assert_eq!(indexes[0].fields[0].name, "embedding");
1236 assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
1237 assert!(indexes[0].fields[0].sparse_vector_config.is_none());
1238 }
1239
1240 #[test]
1241 fn test_sparse_vector_with_config() {
1242 let sdl = r#"
1243 index documents {
1244 field embedding: sparse_vector<u16> [indexed<quantization: uint8>, stored]
1245 field dense: sparse_vector<u32> [indexed<quantization: float32>]
1246 }
1247 "#;
1248
1249 let indexes = parse_sdl(sdl).unwrap();
1250 assert_eq!(indexes[0].fields.len(), 2);
1251
1252 let f1 = &indexes[0].fields[0];
1254 assert_eq!(f1.name, "embedding");
1255 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1256 assert_eq!(config1.index_size, IndexSize::U16);
1257 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1258
1259 let f2 = &indexes[0].fields[1];
1261 assert_eq!(f2.name, "dense");
1262 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1263 assert_eq!(config2.index_size, IndexSize::U32);
1264 assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
1265 }
1266
1267 #[test]
1268 fn test_sparse_vector_with_weight_threshold() {
1269 let sdl = r#"
1270 index documents {
1271 field embedding: sparse_vector<u16> [indexed<quantization: uint8, weight_threshold: 0.1>, stored]
1272 field embedding2: sparse_vector<u32> [indexed<quantization: float16, weight_threshold: 0.05>]
1273 }
1274 "#;
1275
1276 let indexes = parse_sdl(sdl).unwrap();
1277 assert_eq!(indexes[0].fields.len(), 2);
1278
1279 let f1 = &indexes[0].fields[0];
1281 assert_eq!(f1.name, "embedding");
1282 let config1 = f1.sparse_vector_config.as_ref().unwrap();
1283 assert_eq!(config1.index_size, IndexSize::U16);
1284 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1285 assert!((config1.weight_threshold - 0.1).abs() < 0.001);
1286
1287 let f2 = &indexes[0].fields[1];
1289 assert_eq!(f2.name, "embedding2");
1290 let config2 = f2.sparse_vector_config.as_ref().unwrap();
1291 assert_eq!(config2.index_size, IndexSize::U32);
1292 assert_eq!(config2.weight_quantization, WeightQuantization::Float16);
1293 assert!((config2.weight_threshold - 0.05).abs() < 0.001);
1294 }
1295
1296 #[test]
1297 fn test_sparse_vector_with_pruning() {
1298 let sdl = r#"
1299 index documents {
1300 field embedding: sparse_vector [indexed<quantization: uint8, pruning: 0.1>, stored]
1301 }
1302 "#;
1303
1304 let indexes = parse_sdl(sdl).unwrap();
1305 let f = &indexes[0].fields[0];
1306 assert_eq!(f.name, "embedding");
1307 let config = f.sparse_vector_config.as_ref().unwrap();
1308 assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1309 assert_eq!(config.pruning, Some(0.1));
1310 }
1311
1312 #[test]
1313 fn test_dense_vector_field() {
1314 let sdl = r#"
1315 index documents {
1316 field embedding: dense_vector<768> [indexed, stored]
1317 }
1318 "#;
1319
1320 let indexes = parse_sdl(sdl).unwrap();
1321 assert_eq!(indexes.len(), 1);
1322 assert_eq!(indexes[0].fields.len(), 1);
1323
1324 let f = &indexes[0].fields[0];
1325 assert_eq!(f.name, "embedding");
1326 assert_eq!(f.field_type, FieldType::DenseVector);
1327
1328 let config = f.dense_vector_config.as_ref().unwrap();
1329 assert_eq!(config.dim, 768);
1330 }
1331
1332 #[test]
1333 fn test_dense_vector_alias() {
1334 let sdl = r#"
1335 index documents {
1336 field embedding: vector<1536> [indexed]
1337 }
1338 "#;
1339
1340 let indexes = parse_sdl(sdl).unwrap();
1341 assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
1342 assert_eq!(
1343 indexes[0].fields[0]
1344 .dense_vector_config
1345 .as_ref()
1346 .unwrap()
1347 .dim,
1348 1536
1349 );
1350 }
1351
1352 #[test]
1353 fn test_dense_vector_with_num_clusters() {
1354 let sdl = r#"
1355 index documents {
1356 field embedding: dense_vector<768> [indexed<ivf_rabitq, num_clusters: 256>, stored]
1357 }
1358 "#;
1359
1360 let indexes = parse_sdl(sdl).unwrap();
1361 assert_eq!(indexes.len(), 1);
1362
1363 let f = &indexes[0].fields[0];
1364 assert_eq!(f.name, "embedding");
1365 assert_eq!(f.field_type, FieldType::DenseVector);
1366
1367 let config = f.dense_vector_config.as_ref().unwrap();
1368 assert_eq!(config.dim, 768);
1369 assert_eq!(config.num_clusters, Some(256));
1370 assert_eq!(config.nprobe, 32); }
1372
1373 #[test]
1374 fn test_dense_vector_with_num_clusters_and_nprobe() {
1375 let sdl = r#"
1376 index documents {
1377 field embedding: dense_vector<1536> [indexed<ivf_rabitq, num_clusters: 512, nprobe: 64>]
1378 }
1379 "#;
1380
1381 let indexes = parse_sdl(sdl).unwrap();
1382 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1383
1384 assert_eq!(config.dim, 1536);
1385 assert_eq!(config.num_clusters, Some(512));
1386 assert_eq!(config.nprobe, 64);
1387 }
1388
1389 #[test]
1390 fn test_dense_vector_keyword_syntax() {
1391 let sdl = r#"
1392 index documents {
1393 field embedding: dense_vector<dims: 1536> [indexed, stored]
1394 }
1395 "#;
1396
1397 let indexes = parse_sdl(sdl).unwrap();
1398 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1399
1400 assert_eq!(config.dim, 1536);
1401 assert!(config.num_clusters.is_none());
1402 }
1403
1404 #[test]
1405 fn test_dense_vector_keyword_syntax_full() {
1406 let sdl = r#"
1407 index documents {
1408 field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 256, nprobe: 64>]
1409 }
1410 "#;
1411
1412 let indexes = parse_sdl(sdl).unwrap();
1413 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1414
1415 assert_eq!(config.dim, 1536);
1416 assert_eq!(config.num_clusters, Some(256));
1417 assert_eq!(config.nprobe, 64);
1418 }
1419
1420 #[test]
1421 fn test_dense_vector_keyword_syntax_partial() {
1422 let sdl = r#"
1423 index documents {
1424 field embedding: dense_vector<dims: 768> [indexed<ivf_rabitq, num_clusters: 128>]
1425 }
1426 "#;
1427
1428 let indexes = parse_sdl(sdl).unwrap();
1429 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1430
1431 assert_eq!(config.dim, 768);
1432 assert_eq!(config.num_clusters, Some(128));
1433 assert_eq!(config.nprobe, 32); }
1435
1436 #[test]
1437 fn test_dense_vector_scann_index() {
1438 use crate::dsl::schema::VectorIndexType;
1439
1440 let sdl = r#"
1441 index documents {
1442 field embedding: dense_vector<dims: 768> [indexed<scann, num_clusters: 256, nprobe: 64>]
1443 }
1444 "#;
1445
1446 let indexes = parse_sdl(sdl).unwrap();
1447 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1448
1449 assert_eq!(config.dim, 768);
1450 assert_eq!(config.index_type, VectorIndexType::ScaNN);
1451 assert_eq!(config.num_clusters, Some(256));
1452 assert_eq!(config.nprobe, 64);
1453 }
1454
1455 #[test]
1456 fn test_dense_vector_ivf_rabitq_index() {
1457 use crate::dsl::schema::VectorIndexType;
1458
1459 let sdl = r#"
1460 index documents {
1461 field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 512>]
1462 }
1463 "#;
1464
1465 let indexes = parse_sdl(sdl).unwrap();
1466 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1467
1468 assert_eq!(config.dim, 1536);
1469 assert_eq!(config.index_type, VectorIndexType::IvfRaBitQ);
1470 assert_eq!(config.num_clusters, Some(512));
1471 }
1472
1473 #[test]
1474 fn test_dense_vector_rabitq_no_clusters() {
1475 use crate::dsl::schema::VectorIndexType;
1476
1477 let sdl = r#"
1478 index documents {
1479 field embedding: dense_vector<dims: 768> [indexed<rabitq>]
1480 }
1481 "#;
1482
1483 let indexes = parse_sdl(sdl).unwrap();
1484 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1485
1486 assert_eq!(config.dim, 768);
1487 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1488 assert!(config.num_clusters.is_none());
1489 }
1490
1491 #[test]
1492 fn test_dense_vector_flat_index() {
1493 use crate::dsl::schema::VectorIndexType;
1494
1495 let sdl = r#"
1496 index documents {
1497 field embedding: dense_vector<dims: 768> [indexed<flat>]
1498 }
1499 "#;
1500
1501 let indexes = parse_sdl(sdl).unwrap();
1502 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1503
1504 assert_eq!(config.dim, 768);
1505 assert_eq!(config.index_type, VectorIndexType::Flat);
1506 }
1507
1508 #[test]
1509 fn test_dense_vector_default_index_type() {
1510 use crate::dsl::schema::VectorIndexType;
1511
1512 let sdl = r#"
1514 index documents {
1515 field embedding: dense_vector<dims: 768> [indexed]
1516 }
1517 "#;
1518
1519 let indexes = parse_sdl(sdl).unwrap();
1520 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1521
1522 assert_eq!(config.dim, 768);
1523 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1524 }
1525
1526 #[test]
1527 fn test_dense_vector_f16_quantization() {
1528 use crate::dsl::schema::{DenseVectorQuantization, VectorIndexType};
1529
1530 let sdl = r#"
1531 index documents {
1532 field embedding: dense_vector<768, f16> [indexed]
1533 }
1534 "#;
1535
1536 let indexes = parse_sdl(sdl).unwrap();
1537 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1538
1539 assert_eq!(config.dim, 768);
1540 assert_eq!(config.quantization, DenseVectorQuantization::F16);
1541 assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1542 }
1543
1544 #[test]
1545 fn test_dense_vector_uint8_quantization() {
1546 use crate::dsl::schema::DenseVectorQuantization;
1547
1548 let sdl = r#"
1549 index documents {
1550 field embedding: dense_vector<1024, uint8> [indexed<ivf_rabitq>]
1551 }
1552 "#;
1553
1554 let indexes = parse_sdl(sdl).unwrap();
1555 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1556
1557 assert_eq!(config.dim, 1024);
1558 assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1559 }
1560
1561 #[test]
1562 fn test_dense_vector_u8_alias() {
1563 use crate::dsl::schema::DenseVectorQuantization;
1564
1565 let sdl = r#"
1566 index documents {
1567 field embedding: dense_vector<512, u8> [indexed]
1568 }
1569 "#;
1570
1571 let indexes = parse_sdl(sdl).unwrap();
1572 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1573
1574 assert_eq!(config.dim, 512);
1575 assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1576 }
1577
1578 #[test]
1579 fn test_dense_vector_default_f32_quantization() {
1580 use crate::dsl::schema::DenseVectorQuantization;
1581
1582 let sdl = r#"
1584 index documents {
1585 field embedding: dense_vector<768> [indexed]
1586 }
1587 "#;
1588
1589 let indexes = parse_sdl(sdl).unwrap();
1590 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1591
1592 assert_eq!(config.dim, 768);
1593 assert_eq!(config.quantization, DenseVectorQuantization::F32);
1594 }
1595
1596 #[test]
1597 fn test_dense_vector_keyword_with_quantization() {
1598 use crate::dsl::schema::DenseVectorQuantization;
1599
1600 let sdl = r#"
1601 index documents {
1602 field embedding: dense_vector<dims: 768, f16> [indexed]
1603 }
1604 "#;
1605
1606 let indexes = parse_sdl(sdl).unwrap();
1607 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1608
1609 assert_eq!(config.dim, 768);
1610 assert_eq!(config.quantization, DenseVectorQuantization::F16);
1611 }
1612
1613 #[test]
1614 fn test_json_field_type() {
1615 let sdl = r#"
1616 index documents {
1617 field title: text [indexed, stored]
1618 field metadata: json [stored]
1619 field extra: json
1620 }
1621 "#;
1622
1623 let indexes = parse_sdl(sdl).unwrap();
1624 let index = &indexes[0];
1625
1626 assert_eq!(index.fields.len(), 3);
1627
1628 assert_eq!(index.fields[1].name, "metadata");
1630 assert!(matches!(index.fields[1].field_type, FieldType::Json));
1631 assert!(index.fields[1].stored);
1632 assert_eq!(index.fields[2].name, "extra");
1636 assert!(matches!(index.fields[2].field_type, FieldType::Json));
1637
1638 let schema = index.to_schema();
1640 let metadata_field = schema.get_field("metadata").unwrap();
1641 let entry = schema.get_field_entry(metadata_field).unwrap();
1642 assert_eq!(entry.field_type, FieldType::Json);
1643 assert!(!entry.indexed); assert!(entry.stored);
1645 }
1646
1647 #[test]
1648 fn test_sparse_vector_query_config() {
1649 use crate::structures::QueryWeighting;
1650
1651 let sdl = r#"
1652 index documents {
1653 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "Alibaba-NLP/gte-Qwen2-1.5B-instruct", weighting: idf>>]
1654 }
1655 "#;
1656
1657 let indexes = parse_sdl(sdl).unwrap();
1658 let index = &indexes[0];
1659
1660 assert_eq!(index.fields.len(), 1);
1661 assert_eq!(index.fields[0].name, "embedding");
1662 assert!(matches!(
1663 index.fields[0].field_type,
1664 FieldType::SparseVector
1665 ));
1666
1667 let config = index.fields[0].sparse_vector_config.as_ref().unwrap();
1668 assert_eq!(config.index_size, IndexSize::U16);
1669 assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1670
1671 let query_config = config.query_config.as_ref().unwrap();
1673 assert_eq!(
1674 query_config.tokenizer.as_deref(),
1675 Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1676 );
1677 assert_eq!(query_config.weighting, QueryWeighting::Idf);
1678
1679 let schema = index.to_schema();
1681 let embedding_field = schema.get_field("embedding").unwrap();
1682 let entry = schema.get_field_entry(embedding_field).unwrap();
1683 let sv_config = entry.sparse_vector_config.as_ref().unwrap();
1684 let qc = sv_config.query_config.as_ref().unwrap();
1685 assert_eq!(
1686 qc.tokenizer.as_deref(),
1687 Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1688 );
1689 assert_eq!(qc.weighting, QueryWeighting::Idf);
1690 }
1691
1692 #[test]
1693 fn test_sparse_vector_query_config_weighting_one() {
1694 use crate::structures::QueryWeighting;
1695
1696 let sdl = r#"
1697 index documents {
1698 field embedding: sparse_vector [indexed<query<weighting: one>>]
1699 }
1700 "#;
1701
1702 let indexes = parse_sdl(sdl).unwrap();
1703 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1704
1705 let query_config = config.query_config.as_ref().unwrap();
1706 assert!(query_config.tokenizer.is_none());
1707 assert_eq!(query_config.weighting, QueryWeighting::One);
1708 }
1709
1710 #[test]
1711 fn test_sparse_vector_query_config_weighting_idf_file() {
1712 use crate::structures::QueryWeighting;
1713
1714 let sdl = r#"
1715 index documents {
1716 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "opensearch-neural-sparse-encoding-v1", weighting: idf_file>>]
1717 }
1718 "#;
1719
1720 let indexes = parse_sdl(sdl).unwrap();
1721 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1722
1723 let query_config = config.query_config.as_ref().unwrap();
1724 assert_eq!(
1725 query_config.tokenizer.as_deref(),
1726 Some("opensearch-neural-sparse-encoding-v1")
1727 );
1728 assert_eq!(query_config.weighting, QueryWeighting::IdfFile);
1729
1730 let schema = indexes[0].to_schema();
1732 let field = schema.get_field("embedding").unwrap();
1733 let entry = schema.get_field_entry(field).unwrap();
1734 let sc = entry.sparse_vector_config.as_ref().unwrap();
1735 let qc = sc.query_config.as_ref().unwrap();
1736 assert_eq!(qc.weighting, QueryWeighting::IdfFile);
1737 }
1738
1739 #[test]
1740 fn test_sparse_vector_query_config_pruning_params() {
1741 let sdl = r#"
1742 index documents {
1743 field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<weighting: idf, weight_threshold: 0.03, max_dims: 25, pruning: 0.2>>]
1744 }
1745 "#;
1746
1747 let indexes = parse_sdl(sdl).unwrap();
1748 let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1749
1750 let qc = config.query_config.as_ref().unwrap();
1751 assert_eq!(qc.weighting, QueryWeighting::Idf);
1752 assert!((qc.weight_threshold - 0.03).abs() < 0.001);
1753 assert_eq!(qc.max_query_dims, Some(25));
1754 assert!((qc.pruning.unwrap() - 0.2).abs() < 0.001);
1755
1756 let schema = indexes[0].to_schema();
1758 let field = schema.get_field("embedding").unwrap();
1759 let entry = schema.get_field_entry(field).unwrap();
1760 let sc = entry.sparse_vector_config.as_ref().unwrap();
1761 let rqc = sc.query_config.as_ref().unwrap();
1762 assert!((rqc.weight_threshold - 0.03).abs() < 0.001);
1763 assert_eq!(rqc.max_query_dims, Some(25));
1764 assert!((rqc.pruning.unwrap() - 0.2).abs() < 0.001);
1765 }
1766
1767 #[test]
1768 fn test_fast_attribute() {
1769 let sdl = r#"
1770 index products {
1771 field name: text [indexed, stored]
1772 field price: f64 [indexed, fast]
1773 field category: text [indexed, stored, fast]
1774 field count: u64 [fast]
1775 field score: i64 [indexed, stored, fast]
1776 }
1777 "#;
1778
1779 let indexes = parse_sdl(sdl).unwrap();
1780 assert_eq!(indexes.len(), 1);
1781 let index = &indexes[0];
1782 assert_eq!(index.fields.len(), 5);
1783
1784 assert!(!index.fields[0].fast);
1786 assert!(index.fields[1].fast);
1788 assert!(matches!(index.fields[1].field_type, FieldType::F64));
1789 assert!(index.fields[2].fast);
1791 assert!(matches!(index.fields[2].field_type, FieldType::Text));
1792 assert!(index.fields[3].fast);
1794 assert!(matches!(index.fields[3].field_type, FieldType::U64));
1795 assert!(index.fields[4].fast);
1797 assert!(matches!(index.fields[4].field_type, FieldType::I64));
1798
1799 let schema = index.to_schema();
1801 let price_field = schema.get_field("price").unwrap();
1802 assert!(schema.get_field_entry(price_field).unwrap().fast);
1803
1804 let category_field = schema.get_field("category").unwrap();
1805 assert!(schema.get_field_entry(category_field).unwrap().fast);
1806
1807 let name_field = schema.get_field("name").unwrap();
1808 assert!(!schema.get_field_entry(name_field).unwrap().fast);
1809 }
1810
1811 #[test]
1812 fn test_primary_attribute() {
1813 let sdl = r#"
1814 index documents {
1815 field id: text [primary, stored]
1816 field title: text [indexed, stored]
1817 }
1818 "#;
1819
1820 let indexes = parse_sdl(sdl).unwrap();
1821 assert_eq!(indexes.len(), 1);
1822 let index = &indexes[0];
1823 assert_eq!(index.fields.len(), 2);
1824
1825 let id_field = &index.fields[0];
1827 assert!(id_field.primary, "id should be primary");
1828 assert!(id_field.fast, "primary implies fast");
1829 assert!(id_field.indexed, "primary implies indexed");
1830
1831 assert!(!index.fields[1].primary);
1833
1834 let schema = index.to_schema();
1836 let id = schema.get_field("id").unwrap();
1837 let id_entry = schema.get_field_entry(id).unwrap();
1838 assert!(id_entry.primary_key);
1839 assert!(id_entry.fast);
1840 assert!(id_entry.indexed);
1841
1842 let title = schema.get_field("title").unwrap();
1843 assert!(!schema.get_field_entry(title).unwrap().primary_key);
1844
1845 assert_eq!(schema.primary_field(), Some(id));
1847 }
1848
1849 #[test]
1850 fn test_primary_with_other_attributes() {
1851 let sdl = r#"
1852 index documents {
1853 field id: text<simple> [primary, indexed, stored]
1854 field body: text [indexed]
1855 }
1856 "#;
1857
1858 let indexes = parse_sdl(sdl).unwrap();
1859 let id_field = &indexes[0].fields[0];
1860 assert!(id_field.primary);
1861 assert!(id_field.indexed);
1862 assert!(id_field.stored);
1863 assert!(id_field.fast);
1864 assert_eq!(id_field.tokenizer, Some("simple".to_string()));
1865 }
1866
1867 #[test]
1868 fn test_primary_only_one_allowed() {
1869 let sdl = r#"
1870 index documents {
1871 field id: text [primary]
1872 field alt_id: text [primary]
1873 }
1874 "#;
1875
1876 let result = parse_sdl(sdl);
1877 assert!(result.is_err());
1878 let err = result.unwrap_err().to_string();
1879 assert!(
1880 err.contains("primary key"),
1881 "Error should mention primary key: {}",
1882 err
1883 );
1884 }
1885
1886 #[test]
1887 fn test_primary_must_be_text() {
1888 let sdl = r#"
1889 index documents {
1890 field id: u64 [primary]
1891 }
1892 "#;
1893
1894 let result = parse_sdl(sdl);
1895 assert!(result.is_err());
1896 let err = result.unwrap_err().to_string();
1897 assert!(
1898 err.contains("text"),
1899 "Error should mention text type: {}",
1900 err
1901 );
1902 }
1903
1904 #[test]
1905 fn test_primary_cannot_be_multi() {
1906 let sdl = r#"
1907 index documents {
1908 field id: text [primary, stored<multi>]
1909 }
1910 "#;
1911
1912 let result = parse_sdl(sdl);
1913 assert!(result.is_err());
1914 let err = result.unwrap_err().to_string();
1915 assert!(err.contains("multi"), "Error should mention multi: {}", err);
1916 }
1917
1918 #[test]
1919 fn test_no_primary_field() {
1920 let sdl = r#"
1922 index documents {
1923 field title: text [indexed, stored]
1924 }
1925 "#;
1926
1927 let indexes = parse_sdl(sdl).unwrap();
1928 let schema = indexes[0].to_schema();
1929 assert!(schema.primary_field().is_none());
1930 }
1931}