1use pest::Parser;
34use pest_derive::Parser;
35
36use super::query_field_router::{QueryRouterRule, RoutingMode};
37use super::schema::{FieldType, Schema, SchemaBuilder};
38use crate::Result;
39use crate::error::Error;
40
41#[derive(Parser)]
42#[grammar = "dsl/sdl/sdl.pest"]
43pub struct SdlParser;
44
45use super::schema::DenseVectorConfig;
46use crate::structures::{IndexSize, SparseVectorConfig, WeightQuantization};
47
48#[derive(Debug, Clone)]
50pub struct FieldDef {
51 pub name: String,
52 pub field_type: FieldType,
53 pub indexed: bool,
54 pub stored: bool,
55 pub tokenizer: Option<String>,
57 pub multi: bool,
59 pub sparse_vector_config: Option<SparseVectorConfig>,
61 pub dense_vector_config: Option<DenseVectorConfig>,
63}
64
65#[derive(Debug, Clone)]
67pub struct IndexDef {
68 pub name: String,
69 pub fields: Vec<FieldDef>,
70 pub default_fields: Vec<String>,
71 pub query_routers: Vec<QueryRouterRule>,
73}
74
75impl IndexDef {
76 pub fn to_schema(&self) -> Schema {
78 let mut builder = SchemaBuilder::default();
79
80 for field in &self.fields {
81 let f = match field.field_type {
82 FieldType::Text => {
83 let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
84 builder.add_text_field_with_tokenizer(
85 &field.name,
86 field.indexed,
87 field.stored,
88 tokenizer,
89 )
90 }
91 FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
92 FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
93 FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
94 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
95 FieldType::SparseVector => {
96 if let Some(config) = &field.sparse_vector_config {
97 builder.add_sparse_vector_field_with_config(
98 &field.name,
99 field.indexed,
100 field.stored,
101 *config,
102 )
103 } else {
104 builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
105 }
106 }
107 FieldType::DenseVector => {
108 let config = field
110 .dense_vector_config
111 .as_ref()
112 .expect("DenseVector field requires dimension to be specified");
113 builder.add_dense_vector_field_with_config(
114 &field.name,
115 field.indexed,
116 field.stored,
117 config.clone(),
118 )
119 }
120 };
121 if field.multi {
122 builder.set_multi(f, true);
123 }
124 }
125
126 if !self.default_fields.is_empty() {
128 builder.set_default_fields(self.default_fields.clone());
129 }
130
131 if !self.query_routers.is_empty() {
133 builder.set_query_routers(self.query_routers.clone());
134 }
135
136 builder.build()
137 }
138
139 pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
144 if self.query_routers.is_empty() {
145 return Ok(None);
146 }
147
148 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
149 .map(Some)
150 .map_err(Error::Schema)
151 }
152}
153
154fn parse_field_type(type_str: &str) -> Result<FieldType> {
156 match type_str {
157 "text" | "string" | "str" => Ok(FieldType::Text),
158 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
159 "i64" | "int" | "integer" => Ok(FieldType::I64),
160 "f64" | "float" | "double" => Ok(FieldType::F64),
161 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
162 "sparse_vector" => Ok(FieldType::SparseVector),
163 "dense_vector" | "vector" => Ok(FieldType::DenseVector),
164 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
165 }
166}
167
168fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> (bool, bool, bool) {
171 let mut indexed = false;
172 let mut stored = false;
173 let mut multi = false;
174
175 for attr in pair.into_inner() {
176 match attr.as_str() {
177 "indexed" => indexed = true,
178 "stored" => stored = true,
179 "multi" => multi = true,
180 _ => {}
181 }
182 }
183
184 (indexed, stored, multi)
185}
186
187fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
189 let mut inner = pair.into_inner();
190
191 let name = inner
192 .next()
193 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
194 .as_str()
195 .to_string();
196
197 let field_type_str = inner
198 .next()
199 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
200 .as_str();
201
202 let field_type = parse_field_type(field_type_str)?;
203
204 let mut tokenizer = None;
206 let mut sparse_vector_config = None;
207 let mut dense_vector_config = None;
208 let mut indexed = true;
209 let mut stored = true;
210 let mut multi = false;
211
212 for item in inner {
213 match item.as_rule() {
214 Rule::tokenizer_spec => {
215 if let Some(tok_name) = item.into_inner().next() {
217 tokenizer = Some(tok_name.as_str().to_string());
218 }
219 }
220 Rule::sparse_vector_config => {
221 let mut config_inner = item.into_inner();
223 let index_size = if let Some(size_pair) = config_inner.next() {
224 match size_pair.as_str() {
225 "u16" => IndexSize::U16,
226 "u32" => IndexSize::U32,
227 _ => IndexSize::default(),
228 }
229 } else {
230 IndexSize::default()
231 };
232 let quantization = if let Some(quant_pair) = config_inner.next() {
233 match quant_pair.as_str() {
234 "float32" | "f32" => WeightQuantization::Float32,
235 "float16" | "f16" => WeightQuantization::Float16,
236 "uint8" | "u8" => WeightQuantization::UInt8,
237 "uint4" | "u4" => WeightQuantization::UInt4,
238 _ => WeightQuantization::default(),
239 }
240 } else {
241 WeightQuantization::default()
242 };
243 sparse_vector_config = Some(SparseVectorConfig {
244 index_size,
245 weight_quantization: quantization,
246 });
247 }
248 Rule::dense_vector_config => {
249 dense_vector_config = Some(parse_dense_vector_config(item));
251 }
252 Rule::attributes => {
253 let (idx, sto, mul) = parse_attributes(item);
254 indexed = idx;
255 stored = sto;
256 multi = mul;
257 }
258 _ => {}
259 }
260 }
261
262 Ok(FieldDef {
263 name,
264 field_type,
265 indexed,
266 stored,
267 tokenizer,
268 multi,
269 sparse_vector_config,
270 dense_vector_config,
271 })
272}
273
274fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
276 let mut dim: usize = 0;
277 let mut centroids_path: Option<String> = None;
278 let mut nprobe: usize = 32;
279
280 for params in pair.into_inner() {
282 if params.as_rule() == Rule::dense_vector_params {
283 for inner in params.into_inner() {
284 match inner.as_rule() {
285 Rule::dense_vector_keyword_params => {
286 for kwarg in inner.into_inner() {
288 if kwarg.as_rule() == Rule::dense_vector_kwarg {
289 for kw in kwarg.into_inner() {
290 match kw.as_rule() {
291 Rule::dims_kwarg => {
292 if let Some(d) = kw.into_inner().next() {
293 dim = d.as_str().parse().unwrap_or(0);
294 }
295 }
296 Rule::centroids_kwarg => {
297 if let Some(path) = kw.into_inner().next()
298 && let Some(inner_path) = path.into_inner().next()
299 {
300 centroids_path =
301 Some(inner_path.as_str().to_string());
302 }
303 }
304 Rule::nprobe_kwarg => {
305 if let Some(n) = kw.into_inner().next() {
306 nprobe = n.as_str().parse().unwrap_or(32);
307 }
308 }
309 _ => {}
310 }
311 }
312 }
313 }
314 }
315 Rule::dense_vector_positional_params => {
316 let mut positional = inner.into_inner();
318 if let Some(dim_pair) = positional.next() {
319 dim = dim_pair.as_str().parse().unwrap_or(0);
320 }
321 if let Some(path_pair) = positional.next()
322 && let Some(inner_path) = path_pair.into_inner().next()
323 {
324 centroids_path = Some(inner_path.as_str().to_string());
325 }
326 if let Some(nprobe_pair) = positional.next() {
327 nprobe = nprobe_pair.as_str().parse().unwrap_or(32);
328 }
329 }
330 _ => {}
331 }
332 }
333 }
334 }
335
336 if let Some(path) = centroids_path {
337 DenseVectorConfig::with_ivf(dim, path, nprobe)
338 } else {
339 DenseVectorConfig::new(dim)
340 }
341}
342
343fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
345 pair.into_inner().map(|p| p.as_str().to_string()).collect()
346}
347
348fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
350 let mut pattern = String::new();
351 let mut substitution = String::new();
352 let mut target_field = String::new();
353 let mut mode = RoutingMode::Additional;
354
355 for prop in pair.into_inner() {
356 if prop.as_rule() != Rule::query_router_prop {
357 continue;
358 }
359
360 for inner in prop.into_inner() {
361 match inner.as_rule() {
362 Rule::query_router_pattern => {
363 if let Some(regex_str) = inner.into_inner().next() {
364 pattern = parse_string_value(regex_str);
365 }
366 }
367 Rule::query_router_substitution => {
368 if let Some(quoted) = inner.into_inner().next() {
369 substitution = parse_string_value(quoted);
370 }
371 }
372 Rule::query_router_target => {
373 if let Some(ident) = inner.into_inner().next() {
374 target_field = ident.as_str().to_string();
375 }
376 }
377 Rule::query_router_mode => {
378 if let Some(mode_val) = inner.into_inner().next() {
379 mode = match mode_val.as_str() {
380 "exclusive" => RoutingMode::Exclusive,
381 "additional" => RoutingMode::Additional,
382 _ => RoutingMode::Additional,
383 };
384 }
385 }
386 _ => {}
387 }
388 }
389 }
390
391 if pattern.is_empty() {
392 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
393 }
394 if substitution.is_empty() {
395 return Err(Error::Schema(
396 "query_router missing 'substitution'".to_string(),
397 ));
398 }
399 if target_field.is_empty() {
400 return Err(Error::Schema(
401 "query_router missing 'target_field'".to_string(),
402 ));
403 }
404
405 Ok(QueryRouterRule {
406 pattern,
407 substitution,
408 target_field,
409 mode,
410 })
411}
412
413fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
415 let s = pair.as_str();
416 match pair.as_rule() {
417 Rule::regex_string => {
418 if let Some(inner) = pair.into_inner().next() {
420 parse_string_value(inner)
421 } else {
422 s.to_string()
423 }
424 }
425 Rule::raw_string => {
426 s[2..s.len() - 1].to_string()
428 }
429 Rule::quoted_string => {
430 let inner = &s[1..s.len() - 1];
432 inner
434 .replace("\\n", "\n")
435 .replace("\\t", "\t")
436 .replace("\\\"", "\"")
437 .replace("\\\\", "\\")
438 }
439 _ => s.to_string(),
440 }
441}
442
443fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
445 let mut inner = pair.into_inner();
446
447 let name = inner
448 .next()
449 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
450 .as_str()
451 .to_string();
452
453 let mut fields = Vec::new();
454 let mut default_fields = Vec::new();
455 let mut query_routers = Vec::new();
456
457 for item in inner {
458 match item.as_rule() {
459 Rule::field_def => {
460 fields.push(parse_field_def(item)?);
461 }
462 Rule::default_fields_def => {
463 default_fields = parse_default_fields_def(item);
464 }
465 Rule::query_router_def => {
466 query_routers.push(parse_query_router_def(item)?);
467 }
468 _ => {}
469 }
470 }
471
472 Ok(IndexDef {
473 name,
474 fields,
475 default_fields,
476 query_routers,
477 })
478}
479
480pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
482 let pairs = SdlParser::parse(Rule::file, input)
483 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
484
485 let mut indexes = Vec::new();
486
487 for pair in pairs {
488 if pair.as_rule() == Rule::file {
489 for inner in pair.into_inner() {
490 if inner.as_rule() == Rule::index_def {
491 indexes.push(parse_index_def(inner)?);
492 }
493 }
494 }
495 }
496
497 Ok(indexes)
498}
499
500pub fn parse_single_index(input: &str) -> Result<IndexDef> {
502 let indexes = parse_sdl(input)?;
503
504 if indexes.is_empty() {
505 return Err(Error::Schema("No index definition found".to_string()));
506 }
507
508 if indexes.len() > 1 {
509 return Err(Error::Schema(
510 "Multiple index definitions found, expected one".to_string(),
511 ));
512 }
513
514 Ok(indexes.into_iter().next().unwrap())
515}
516
517#[cfg(test)]
518mod tests {
519 use super::*;
520
521 #[test]
522 fn test_parse_simple_schema() {
523 let sdl = r#"
524 index articles {
525 field title: text [indexed, stored]
526 field body: text [indexed]
527 }
528 "#;
529
530 let indexes = parse_sdl(sdl).unwrap();
531 assert_eq!(indexes.len(), 1);
532
533 let index = &indexes[0];
534 assert_eq!(index.name, "articles");
535 assert_eq!(index.fields.len(), 2);
536
537 assert_eq!(index.fields[0].name, "title");
538 assert!(matches!(index.fields[0].field_type, FieldType::Text));
539 assert!(index.fields[0].indexed);
540 assert!(index.fields[0].stored);
541
542 assert_eq!(index.fields[1].name, "body");
543 assert!(matches!(index.fields[1].field_type, FieldType::Text));
544 assert!(index.fields[1].indexed);
545 assert!(!index.fields[1].stored);
546 }
547
548 #[test]
549 fn test_parse_all_field_types() {
550 let sdl = r#"
551 index test {
552 field text_field: text [indexed, stored]
553 field u64_field: u64 [indexed, stored]
554 field i64_field: i64 [indexed, stored]
555 field f64_field: f64 [indexed, stored]
556 field bytes_field: bytes [stored]
557 }
558 "#;
559
560 let indexes = parse_sdl(sdl).unwrap();
561 let index = &indexes[0];
562
563 assert!(matches!(index.fields[0].field_type, FieldType::Text));
564 assert!(matches!(index.fields[1].field_type, FieldType::U64));
565 assert!(matches!(index.fields[2].field_type, FieldType::I64));
566 assert!(matches!(index.fields[3].field_type, FieldType::F64));
567 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
568 }
569
570 #[test]
571 fn test_parse_with_comments() {
572 let sdl = r#"
573 # This is a comment
574 index articles {
575 # Title field
576 field title: text [indexed, stored]
577 field body: text [indexed] # inline comment not supported yet
578 }
579 "#;
580
581 let indexes = parse_sdl(sdl).unwrap();
582 assert_eq!(indexes[0].fields.len(), 2);
583 }
584
585 #[test]
586 fn test_parse_type_aliases() {
587 let sdl = r#"
588 index test {
589 field a: string [indexed]
590 field b: int [indexed]
591 field c: uint [indexed]
592 field d: float [indexed]
593 field e: binary [stored]
594 }
595 "#;
596
597 let indexes = parse_sdl(sdl).unwrap();
598 let index = &indexes[0];
599
600 assert!(matches!(index.fields[0].field_type, FieldType::Text));
601 assert!(matches!(index.fields[1].field_type, FieldType::I64));
602 assert!(matches!(index.fields[2].field_type, FieldType::U64));
603 assert!(matches!(index.fields[3].field_type, FieldType::F64));
604 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
605 }
606
607 #[test]
608 fn test_to_schema() {
609 let sdl = r#"
610 index articles {
611 field title: text [indexed, stored]
612 field views: u64 [indexed, stored]
613 }
614 "#;
615
616 let indexes = parse_sdl(sdl).unwrap();
617 let schema = indexes[0].to_schema();
618
619 assert!(schema.get_field("title").is_some());
620 assert!(schema.get_field("views").is_some());
621 assert!(schema.get_field("nonexistent").is_none());
622 }
623
624 #[test]
625 fn test_default_attributes() {
626 let sdl = r#"
627 index test {
628 field title: text
629 }
630 "#;
631
632 let indexes = parse_sdl(sdl).unwrap();
633 let field = &indexes[0].fields[0];
634
635 assert!(field.indexed);
637 assert!(field.stored);
638 }
639
640 #[test]
641 fn test_multiple_indexes() {
642 let sdl = r#"
643 index articles {
644 field title: text [indexed, stored]
645 }
646
647 index users {
648 field name: text [indexed, stored]
649 field email: text [indexed, stored]
650 }
651 "#;
652
653 let indexes = parse_sdl(sdl).unwrap();
654 assert_eq!(indexes.len(), 2);
655 assert_eq!(indexes[0].name, "articles");
656 assert_eq!(indexes[1].name, "users");
657 }
658
659 #[test]
660 fn test_tokenizer_spec() {
661 let sdl = r#"
662 index articles {
663 field title: text<en_stem> [indexed, stored]
664 field body: text<default> [indexed]
665 field author: text [indexed, stored]
666 }
667 "#;
668
669 let indexes = parse_sdl(sdl).unwrap();
670 let index = &indexes[0];
671
672 assert_eq!(index.fields[0].name, "title");
673 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
674
675 assert_eq!(index.fields[1].name, "body");
676 assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
677
678 assert_eq!(index.fields[2].name, "author");
679 assert_eq!(index.fields[2].tokenizer, None); }
681
682 #[test]
683 fn test_tokenizer_in_schema() {
684 let sdl = r#"
685 index articles {
686 field title: text<german> [indexed, stored]
687 field body: text<en_stem> [indexed]
688 }
689 "#;
690
691 let indexes = parse_sdl(sdl).unwrap();
692 let schema = indexes[0].to_schema();
693
694 let title_field = schema.get_field("title").unwrap();
695 let title_entry = schema.get_field_entry(title_field).unwrap();
696 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
697
698 let body_field = schema.get_field("body").unwrap();
699 let body_entry = schema.get_field_entry(body_field).unwrap();
700 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
701 }
702
703 #[test]
704 fn test_query_router_basic() {
705 let sdl = r#"
706 index documents {
707 field title: text [indexed, stored]
708 field uri: text [indexed, stored]
709
710 query_router {
711 pattern: "10\\.\\d{4,}/[^\\s]+"
712 substitution: "doi://{0}"
713 target_field: uris
714 mode: exclusive
715 }
716 }
717 "#;
718
719 let indexes = parse_sdl(sdl).unwrap();
720 let index = &indexes[0];
721
722 assert_eq!(index.query_routers.len(), 1);
723 let router = &index.query_routers[0];
724 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
725 assert_eq!(router.substitution, "doi://{0}");
726 assert_eq!(router.target_field, "uris");
727 assert_eq!(router.mode, RoutingMode::Exclusive);
728 }
729
730 #[test]
731 fn test_query_router_raw_string() {
732 let sdl = r#"
733 index documents {
734 field uris: text [indexed, stored]
735
736 query_router {
737 pattern: r"^pmid:(\d+)$"
738 substitution: "pubmed://{1}"
739 target_field: uris
740 mode: additional
741 }
742 }
743 "#;
744
745 let indexes = parse_sdl(sdl).unwrap();
746 let router = &indexes[0].query_routers[0];
747
748 assert_eq!(router.pattern, r"^pmid:(\d+)$");
749 assert_eq!(router.substitution, "pubmed://{1}");
750 assert_eq!(router.mode, RoutingMode::Additional);
751 }
752
753 #[test]
754 fn test_multiple_query_routers() {
755 let sdl = r#"
756 index documents {
757 field uris: text [indexed, stored]
758
759 query_router {
760 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
761 substitution: "doi://{1}"
762 target_field: uris
763 mode: exclusive
764 }
765
766 query_router {
767 pattern: r"^pmid:(\d+)$"
768 substitution: "pubmed://{1}"
769 target_field: uris
770 mode: exclusive
771 }
772
773 query_router {
774 pattern: r"^arxiv:(\d+\.\d+)$"
775 substitution: "arxiv://{1}"
776 target_field: uris
777 mode: additional
778 }
779 }
780 "#;
781
782 let indexes = parse_sdl(sdl).unwrap();
783 assert_eq!(indexes[0].query_routers.len(), 3);
784 }
785
786 #[test]
787 fn test_query_router_default_mode() {
788 let sdl = r#"
789 index documents {
790 field uris: text [indexed, stored]
791
792 query_router {
793 pattern: r"test"
794 substitution: "{0}"
795 target_field: uris
796 }
797 }
798 "#;
799
800 let indexes = parse_sdl(sdl).unwrap();
801 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
803 }
804
805 #[test]
806 fn test_multi_attribute() {
807 let sdl = r#"
808 index documents {
809 field uris: text [indexed, stored, multi]
810 field title: text [indexed, stored]
811 }
812 "#;
813
814 let indexes = parse_sdl(sdl).unwrap();
815 assert_eq!(indexes.len(), 1);
816
817 let fields = &indexes[0].fields;
818 assert_eq!(fields.len(), 2);
819
820 assert_eq!(fields[0].name, "uris");
822 assert!(fields[0].multi, "uris field should have multi=true");
823
824 assert_eq!(fields[1].name, "title");
826 assert!(!fields[1].multi, "title field should have multi=false");
827
828 let schema = indexes[0].to_schema();
830 let uris_field = schema.get_field("uris").unwrap();
831 let title_field = schema.get_field("title").unwrap();
832
833 assert!(schema.get_field_entry(uris_field).unwrap().multi);
834 assert!(!schema.get_field_entry(title_field).unwrap().multi);
835 }
836
837 #[test]
838 fn test_sparse_vector_field() {
839 let sdl = r#"
840 index documents {
841 field embedding: sparse_vector [indexed, stored]
842 }
843 "#;
844
845 let indexes = parse_sdl(sdl).unwrap();
846 assert_eq!(indexes.len(), 1);
847 assert_eq!(indexes[0].fields.len(), 1);
848 assert_eq!(indexes[0].fields[0].name, "embedding");
849 assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
850 assert!(indexes[0].fields[0].sparse_vector_config.is_none());
851 }
852
853 #[test]
854 fn test_sparse_vector_with_config() {
855 let sdl = r#"
856 index documents {
857 field embedding: sparse_vector<u16, uint8> [indexed, stored]
858 field dense: sparse_vector<u32, float32> [indexed]
859 }
860 "#;
861
862 let indexes = parse_sdl(sdl).unwrap();
863 assert_eq!(indexes[0].fields.len(), 2);
864
865 let f1 = &indexes[0].fields[0];
867 assert_eq!(f1.name, "embedding");
868 let config1 = f1.sparse_vector_config.as_ref().unwrap();
869 assert_eq!(config1.index_size, IndexSize::U16);
870 assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
871
872 let f2 = &indexes[0].fields[1];
874 assert_eq!(f2.name, "dense");
875 let config2 = f2.sparse_vector_config.as_ref().unwrap();
876 assert_eq!(config2.index_size, IndexSize::U32);
877 assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
878 }
879
880 #[test]
881 fn test_dense_vector_field() {
882 let sdl = r#"
883 index documents {
884 field embedding: dense_vector<768> [indexed, stored]
885 }
886 "#;
887
888 let indexes = parse_sdl(sdl).unwrap();
889 assert_eq!(indexes.len(), 1);
890 assert_eq!(indexes[0].fields.len(), 1);
891
892 let f = &indexes[0].fields[0];
893 assert_eq!(f.name, "embedding");
894 assert_eq!(f.field_type, FieldType::DenseVector);
895
896 let config = f.dense_vector_config.as_ref().unwrap();
897 assert_eq!(config.dim, 768);
898 }
899
900 #[test]
901 fn test_dense_vector_alias() {
902 let sdl = r#"
903 index documents {
904 field embedding: vector<1536> [indexed]
905 }
906 "#;
907
908 let indexes = parse_sdl(sdl).unwrap();
909 assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
910 assert_eq!(
911 indexes[0].fields[0]
912 .dense_vector_config
913 .as_ref()
914 .unwrap()
915 .dim,
916 1536
917 );
918 }
919
920 #[test]
921 fn test_dense_vector_with_centroids() {
922 let sdl = r#"
923 index documents {
924 field embedding: dense_vector<768, "centroids.bin"> [indexed, stored]
925 }
926 "#;
927
928 let indexes = parse_sdl(sdl).unwrap();
929 assert_eq!(indexes.len(), 1);
930
931 let f = &indexes[0].fields[0];
932 assert_eq!(f.name, "embedding");
933 assert_eq!(f.field_type, FieldType::DenseVector);
934
935 let config = f.dense_vector_config.as_ref().unwrap();
936 assert_eq!(config.dim, 768);
937 assert_eq!(
938 config.coarse_centroids_path.as_deref(),
939 Some("centroids.bin")
940 );
941 assert_eq!(config.nprobe, 32); }
943
944 #[test]
945 fn test_dense_vector_with_centroids_and_nprobe() {
946 let sdl = r#"
947 index documents {
948 field embedding: dense_vector<1536, "/path/to/centroids.bin", 64> [indexed]
949 }
950 "#;
951
952 let indexes = parse_sdl(sdl).unwrap();
953 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
954
955 assert_eq!(config.dim, 1536);
956 assert_eq!(
957 config.coarse_centroids_path.as_deref(),
958 Some("/path/to/centroids.bin")
959 );
960 assert_eq!(config.nprobe, 64);
961 }
962
963 #[test]
964 fn test_dense_vector_keyword_syntax() {
965 let sdl = r#"
966 index documents {
967 field embedding: dense_vector<dims: 1536> [indexed, stored]
968 }
969 "#;
970
971 let indexes = parse_sdl(sdl).unwrap();
972 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
973
974 assert_eq!(config.dim, 1536);
975 assert!(config.coarse_centroids_path.is_none());
976 }
977
978 #[test]
979 fn test_dense_vector_keyword_syntax_full() {
980 let sdl = r#"
981 index documents {
982 field embedding: dense_vector<dims: 1536, centroids: "/path/to/centroids.bin", nprobe: 64> [indexed]
983 }
984 "#;
985
986 let indexes = parse_sdl(sdl).unwrap();
987 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
988
989 assert_eq!(config.dim, 1536);
990 assert_eq!(
991 config.coarse_centroids_path.as_deref(),
992 Some("/path/to/centroids.bin")
993 );
994 assert_eq!(config.nprobe, 64);
995 }
996
997 #[test]
998 fn test_dense_vector_keyword_syntax_partial() {
999 let sdl = r#"
1000 index documents {
1001 field embedding: dense_vector<dims: 768, centroids: "centroids.bin"> [indexed]
1002 }
1003 "#;
1004
1005 let indexes = parse_sdl(sdl).unwrap();
1006 let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1007
1008 assert_eq!(config.dim, 768);
1009 assert_eq!(
1010 config.coarse_centroids_path.as_deref(),
1011 Some("centroids.bin")
1012 );
1013 assert_eq!(config.nprobe, 32); }
1015}