hermes_core/dsl/sdl/
mod.rs

1//! Schema Definition Language (SDL) for Hermes
2//!
3//! A simple, readable format for defining index schemas using pest parser.
4//!
5//! # Example SDL
6//!
7//! ```text
8//! # Article index schema
9//! index articles {
10//!     # Primary text field for full-text search
11//!     field title: text [indexed, stored]
12//!
13//!     # Body content - indexed but not stored (save space)
14//!     field body: text [indexed]
15//!
16//!     # Author name
17//!     field author: text [indexed, stored]
18//!
19//!     # Publication timestamp
20//!     field published_at: i64 [indexed, stored]
21//!
22//!     # View count
23//!     field views: u64 [indexed, stored]
24//!
25//!     # Rating score
26//!     field rating: f64 [indexed, stored]
27//!
28//!     # Raw content hash (not indexed, just stored)
29//!     field content_hash: bytes [stored]
30//! }
31//! ```
32
33use pest::Parser;
34use pest_derive::Parser;
35
36use super::query_field_router::{QueryRouterRule, RoutingMode};
37use super::schema::{FieldType, Schema, SchemaBuilder};
38use crate::Result;
39use crate::error::Error;
40
41#[derive(Parser)]
42#[grammar = "dsl/sdl/sdl.pest"]
43pub struct SdlParser;
44
45use super::schema::DenseVectorConfig;
46use crate::structures::{IndexSize, SparseVectorConfig, WeightQuantization};
47
48/// Parsed field definition
49#[derive(Debug, Clone)]
50pub struct FieldDef {
51    pub name: String,
52    pub field_type: FieldType,
53    pub indexed: bool,
54    pub stored: bool,
55    /// Tokenizer name for text fields (e.g., "default", "en_stem", "german")
56    pub tokenizer: Option<String>,
57    /// Whether this field can have multiple values (serialized as array in JSON)
58    pub multi: bool,
59    /// Configuration for sparse vector fields
60    pub sparse_vector_config: Option<SparseVectorConfig>,
61    /// Configuration for dense vector fields
62    pub dense_vector_config: Option<DenseVectorConfig>,
63}
64
65/// Parsed index definition
66#[derive(Debug, Clone)]
67pub struct IndexDef {
68    pub name: String,
69    pub fields: Vec<FieldDef>,
70    pub default_fields: Vec<String>,
71    /// Query router rules for routing queries to specific fields
72    pub query_routers: Vec<QueryRouterRule>,
73}
74
75impl IndexDef {
76    /// Convert to a Schema
77    pub fn to_schema(&self) -> Schema {
78        let mut builder = SchemaBuilder::default();
79
80        for field in &self.fields {
81            let f = match field.field_type {
82                FieldType::Text => {
83                    let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
84                    builder.add_text_field_with_tokenizer(
85                        &field.name,
86                        field.indexed,
87                        field.stored,
88                        tokenizer,
89                    )
90                }
91                FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
92                FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
93                FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
94                FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
95                FieldType::SparseVector => {
96                    if let Some(config) = &field.sparse_vector_config {
97                        builder.add_sparse_vector_field_with_config(
98                            &field.name,
99                            field.indexed,
100                            field.stored,
101                            *config,
102                        )
103                    } else {
104                        builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
105                    }
106                }
107                FieldType::DenseVector => {
108                    // Dense vector dimension must be specified via config
109                    let config = field
110                        .dense_vector_config
111                        .as_ref()
112                        .expect("DenseVector field requires dimension to be specified");
113                    builder.add_dense_vector_field_with_config(
114                        &field.name,
115                        field.indexed,
116                        field.stored,
117                        config.clone(),
118                    )
119                }
120            };
121            if field.multi {
122                builder.set_multi(f, true);
123            }
124        }
125
126        // Set default fields if specified
127        if !self.default_fields.is_empty() {
128            builder.set_default_fields(self.default_fields.clone());
129        }
130
131        // Set query routers if specified
132        if !self.query_routers.is_empty() {
133            builder.set_query_routers(self.query_routers.clone());
134        }
135
136        builder.build()
137    }
138
139    /// Create a QueryFieldRouter from the query router rules
140    ///
141    /// Returns None if there are no query router rules defined.
142    /// Returns Err if any regex pattern is invalid.
143    pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
144        if self.query_routers.is_empty() {
145            return Ok(None);
146        }
147
148        super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
149            .map(Some)
150            .map_err(Error::Schema)
151    }
152}
153
154/// Parse field type from string
155fn parse_field_type(type_str: &str) -> Result<FieldType> {
156    match type_str {
157        "text" | "string" | "str" => Ok(FieldType::Text),
158        "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
159        "i64" | "int" | "integer" => Ok(FieldType::I64),
160        "f64" | "float" | "double" => Ok(FieldType::F64),
161        "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
162        "sparse_vector" => Ok(FieldType::SparseVector),
163        "dense_vector" | "vector" => Ok(FieldType::DenseVector),
164        _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
165    }
166}
167
168/// Parse attributes from pest pair
169/// Returns (indexed, stored, multi)
170fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> (bool, bool, bool) {
171    let mut indexed = false;
172    let mut stored = false;
173    let mut multi = false;
174
175    for attr in pair.into_inner() {
176        match attr.as_str() {
177            "indexed" => indexed = true,
178            "stored" => stored = true,
179            "multi" => multi = true,
180            _ => {}
181        }
182    }
183
184    (indexed, stored, multi)
185}
186
187/// Parse a field definition from pest pair
188fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
189    let mut inner = pair.into_inner();
190
191    let name = inner
192        .next()
193        .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
194        .as_str()
195        .to_string();
196
197    let field_type_str = inner
198        .next()
199        .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
200        .as_str();
201
202    let field_type = parse_field_type(field_type_str)?;
203
204    // Parse optional tokenizer spec, sparse_vector_config, dense_vector_config, and attributes
205    let mut tokenizer = None;
206    let mut sparse_vector_config = None;
207    let mut dense_vector_config = None;
208    let mut indexed = true;
209    let mut stored = true;
210    let mut multi = false;
211
212    for item in inner {
213        match item.as_rule() {
214            Rule::tokenizer_spec => {
215                // Extract tokenizer name from <name>
216                if let Some(tok_name) = item.into_inner().next() {
217                    tokenizer = Some(tok_name.as_str().to_string());
218                }
219            }
220            Rule::sparse_vector_config => {
221                // Parse <index_size, quantization>
222                let mut config_inner = item.into_inner();
223                let index_size = if let Some(size_pair) = config_inner.next() {
224                    match size_pair.as_str() {
225                        "u16" => IndexSize::U16,
226                        "u32" => IndexSize::U32,
227                        _ => IndexSize::default(),
228                    }
229                } else {
230                    IndexSize::default()
231                };
232                let quantization = if let Some(quant_pair) = config_inner.next() {
233                    match quant_pair.as_str() {
234                        "float32" | "f32" => WeightQuantization::Float32,
235                        "float16" | "f16" => WeightQuantization::Float16,
236                        "uint8" | "u8" => WeightQuantization::UInt8,
237                        "uint4" | "u4" => WeightQuantization::UInt4,
238                        _ => WeightQuantization::default(),
239                    }
240                } else {
241                    WeightQuantization::default()
242                };
243                sparse_vector_config = Some(SparseVectorConfig {
244                    index_size,
245                    weight_quantization: quantization,
246                });
247            }
248            Rule::dense_vector_config => {
249                // Parse dense_vector_params (keyword or positional)
250                dense_vector_config = Some(parse_dense_vector_config(item));
251            }
252            Rule::attributes => {
253                let (idx, sto, mul) = parse_attributes(item);
254                indexed = idx;
255                stored = sto;
256                multi = mul;
257            }
258            _ => {}
259        }
260    }
261
262    Ok(FieldDef {
263        name,
264        field_type,
265        indexed,
266        stored,
267        tokenizer,
268        multi,
269        sparse_vector_config,
270        dense_vector_config,
271    })
272}
273
274/// Parse dense_vector_config with keyword or positional params
275fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
276    let mut dim: usize = 0;
277    let mut centroids_path: Option<String> = None;
278    let mut nprobe: usize = 32;
279
280    // Navigate to dense_vector_params
281    for params in pair.into_inner() {
282        if params.as_rule() == Rule::dense_vector_params {
283            for inner in params.into_inner() {
284                match inner.as_rule() {
285                    Rule::dense_vector_keyword_params => {
286                        // Parse keyword args: dims: N, centroids: "path", nprobe: N
287                        for kwarg in inner.into_inner() {
288                            if kwarg.as_rule() == Rule::dense_vector_kwarg {
289                                for kw in kwarg.into_inner() {
290                                    match kw.as_rule() {
291                                        Rule::dims_kwarg => {
292                                            if let Some(d) = kw.into_inner().next() {
293                                                dim = d.as_str().parse().unwrap_or(0);
294                                            }
295                                        }
296                                        Rule::centroids_kwarg => {
297                                            if let Some(path) = kw.into_inner().next()
298                                                && let Some(inner_path) = path.into_inner().next()
299                                            {
300                                                centroids_path =
301                                                    Some(inner_path.as_str().to_string());
302                                            }
303                                        }
304                                        Rule::nprobe_kwarg => {
305                                            if let Some(n) = kw.into_inner().next() {
306                                                nprobe = n.as_str().parse().unwrap_or(32);
307                                            }
308                                        }
309                                        _ => {}
310                                    }
311                                }
312                            }
313                        }
314                    }
315                    Rule::dense_vector_positional_params => {
316                        // Parse positional: dimension, "path", nprobe
317                        let mut positional = inner.into_inner();
318                        if let Some(dim_pair) = positional.next() {
319                            dim = dim_pair.as_str().parse().unwrap_or(0);
320                        }
321                        if let Some(path_pair) = positional.next()
322                            && let Some(inner_path) = path_pair.into_inner().next()
323                        {
324                            centroids_path = Some(inner_path.as_str().to_string());
325                        }
326                        if let Some(nprobe_pair) = positional.next() {
327                            nprobe = nprobe_pair.as_str().parse().unwrap_or(32);
328                        }
329                    }
330                    _ => {}
331                }
332            }
333        }
334    }
335
336    if let Some(path) = centroids_path {
337        DenseVectorConfig::with_ivf(dim, path, nprobe)
338    } else {
339        DenseVectorConfig::new(dim)
340    }
341}
342
343/// Parse default_fields definition
344fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
345    pair.into_inner().map(|p| p.as_str().to_string()).collect()
346}
347
348/// Parse a query router definition
349fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
350    let mut pattern = String::new();
351    let mut substitution = String::new();
352    let mut target_field = String::new();
353    let mut mode = RoutingMode::Additional;
354
355    for prop in pair.into_inner() {
356        if prop.as_rule() != Rule::query_router_prop {
357            continue;
358        }
359
360        for inner in prop.into_inner() {
361            match inner.as_rule() {
362                Rule::query_router_pattern => {
363                    if let Some(regex_str) = inner.into_inner().next() {
364                        pattern = parse_string_value(regex_str);
365                    }
366                }
367                Rule::query_router_substitution => {
368                    if let Some(quoted) = inner.into_inner().next() {
369                        substitution = parse_string_value(quoted);
370                    }
371                }
372                Rule::query_router_target => {
373                    if let Some(ident) = inner.into_inner().next() {
374                        target_field = ident.as_str().to_string();
375                    }
376                }
377                Rule::query_router_mode => {
378                    if let Some(mode_val) = inner.into_inner().next() {
379                        mode = match mode_val.as_str() {
380                            "exclusive" => RoutingMode::Exclusive,
381                            "additional" => RoutingMode::Additional,
382                            _ => RoutingMode::Additional,
383                        };
384                    }
385                }
386                _ => {}
387            }
388        }
389    }
390
391    if pattern.is_empty() {
392        return Err(Error::Schema("query_router missing 'pattern'".to_string()));
393    }
394    if substitution.is_empty() {
395        return Err(Error::Schema(
396            "query_router missing 'substitution'".to_string(),
397        ));
398    }
399    if target_field.is_empty() {
400        return Err(Error::Schema(
401            "query_router missing 'target_field'".to_string(),
402        ));
403    }
404
405    Ok(QueryRouterRule {
406        pattern,
407        substitution,
408        target_field,
409        mode,
410    })
411}
412
413/// Parse a string value from quoted_string, raw_string, or regex_string
414fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
415    let s = pair.as_str();
416    match pair.as_rule() {
417        Rule::regex_string => {
418            // regex_string contains either raw_string or quoted_string
419            if let Some(inner) = pair.into_inner().next() {
420                parse_string_value(inner)
421            } else {
422                s.to_string()
423            }
424        }
425        Rule::raw_string => {
426            // r"..." - strip r" prefix and " suffix
427            s[2..s.len() - 1].to_string()
428        }
429        Rule::quoted_string => {
430            // "..." - strip quotes and handle escapes
431            let inner = &s[1..s.len() - 1];
432            // Simple escape handling
433            inner
434                .replace("\\n", "\n")
435                .replace("\\t", "\t")
436                .replace("\\\"", "\"")
437                .replace("\\\\", "\\")
438        }
439        _ => s.to_string(),
440    }
441}
442
443/// Parse an index definition from pest pair
444fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
445    let mut inner = pair.into_inner();
446
447    let name = inner
448        .next()
449        .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
450        .as_str()
451        .to_string();
452
453    let mut fields = Vec::new();
454    let mut default_fields = Vec::new();
455    let mut query_routers = Vec::new();
456
457    for item in inner {
458        match item.as_rule() {
459            Rule::field_def => {
460                fields.push(parse_field_def(item)?);
461            }
462            Rule::default_fields_def => {
463                default_fields = parse_default_fields_def(item);
464            }
465            Rule::query_router_def => {
466                query_routers.push(parse_query_router_def(item)?);
467            }
468            _ => {}
469        }
470    }
471
472    Ok(IndexDef {
473        name,
474        fields,
475        default_fields,
476        query_routers,
477    })
478}
479
480/// Parse SDL from a string
481pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
482    let pairs = SdlParser::parse(Rule::file, input)
483        .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
484
485    let mut indexes = Vec::new();
486
487    for pair in pairs {
488        if pair.as_rule() == Rule::file {
489            for inner in pair.into_inner() {
490                if inner.as_rule() == Rule::index_def {
491                    indexes.push(parse_index_def(inner)?);
492                }
493            }
494        }
495    }
496
497    Ok(indexes)
498}
499
500/// Parse SDL and return a single index definition
501pub fn parse_single_index(input: &str) -> Result<IndexDef> {
502    let indexes = parse_sdl(input)?;
503
504    if indexes.is_empty() {
505        return Err(Error::Schema("No index definition found".to_string()));
506    }
507
508    if indexes.len() > 1 {
509        return Err(Error::Schema(
510            "Multiple index definitions found, expected one".to_string(),
511        ));
512    }
513
514    Ok(indexes.into_iter().next().unwrap())
515}
516
517#[cfg(test)]
518mod tests {
519    use super::*;
520
521    #[test]
522    fn test_parse_simple_schema() {
523        let sdl = r#"
524            index articles {
525                field title: text [indexed, stored]
526                field body: text [indexed]
527            }
528        "#;
529
530        let indexes = parse_sdl(sdl).unwrap();
531        assert_eq!(indexes.len(), 1);
532
533        let index = &indexes[0];
534        assert_eq!(index.name, "articles");
535        assert_eq!(index.fields.len(), 2);
536
537        assert_eq!(index.fields[0].name, "title");
538        assert!(matches!(index.fields[0].field_type, FieldType::Text));
539        assert!(index.fields[0].indexed);
540        assert!(index.fields[0].stored);
541
542        assert_eq!(index.fields[1].name, "body");
543        assert!(matches!(index.fields[1].field_type, FieldType::Text));
544        assert!(index.fields[1].indexed);
545        assert!(!index.fields[1].stored);
546    }
547
548    #[test]
549    fn test_parse_all_field_types() {
550        let sdl = r#"
551            index test {
552                field text_field: text [indexed, stored]
553                field u64_field: u64 [indexed, stored]
554                field i64_field: i64 [indexed, stored]
555                field f64_field: f64 [indexed, stored]
556                field bytes_field: bytes [stored]
557            }
558        "#;
559
560        let indexes = parse_sdl(sdl).unwrap();
561        let index = &indexes[0];
562
563        assert!(matches!(index.fields[0].field_type, FieldType::Text));
564        assert!(matches!(index.fields[1].field_type, FieldType::U64));
565        assert!(matches!(index.fields[2].field_type, FieldType::I64));
566        assert!(matches!(index.fields[3].field_type, FieldType::F64));
567        assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
568    }
569
570    #[test]
571    fn test_parse_with_comments() {
572        let sdl = r#"
573            # This is a comment
574            index articles {
575                # Title field
576                field title: text [indexed, stored]
577                field body: text [indexed] # inline comment not supported yet
578            }
579        "#;
580
581        let indexes = parse_sdl(sdl).unwrap();
582        assert_eq!(indexes[0].fields.len(), 2);
583    }
584
585    #[test]
586    fn test_parse_type_aliases() {
587        let sdl = r#"
588            index test {
589                field a: string [indexed]
590                field b: int [indexed]
591                field c: uint [indexed]
592                field d: float [indexed]
593                field e: binary [stored]
594            }
595        "#;
596
597        let indexes = parse_sdl(sdl).unwrap();
598        let index = &indexes[0];
599
600        assert!(matches!(index.fields[0].field_type, FieldType::Text));
601        assert!(matches!(index.fields[1].field_type, FieldType::I64));
602        assert!(matches!(index.fields[2].field_type, FieldType::U64));
603        assert!(matches!(index.fields[3].field_type, FieldType::F64));
604        assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
605    }
606
607    #[test]
608    fn test_to_schema() {
609        let sdl = r#"
610            index articles {
611                field title: text [indexed, stored]
612                field views: u64 [indexed, stored]
613            }
614        "#;
615
616        let indexes = parse_sdl(sdl).unwrap();
617        let schema = indexes[0].to_schema();
618
619        assert!(schema.get_field("title").is_some());
620        assert!(schema.get_field("views").is_some());
621        assert!(schema.get_field("nonexistent").is_none());
622    }
623
624    #[test]
625    fn test_default_attributes() {
626        let sdl = r#"
627            index test {
628                field title: text
629            }
630        "#;
631
632        let indexes = parse_sdl(sdl).unwrap();
633        let field = &indexes[0].fields[0];
634
635        // Default should be indexed and stored
636        assert!(field.indexed);
637        assert!(field.stored);
638    }
639
640    #[test]
641    fn test_multiple_indexes() {
642        let sdl = r#"
643            index articles {
644                field title: text [indexed, stored]
645            }
646
647            index users {
648                field name: text [indexed, stored]
649                field email: text [indexed, stored]
650            }
651        "#;
652
653        let indexes = parse_sdl(sdl).unwrap();
654        assert_eq!(indexes.len(), 2);
655        assert_eq!(indexes[0].name, "articles");
656        assert_eq!(indexes[1].name, "users");
657    }
658
659    #[test]
660    fn test_tokenizer_spec() {
661        let sdl = r#"
662            index articles {
663                field title: text<en_stem> [indexed, stored]
664                field body: text<default> [indexed]
665                field author: text [indexed, stored]
666            }
667        "#;
668
669        let indexes = parse_sdl(sdl).unwrap();
670        let index = &indexes[0];
671
672        assert_eq!(index.fields[0].name, "title");
673        assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
674
675        assert_eq!(index.fields[1].name, "body");
676        assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
677
678        assert_eq!(index.fields[2].name, "author");
679        assert_eq!(index.fields[2].tokenizer, None); // No tokenizer specified
680    }
681
682    #[test]
683    fn test_tokenizer_in_schema() {
684        let sdl = r#"
685            index articles {
686                field title: text<german> [indexed, stored]
687                field body: text<en_stem> [indexed]
688            }
689        "#;
690
691        let indexes = parse_sdl(sdl).unwrap();
692        let schema = indexes[0].to_schema();
693
694        let title_field = schema.get_field("title").unwrap();
695        let title_entry = schema.get_field_entry(title_field).unwrap();
696        assert_eq!(title_entry.tokenizer, Some("german".to_string()));
697
698        let body_field = schema.get_field("body").unwrap();
699        let body_entry = schema.get_field_entry(body_field).unwrap();
700        assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
701    }
702
703    #[test]
704    fn test_query_router_basic() {
705        let sdl = r#"
706            index documents {
707                field title: text [indexed, stored]
708                field uri: text [indexed, stored]
709
710                query_router {
711                    pattern: "10\\.\\d{4,}/[^\\s]+"
712                    substitution: "doi://{0}"
713                    target_field: uris
714                    mode: exclusive
715                }
716            }
717        "#;
718
719        let indexes = parse_sdl(sdl).unwrap();
720        let index = &indexes[0];
721
722        assert_eq!(index.query_routers.len(), 1);
723        let router = &index.query_routers[0];
724        assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
725        assert_eq!(router.substitution, "doi://{0}");
726        assert_eq!(router.target_field, "uris");
727        assert_eq!(router.mode, RoutingMode::Exclusive);
728    }
729
730    #[test]
731    fn test_query_router_raw_string() {
732        let sdl = r#"
733            index documents {
734                field uris: text [indexed, stored]
735
736                query_router {
737                    pattern: r"^pmid:(\d+)$"
738                    substitution: "pubmed://{1}"
739                    target_field: uris
740                    mode: additional
741                }
742            }
743        "#;
744
745        let indexes = parse_sdl(sdl).unwrap();
746        let router = &indexes[0].query_routers[0];
747
748        assert_eq!(router.pattern, r"^pmid:(\d+)$");
749        assert_eq!(router.substitution, "pubmed://{1}");
750        assert_eq!(router.mode, RoutingMode::Additional);
751    }
752
753    #[test]
754    fn test_multiple_query_routers() {
755        let sdl = r#"
756            index documents {
757                field uris: text [indexed, stored]
758
759                query_router {
760                    pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
761                    substitution: "doi://{1}"
762                    target_field: uris
763                    mode: exclusive
764                }
765
766                query_router {
767                    pattern: r"^pmid:(\d+)$"
768                    substitution: "pubmed://{1}"
769                    target_field: uris
770                    mode: exclusive
771                }
772
773                query_router {
774                    pattern: r"^arxiv:(\d+\.\d+)$"
775                    substitution: "arxiv://{1}"
776                    target_field: uris
777                    mode: additional
778                }
779            }
780        "#;
781
782        let indexes = parse_sdl(sdl).unwrap();
783        assert_eq!(indexes[0].query_routers.len(), 3);
784    }
785
786    #[test]
787    fn test_query_router_default_mode() {
788        let sdl = r#"
789            index documents {
790                field uris: text [indexed, stored]
791
792                query_router {
793                    pattern: r"test"
794                    substitution: "{0}"
795                    target_field: uris
796                }
797            }
798        "#;
799
800        let indexes = parse_sdl(sdl).unwrap();
801        // Default mode should be Additional
802        assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
803    }
804
805    #[test]
806    fn test_multi_attribute() {
807        let sdl = r#"
808            index documents {
809                field uris: text [indexed, stored, multi]
810                field title: text [indexed, stored]
811            }
812        "#;
813
814        let indexes = parse_sdl(sdl).unwrap();
815        assert_eq!(indexes.len(), 1);
816
817        let fields = &indexes[0].fields;
818        assert_eq!(fields.len(), 2);
819
820        // uris should have multi=true
821        assert_eq!(fields[0].name, "uris");
822        assert!(fields[0].multi, "uris field should have multi=true");
823
824        // title should have multi=false
825        assert_eq!(fields[1].name, "title");
826        assert!(!fields[1].multi, "title field should have multi=false");
827
828        // Verify schema conversion preserves multi attribute
829        let schema = indexes[0].to_schema();
830        let uris_field = schema.get_field("uris").unwrap();
831        let title_field = schema.get_field("title").unwrap();
832
833        assert!(schema.get_field_entry(uris_field).unwrap().multi);
834        assert!(!schema.get_field_entry(title_field).unwrap().multi);
835    }
836
837    #[test]
838    fn test_sparse_vector_field() {
839        let sdl = r#"
840            index documents {
841                field embedding: sparse_vector [indexed, stored]
842            }
843        "#;
844
845        let indexes = parse_sdl(sdl).unwrap();
846        assert_eq!(indexes.len(), 1);
847        assert_eq!(indexes[0].fields.len(), 1);
848        assert_eq!(indexes[0].fields[0].name, "embedding");
849        assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
850        assert!(indexes[0].fields[0].sparse_vector_config.is_none());
851    }
852
853    #[test]
854    fn test_sparse_vector_with_config() {
855        let sdl = r#"
856            index documents {
857                field embedding: sparse_vector<u16, uint8> [indexed, stored]
858                field dense: sparse_vector<u32, float32> [indexed]
859            }
860        "#;
861
862        let indexes = parse_sdl(sdl).unwrap();
863        assert_eq!(indexes[0].fields.len(), 2);
864
865        // First field: u16 indices, uint8 quantization
866        let f1 = &indexes[0].fields[0];
867        assert_eq!(f1.name, "embedding");
868        let config1 = f1.sparse_vector_config.as_ref().unwrap();
869        assert_eq!(config1.index_size, IndexSize::U16);
870        assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
871
872        // Second field: u32 indices, float32 quantization
873        let f2 = &indexes[0].fields[1];
874        assert_eq!(f2.name, "dense");
875        let config2 = f2.sparse_vector_config.as_ref().unwrap();
876        assert_eq!(config2.index_size, IndexSize::U32);
877        assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
878    }
879
880    #[test]
881    fn test_dense_vector_field() {
882        let sdl = r#"
883            index documents {
884                field embedding: dense_vector<768> [indexed, stored]
885            }
886        "#;
887
888        let indexes = parse_sdl(sdl).unwrap();
889        assert_eq!(indexes.len(), 1);
890        assert_eq!(indexes[0].fields.len(), 1);
891
892        let f = &indexes[0].fields[0];
893        assert_eq!(f.name, "embedding");
894        assert_eq!(f.field_type, FieldType::DenseVector);
895
896        let config = f.dense_vector_config.as_ref().unwrap();
897        assert_eq!(config.dim, 768);
898    }
899
900    #[test]
901    fn test_dense_vector_alias() {
902        let sdl = r#"
903            index documents {
904                field embedding: vector<1536> [indexed]
905            }
906        "#;
907
908        let indexes = parse_sdl(sdl).unwrap();
909        assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
910        assert_eq!(
911            indexes[0].fields[0]
912                .dense_vector_config
913                .as_ref()
914                .unwrap()
915                .dim,
916            1536
917        );
918    }
919
920    #[test]
921    fn test_dense_vector_with_centroids() {
922        let sdl = r#"
923            index documents {
924                field embedding: dense_vector<768, "centroids.bin"> [indexed, stored]
925            }
926        "#;
927
928        let indexes = parse_sdl(sdl).unwrap();
929        assert_eq!(indexes.len(), 1);
930
931        let f = &indexes[0].fields[0];
932        assert_eq!(f.name, "embedding");
933        assert_eq!(f.field_type, FieldType::DenseVector);
934
935        let config = f.dense_vector_config.as_ref().unwrap();
936        assert_eq!(config.dim, 768);
937        assert_eq!(
938            config.coarse_centroids_path.as_deref(),
939            Some("centroids.bin")
940        );
941        assert_eq!(config.nprobe, 32); // default
942    }
943
944    #[test]
945    fn test_dense_vector_with_centroids_and_nprobe() {
946        let sdl = r#"
947            index documents {
948                field embedding: dense_vector<1536, "/path/to/centroids.bin", 64> [indexed]
949            }
950        "#;
951
952        let indexes = parse_sdl(sdl).unwrap();
953        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
954
955        assert_eq!(config.dim, 1536);
956        assert_eq!(
957            config.coarse_centroids_path.as_deref(),
958            Some("/path/to/centroids.bin")
959        );
960        assert_eq!(config.nprobe, 64);
961    }
962
963    #[test]
964    fn test_dense_vector_keyword_syntax() {
965        let sdl = r#"
966            index documents {
967                field embedding: dense_vector<dims: 1536> [indexed, stored]
968            }
969        "#;
970
971        let indexes = parse_sdl(sdl).unwrap();
972        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
973
974        assert_eq!(config.dim, 1536);
975        assert!(config.coarse_centroids_path.is_none());
976    }
977
978    #[test]
979    fn test_dense_vector_keyword_syntax_full() {
980        let sdl = r#"
981            index documents {
982                field embedding: dense_vector<dims: 1536, centroids: "/path/to/centroids.bin", nprobe: 64> [indexed]
983            }
984        "#;
985
986        let indexes = parse_sdl(sdl).unwrap();
987        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
988
989        assert_eq!(config.dim, 1536);
990        assert_eq!(
991            config.coarse_centroids_path.as_deref(),
992            Some("/path/to/centroids.bin")
993        );
994        assert_eq!(config.nprobe, 64);
995    }
996
997    #[test]
998    fn test_dense_vector_keyword_syntax_partial() {
999        let sdl = r#"
1000            index documents {
1001                field embedding: dense_vector<dims: 768, centroids: "centroids.bin"> [indexed]
1002            }
1003        "#;
1004
1005        let indexes = parse_sdl(sdl).unwrap();
1006        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1007
1008        assert_eq!(config.dim, 768);
1009        assert_eq!(
1010            config.coarse_centroids_path.as_deref(),
1011            Some("centroids.bin")
1012        );
1013        assert_eq!(config.nprobe, 32); // default
1014    }
1015}