Skip to main content

hermes_core/dsl/sdl/
mod.rs

1//! Schema Definition Language (SDL) for Hermes
2//!
3//! A simple, readable format for defining index schemas using pest parser.
4//!
5//! # Example SDL
6//!
7//! ```text
8//! # Article index schema
9//! index articles {
10//!     # Primary text field for full-text search
11//!     field title: text [indexed, stored]
12//!
13//!     # Body content - indexed but not stored (save space)
14//!     field body: text [indexed]
15//!
16//!     # Author name
17//!     field author: text [indexed, stored]
18//!
19//!     # Publication timestamp
20//!     field published_at: i64 [indexed, stored]
21//!
22//!     # View count
23//!     field views: u64 [indexed, stored]
24//!
25//!     # Rating score
26//!     field rating: f64 [indexed, stored]
27//!
28//!     # Raw content hash (not indexed, just stored)
29//!     field content_hash: bytes [stored]
30//!
31//!     # Dense vector with IVF-RaBitQ index
32//!     field embedding: dense_vector<768> [indexed<rabitq, centroids: "centroids.bin", nprobe: 32>]
33//!
34//! }
35//! ```
36//!
37//! # Dense Vector Index Configuration
38//!
39//! Index-related parameters for dense vectors are specified in `indexed<...>`:
40//! - `rabitq` or `scann` - index type
41//! - `centroids: "path"` - path to pre-trained centroids file
42//! - `codebook: "path"` - path to PQ codebook (ScaNN only)
43//! - `nprobe: N` - number of clusters to probe (default: 32)
44
45use pest::Parser;
46use pest_derive::Parser;
47
48use super::query_field_router::{QueryRouterRule, RoutingMode};
49use super::schema::{DenseVectorQuantization, FieldType, Schema, SchemaBuilder};
50use crate::Result;
51use crate::error::Error;
52
53#[derive(Parser)]
54#[grammar = "dsl/sdl/sdl.pest"]
55pub struct SdlParser;
56
57use super::schema::DenseVectorConfig;
58use crate::structures::{
59    IndexSize, QueryWeighting, SparseFormat, SparseQueryConfig, SparseVectorConfig,
60    WeightQuantization,
61};
62
63/// Parsed field definition
64#[derive(Debug, Clone)]
65pub struct FieldDef {
66    pub name: String,
67    pub field_type: FieldType,
68    pub indexed: bool,
69    pub stored: bool,
70    /// Tokenizer name for text fields (e.g., "simple", "en_stem", "german")
71    pub tokenizer: Option<String>,
72    /// Whether this field can have multiple values (serialized as array in JSON)
73    pub multi: bool,
74    /// Position tracking mode for phrase queries and multi-field element tracking
75    pub positions: Option<super::schema::PositionMode>,
76    /// Configuration for sparse vector fields
77    pub sparse_vector_config: Option<SparseVectorConfig>,
78    /// Configuration for dense vector fields
79    pub dense_vector_config: Option<DenseVectorConfig>,
80    /// Whether this field has columnar fast-field storage
81    pub fast: bool,
82    /// Whether this field is a primary key (unique constraint)
83    pub primary: bool,
84    /// Whether build-time document reordering (BP) is enabled for BMP fields
85    pub reorder: bool,
86}
87
88/// Parsed index definition
89#[derive(Debug, Clone)]
90pub struct IndexDef {
91    pub name: String,
92    pub fields: Vec<FieldDef>,
93    pub default_fields: Vec<String>,
94    /// Query router rules for routing queries to specific fields
95    pub query_routers: Vec<QueryRouterRule>,
96}
97
98impl IndexDef {
99    /// Convert to a Schema
100    pub fn to_schema(&self) -> Schema {
101        let mut builder = SchemaBuilder::default();
102
103        for field in &self.fields {
104            let f = match field.field_type {
105                FieldType::Text => {
106                    let tokenizer = field.tokenizer.as_deref().unwrap_or("simple");
107                    builder.add_text_field_with_tokenizer(
108                        &field.name,
109                        field.indexed,
110                        field.stored,
111                        tokenizer,
112                    )
113                }
114                FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
115                FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
116                FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
117                FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
118                FieldType::Json => builder.add_json_field(&field.name, field.stored),
119                FieldType::SparseVector => {
120                    if let Some(config) = &field.sparse_vector_config {
121                        builder.add_sparse_vector_field_with_config(
122                            &field.name,
123                            field.indexed,
124                            field.stored,
125                            config.clone(),
126                        )
127                    } else {
128                        builder.add_sparse_vector_field(&field.name, field.indexed, field.stored)
129                    }
130                }
131                FieldType::DenseVector => {
132                    // Dense vector dimension must be specified via config
133                    let config = field
134                        .dense_vector_config
135                        .as_ref()
136                        .expect("DenseVector field requires dimension to be specified");
137                    builder.add_dense_vector_field_with_config(
138                        &field.name,
139                        field.indexed,
140                        field.stored,
141                        config.clone(),
142                    )
143                }
144            };
145            if field.multi {
146                builder.set_multi(f, true);
147            }
148            if field.fast {
149                builder.set_fast(f, true);
150            }
151            if field.primary {
152                builder.set_primary_key(f);
153            }
154            if field.reorder {
155                builder.set_reorder(f, true);
156            }
157            // Set positions: explicit > auto (ordinal for multi vectors)
158            let positions = field.positions.or({
159                // Auto-set ordinal positions for multi-valued vector fields
160                if field.multi
161                    && matches!(
162                        field.field_type,
163                        FieldType::SparseVector | FieldType::DenseVector
164                    )
165                {
166                    Some(super::schema::PositionMode::Ordinal)
167                } else {
168                    None
169                }
170            });
171            if let Some(mode) = positions {
172                builder.set_positions(f, mode);
173            }
174        }
175
176        // Set default fields if specified
177        if !self.default_fields.is_empty() {
178            builder.set_default_fields(self.default_fields.clone());
179        }
180
181        // Set query routers if specified
182        if !self.query_routers.is_empty() {
183            builder.set_query_routers(self.query_routers.clone());
184        }
185
186        builder.build()
187    }
188
189    /// Create a QueryFieldRouter from the query router rules
190    ///
191    /// Returns None if there are no query router rules defined.
192    /// Returns Err if any regex pattern is invalid.
193    pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
194        if self.query_routers.is_empty() {
195            return Ok(None);
196        }
197
198        super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
199            .map(Some)
200            .map_err(Error::Schema)
201    }
202}
203
204/// Parse field type from string
205fn parse_field_type(type_str: &str) -> Result<FieldType> {
206    match type_str {
207        "text" | "string" | "str" => Ok(FieldType::Text),
208        "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
209        "i64" | "int" | "integer" => Ok(FieldType::I64),
210        "f64" | "float" | "double" => Ok(FieldType::F64),
211        "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
212        "json" => Ok(FieldType::Json),
213        "sparse_vector" => Ok(FieldType::SparseVector),
214        "dense_vector" | "vector" => Ok(FieldType::DenseVector),
215        _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
216    }
217}
218
219/// Index configuration parsed from indexed<...> attribute
220#[derive(Debug, Clone, Default)]
221struct IndexConfig {
222    index_type: Option<super::schema::VectorIndexType>,
223    num_clusters: Option<usize>,
224    nprobe: Option<usize>,
225    build_threshold: Option<usize>,
226    // Sparse vector index params
227    sparse_format: Option<SparseFormat>,
228    quantization: Option<WeightQuantization>,
229    weight_threshold: Option<f32>,
230    block_size: Option<usize>,
231    pruning: Option<f32>,
232    min_terms: Option<usize>,
233    // Sparse vector query-time config
234    query_tokenizer: Option<String>,
235    query_weighting: Option<QueryWeighting>,
236    query_weight_threshold: Option<f32>,
237    query_max_dims: Option<usize>,
238    query_pruning: Option<f32>,
239    query_min_query_dims: Option<usize>,
240    // BMP fixed dims (vocabulary size) and max weight scale
241    dims: Option<u32>,
242    max_weight: Option<f32>,
243    // Position tracking mode for phrase queries
244    positions: Option<super::schema::PositionMode>,
245}
246
247/// Parsed attributes from SDL field definition
248struct ParsedAttributes {
249    indexed: bool,
250    stored: bool,
251    multi: bool,
252    fast: bool,
253    primary: bool,
254    reorder: bool,
255    index_config: Option<IndexConfig>,
256}
257
258/// Parse attributes from pest pair
259fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> ParsedAttributes {
260    let mut attrs = ParsedAttributes {
261        indexed: false,
262        stored: false,
263        multi: false,
264        fast: false,
265        primary: false,
266        reorder: false,
267        index_config: None,
268    };
269
270    for attr in pair.into_inner() {
271        if attr.as_rule() == Rule::attribute {
272            let mut found_config = false;
273            for inner in attr.clone().into_inner() {
274                match inner.as_rule() {
275                    Rule::indexed_with_config => {
276                        attrs.indexed = true;
277                        attrs.index_config = Some(parse_index_config(inner));
278                        found_config = true;
279                        break;
280                    }
281                    Rule::stored_with_config => {
282                        attrs.stored = true;
283                        attrs.multi = true; // stored<multi>
284                        found_config = true;
285                        break;
286                    }
287                    _ => {}
288                }
289            }
290            if !found_config {
291                match attr.as_str() {
292                    "indexed" => attrs.indexed = true,
293                    "stored" => attrs.stored = true,
294                    "fast" => attrs.fast = true,
295                    "primary" => attrs.primary = true,
296                    "reorder" => attrs.reorder = true,
297                    _ => {}
298                }
299            }
300        }
301    }
302
303    attrs
304}
305
306/// Parse index configuration from indexed<...> attribute
307fn parse_index_config(pair: pest::iterators::Pair<Rule>) -> IndexConfig {
308    let mut config = IndexConfig::default();
309
310    // indexed_with_config = { "indexed" ~ "<" ~ index_config_params ~ ">" }
311    // index_config_params = { index_config_param ~ ("," ~ index_config_param)* }
312    // index_config_param = { index_type_kwarg | centroids_kwarg | codebook_kwarg | nprobe_kwarg | index_type_spec }
313
314    for inner in pair.into_inner() {
315        if inner.as_rule() == Rule::index_config_params {
316            for param in inner.into_inner() {
317                if param.as_rule() == Rule::index_config_param {
318                    for p in param.into_inner() {
319                        parse_single_index_config_param(&mut config, p);
320                    }
321                }
322            }
323        }
324    }
325
326    config
327}
328
329/// Parse a single index config parameter
330fn parse_single_index_config_param(config: &mut IndexConfig, p: pest::iterators::Pair<Rule>) {
331    use super::schema::VectorIndexType;
332
333    match p.as_rule() {
334        Rule::index_type_spec => {
335            config.index_type = Some(match p.as_str() {
336                "flat" => VectorIndexType::Flat,
337                "rabitq" => VectorIndexType::RaBitQ,
338                "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
339                "scann" => VectorIndexType::ScaNN,
340                _ => VectorIndexType::RaBitQ,
341            });
342        }
343        Rule::index_type_kwarg => {
344            // index_type_kwarg = { "index" ~ ":" ~ index_type_spec }
345            if let Some(t) = p.into_inner().next() {
346                config.index_type = Some(match t.as_str() {
347                    "flat" => VectorIndexType::Flat,
348                    "rabitq" => VectorIndexType::RaBitQ,
349                    "ivf_rabitq" => VectorIndexType::IvfRaBitQ,
350                    "scann" => VectorIndexType::ScaNN,
351                    _ => VectorIndexType::RaBitQ,
352                });
353            }
354        }
355        Rule::num_clusters_kwarg => {
356            // num_clusters_kwarg = { "num_clusters" ~ ":" ~ num_clusters_spec }
357            if let Some(n) = p.into_inner().next() {
358                config.num_clusters = Some(n.as_str().parse().unwrap_or_else(|_| {
359                    log::warn!(
360                        "Invalid num_clusters value '{}', using default 256",
361                        n.as_str()
362                    );
363                    256
364                }));
365            }
366        }
367        Rule::build_threshold_kwarg => {
368            // build_threshold_kwarg = { "build_threshold" ~ ":" ~ build_threshold_spec }
369            if let Some(n) = p.into_inner().next() {
370                config.build_threshold = Some(n.as_str().parse().unwrap_or_else(|_| {
371                    log::warn!(
372                        "Invalid build_threshold value '{}', using default 10000",
373                        n.as_str()
374                    );
375                    10000
376                }));
377            }
378        }
379        Rule::nprobe_kwarg => {
380            // nprobe_kwarg = { "nprobe" ~ ":" ~ nprobe_spec }
381            if let Some(n) = p.into_inner().next() {
382                config.nprobe = Some(n.as_str().parse().unwrap_or_else(|_| {
383                    log::warn!("Invalid nprobe value '{}', using default 32", n.as_str());
384                    32
385                }));
386            }
387        }
388        Rule::quantization_kwarg => {
389            // quantization_kwarg = { "quantization" ~ ":" ~ quantization_spec }
390            if let Some(q) = p.into_inner().next() {
391                config.quantization = Some(match q.as_str() {
392                    "float32" | "f32" => WeightQuantization::Float32,
393                    "float16" | "f16" => WeightQuantization::Float16,
394                    "uint8" | "u8" => WeightQuantization::UInt8,
395                    "uint4" | "u4" => WeightQuantization::UInt4,
396                    _ => WeightQuantization::default(),
397                });
398            }
399        }
400        Rule::weight_threshold_kwarg => {
401            // weight_threshold_kwarg = { "weight_threshold" ~ ":" ~ weight_threshold_spec }
402            if let Some(t) = p.into_inner().next() {
403                config.weight_threshold = Some(t.as_str().parse().unwrap_or_else(|_| {
404                    log::warn!(
405                        "Invalid weight_threshold value '{}', using default 0.0",
406                        t.as_str()
407                    );
408                    0.0
409                }));
410            }
411        }
412        Rule::block_size_kwarg => {
413            // block_size_kwarg = { "block_size" ~ ":" ~ block_size_spec }
414            if let Some(n) = p.into_inner().next() {
415                config.block_size = Some(n.as_str().parse().unwrap_or_else(|_| {
416                    log::warn!(
417                        "Invalid block_size value '{}', using default 128",
418                        n.as_str()
419                    );
420                    128
421                }));
422            }
423        }
424        Rule::pruning_kwarg => {
425            // pruning_kwarg = { "pruning" ~ ":" ~ pruning_spec }
426            if let Some(f) = p.into_inner().next() {
427                config.pruning = Some(f.as_str().parse().unwrap_or_else(|_| {
428                    log::warn!("Invalid pruning value '{}', using default 1.0", f.as_str());
429                    1.0
430                }));
431            }
432        }
433        Rule::min_terms_kwarg => {
434            if let Some(n) = p.into_inner().next() {
435                config.min_terms = Some(n.as_str().parse().unwrap_or_else(|_| {
436                    log::warn!("Invalid min_terms value '{}', using default 4", n.as_str());
437                    4
438                }));
439            }
440        }
441        Rule::sparse_format_kwarg => {
442            // sparse_format_kwarg = { "format" ~ ":" ~ sparse_format_spec }
443            if let Some(f) = p.into_inner().next() {
444                config.sparse_format = Some(match f.as_str() {
445                    "bmp" => SparseFormat::Bmp,
446                    "maxscore" => SparseFormat::MaxScore,
447                    _ => SparseFormat::default(),
448                });
449            }
450        }
451        Rule::sparse_dims_kwarg => {
452            if let Some(n) = p.into_inner().next() {
453                config.dims = Some(n.as_str().parse().unwrap_or_else(|_| {
454                    log::warn!("Invalid dims value '{}', using default 105879", n.as_str());
455                    105879
456                }));
457            }
458        }
459        Rule::sparse_max_weight_kwarg => {
460            if let Some(f) = p.into_inner().next() {
461                config.max_weight = Some(f.as_str().parse().unwrap_or_else(|_| {
462                    log::warn!(
463                        "Invalid max_weight value '{}', using default 5.0",
464                        f.as_str()
465                    );
466                    5.0
467                }));
468            }
469        }
470        Rule::query_config_block => {
471            // query_config_block = { "query" ~ "<" ~ query_config_params ~ ">" }
472            parse_query_config_block(config, p);
473        }
474        Rule::positions_kwarg => {
475            // positions_kwarg = { "positions" | "ordinal" | "token_position" }
476            use super::schema::PositionMode;
477            config.positions = Some(match p.as_str() {
478                "ordinal" => PositionMode::Ordinal,
479                "token_position" => PositionMode::TokenPosition,
480                _ => PositionMode::Full, // "positions" or any other value defaults to Full
481            });
482        }
483        _ => {}
484    }
485}
486
487/// Parse query configuration block: query<tokenizer: "...", weighting: idf>
488fn parse_query_config_block(config: &mut IndexConfig, pair: pest::iterators::Pair<Rule>) {
489    for inner in pair.into_inner() {
490        if inner.as_rule() == Rule::query_config_params {
491            for param in inner.into_inner() {
492                if param.as_rule() == Rule::query_config_param {
493                    for p in param.into_inner() {
494                        match p.as_rule() {
495                            Rule::query_tokenizer_kwarg => {
496                                // query_tokenizer_kwarg = { "tokenizer" ~ ":" ~ tokenizer_path }
497                                if let Some(path) = p.into_inner().next()
498                                    && let Some(inner_path) = path.into_inner().next()
499                                {
500                                    config.query_tokenizer = Some(inner_path.as_str().to_string());
501                                }
502                            }
503                            Rule::query_weighting_kwarg => {
504                                // query_weighting_kwarg = { "weighting" ~ ":" ~ weighting_spec }
505                                if let Some(w) = p.into_inner().next() {
506                                    config.query_weighting = Some(match w.as_str() {
507                                        "one" => QueryWeighting::One,
508                                        "idf" => QueryWeighting::Idf,
509                                        "idf_file" => QueryWeighting::IdfFile,
510                                        _ => QueryWeighting::One,
511                                    });
512                                }
513                            }
514                            Rule::query_weight_threshold_kwarg => {
515                                if let Some(t) = p.into_inner().next() {
516                                    config.query_weight_threshold =
517                                        Some(t.as_str().parse().unwrap_or_else(|_| {
518                                            log::warn!(
519                                                "Invalid query weight_threshold '{}', using 0.0",
520                                                t.as_str()
521                                            );
522                                            0.0
523                                        }));
524                                }
525                            }
526                            Rule::query_max_dims_kwarg => {
527                                if let Some(t) = p.into_inner().next() {
528                                    config.query_max_dims =
529                                        Some(t.as_str().parse().unwrap_or_else(|_| {
530                                            log::warn!(
531                                                "Invalid query max_dims '{}', using 0",
532                                                t.as_str()
533                                            );
534                                            0
535                                        }));
536                                }
537                            }
538                            Rule::query_pruning_kwarg => {
539                                if let Some(t) = p.into_inner().next() {
540                                    config.query_pruning =
541                                        Some(t.as_str().parse().unwrap_or_else(|_| {
542                                            log::warn!(
543                                                "Invalid query pruning '{}', using 1.0",
544                                                t.as_str()
545                                            );
546                                            1.0
547                                        }));
548                                }
549                            }
550                            Rule::query_min_query_dims_kwarg => {
551                                if let Some(t) = p.into_inner().next() {
552                                    config.query_min_query_dims =
553                                        Some(t.as_str().parse().unwrap_or_else(|_| {
554                                            log::warn!(
555                                                "Invalid query min_query_dims '{}', using 4",
556                                                t.as_str()
557                                            );
558                                            4
559                                        }));
560                                }
561                            }
562                            _ => {}
563                        }
564                    }
565                }
566            }
567        }
568    }
569}
570
571/// Parse a field definition from pest pair
572fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
573    let mut inner = pair.into_inner();
574
575    let name = inner
576        .next()
577        .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
578        .as_str()
579        .to_string();
580
581    let field_type_str = inner
582        .next()
583        .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
584        .as_str();
585
586    let field_type = parse_field_type(field_type_str)?;
587
588    // Parse optional tokenizer spec, sparse_vector_config, dense_vector_config, and attributes
589    let mut tokenizer = None;
590    let mut sparse_vector_config = None;
591    let mut dense_vector_config = None;
592    let mut indexed = true;
593    let mut stored = true;
594    let mut multi = false;
595    let mut fast = false;
596    let mut primary = false;
597    let mut reorder = false;
598    let mut index_config: Option<IndexConfig> = None;
599
600    for item in inner {
601        match item.as_rule() {
602            Rule::tokenizer_spec => {
603                // Extract tokenizer name from <name>
604                if let Some(tok_name) = item.into_inner().next() {
605                    tokenizer = Some(tok_name.as_str().to_string());
606                }
607            }
608            Rule::sparse_vector_config => {
609                // Parse named parameters: <index_size: u16, quantization: uint8, weight_threshold: 0.1>
610                sparse_vector_config = Some(parse_sparse_vector_config(item));
611            }
612            Rule::dense_vector_config => {
613                // Parse dense_vector_params (keyword or positional) - only dims
614                dense_vector_config = Some(parse_dense_vector_config(item));
615            }
616            Rule::attributes => {
617                let attrs = parse_attributes(item);
618                indexed = attrs.indexed;
619                stored = attrs.stored;
620                multi = attrs.multi;
621                fast = attrs.fast;
622                primary = attrs.primary;
623                reorder = attrs.reorder;
624                index_config = attrs.index_config;
625            }
626            _ => {}
627        }
628    }
629
630    // Primary key implies fast + indexed (needed for dedup lookups)
631    if primary {
632        fast = true;
633        indexed = true;
634    }
635
636    // Merge index config into vector configs if both exist
637    let mut positions = None;
638    if let Some(idx_cfg) = index_config {
639        positions = idx_cfg.positions;
640        if let Some(ref mut dv_config) = dense_vector_config {
641            apply_index_config_to_dense_vector(dv_config, idx_cfg);
642        } else if field_type == FieldType::SparseVector {
643            // For sparse vectors, create default config if not present and apply index params
644            let sv_config = sparse_vector_config.get_or_insert(SparseVectorConfig::default());
645            apply_index_config_to_sparse_vector(sv_config, idx_cfg);
646        }
647    }
648
649    Ok(FieldDef {
650        name,
651        field_type,
652        indexed,
653        stored,
654        tokenizer,
655        multi,
656        positions,
657        sparse_vector_config,
658        dense_vector_config,
659        fast,
660        primary,
661        reorder,
662    })
663}
664
665/// Apply index configuration from indexed<...> to DenseVectorConfig
666fn apply_index_config_to_dense_vector(config: &mut DenseVectorConfig, idx_cfg: IndexConfig) {
667    // Apply index type if specified
668    if let Some(index_type) = idx_cfg.index_type {
669        config.index_type = index_type;
670    }
671
672    // Apply num_clusters for IVF-based indexes
673    if idx_cfg.num_clusters.is_some() {
674        config.num_clusters = idx_cfg.num_clusters;
675    }
676
677    // Apply nprobe if specified
678    if let Some(nprobe) = idx_cfg.nprobe {
679        config.nprobe = nprobe;
680    }
681
682    // Apply build_threshold if specified
683    if idx_cfg.build_threshold.is_some() {
684        config.build_threshold = idx_cfg.build_threshold;
685    }
686}
687
688/// Parse sparse_vector_config - only index_size (positional)
689/// Example: <u16> or <u32>
690fn parse_sparse_vector_config(pair: pest::iterators::Pair<Rule>) -> SparseVectorConfig {
691    let mut index_size = IndexSize::default();
692
693    // Parse positional index_size_spec
694    for inner in pair.into_inner() {
695        if inner.as_rule() == Rule::index_size_spec {
696            index_size = match inner.as_str() {
697                "u16" => IndexSize::U16,
698                "u32" => IndexSize::U32,
699                _ => IndexSize::default(),
700            };
701        }
702    }
703
704    SparseVectorConfig {
705        format: SparseFormat::default(),
706        index_size,
707        weight_quantization: WeightQuantization::default(),
708        weight_threshold: 0.0,
709        block_size: 128,
710        bmp_block_size: 64,
711        max_bmp_grid_bytes: 0,
712        bmp_superblock_size: 64,
713        pruning: None,
714        query_config: None,
715        dims: None,
716        max_weight: None,
717        min_terms: 4,
718    }
719}
720
721/// Apply index configuration from indexed<...> to SparseVectorConfig
722fn apply_index_config_to_sparse_vector(config: &mut SparseVectorConfig, idx_cfg: IndexConfig) {
723    if let Some(f) = idx_cfg.sparse_format {
724        config.format = f;
725    }
726    if let Some(q) = idx_cfg.quantization {
727        config.weight_quantization = q;
728    }
729    if let Some(t) = idx_cfg.weight_threshold {
730        config.weight_threshold = t;
731    }
732    if let Some(bs) = idx_cfg.block_size {
733        let adjusted = bs.next_power_of_two();
734        if adjusted != bs {
735            log::warn!(
736                "block_size {} adjusted to next power of two: {}",
737                bs,
738                adjusted
739            );
740        }
741        config.block_size = adjusted;
742    }
743    if let Some(p) = idx_cfg.pruning {
744        let clamped = p.clamp(0.0, 1.0);
745        if (clamped - p).abs() > f32::EPSILON {
746            log::warn!(
747                "pruning {} clamped to valid range [0.0, 1.0]: {}",
748                p,
749                clamped
750            );
751        }
752        config.pruning = Some(clamped);
753    }
754    if let Some(mt) = idx_cfg.min_terms {
755        config.min_terms = mt;
756    }
757    if let Some(d) = idx_cfg.dims {
758        config.dims = Some(d);
759    }
760    if let Some(mw) = idx_cfg.max_weight {
761        config.max_weight = Some(mw);
762    }
763    // Apply query-time configuration if present
764    if idx_cfg.query_tokenizer.is_some()
765        || idx_cfg.query_weighting.is_some()
766        || idx_cfg.query_weight_threshold.is_some()
767        || idx_cfg.query_max_dims.is_some()
768        || idx_cfg.query_pruning.is_some()
769        || idx_cfg.query_min_query_dims.is_some()
770    {
771        let query_config = config
772            .query_config
773            .get_or_insert(SparseQueryConfig::default());
774        if let Some(tokenizer) = idx_cfg.query_tokenizer {
775            query_config.tokenizer = Some(tokenizer);
776        }
777        if let Some(weighting) = idx_cfg.query_weighting {
778            query_config.weighting = weighting;
779        }
780        if let Some(t) = idx_cfg.query_weight_threshold {
781            query_config.weight_threshold = t;
782        }
783        if let Some(d) = idx_cfg.query_max_dims {
784            query_config.max_query_dims = Some(d);
785        }
786        if let Some(p) = idx_cfg.query_pruning {
787            query_config.pruning = Some(p);
788        }
789        if let Some(m) = idx_cfg.query_min_query_dims {
790            query_config.min_query_dims = m;
791        }
792    }
793}
794
795/// Parse dense_vector_config - dims and optional quantization type
796/// All index-related params are in indexed<...> attribute
797fn parse_dense_vector_config(pair: pest::iterators::Pair<Rule>) -> DenseVectorConfig {
798    let mut dim: usize = 0;
799    let mut quantization = DenseVectorQuantization::F32;
800
801    // Navigate to dense_vector_params
802    for params in pair.into_inner() {
803        if params.as_rule() == Rule::dense_vector_params {
804            for inner in params.into_inner() {
805                match inner.as_rule() {
806                    Rule::dense_vector_keyword_params => {
807                        for kwarg in inner.into_inner() {
808                            match kwarg.as_rule() {
809                                Rule::dims_kwarg => {
810                                    if let Some(d) = kwarg.into_inner().next() {
811                                        dim = d.as_str().parse().unwrap_or(0);
812                                    }
813                                }
814                                Rule::quant_type_spec => {
815                                    quantization = parse_quant_type(kwarg.as_str());
816                                }
817                                _ => {}
818                            }
819                        }
820                    }
821                    Rule::dense_vector_positional_params => {
822                        for item in inner.into_inner() {
823                            match item.as_rule() {
824                                Rule::dimension_spec => {
825                                    dim = item.as_str().parse().unwrap_or(0);
826                                }
827                                Rule::quant_type_spec => {
828                                    quantization = parse_quant_type(item.as_str());
829                                }
830                                _ => {}
831                            }
832                        }
833                    }
834                    _ => {}
835                }
836            }
837        }
838    }
839
840    DenseVectorConfig::new(dim).with_quantization(quantization)
841}
842
843fn parse_quant_type(s: &str) -> DenseVectorQuantization {
844    match s.trim() {
845        "f16" => DenseVectorQuantization::F16,
846        "uint8" | "u8" => DenseVectorQuantization::UInt8,
847        _ => DenseVectorQuantization::F32,
848    }
849}
850
851/// Parse default_fields definition
852fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
853    pair.into_inner().map(|p| p.as_str().to_string()).collect()
854}
855
856/// Parse a query router definition
857fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
858    let mut pattern = String::new();
859    let mut substitution = String::new();
860    let mut target_field = String::new();
861    let mut mode = RoutingMode::Additional;
862
863    for prop in pair.into_inner() {
864        if prop.as_rule() != Rule::query_router_prop {
865            continue;
866        }
867
868        for inner in prop.into_inner() {
869            match inner.as_rule() {
870                Rule::query_router_pattern => {
871                    if let Some(regex_str) = inner.into_inner().next() {
872                        pattern = parse_string_value(regex_str);
873                    }
874                }
875                Rule::query_router_substitution => {
876                    if let Some(quoted) = inner.into_inner().next() {
877                        substitution = parse_string_value(quoted);
878                    }
879                }
880                Rule::query_router_target => {
881                    if let Some(ident) = inner.into_inner().next() {
882                        target_field = ident.as_str().to_string();
883                    }
884                }
885                Rule::query_router_mode => {
886                    if let Some(mode_val) = inner.into_inner().next() {
887                        mode = match mode_val.as_str() {
888                            "exclusive" => RoutingMode::Exclusive,
889                            "additional" => RoutingMode::Additional,
890                            _ => RoutingMode::Additional,
891                        };
892                    }
893                }
894                _ => {}
895            }
896        }
897    }
898
899    if pattern.is_empty() {
900        return Err(Error::Schema("query_router missing 'pattern'".to_string()));
901    }
902    if substitution.is_empty() {
903        return Err(Error::Schema(
904            "query_router missing 'substitution'".to_string(),
905        ));
906    }
907    if target_field.is_empty() {
908        return Err(Error::Schema(
909            "query_router missing 'target_field'".to_string(),
910        ));
911    }
912
913    Ok(QueryRouterRule {
914        pattern,
915        substitution,
916        target_field,
917        mode,
918    })
919}
920
921/// Parse a string value from quoted_string, raw_string, or regex_string
922fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
923    let s = pair.as_str();
924    match pair.as_rule() {
925        Rule::regex_string => {
926            // regex_string contains either raw_string or quoted_string
927            if let Some(inner) = pair.into_inner().next() {
928                parse_string_value(inner)
929            } else {
930                s.to_string()
931            }
932        }
933        Rule::raw_string => {
934            // r"..." - strip r" prefix and " suffix
935            s[2..s.len() - 1].to_string()
936        }
937        Rule::quoted_string => {
938            // "..." - strip quotes and handle escapes
939            let inner = &s[1..s.len() - 1];
940            // Simple escape handling
941            inner
942                .replace("\\n", "\n")
943                .replace("\\t", "\t")
944                .replace("\\\"", "\"")
945                .replace("\\\\", "\\")
946        }
947        _ => s.to_string(),
948    }
949}
950
951/// Parse an index definition from pest pair
952fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
953    let mut inner = pair.into_inner();
954
955    let name = inner
956        .next()
957        .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
958        .as_str()
959        .to_string();
960
961    let mut fields = Vec::new();
962    let mut default_fields = Vec::new();
963    let mut query_routers = Vec::new();
964
965    for item in inner {
966        match item.as_rule() {
967            Rule::field_def => {
968                fields.push(parse_field_def(item)?);
969            }
970            Rule::default_fields_def => {
971                default_fields = parse_default_fields_def(item);
972            }
973            Rule::query_router_def => {
974                query_routers.push(parse_query_router_def(item)?);
975            }
976            _ => {}
977        }
978    }
979
980    // Validate primary key constraints
981    let primary_fields: Vec<&FieldDef> = fields.iter().filter(|f| f.primary).collect();
982    if primary_fields.len() > 1 {
983        return Err(Error::Schema(format!(
984            "Index '{}' has {} primary key fields, but at most one is allowed",
985            name,
986            primary_fields.len()
987        )));
988    }
989    if let Some(pk) = primary_fields.first() {
990        if pk.field_type != FieldType::Text {
991            return Err(Error::Schema(format!(
992                "Primary key field '{}' must be of type text, got {:?}",
993                pk.name, pk.field_type
994            )));
995        }
996        if pk.multi {
997            return Err(Error::Schema(format!(
998                "Primary key field '{}' cannot be multi-valued",
999                pk.name
1000            )));
1001        }
1002    }
1003
1004    Ok(IndexDef {
1005        name,
1006        fields,
1007        default_fields,
1008        query_routers,
1009    })
1010}
1011
1012/// Parse SDL from a string
1013pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
1014    let pairs = SdlParser::parse(Rule::file, input)
1015        .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
1016
1017    let mut indexes = Vec::new();
1018
1019    for pair in pairs {
1020        if pair.as_rule() == Rule::file {
1021            for inner in pair.into_inner() {
1022                if inner.as_rule() == Rule::index_def {
1023                    indexes.push(parse_index_def(inner)?);
1024                }
1025            }
1026        }
1027    }
1028
1029    Ok(indexes)
1030}
1031
1032/// Parse SDL and return a single index definition
1033pub fn parse_single_index(input: &str) -> Result<IndexDef> {
1034    let indexes = parse_sdl(input)?;
1035
1036    if indexes.is_empty() {
1037        return Err(Error::Schema("No index definition found".to_string()));
1038    }
1039
1040    if indexes.len() > 1 {
1041        return Err(Error::Schema(
1042            "Multiple index definitions found, expected one".to_string(),
1043        ));
1044    }
1045
1046    Ok(indexes.into_iter().next().unwrap())
1047}
1048
1049#[cfg(test)]
1050mod tests {
1051    use super::*;
1052
1053    #[test]
1054    fn test_parse_simple_schema() {
1055        let sdl = r#"
1056            index articles {
1057                field title: text [indexed, stored]
1058                field body: text [indexed]
1059            }
1060        "#;
1061
1062        let indexes = parse_sdl(sdl).unwrap();
1063        assert_eq!(indexes.len(), 1);
1064
1065        let index = &indexes[0];
1066        assert_eq!(index.name, "articles");
1067        assert_eq!(index.fields.len(), 2);
1068
1069        assert_eq!(index.fields[0].name, "title");
1070        assert!(matches!(index.fields[0].field_type, FieldType::Text));
1071        assert!(index.fields[0].indexed);
1072        assert!(index.fields[0].stored);
1073
1074        assert_eq!(index.fields[1].name, "body");
1075        assert!(matches!(index.fields[1].field_type, FieldType::Text));
1076        assert!(index.fields[1].indexed);
1077        assert!(!index.fields[1].stored);
1078    }
1079
1080    #[test]
1081    fn test_parse_all_field_types() {
1082        let sdl = r#"
1083            index test {
1084                field text_field: text [indexed, stored]
1085                field u64_field: u64 [indexed, stored]
1086                field i64_field: i64 [indexed, stored]
1087                field f64_field: f64 [indexed, stored]
1088                field bytes_field: bytes [stored]
1089            }
1090        "#;
1091
1092        let indexes = parse_sdl(sdl).unwrap();
1093        let index = &indexes[0];
1094
1095        assert!(matches!(index.fields[0].field_type, FieldType::Text));
1096        assert!(matches!(index.fields[1].field_type, FieldType::U64));
1097        assert!(matches!(index.fields[2].field_type, FieldType::I64));
1098        assert!(matches!(index.fields[3].field_type, FieldType::F64));
1099        assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
1100    }
1101
1102    #[test]
1103    fn test_parse_with_comments() {
1104        let sdl = r#"
1105            # This is a comment
1106            index articles {
1107                # Title field
1108                field title: text [indexed, stored]
1109                field body: text [indexed] # inline comment not supported yet
1110            }
1111        "#;
1112
1113        let indexes = parse_sdl(sdl).unwrap();
1114        assert_eq!(indexes[0].fields.len(), 2);
1115    }
1116
1117    #[test]
1118    fn test_parse_type_aliases() {
1119        let sdl = r#"
1120            index test {
1121                field a: string [indexed]
1122                field b: int [indexed]
1123                field c: uint [indexed]
1124                field d: float [indexed]
1125                field e: binary [stored]
1126            }
1127        "#;
1128
1129        let indexes = parse_sdl(sdl).unwrap();
1130        let index = &indexes[0];
1131
1132        assert!(matches!(index.fields[0].field_type, FieldType::Text));
1133        assert!(matches!(index.fields[1].field_type, FieldType::I64));
1134        assert!(matches!(index.fields[2].field_type, FieldType::U64));
1135        assert!(matches!(index.fields[3].field_type, FieldType::F64));
1136        assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
1137    }
1138
1139    #[test]
1140    fn test_to_schema() {
1141        let sdl = r#"
1142            index articles {
1143                field title: text [indexed, stored]
1144                field views: u64 [indexed, stored]
1145            }
1146        "#;
1147
1148        let indexes = parse_sdl(sdl).unwrap();
1149        let schema = indexes[0].to_schema();
1150
1151        assert!(schema.get_field("title").is_some());
1152        assert!(schema.get_field("views").is_some());
1153        assert!(schema.get_field("nonexistent").is_none());
1154    }
1155
1156    #[test]
1157    fn test_default_attributes() {
1158        let sdl = r#"
1159            index test {
1160                field title: text
1161            }
1162        "#;
1163
1164        let indexes = parse_sdl(sdl).unwrap();
1165        let field = &indexes[0].fields[0];
1166
1167        // Default should be indexed and stored
1168        assert!(field.indexed);
1169        assert!(field.stored);
1170    }
1171
1172    #[test]
1173    fn test_multiple_indexes() {
1174        let sdl = r#"
1175            index articles {
1176                field title: text [indexed, stored]
1177            }
1178
1179            index users {
1180                field name: text [indexed, stored]
1181                field email: text [indexed, stored]
1182            }
1183        "#;
1184
1185        let indexes = parse_sdl(sdl).unwrap();
1186        assert_eq!(indexes.len(), 2);
1187        assert_eq!(indexes[0].name, "articles");
1188        assert_eq!(indexes[1].name, "users");
1189    }
1190
1191    #[test]
1192    fn test_tokenizer_spec() {
1193        let sdl = r#"
1194            index articles {
1195                field title: text<en_stem> [indexed, stored]
1196                field body: text<simple> [indexed]
1197                field author: text [indexed, stored]
1198            }
1199        "#;
1200
1201        let indexes = parse_sdl(sdl).unwrap();
1202        let index = &indexes[0];
1203
1204        assert_eq!(index.fields[0].name, "title");
1205        assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
1206
1207        assert_eq!(index.fields[1].name, "body");
1208        assert_eq!(index.fields[1].tokenizer, Some("simple".to_string()));
1209
1210        assert_eq!(index.fields[2].name, "author");
1211        assert_eq!(index.fields[2].tokenizer, None); // No tokenizer specified
1212    }
1213
1214    #[test]
1215    fn test_tokenizer_in_schema() {
1216        let sdl = r#"
1217            index articles {
1218                field title: text<german> [indexed, stored]
1219                field body: text<en_stem> [indexed]
1220            }
1221        "#;
1222
1223        let indexes = parse_sdl(sdl).unwrap();
1224        let schema = indexes[0].to_schema();
1225
1226        let title_field = schema.get_field("title").unwrap();
1227        let title_entry = schema.get_field_entry(title_field).unwrap();
1228        assert_eq!(title_entry.tokenizer, Some("german".to_string()));
1229
1230        let body_field = schema.get_field("body").unwrap();
1231        let body_entry = schema.get_field_entry(body_field).unwrap();
1232        assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
1233    }
1234
1235    #[test]
1236    fn test_query_router_basic() {
1237        let sdl = r#"
1238            index documents {
1239                field title: text [indexed, stored]
1240                field uri: text [indexed, stored]
1241
1242                query_router {
1243                    pattern: "10\\.\\d{4,}/[^\\s]+"
1244                    substitution: "doi://{0}"
1245                    target_field: uris
1246                    mode: exclusive
1247                }
1248            }
1249        "#;
1250
1251        let indexes = parse_sdl(sdl).unwrap();
1252        let index = &indexes[0];
1253
1254        assert_eq!(index.query_routers.len(), 1);
1255        let router = &index.query_routers[0];
1256        assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
1257        assert_eq!(router.substitution, "doi://{0}");
1258        assert_eq!(router.target_field, "uris");
1259        assert_eq!(router.mode, RoutingMode::Exclusive);
1260    }
1261
1262    #[test]
1263    fn test_query_router_raw_string() {
1264        let sdl = r#"
1265            index documents {
1266                field uris: text [indexed, stored]
1267
1268                query_router {
1269                    pattern: r"^pmid:(\d+)$"
1270                    substitution: "pubmed://{1}"
1271                    target_field: uris
1272                    mode: additional
1273                }
1274            }
1275        "#;
1276
1277        let indexes = parse_sdl(sdl).unwrap();
1278        let router = &indexes[0].query_routers[0];
1279
1280        assert_eq!(router.pattern, r"^pmid:(\d+)$");
1281        assert_eq!(router.substitution, "pubmed://{1}");
1282        assert_eq!(router.mode, RoutingMode::Additional);
1283    }
1284
1285    #[test]
1286    fn test_multiple_query_routers() {
1287        let sdl = r#"
1288            index documents {
1289                field uris: text [indexed, stored]
1290
1291                query_router {
1292                    pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
1293                    substitution: "doi://{1}"
1294                    target_field: uris
1295                    mode: exclusive
1296                }
1297
1298                query_router {
1299                    pattern: r"^pmid:(\d+)$"
1300                    substitution: "pubmed://{1}"
1301                    target_field: uris
1302                    mode: exclusive
1303                }
1304
1305                query_router {
1306                    pattern: r"^arxiv:(\d+\.\d+)$"
1307                    substitution: "arxiv://{1}"
1308                    target_field: uris
1309                    mode: additional
1310                }
1311            }
1312        "#;
1313
1314        let indexes = parse_sdl(sdl).unwrap();
1315        assert_eq!(indexes[0].query_routers.len(), 3);
1316    }
1317
1318    #[test]
1319    fn test_query_router_default_mode() {
1320        let sdl = r#"
1321            index documents {
1322                field uris: text [indexed, stored]
1323
1324                query_router {
1325                    pattern: r"test"
1326                    substitution: "{0}"
1327                    target_field: uris
1328                }
1329            }
1330        "#;
1331
1332        let indexes = parse_sdl(sdl).unwrap();
1333        // Default mode should be Additional
1334        assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
1335    }
1336
1337    #[test]
1338    fn test_multi_attribute() {
1339        let sdl = r#"
1340            index documents {
1341                field uris: text [indexed, stored<multi>]
1342                field title: text [indexed, stored]
1343            }
1344        "#;
1345
1346        let indexes = parse_sdl(sdl).unwrap();
1347        assert_eq!(indexes.len(), 1);
1348
1349        let fields = &indexes[0].fields;
1350        assert_eq!(fields.len(), 2);
1351
1352        // uris should have multi=true
1353        assert_eq!(fields[0].name, "uris");
1354        assert!(fields[0].multi, "uris field should have multi=true");
1355
1356        // title should have multi=false
1357        assert_eq!(fields[1].name, "title");
1358        assert!(!fields[1].multi, "title field should have multi=false");
1359
1360        // Verify schema conversion preserves multi attribute
1361        let schema = indexes[0].to_schema();
1362        let uris_field = schema.get_field("uris").unwrap();
1363        let title_field = schema.get_field("title").unwrap();
1364
1365        assert!(schema.get_field_entry(uris_field).unwrap().multi);
1366        assert!(!schema.get_field_entry(title_field).unwrap().multi);
1367    }
1368
1369    #[test]
1370    fn test_sparse_vector_field() {
1371        let sdl = r#"
1372            index documents {
1373                field embedding: sparse_vector [indexed, stored]
1374            }
1375        "#;
1376
1377        let indexes = parse_sdl(sdl).unwrap();
1378        assert_eq!(indexes.len(), 1);
1379        assert_eq!(indexes[0].fields.len(), 1);
1380        assert_eq!(indexes[0].fields[0].name, "embedding");
1381        assert_eq!(indexes[0].fields[0].field_type, FieldType::SparseVector);
1382        assert!(indexes[0].fields[0].sparse_vector_config.is_none());
1383    }
1384
1385    #[test]
1386    fn test_sparse_vector_with_config() {
1387        let sdl = r#"
1388            index documents {
1389                field embedding: sparse_vector<u16> [indexed<quantization: uint8>, stored]
1390                field dense: sparse_vector<u32> [indexed<quantization: float32>]
1391            }
1392        "#;
1393
1394        let indexes = parse_sdl(sdl).unwrap();
1395        assert_eq!(indexes[0].fields.len(), 2);
1396
1397        // First field: u16 indices, uint8 quantization
1398        let f1 = &indexes[0].fields[0];
1399        assert_eq!(f1.name, "embedding");
1400        let config1 = f1.sparse_vector_config.as_ref().unwrap();
1401        assert_eq!(config1.index_size, IndexSize::U16);
1402        assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1403
1404        // Second field: u32 indices, float32 quantization
1405        let f2 = &indexes[0].fields[1];
1406        assert_eq!(f2.name, "dense");
1407        let config2 = f2.sparse_vector_config.as_ref().unwrap();
1408        assert_eq!(config2.index_size, IndexSize::U32);
1409        assert_eq!(config2.weight_quantization, WeightQuantization::Float32);
1410    }
1411
1412    #[test]
1413    fn test_sparse_vector_with_weight_threshold() {
1414        let sdl = r#"
1415            index documents {
1416                field embedding: sparse_vector<u16> [indexed<quantization: uint8, weight_threshold: 0.1>, stored]
1417                field embedding2: sparse_vector<u32> [indexed<quantization: float16, weight_threshold: 0.05>]
1418            }
1419        "#;
1420
1421        let indexes = parse_sdl(sdl).unwrap();
1422        assert_eq!(indexes[0].fields.len(), 2);
1423
1424        // First field: u16 indices, uint8 quantization, threshold 0.1
1425        let f1 = &indexes[0].fields[0];
1426        assert_eq!(f1.name, "embedding");
1427        let config1 = f1.sparse_vector_config.as_ref().unwrap();
1428        assert_eq!(config1.index_size, IndexSize::U16);
1429        assert_eq!(config1.weight_quantization, WeightQuantization::UInt8);
1430        assert!((config1.weight_threshold - 0.1).abs() < 0.001);
1431
1432        // Second field: u32 indices, float16 quantization, threshold 0.05
1433        let f2 = &indexes[0].fields[1];
1434        assert_eq!(f2.name, "embedding2");
1435        let config2 = f2.sparse_vector_config.as_ref().unwrap();
1436        assert_eq!(config2.index_size, IndexSize::U32);
1437        assert_eq!(config2.weight_quantization, WeightQuantization::Float16);
1438        assert!((config2.weight_threshold - 0.05).abs() < 0.001);
1439    }
1440
1441    #[test]
1442    fn test_sparse_vector_with_pruning() {
1443        let sdl = r#"
1444            index documents {
1445                field embedding: sparse_vector [indexed<quantization: uint8, pruning: 0.1>, stored]
1446            }
1447        "#;
1448
1449        let indexes = parse_sdl(sdl).unwrap();
1450        let f = &indexes[0].fields[0];
1451        assert_eq!(f.name, "embedding");
1452        let config = f.sparse_vector_config.as_ref().unwrap();
1453        assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1454        assert_eq!(config.pruning, Some(0.1));
1455    }
1456
1457    #[test]
1458    fn test_dense_vector_field() {
1459        let sdl = r#"
1460            index documents {
1461                field embedding: dense_vector<768> [indexed, stored]
1462            }
1463        "#;
1464
1465        let indexes = parse_sdl(sdl).unwrap();
1466        assert_eq!(indexes.len(), 1);
1467        assert_eq!(indexes[0].fields.len(), 1);
1468
1469        let f = &indexes[0].fields[0];
1470        assert_eq!(f.name, "embedding");
1471        assert_eq!(f.field_type, FieldType::DenseVector);
1472
1473        let config = f.dense_vector_config.as_ref().unwrap();
1474        assert_eq!(config.dim, 768);
1475    }
1476
1477    #[test]
1478    fn test_dense_vector_alias() {
1479        let sdl = r#"
1480            index documents {
1481                field embedding: vector<1536> [indexed]
1482            }
1483        "#;
1484
1485        let indexes = parse_sdl(sdl).unwrap();
1486        assert_eq!(indexes[0].fields[0].field_type, FieldType::DenseVector);
1487        assert_eq!(
1488            indexes[0].fields[0]
1489                .dense_vector_config
1490                .as_ref()
1491                .unwrap()
1492                .dim,
1493            1536
1494        );
1495    }
1496
1497    #[test]
1498    fn test_dense_vector_with_num_clusters() {
1499        let sdl = r#"
1500            index documents {
1501                field embedding: dense_vector<768> [indexed<ivf_rabitq, num_clusters: 256>, stored]
1502            }
1503        "#;
1504
1505        let indexes = parse_sdl(sdl).unwrap();
1506        assert_eq!(indexes.len(), 1);
1507
1508        let f = &indexes[0].fields[0];
1509        assert_eq!(f.name, "embedding");
1510        assert_eq!(f.field_type, FieldType::DenseVector);
1511
1512        let config = f.dense_vector_config.as_ref().unwrap();
1513        assert_eq!(config.dim, 768);
1514        assert_eq!(config.num_clusters, Some(256));
1515        assert_eq!(config.nprobe, 32); // default
1516    }
1517
1518    #[test]
1519    fn test_dense_vector_with_num_clusters_and_nprobe() {
1520        let sdl = r#"
1521            index documents {
1522                field embedding: dense_vector<1536> [indexed<ivf_rabitq, num_clusters: 512, nprobe: 64>]
1523            }
1524        "#;
1525
1526        let indexes = parse_sdl(sdl).unwrap();
1527        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1528
1529        assert_eq!(config.dim, 1536);
1530        assert_eq!(config.num_clusters, Some(512));
1531        assert_eq!(config.nprobe, 64);
1532    }
1533
1534    #[test]
1535    fn test_dense_vector_keyword_syntax() {
1536        let sdl = r#"
1537            index documents {
1538                field embedding: dense_vector<dims: 1536> [indexed, stored]
1539            }
1540        "#;
1541
1542        let indexes = parse_sdl(sdl).unwrap();
1543        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1544
1545        assert_eq!(config.dim, 1536);
1546        assert!(config.num_clusters.is_none());
1547    }
1548
1549    #[test]
1550    fn test_dense_vector_keyword_syntax_full() {
1551        let sdl = r#"
1552            index documents {
1553                field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 256, nprobe: 64>]
1554            }
1555        "#;
1556
1557        let indexes = parse_sdl(sdl).unwrap();
1558        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1559
1560        assert_eq!(config.dim, 1536);
1561        assert_eq!(config.num_clusters, Some(256));
1562        assert_eq!(config.nprobe, 64);
1563    }
1564
1565    #[test]
1566    fn test_dense_vector_keyword_syntax_partial() {
1567        let sdl = r#"
1568            index documents {
1569                field embedding: dense_vector<dims: 768> [indexed<ivf_rabitq, num_clusters: 128>]
1570            }
1571        "#;
1572
1573        let indexes = parse_sdl(sdl).unwrap();
1574        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1575
1576        assert_eq!(config.dim, 768);
1577        assert_eq!(config.num_clusters, Some(128));
1578        assert_eq!(config.nprobe, 32); // default
1579    }
1580
1581    #[test]
1582    fn test_dense_vector_scann_index() {
1583        use crate::dsl::schema::VectorIndexType;
1584
1585        let sdl = r#"
1586            index documents {
1587                field embedding: dense_vector<dims: 768> [indexed<scann, num_clusters: 256, nprobe: 64>]
1588            }
1589        "#;
1590
1591        let indexes = parse_sdl(sdl).unwrap();
1592        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1593
1594        assert_eq!(config.dim, 768);
1595        assert_eq!(config.index_type, VectorIndexType::ScaNN);
1596        assert_eq!(config.num_clusters, Some(256));
1597        assert_eq!(config.nprobe, 64);
1598    }
1599
1600    #[test]
1601    fn test_dense_vector_ivf_rabitq_index() {
1602        use crate::dsl::schema::VectorIndexType;
1603
1604        let sdl = r#"
1605            index documents {
1606                field embedding: dense_vector<dims: 1536> [indexed<ivf_rabitq, num_clusters: 512>]
1607            }
1608        "#;
1609
1610        let indexes = parse_sdl(sdl).unwrap();
1611        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1612
1613        assert_eq!(config.dim, 1536);
1614        assert_eq!(config.index_type, VectorIndexType::IvfRaBitQ);
1615        assert_eq!(config.num_clusters, Some(512));
1616    }
1617
1618    #[test]
1619    fn test_dense_vector_rabitq_no_clusters() {
1620        use crate::dsl::schema::VectorIndexType;
1621
1622        let sdl = r#"
1623            index documents {
1624                field embedding: dense_vector<dims: 768> [indexed<rabitq>]
1625            }
1626        "#;
1627
1628        let indexes = parse_sdl(sdl).unwrap();
1629        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1630
1631        assert_eq!(config.dim, 768);
1632        assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1633        assert!(config.num_clusters.is_none());
1634    }
1635
1636    #[test]
1637    fn test_dense_vector_flat_index() {
1638        use crate::dsl::schema::VectorIndexType;
1639
1640        let sdl = r#"
1641            index documents {
1642                field embedding: dense_vector<dims: 768> [indexed<flat>]
1643            }
1644        "#;
1645
1646        let indexes = parse_sdl(sdl).unwrap();
1647        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1648
1649        assert_eq!(config.dim, 768);
1650        assert_eq!(config.index_type, VectorIndexType::Flat);
1651    }
1652
1653    #[test]
1654    fn test_dense_vector_default_index_type() {
1655        use crate::dsl::schema::VectorIndexType;
1656
1657        // When no index type specified, should default to RaBitQ (basic)
1658        let sdl = r#"
1659            index documents {
1660                field embedding: dense_vector<dims: 768> [indexed]
1661            }
1662        "#;
1663
1664        let indexes = parse_sdl(sdl).unwrap();
1665        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1666
1667        assert_eq!(config.dim, 768);
1668        assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1669    }
1670
1671    #[test]
1672    fn test_dense_vector_f16_quantization() {
1673        use crate::dsl::schema::{DenseVectorQuantization, VectorIndexType};
1674
1675        let sdl = r#"
1676            index documents {
1677                field embedding: dense_vector<768, f16> [indexed]
1678            }
1679        "#;
1680
1681        let indexes = parse_sdl(sdl).unwrap();
1682        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1683
1684        assert_eq!(config.dim, 768);
1685        assert_eq!(config.quantization, DenseVectorQuantization::F16);
1686        assert_eq!(config.index_type, VectorIndexType::RaBitQ);
1687    }
1688
1689    #[test]
1690    fn test_dense_vector_uint8_quantization() {
1691        use crate::dsl::schema::DenseVectorQuantization;
1692
1693        let sdl = r#"
1694            index documents {
1695                field embedding: dense_vector<1024, uint8> [indexed<ivf_rabitq>]
1696            }
1697        "#;
1698
1699        let indexes = parse_sdl(sdl).unwrap();
1700        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1701
1702        assert_eq!(config.dim, 1024);
1703        assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1704    }
1705
1706    #[test]
1707    fn test_dense_vector_u8_alias() {
1708        use crate::dsl::schema::DenseVectorQuantization;
1709
1710        let sdl = r#"
1711            index documents {
1712                field embedding: dense_vector<512, u8> [indexed]
1713            }
1714        "#;
1715
1716        let indexes = parse_sdl(sdl).unwrap();
1717        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1718
1719        assert_eq!(config.dim, 512);
1720        assert_eq!(config.quantization, DenseVectorQuantization::UInt8);
1721    }
1722
1723    #[test]
1724    fn test_dense_vector_default_f32_quantization() {
1725        use crate::dsl::schema::DenseVectorQuantization;
1726
1727        // No quantization type → default f32
1728        let sdl = r#"
1729            index documents {
1730                field embedding: dense_vector<768> [indexed]
1731            }
1732        "#;
1733
1734        let indexes = parse_sdl(sdl).unwrap();
1735        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1736
1737        assert_eq!(config.dim, 768);
1738        assert_eq!(config.quantization, DenseVectorQuantization::F32);
1739    }
1740
1741    #[test]
1742    fn test_dense_vector_keyword_with_quantization() {
1743        use crate::dsl::schema::DenseVectorQuantization;
1744
1745        let sdl = r#"
1746            index documents {
1747                field embedding: dense_vector<dims: 768, f16> [indexed]
1748            }
1749        "#;
1750
1751        let indexes = parse_sdl(sdl).unwrap();
1752        let config = indexes[0].fields[0].dense_vector_config.as_ref().unwrap();
1753
1754        assert_eq!(config.dim, 768);
1755        assert_eq!(config.quantization, DenseVectorQuantization::F16);
1756    }
1757
1758    #[test]
1759    fn test_json_field_type() {
1760        let sdl = r#"
1761            index documents {
1762                field title: text [indexed, stored]
1763                field metadata: json [stored]
1764                field extra: json
1765            }
1766        "#;
1767
1768        let indexes = parse_sdl(sdl).unwrap();
1769        let index = &indexes[0];
1770
1771        assert_eq!(index.fields.len(), 3);
1772
1773        // Check JSON field
1774        assert_eq!(index.fields[1].name, "metadata");
1775        assert!(matches!(index.fields[1].field_type, FieldType::Json));
1776        assert!(index.fields[1].stored);
1777        // JSON fields should not be indexed (enforced by add_json_field)
1778
1779        // Check default attributes for JSON field
1780        assert_eq!(index.fields[2].name, "extra");
1781        assert!(matches!(index.fields[2].field_type, FieldType::Json));
1782
1783        // Verify schema conversion
1784        let schema = index.to_schema();
1785        let metadata_field = schema.get_field("metadata").unwrap();
1786        let entry = schema.get_field_entry(metadata_field).unwrap();
1787        assert_eq!(entry.field_type, FieldType::Json);
1788        assert!(!entry.indexed); // JSON fields are never indexed
1789        assert!(entry.stored);
1790    }
1791
1792    #[test]
1793    fn test_sparse_vector_query_config() {
1794        use crate::structures::QueryWeighting;
1795
1796        let sdl = r#"
1797            index documents {
1798                field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "Alibaba-NLP/gte-Qwen2-1.5B-instruct", weighting: idf>>]
1799            }
1800        "#;
1801
1802        let indexes = parse_sdl(sdl).unwrap();
1803        let index = &indexes[0];
1804
1805        assert_eq!(index.fields.len(), 1);
1806        assert_eq!(index.fields[0].name, "embedding");
1807        assert!(matches!(
1808            index.fields[0].field_type,
1809            FieldType::SparseVector
1810        ));
1811
1812        let config = index.fields[0].sparse_vector_config.as_ref().unwrap();
1813        assert_eq!(config.index_size, IndexSize::U16);
1814        assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1815
1816        // Check query config
1817        let query_config = config.query_config.as_ref().unwrap();
1818        assert_eq!(
1819            query_config.tokenizer.as_deref(),
1820            Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1821        );
1822        assert_eq!(query_config.weighting, QueryWeighting::Idf);
1823
1824        // Verify schema conversion preserves query config
1825        let schema = index.to_schema();
1826        let embedding_field = schema.get_field("embedding").unwrap();
1827        let entry = schema.get_field_entry(embedding_field).unwrap();
1828        let sv_config = entry.sparse_vector_config.as_ref().unwrap();
1829        let qc = sv_config.query_config.as_ref().unwrap();
1830        assert_eq!(
1831            qc.tokenizer.as_deref(),
1832            Some("Alibaba-NLP/gte-Qwen2-1.5B-instruct")
1833        );
1834        assert_eq!(qc.weighting, QueryWeighting::Idf);
1835    }
1836
1837    #[test]
1838    fn test_sparse_vector_query_config_weighting_one() {
1839        use crate::structures::QueryWeighting;
1840
1841        let sdl = r#"
1842            index documents {
1843                field embedding: sparse_vector [indexed<query<weighting: one>>]
1844            }
1845        "#;
1846
1847        let indexes = parse_sdl(sdl).unwrap();
1848        let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1849
1850        let query_config = config.query_config.as_ref().unwrap();
1851        assert!(query_config.tokenizer.is_none());
1852        assert_eq!(query_config.weighting, QueryWeighting::One);
1853    }
1854
1855    #[test]
1856    fn test_sparse_vector_query_config_weighting_idf_file() {
1857        use crate::structures::QueryWeighting;
1858
1859        let sdl = r#"
1860            index documents {
1861                field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<tokenizer: "opensearch-neural-sparse-encoding-v1", weighting: idf_file>>]
1862            }
1863        "#;
1864
1865        let indexes = parse_sdl(sdl).unwrap();
1866        let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1867
1868        let query_config = config.query_config.as_ref().unwrap();
1869        assert_eq!(
1870            query_config.tokenizer.as_deref(),
1871            Some("opensearch-neural-sparse-encoding-v1")
1872        );
1873        assert_eq!(query_config.weighting, QueryWeighting::IdfFile);
1874
1875        // Verify schema conversion preserves idf_file
1876        let schema = indexes[0].to_schema();
1877        let field = schema.get_field("embedding").unwrap();
1878        let entry = schema.get_field_entry(field).unwrap();
1879        let sc = entry.sparse_vector_config.as_ref().unwrap();
1880        let qc = sc.query_config.as_ref().unwrap();
1881        assert_eq!(qc.weighting, QueryWeighting::IdfFile);
1882    }
1883
1884    #[test]
1885    fn test_sparse_vector_query_config_pruning_params() {
1886        let sdl = r#"
1887            index documents {
1888                field embedding: sparse_vector<u16> [indexed<quantization: uint8, query<weighting: idf, weight_threshold: 0.03, max_dims: 25, pruning: 0.2>>]
1889            }
1890        "#;
1891
1892        let indexes = parse_sdl(sdl).unwrap();
1893        let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1894
1895        let qc = config.query_config.as_ref().unwrap();
1896        assert_eq!(qc.weighting, QueryWeighting::Idf);
1897        assert!((qc.weight_threshold - 0.03).abs() < 0.001);
1898        assert_eq!(qc.max_query_dims, Some(25));
1899        assert!((qc.pruning.unwrap() - 0.2).abs() < 0.001);
1900
1901        // Verify schema roundtrip
1902        let schema = indexes[0].to_schema();
1903        let field = schema.get_field("embedding").unwrap();
1904        let entry = schema.get_field_entry(field).unwrap();
1905        let sc = entry.sparse_vector_config.as_ref().unwrap();
1906        let rqc = sc.query_config.as_ref().unwrap();
1907        assert!((rqc.weight_threshold - 0.03).abs() < 0.001);
1908        assert_eq!(rqc.max_query_dims, Some(25));
1909        assert!((rqc.pruning.unwrap() - 0.2).abs() < 0.001);
1910    }
1911
1912    #[test]
1913    fn test_sparse_vector_format_maxscore() {
1914        let sdl = r#"
1915            index documents {
1916                field embedding: sparse_vector<u16> [indexed<format: maxscore, quantization: uint8>]
1917            }
1918        "#;
1919
1920        let indexes = parse_sdl(sdl).unwrap();
1921        let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1922        assert_eq!(config.format, SparseFormat::MaxScore);
1923        assert_eq!(config.weight_quantization, WeightQuantization::UInt8);
1924
1925        // Verify schema roundtrip
1926        let schema = indexes[0].to_schema();
1927        let field = schema.get_field("embedding").unwrap();
1928        let entry = schema.get_field_entry(field).unwrap();
1929        let sc = entry.sparse_vector_config.as_ref().unwrap();
1930        assert_eq!(sc.format, SparseFormat::MaxScore);
1931    }
1932
1933    #[test]
1934    fn test_sparse_vector_format_bmp() {
1935        let sdl = r#"
1936            index documents {
1937                field embedding: sparse_vector<u16> [indexed<format: bmp, quantization: uint8>]
1938            }
1939        "#;
1940
1941        let indexes = parse_sdl(sdl).unwrap();
1942        let config = indexes[0].fields[0].sparse_vector_config.as_ref().unwrap();
1943        assert_eq!(config.format, SparseFormat::Bmp);
1944    }
1945
1946    #[test]
1947    fn test_fast_attribute() {
1948        let sdl = r#"
1949            index products {
1950                field name: text [indexed, stored]
1951                field price: f64 [indexed, fast]
1952                field category: text [indexed, stored, fast]
1953                field count: u64 [fast]
1954                field score: i64 [indexed, stored, fast]
1955            }
1956        "#;
1957
1958        let indexes = parse_sdl(sdl).unwrap();
1959        assert_eq!(indexes.len(), 1);
1960        let index = &indexes[0];
1961        assert_eq!(index.fields.len(), 5);
1962
1963        // name: no fast
1964        assert!(!index.fields[0].fast);
1965        // price: fast
1966        assert!(index.fields[1].fast);
1967        assert!(matches!(index.fields[1].field_type, FieldType::F64));
1968        // category: fast text
1969        assert!(index.fields[2].fast);
1970        assert!(matches!(index.fields[2].field_type, FieldType::Text));
1971        // count: fast only
1972        assert!(index.fields[3].fast);
1973        assert!(matches!(index.fields[3].field_type, FieldType::U64));
1974        // score: fast i64
1975        assert!(index.fields[4].fast);
1976        assert!(matches!(index.fields[4].field_type, FieldType::I64));
1977
1978        // Verify schema roundtrip preserves fast flag
1979        let schema = index.to_schema();
1980        let price_field = schema.get_field("price").unwrap();
1981        assert!(schema.get_field_entry(price_field).unwrap().fast);
1982
1983        let category_field = schema.get_field("category").unwrap();
1984        assert!(schema.get_field_entry(category_field).unwrap().fast);
1985
1986        let name_field = schema.get_field("name").unwrap();
1987        assert!(!schema.get_field_entry(name_field).unwrap().fast);
1988    }
1989
1990    #[test]
1991    fn test_primary_attribute() {
1992        let sdl = r#"
1993            index documents {
1994                field id: text [primary, stored]
1995                field title: text [indexed, stored]
1996            }
1997        "#;
1998
1999        let indexes = parse_sdl(sdl).unwrap();
2000        assert_eq!(indexes.len(), 1);
2001        let index = &indexes[0];
2002        assert_eq!(index.fields.len(), 2);
2003
2004        // id should be primary, and auto-set fast + indexed
2005        let id_field = &index.fields[0];
2006        assert!(id_field.primary, "id should be primary");
2007        assert!(id_field.fast, "primary implies fast");
2008        assert!(id_field.indexed, "primary implies indexed");
2009
2010        // title should NOT be primary
2011        assert!(!index.fields[1].primary);
2012
2013        // Verify schema conversion preserves primary_key
2014        let schema = index.to_schema();
2015        let id = schema.get_field("id").unwrap();
2016        let id_entry = schema.get_field_entry(id).unwrap();
2017        assert!(id_entry.primary_key);
2018        assert!(id_entry.fast);
2019        assert!(id_entry.indexed);
2020
2021        let title = schema.get_field("title").unwrap();
2022        assert!(!schema.get_field_entry(title).unwrap().primary_key);
2023
2024        // primary_field() should return the primary field
2025        assert_eq!(schema.primary_field(), Some(id));
2026    }
2027
2028    #[test]
2029    fn test_primary_with_other_attributes() {
2030        let sdl = r#"
2031            index documents {
2032                field id: text<simple> [primary, indexed, stored]
2033                field body: text [indexed]
2034            }
2035        "#;
2036
2037        let indexes = parse_sdl(sdl).unwrap();
2038        let id_field = &indexes[0].fields[0];
2039        assert!(id_field.primary);
2040        assert!(id_field.indexed);
2041        assert!(id_field.stored);
2042        assert!(id_field.fast);
2043        assert_eq!(id_field.tokenizer, Some("simple".to_string()));
2044    }
2045
2046    #[test]
2047    fn test_primary_only_one_allowed() {
2048        let sdl = r#"
2049            index documents {
2050                field id: text [primary]
2051                field alt_id: text [primary]
2052            }
2053        "#;
2054
2055        let result = parse_sdl(sdl);
2056        assert!(result.is_err());
2057        let err = result.unwrap_err().to_string();
2058        assert!(
2059            err.contains("primary key"),
2060            "Error should mention primary key: {}",
2061            err
2062        );
2063    }
2064
2065    #[test]
2066    fn test_primary_must_be_text() {
2067        let sdl = r#"
2068            index documents {
2069                field id: u64 [primary]
2070            }
2071        "#;
2072
2073        let result = parse_sdl(sdl);
2074        assert!(result.is_err());
2075        let err = result.unwrap_err().to_string();
2076        assert!(
2077            err.contains("text"),
2078            "Error should mention text type: {}",
2079            err
2080        );
2081    }
2082
2083    #[test]
2084    fn test_primary_cannot_be_multi() {
2085        let sdl = r#"
2086            index documents {
2087                field id: text [primary, stored<multi>]
2088            }
2089        "#;
2090
2091        let result = parse_sdl(sdl);
2092        assert!(result.is_err());
2093        let err = result.unwrap_err().to_string();
2094        assert!(err.contains("multi"), "Error should mention multi: {}", err);
2095    }
2096
2097    #[test]
2098    fn test_no_primary_field() {
2099        // Schema without primary field should work fine
2100        let sdl = r#"
2101            index documents {
2102                field title: text [indexed, stored]
2103            }
2104        "#;
2105
2106        let indexes = parse_sdl(sdl).unwrap();
2107        let schema = indexes[0].to_schema();
2108        assert!(schema.primary_field().is_none());
2109    }
2110
2111    #[test]
2112    fn test_reorder_attribute() {
2113        let sdl = r#"
2114            index documents {
2115                field embedding: sparse_vector<u16> [indexed<format: bmp, quantization: uint8>, reorder]
2116                field embedding2: sparse_vector [indexed<format: bmp>]
2117            }
2118        "#;
2119
2120        let indexes = parse_sdl(sdl).unwrap();
2121        assert_eq!(indexes[0].fields.len(), 2);
2122
2123        // First field should have reorder=true
2124        assert!(indexes[0].fields[0].reorder);
2125        // Second field should have reorder=false
2126        assert!(!indexes[0].fields[1].reorder);
2127
2128        // Verify schema roundtrip
2129        let schema = indexes[0].to_schema();
2130        let f1 = schema.get_field("embedding").unwrap();
2131        assert!(schema.get_field_entry(f1).unwrap().reorder);
2132
2133        let f2 = schema.get_field("embedding2").unwrap();
2134        assert!(!schema.get_field_entry(f2).unwrap().reorder);
2135    }
2136}