1use pest::Parser;
34use pest_derive::Parser;
35
36use super::query_field_router::{QueryRouterRule, RoutingMode};
37use super::schema::{FieldType, Schema, SchemaBuilder};
38use crate::Result;
39use crate::error::Error;
40
41#[derive(Parser)]
42#[grammar = "dsl/sdl/sdl.pest"]
43pub struct SdlParser;
44
45#[derive(Debug, Clone)]
47pub struct FieldDef {
48 pub name: String,
49 pub field_type: FieldType,
50 pub indexed: bool,
51 pub stored: bool,
52 pub tokenizer: Option<String>,
54 pub multi: bool,
56}
57
58#[derive(Debug, Clone)]
60pub struct IndexDef {
61 pub name: String,
62 pub fields: Vec<FieldDef>,
63 pub default_fields: Vec<String>,
64 pub query_routers: Vec<QueryRouterRule>,
66}
67
68impl IndexDef {
69 pub fn to_schema(&self) -> Schema {
71 let mut builder = SchemaBuilder::default();
72
73 for field in &self.fields {
74 let f = match field.field_type {
75 FieldType::Text => {
76 let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
77 builder.add_text_field_with_tokenizer(
78 &field.name,
79 field.indexed,
80 field.stored,
81 tokenizer,
82 )
83 }
84 FieldType::U64 => {
85 builder.add_u64_field(&field.name, field.indexed, field.stored)
86 }
87 FieldType::I64 => {
88 builder.add_i64_field(&field.name, field.indexed, field.stored)
89 }
90 FieldType::F64 => {
91 builder.add_f64_field(&field.name, field.indexed, field.stored)
92 }
93 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
94 };
95 if field.multi {
96 builder.set_multi(f, true);
97 }
98 }
99
100 if !self.default_fields.is_empty() {
102 builder.set_default_fields(self.default_fields.clone());
103 }
104
105 if !self.query_routers.is_empty() {
107 builder.set_query_routers(self.query_routers.clone());
108 }
109
110 builder.build()
111 }
112
113 pub fn to_query_router(
118 &self,
119 ) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
120 if self.query_routers.is_empty() {
121 return Ok(None);
122 }
123
124 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
125 .map(Some)
126 .map_err(|e| Error::Schema(e))
127 }
128}
129
130fn parse_field_type(type_str: &str) -> Result<FieldType> {
132 match type_str {
133 "text" | "string" | "str" => Ok(FieldType::Text),
134 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
135 "i64" | "int" | "integer" => Ok(FieldType::I64),
136 "f64" | "float" | "double" => Ok(FieldType::F64),
137 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
138 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
139 }
140}
141
142fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> (bool, bool, bool) {
145 let mut indexed = false;
146 let mut stored = false;
147 let mut multi = false;
148
149 for attr in pair.into_inner() {
150 match attr.as_str() {
151 "indexed" => indexed = true,
152 "stored" => stored = true,
153 "multi" => multi = true,
154 _ => {}
155 }
156 }
157
158 (indexed, stored, multi)
159}
160
161fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
163 let mut inner = pair.into_inner();
164
165 let name = inner
166 .next()
167 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
168 .as_str()
169 .to_string();
170
171 let field_type_str = inner
172 .next()
173 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
174 .as_str();
175
176 let field_type = parse_field_type(field_type_str)?;
177
178 let mut tokenizer = None;
180 let mut indexed = true;
181 let mut stored = true;
182 let mut multi = false;
183
184 for item in inner {
185 match item.as_rule() {
186 Rule::tokenizer_spec => {
187 if let Some(tok_name) = item.into_inner().next() {
189 tokenizer = Some(tok_name.as_str().to_string());
190 }
191 }
192 Rule::attributes => {
193 let (idx, sto, mul) = parse_attributes(item);
194 indexed = idx;
195 stored = sto;
196 multi = mul;
197 }
198 _ => {}
199 }
200 }
201
202 Ok(FieldDef {
203 name,
204 field_type,
205 indexed,
206 stored,
207 tokenizer,
208 multi,
209 })
210}
211
212fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
214 pair.into_inner().map(|p| p.as_str().to_string()).collect()
215}
216
217fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
219 let mut pattern = String::new();
220 let mut substitution = String::new();
221 let mut target_field = String::new();
222 let mut mode = RoutingMode::Additional;
223
224 for prop in pair.into_inner() {
225 if prop.as_rule() != Rule::query_router_prop {
226 continue;
227 }
228
229 for inner in prop.into_inner() {
230 match inner.as_rule() {
231 Rule::query_router_pattern => {
232 if let Some(regex_str) = inner.into_inner().next() {
233 pattern = parse_string_value(regex_str);
234 }
235 }
236 Rule::query_router_substitution => {
237 if let Some(quoted) = inner.into_inner().next() {
238 substitution = parse_string_value(quoted);
239 }
240 }
241 Rule::query_router_target => {
242 if let Some(ident) = inner.into_inner().next() {
243 target_field = ident.as_str().to_string();
244 }
245 }
246 Rule::query_router_mode => {
247 if let Some(mode_val) = inner.into_inner().next() {
248 mode = match mode_val.as_str() {
249 "exclusive" => RoutingMode::Exclusive,
250 "additional" => RoutingMode::Additional,
251 _ => RoutingMode::Additional,
252 };
253 }
254 }
255 _ => {}
256 }
257 }
258 }
259
260 if pattern.is_empty() {
261 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
262 }
263 if substitution.is_empty() {
264 return Err(Error::Schema(
265 "query_router missing 'substitution'".to_string(),
266 ));
267 }
268 if target_field.is_empty() {
269 return Err(Error::Schema(
270 "query_router missing 'target_field'".to_string(),
271 ));
272 }
273
274 Ok(QueryRouterRule {
275 pattern,
276 substitution,
277 target_field,
278 mode,
279 })
280}
281
282fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
284 let s = pair.as_str();
285 match pair.as_rule() {
286 Rule::regex_string => {
287 if let Some(inner) = pair.into_inner().next() {
289 parse_string_value(inner)
290 } else {
291 s.to_string()
292 }
293 }
294 Rule::raw_string => {
295 s[2..s.len() - 1].to_string()
297 }
298 Rule::quoted_string => {
299 let inner = &s[1..s.len() - 1];
301 inner
303 .replace("\\n", "\n")
304 .replace("\\t", "\t")
305 .replace("\\\"", "\"")
306 .replace("\\\\", "\\")
307 }
308 _ => s.to_string(),
309 }
310}
311
312fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
314 let mut inner = pair.into_inner();
315
316 let name = inner
317 .next()
318 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
319 .as_str()
320 .to_string();
321
322 let mut fields = Vec::new();
323 let mut default_fields = Vec::new();
324 let mut query_routers = Vec::new();
325
326 for item in inner {
327 match item.as_rule() {
328 Rule::field_def => {
329 fields.push(parse_field_def(item)?);
330 }
331 Rule::default_fields_def => {
332 default_fields = parse_default_fields_def(item);
333 }
334 Rule::query_router_def => {
335 query_routers.push(parse_query_router_def(item)?);
336 }
337 _ => {}
338 }
339 }
340
341 Ok(IndexDef {
342 name,
343 fields,
344 default_fields,
345 query_routers,
346 })
347}
348
349pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
351 let pairs = SdlParser::parse(Rule::file, input)
352 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
353
354 let mut indexes = Vec::new();
355
356 for pair in pairs {
357 if pair.as_rule() == Rule::file {
358 for inner in pair.into_inner() {
359 if inner.as_rule() == Rule::index_def {
360 indexes.push(parse_index_def(inner)?);
361 }
362 }
363 }
364 }
365
366 Ok(indexes)
367}
368
369pub fn parse_single_index(input: &str) -> Result<IndexDef> {
371 let indexes = parse_sdl(input)?;
372
373 if indexes.is_empty() {
374 return Err(Error::Schema("No index definition found".to_string()));
375 }
376
377 if indexes.len() > 1 {
378 return Err(Error::Schema(
379 "Multiple index definitions found, expected one".to_string(),
380 ));
381 }
382
383 Ok(indexes.into_iter().next().unwrap())
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn test_parse_simple_schema() {
392 let sdl = r#"
393 index articles {
394 field title: text [indexed, stored]
395 field body: text [indexed]
396 }
397 "#;
398
399 let indexes = parse_sdl(sdl).unwrap();
400 assert_eq!(indexes.len(), 1);
401
402 let index = &indexes[0];
403 assert_eq!(index.name, "articles");
404 assert_eq!(index.fields.len(), 2);
405
406 assert_eq!(index.fields[0].name, "title");
407 assert!(matches!(index.fields[0].field_type, FieldType::Text));
408 assert!(index.fields[0].indexed);
409 assert!(index.fields[0].stored);
410
411 assert_eq!(index.fields[1].name, "body");
412 assert!(matches!(index.fields[1].field_type, FieldType::Text));
413 assert!(index.fields[1].indexed);
414 assert!(!index.fields[1].stored);
415 }
416
417 #[test]
418 fn test_parse_all_field_types() {
419 let sdl = r#"
420 index test {
421 field text_field: text [indexed, stored]
422 field u64_field: u64 [indexed, stored]
423 field i64_field: i64 [indexed, stored]
424 field f64_field: f64 [indexed, stored]
425 field bytes_field: bytes [stored]
426 }
427 "#;
428
429 let indexes = parse_sdl(sdl).unwrap();
430 let index = &indexes[0];
431
432 assert!(matches!(index.fields[0].field_type, FieldType::Text));
433 assert!(matches!(index.fields[1].field_type, FieldType::U64));
434 assert!(matches!(index.fields[2].field_type, FieldType::I64));
435 assert!(matches!(index.fields[3].field_type, FieldType::F64));
436 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
437 }
438
439 #[test]
440 fn test_parse_with_comments() {
441 let sdl = r#"
442 # This is a comment
443 index articles {
444 # Title field
445 field title: text [indexed, stored]
446 field body: text [indexed] # inline comment not supported yet
447 }
448 "#;
449
450 let indexes = parse_sdl(sdl).unwrap();
451 assert_eq!(indexes[0].fields.len(), 2);
452 }
453
454 #[test]
455 fn test_parse_type_aliases() {
456 let sdl = r#"
457 index test {
458 field a: string [indexed]
459 field b: int [indexed]
460 field c: uint [indexed]
461 field d: float [indexed]
462 field e: binary [stored]
463 }
464 "#;
465
466 let indexes = parse_sdl(sdl).unwrap();
467 let index = &indexes[0];
468
469 assert!(matches!(index.fields[0].field_type, FieldType::Text));
470 assert!(matches!(index.fields[1].field_type, FieldType::I64));
471 assert!(matches!(index.fields[2].field_type, FieldType::U64));
472 assert!(matches!(index.fields[3].field_type, FieldType::F64));
473 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
474 }
475
476 #[test]
477 fn test_to_schema() {
478 let sdl = r#"
479 index articles {
480 field title: text [indexed, stored]
481 field views: u64 [indexed, stored]
482 }
483 "#;
484
485 let indexes = parse_sdl(sdl).unwrap();
486 let schema = indexes[0].to_schema();
487
488 assert!(schema.get_field("title").is_some());
489 assert!(schema.get_field("views").is_some());
490 assert!(schema.get_field("nonexistent").is_none());
491 }
492
493 #[test]
494 fn test_default_attributes() {
495 let sdl = r#"
496 index test {
497 field title: text
498 }
499 "#;
500
501 let indexes = parse_sdl(sdl).unwrap();
502 let field = &indexes[0].fields[0];
503
504 assert!(field.indexed);
506 assert!(field.stored);
507 }
508
509 #[test]
510 fn test_multiple_indexes() {
511 let sdl = r#"
512 index articles {
513 field title: text [indexed, stored]
514 }
515
516 index users {
517 field name: text [indexed, stored]
518 field email: text [indexed, stored]
519 }
520 "#;
521
522 let indexes = parse_sdl(sdl).unwrap();
523 assert_eq!(indexes.len(), 2);
524 assert_eq!(indexes[0].name, "articles");
525 assert_eq!(indexes[1].name, "users");
526 }
527
528 #[test]
529 fn test_tokenizer_spec() {
530 let sdl = r#"
531 index articles {
532 field title: text<en_stem> [indexed, stored]
533 field body: text<default> [indexed]
534 field author: text [indexed, stored]
535 }
536 "#;
537
538 let indexes = parse_sdl(sdl).unwrap();
539 let index = &indexes[0];
540
541 assert_eq!(index.fields[0].name, "title");
542 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
543
544 assert_eq!(index.fields[1].name, "body");
545 assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
546
547 assert_eq!(index.fields[2].name, "author");
548 assert_eq!(index.fields[2].tokenizer, None); }
550
551 #[test]
552 fn test_tokenizer_in_schema() {
553 let sdl = r#"
554 index articles {
555 field title: text<german> [indexed, stored]
556 field body: text<en_stem> [indexed]
557 }
558 "#;
559
560 let indexes = parse_sdl(sdl).unwrap();
561 let schema = indexes[0].to_schema();
562
563 let title_field = schema.get_field("title").unwrap();
564 let title_entry = schema.get_field_entry(title_field).unwrap();
565 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
566
567 let body_field = schema.get_field("body").unwrap();
568 let body_entry = schema.get_field_entry(body_field).unwrap();
569 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
570 }
571
572 #[test]
573 fn test_query_router_basic() {
574 let sdl = r#"
575 index documents {
576 field title: text [indexed, stored]
577 field uri: text [indexed, stored]
578
579 query_router {
580 pattern: "10\\.\\d{4,}/[^\\s]+"
581 substitution: "doi://{0}"
582 target_field: uris
583 mode: exclusive
584 }
585 }
586 "#;
587
588 let indexes = parse_sdl(sdl).unwrap();
589 let index = &indexes[0];
590
591 assert_eq!(index.query_routers.len(), 1);
592 let router = &index.query_routers[0];
593 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
594 assert_eq!(router.substitution, "doi://{0}");
595 assert_eq!(router.target_field, "uris");
596 assert_eq!(router.mode, RoutingMode::Exclusive);
597 }
598
599 #[test]
600 fn test_query_router_raw_string() {
601 let sdl = r#"
602 index documents {
603 field uris: text [indexed, stored]
604
605 query_router {
606 pattern: r"^pmid:(\d+)$"
607 substitution: "pubmed://{1}"
608 target_field: uris
609 mode: additional
610 }
611 }
612 "#;
613
614 let indexes = parse_sdl(sdl).unwrap();
615 let router = &indexes[0].query_routers[0];
616
617 assert_eq!(router.pattern, r"^pmid:(\d+)$");
618 assert_eq!(router.substitution, "pubmed://{1}");
619 assert_eq!(router.mode, RoutingMode::Additional);
620 }
621
622 #[test]
623 fn test_multiple_query_routers() {
624 let sdl = r#"
625 index documents {
626 field uris: text [indexed, stored]
627
628 query_router {
629 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
630 substitution: "doi://{1}"
631 target_field: uris
632 mode: exclusive
633 }
634
635 query_router {
636 pattern: r"^pmid:(\d+)$"
637 substitution: "pubmed://{1}"
638 target_field: uris
639 mode: exclusive
640 }
641
642 query_router {
643 pattern: r"^arxiv:(\d+\.\d+)$"
644 substitution: "arxiv://{1}"
645 target_field: uris
646 mode: additional
647 }
648 }
649 "#;
650
651 let indexes = parse_sdl(sdl).unwrap();
652 assert_eq!(indexes[0].query_routers.len(), 3);
653 }
654
655 #[test]
656 fn test_query_router_default_mode() {
657 let sdl = r#"
658 index documents {
659 field uris: text [indexed, stored]
660
661 query_router {
662 pattern: r"test"
663 substitution: "{0}"
664 target_field: uris
665 }
666 }
667 "#;
668
669 let indexes = parse_sdl(sdl).unwrap();
670 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
672 }
673
674 #[test]
675 fn test_multi_attribute() {
676 let sdl = r#"
677 index documents {
678 field uris: text [indexed, stored, multi]
679 field title: text [indexed, stored]
680 }
681 "#;
682
683 let indexes = parse_sdl(sdl).unwrap();
684 assert_eq!(indexes.len(), 1);
685
686 let fields = &indexes[0].fields;
687 assert_eq!(fields.len(), 2);
688
689 assert_eq!(fields[0].name, "uris");
691 assert!(fields[0].multi, "uris field should have multi=true");
692
693 assert_eq!(fields[1].name, "title");
695 assert!(!fields[1].multi, "title field should have multi=false");
696
697 let schema = indexes[0].to_schema();
699 let uris_field = schema.get_field("uris").unwrap();
700 let title_field = schema.get_field("title").unwrap();
701
702 assert!(schema.get_field_entry(uris_field).unwrap().multi);
703 assert!(!schema.get_field_entry(title_field).unwrap().multi);
704 }
705}