1use pest::Parser;
34use pest_derive::Parser;
35
36use super::query_field_router::{QueryRouterRule, RoutingMode};
37use super::schema::{FieldType, Schema, SchemaBuilder};
38use crate::Result;
39use crate::error::Error;
40
41#[derive(Parser)]
42#[grammar = "dsl/sdl/sdl.pest"]
43pub struct SdlParser;
44
45#[derive(Debug, Clone)]
47pub struct FieldDef {
48 pub name: String,
49 pub field_type: FieldType,
50 pub indexed: bool,
51 pub stored: bool,
52 pub tokenizer: Option<String>,
54 pub multi: bool,
56}
57
58#[derive(Debug, Clone)]
60pub struct IndexDef {
61 pub name: String,
62 pub fields: Vec<FieldDef>,
63 pub default_fields: Vec<String>,
64 pub query_routers: Vec<QueryRouterRule>,
66}
67
68impl IndexDef {
69 pub fn to_schema(&self) -> Schema {
71 let mut builder = SchemaBuilder::default();
72
73 for field in &self.fields {
74 let f = match field.field_type {
75 FieldType::Text => {
76 let tokenizer = field.tokenizer.as_deref().unwrap_or("default");
77 builder.add_text_field_with_tokenizer(
78 &field.name,
79 field.indexed,
80 field.stored,
81 tokenizer,
82 )
83 }
84 FieldType::U64 => builder.add_u64_field(&field.name, field.indexed, field.stored),
85 FieldType::I64 => builder.add_i64_field(&field.name, field.indexed, field.stored),
86 FieldType::F64 => builder.add_f64_field(&field.name, field.indexed, field.stored),
87 FieldType::Bytes => builder.add_bytes_field(&field.name, field.stored),
88 };
89 if field.multi {
90 builder.set_multi(f, true);
91 }
92 }
93
94 if !self.default_fields.is_empty() {
96 builder.set_default_fields(self.default_fields.clone());
97 }
98
99 if !self.query_routers.is_empty() {
101 builder.set_query_routers(self.query_routers.clone());
102 }
103
104 builder.build()
105 }
106
107 pub fn to_query_router(&self) -> Result<Option<super::query_field_router::QueryFieldRouter>> {
112 if self.query_routers.is_empty() {
113 return Ok(None);
114 }
115
116 super::query_field_router::QueryFieldRouter::from_rules(&self.query_routers)
117 .map(Some)
118 .map_err(Error::Schema)
119 }
120}
121
122fn parse_field_type(type_str: &str) -> Result<FieldType> {
124 match type_str {
125 "text" | "string" | "str" => Ok(FieldType::Text),
126 "u64" | "uint" | "unsigned" => Ok(FieldType::U64),
127 "i64" | "int" | "integer" => Ok(FieldType::I64),
128 "f64" | "float" | "double" => Ok(FieldType::F64),
129 "bytes" | "binary" | "blob" => Ok(FieldType::Bytes),
130 _ => Err(Error::Schema(format!("Unknown field type: {}", type_str))),
131 }
132}
133
134fn parse_attributes(pair: pest::iterators::Pair<Rule>) -> (bool, bool, bool) {
137 let mut indexed = false;
138 let mut stored = false;
139 let mut multi = false;
140
141 for attr in pair.into_inner() {
142 match attr.as_str() {
143 "indexed" => indexed = true,
144 "stored" => stored = true,
145 "multi" => multi = true,
146 _ => {}
147 }
148 }
149
150 (indexed, stored, multi)
151}
152
153fn parse_field_def(pair: pest::iterators::Pair<Rule>) -> Result<FieldDef> {
155 let mut inner = pair.into_inner();
156
157 let name = inner
158 .next()
159 .ok_or_else(|| Error::Schema("Missing field name".to_string()))?
160 .as_str()
161 .to_string();
162
163 let field_type_str = inner
164 .next()
165 .ok_or_else(|| Error::Schema("Missing field type".to_string()))?
166 .as_str();
167
168 let field_type = parse_field_type(field_type_str)?;
169
170 let mut tokenizer = None;
172 let mut indexed = true;
173 let mut stored = true;
174 let mut multi = false;
175
176 for item in inner {
177 match item.as_rule() {
178 Rule::tokenizer_spec => {
179 if let Some(tok_name) = item.into_inner().next() {
181 tokenizer = Some(tok_name.as_str().to_string());
182 }
183 }
184 Rule::attributes => {
185 let (idx, sto, mul) = parse_attributes(item);
186 indexed = idx;
187 stored = sto;
188 multi = mul;
189 }
190 _ => {}
191 }
192 }
193
194 Ok(FieldDef {
195 name,
196 field_type,
197 indexed,
198 stored,
199 tokenizer,
200 multi,
201 })
202}
203
204fn parse_default_fields_def(pair: pest::iterators::Pair<Rule>) -> Vec<String> {
206 pair.into_inner().map(|p| p.as_str().to_string()).collect()
207}
208
209fn parse_query_router_def(pair: pest::iterators::Pair<Rule>) -> Result<QueryRouterRule> {
211 let mut pattern = String::new();
212 let mut substitution = String::new();
213 let mut target_field = String::new();
214 let mut mode = RoutingMode::Additional;
215
216 for prop in pair.into_inner() {
217 if prop.as_rule() != Rule::query_router_prop {
218 continue;
219 }
220
221 for inner in prop.into_inner() {
222 match inner.as_rule() {
223 Rule::query_router_pattern => {
224 if let Some(regex_str) = inner.into_inner().next() {
225 pattern = parse_string_value(regex_str);
226 }
227 }
228 Rule::query_router_substitution => {
229 if let Some(quoted) = inner.into_inner().next() {
230 substitution = parse_string_value(quoted);
231 }
232 }
233 Rule::query_router_target => {
234 if let Some(ident) = inner.into_inner().next() {
235 target_field = ident.as_str().to_string();
236 }
237 }
238 Rule::query_router_mode => {
239 if let Some(mode_val) = inner.into_inner().next() {
240 mode = match mode_val.as_str() {
241 "exclusive" => RoutingMode::Exclusive,
242 "additional" => RoutingMode::Additional,
243 _ => RoutingMode::Additional,
244 };
245 }
246 }
247 _ => {}
248 }
249 }
250 }
251
252 if pattern.is_empty() {
253 return Err(Error::Schema("query_router missing 'pattern'".to_string()));
254 }
255 if substitution.is_empty() {
256 return Err(Error::Schema(
257 "query_router missing 'substitution'".to_string(),
258 ));
259 }
260 if target_field.is_empty() {
261 return Err(Error::Schema(
262 "query_router missing 'target_field'".to_string(),
263 ));
264 }
265
266 Ok(QueryRouterRule {
267 pattern,
268 substitution,
269 target_field,
270 mode,
271 })
272}
273
274fn parse_string_value(pair: pest::iterators::Pair<Rule>) -> String {
276 let s = pair.as_str();
277 match pair.as_rule() {
278 Rule::regex_string => {
279 if let Some(inner) = pair.into_inner().next() {
281 parse_string_value(inner)
282 } else {
283 s.to_string()
284 }
285 }
286 Rule::raw_string => {
287 s[2..s.len() - 1].to_string()
289 }
290 Rule::quoted_string => {
291 let inner = &s[1..s.len() - 1];
293 inner
295 .replace("\\n", "\n")
296 .replace("\\t", "\t")
297 .replace("\\\"", "\"")
298 .replace("\\\\", "\\")
299 }
300 _ => s.to_string(),
301 }
302}
303
304fn parse_index_def(pair: pest::iterators::Pair<Rule>) -> Result<IndexDef> {
306 let mut inner = pair.into_inner();
307
308 let name = inner
309 .next()
310 .ok_or_else(|| Error::Schema("Missing index name".to_string()))?
311 .as_str()
312 .to_string();
313
314 let mut fields = Vec::new();
315 let mut default_fields = Vec::new();
316 let mut query_routers = Vec::new();
317
318 for item in inner {
319 match item.as_rule() {
320 Rule::field_def => {
321 fields.push(parse_field_def(item)?);
322 }
323 Rule::default_fields_def => {
324 default_fields = parse_default_fields_def(item);
325 }
326 Rule::query_router_def => {
327 query_routers.push(parse_query_router_def(item)?);
328 }
329 _ => {}
330 }
331 }
332
333 Ok(IndexDef {
334 name,
335 fields,
336 default_fields,
337 query_routers,
338 })
339}
340
341pub fn parse_sdl(input: &str) -> Result<Vec<IndexDef>> {
343 let pairs = SdlParser::parse(Rule::file, input)
344 .map_err(|e| Error::Schema(format!("Parse error: {}", e)))?;
345
346 let mut indexes = Vec::new();
347
348 for pair in pairs {
349 if pair.as_rule() == Rule::file {
350 for inner in pair.into_inner() {
351 if inner.as_rule() == Rule::index_def {
352 indexes.push(parse_index_def(inner)?);
353 }
354 }
355 }
356 }
357
358 Ok(indexes)
359}
360
361pub fn parse_single_index(input: &str) -> Result<IndexDef> {
363 let indexes = parse_sdl(input)?;
364
365 if indexes.is_empty() {
366 return Err(Error::Schema("No index definition found".to_string()));
367 }
368
369 if indexes.len() > 1 {
370 return Err(Error::Schema(
371 "Multiple index definitions found, expected one".to_string(),
372 ));
373 }
374
375 Ok(indexes.into_iter().next().unwrap())
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381
382 #[test]
383 fn test_parse_simple_schema() {
384 let sdl = r#"
385 index articles {
386 field title: text [indexed, stored]
387 field body: text [indexed]
388 }
389 "#;
390
391 let indexes = parse_sdl(sdl).unwrap();
392 assert_eq!(indexes.len(), 1);
393
394 let index = &indexes[0];
395 assert_eq!(index.name, "articles");
396 assert_eq!(index.fields.len(), 2);
397
398 assert_eq!(index.fields[0].name, "title");
399 assert!(matches!(index.fields[0].field_type, FieldType::Text));
400 assert!(index.fields[0].indexed);
401 assert!(index.fields[0].stored);
402
403 assert_eq!(index.fields[1].name, "body");
404 assert!(matches!(index.fields[1].field_type, FieldType::Text));
405 assert!(index.fields[1].indexed);
406 assert!(!index.fields[1].stored);
407 }
408
409 #[test]
410 fn test_parse_all_field_types() {
411 let sdl = r#"
412 index test {
413 field text_field: text [indexed, stored]
414 field u64_field: u64 [indexed, stored]
415 field i64_field: i64 [indexed, stored]
416 field f64_field: f64 [indexed, stored]
417 field bytes_field: bytes [stored]
418 }
419 "#;
420
421 let indexes = parse_sdl(sdl).unwrap();
422 let index = &indexes[0];
423
424 assert!(matches!(index.fields[0].field_type, FieldType::Text));
425 assert!(matches!(index.fields[1].field_type, FieldType::U64));
426 assert!(matches!(index.fields[2].field_type, FieldType::I64));
427 assert!(matches!(index.fields[3].field_type, FieldType::F64));
428 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
429 }
430
431 #[test]
432 fn test_parse_with_comments() {
433 let sdl = r#"
434 # This is a comment
435 index articles {
436 # Title field
437 field title: text [indexed, stored]
438 field body: text [indexed] # inline comment not supported yet
439 }
440 "#;
441
442 let indexes = parse_sdl(sdl).unwrap();
443 assert_eq!(indexes[0].fields.len(), 2);
444 }
445
446 #[test]
447 fn test_parse_type_aliases() {
448 let sdl = r#"
449 index test {
450 field a: string [indexed]
451 field b: int [indexed]
452 field c: uint [indexed]
453 field d: float [indexed]
454 field e: binary [stored]
455 }
456 "#;
457
458 let indexes = parse_sdl(sdl).unwrap();
459 let index = &indexes[0];
460
461 assert!(matches!(index.fields[0].field_type, FieldType::Text));
462 assert!(matches!(index.fields[1].field_type, FieldType::I64));
463 assert!(matches!(index.fields[2].field_type, FieldType::U64));
464 assert!(matches!(index.fields[3].field_type, FieldType::F64));
465 assert!(matches!(index.fields[4].field_type, FieldType::Bytes));
466 }
467
468 #[test]
469 fn test_to_schema() {
470 let sdl = r#"
471 index articles {
472 field title: text [indexed, stored]
473 field views: u64 [indexed, stored]
474 }
475 "#;
476
477 let indexes = parse_sdl(sdl).unwrap();
478 let schema = indexes[0].to_schema();
479
480 assert!(schema.get_field("title").is_some());
481 assert!(schema.get_field("views").is_some());
482 assert!(schema.get_field("nonexistent").is_none());
483 }
484
485 #[test]
486 fn test_default_attributes() {
487 let sdl = r#"
488 index test {
489 field title: text
490 }
491 "#;
492
493 let indexes = parse_sdl(sdl).unwrap();
494 let field = &indexes[0].fields[0];
495
496 assert!(field.indexed);
498 assert!(field.stored);
499 }
500
501 #[test]
502 fn test_multiple_indexes() {
503 let sdl = r#"
504 index articles {
505 field title: text [indexed, stored]
506 }
507
508 index users {
509 field name: text [indexed, stored]
510 field email: text [indexed, stored]
511 }
512 "#;
513
514 let indexes = parse_sdl(sdl).unwrap();
515 assert_eq!(indexes.len(), 2);
516 assert_eq!(indexes[0].name, "articles");
517 assert_eq!(indexes[1].name, "users");
518 }
519
520 #[test]
521 fn test_tokenizer_spec() {
522 let sdl = r#"
523 index articles {
524 field title: text<en_stem> [indexed, stored]
525 field body: text<default> [indexed]
526 field author: text [indexed, stored]
527 }
528 "#;
529
530 let indexes = parse_sdl(sdl).unwrap();
531 let index = &indexes[0];
532
533 assert_eq!(index.fields[0].name, "title");
534 assert_eq!(index.fields[0].tokenizer, Some("en_stem".to_string()));
535
536 assert_eq!(index.fields[1].name, "body");
537 assert_eq!(index.fields[1].tokenizer, Some("default".to_string()));
538
539 assert_eq!(index.fields[2].name, "author");
540 assert_eq!(index.fields[2].tokenizer, None); }
542
543 #[test]
544 fn test_tokenizer_in_schema() {
545 let sdl = r#"
546 index articles {
547 field title: text<german> [indexed, stored]
548 field body: text<en_stem> [indexed]
549 }
550 "#;
551
552 let indexes = parse_sdl(sdl).unwrap();
553 let schema = indexes[0].to_schema();
554
555 let title_field = schema.get_field("title").unwrap();
556 let title_entry = schema.get_field_entry(title_field).unwrap();
557 assert_eq!(title_entry.tokenizer, Some("german".to_string()));
558
559 let body_field = schema.get_field("body").unwrap();
560 let body_entry = schema.get_field_entry(body_field).unwrap();
561 assert_eq!(body_entry.tokenizer, Some("en_stem".to_string()));
562 }
563
564 #[test]
565 fn test_query_router_basic() {
566 let sdl = r#"
567 index documents {
568 field title: text [indexed, stored]
569 field uri: text [indexed, stored]
570
571 query_router {
572 pattern: "10\\.\\d{4,}/[^\\s]+"
573 substitution: "doi://{0}"
574 target_field: uris
575 mode: exclusive
576 }
577 }
578 "#;
579
580 let indexes = parse_sdl(sdl).unwrap();
581 let index = &indexes[0];
582
583 assert_eq!(index.query_routers.len(), 1);
584 let router = &index.query_routers[0];
585 assert_eq!(router.pattern, r"10\.\d{4,}/[^\s]+");
586 assert_eq!(router.substitution, "doi://{0}");
587 assert_eq!(router.target_field, "uris");
588 assert_eq!(router.mode, RoutingMode::Exclusive);
589 }
590
591 #[test]
592 fn test_query_router_raw_string() {
593 let sdl = r#"
594 index documents {
595 field uris: text [indexed, stored]
596
597 query_router {
598 pattern: r"^pmid:(\d+)$"
599 substitution: "pubmed://{1}"
600 target_field: uris
601 mode: additional
602 }
603 }
604 "#;
605
606 let indexes = parse_sdl(sdl).unwrap();
607 let router = &indexes[0].query_routers[0];
608
609 assert_eq!(router.pattern, r"^pmid:(\d+)$");
610 assert_eq!(router.substitution, "pubmed://{1}");
611 assert_eq!(router.mode, RoutingMode::Additional);
612 }
613
614 #[test]
615 fn test_multiple_query_routers() {
616 let sdl = r#"
617 index documents {
618 field uris: text [indexed, stored]
619
620 query_router {
621 pattern: r"^doi:(10\.\d{4,}/[^\s]+)$"
622 substitution: "doi://{1}"
623 target_field: uris
624 mode: exclusive
625 }
626
627 query_router {
628 pattern: r"^pmid:(\d+)$"
629 substitution: "pubmed://{1}"
630 target_field: uris
631 mode: exclusive
632 }
633
634 query_router {
635 pattern: r"^arxiv:(\d+\.\d+)$"
636 substitution: "arxiv://{1}"
637 target_field: uris
638 mode: additional
639 }
640 }
641 "#;
642
643 let indexes = parse_sdl(sdl).unwrap();
644 assert_eq!(indexes[0].query_routers.len(), 3);
645 }
646
647 #[test]
648 fn test_query_router_default_mode() {
649 let sdl = r#"
650 index documents {
651 field uris: text [indexed, stored]
652
653 query_router {
654 pattern: r"test"
655 substitution: "{0}"
656 target_field: uris
657 }
658 }
659 "#;
660
661 let indexes = parse_sdl(sdl).unwrap();
662 assert_eq!(indexes[0].query_routers[0].mode, RoutingMode::Additional);
664 }
665
666 #[test]
667 fn test_multi_attribute() {
668 let sdl = r#"
669 index documents {
670 field uris: text [indexed, stored, multi]
671 field title: text [indexed, stored]
672 }
673 "#;
674
675 let indexes = parse_sdl(sdl).unwrap();
676 assert_eq!(indexes.len(), 1);
677
678 let fields = &indexes[0].fields;
679 assert_eq!(fields.len(), 2);
680
681 assert_eq!(fields[0].name, "uris");
683 assert!(fields[0].multi, "uris field should have multi=true");
684
685 assert_eq!(fields[1].name, "title");
687 assert!(!fields[1].multi, "title field should have multi=false");
688
689 let schema = indexes[0].to_schema();
691 let uris_field = schema.get_field("uris").unwrap();
692 let title_field = schema.get_field("title").unwrap();
693
694 assert!(schema.get_field_entry(uris_field).unwrap().multi);
695 assert!(!schema.get_field_entry(title_field).unwrap().multi);
696 }
697}