1use regex::Regex;
4
5use code_moniker_core::core::moniker::Moniker;
6use code_moniker_core::core::uri::{UriConfig, from_uri};
7
8#[derive(Debug, Clone, Copy, Eq, PartialEq)]
9pub(super) enum Lhs {
10 Name,
11 Lines,
12 Kind,
13 Shape,
14 Visibility,
15 Text,
16 Moniker,
17 Depth,
18 Confidence,
19 ParentName,
20 ParentKind,
21 ParentShape,
22 SourceName,
23 SourceKind,
24 SourceShape,
25 SourceVisibility,
26 SourceMoniker,
27 TargetName,
28 TargetKind,
29 TargetShape,
30 TargetVisibility,
31 TargetMoniker,
32 SegmentName,
33 SegmentKind,
34}
35
36impl Lhs {
37 pub(super) fn as_str(self) -> &'static str {
38 match self {
39 Self::Name => "name",
40 Self::Lines => "lines",
41 Self::Kind => "kind",
42 Self::Shape => "shape",
43 Self::Visibility => "visibility",
44 Self::Text => "text",
45 Self::Moniker => "moniker",
46 Self::Depth => "depth",
47 Self::Confidence => "confidence",
48 Self::ParentName => "parent.name",
49 Self::ParentKind => "parent.kind",
50 Self::ParentShape => "parent.shape",
51 Self::SourceName => "source.name",
52 Self::SourceKind => "source.kind",
53 Self::SourceShape => "source.shape",
54 Self::SourceVisibility => "source.visibility",
55 Self::SourceMoniker => "source",
56 Self::TargetName => "target.name",
57 Self::TargetKind => "target.kind",
58 Self::TargetShape => "target.shape",
59 Self::TargetVisibility => "target.visibility",
60 Self::TargetMoniker => "target",
61 Self::SegmentName => "segment.name",
62 Self::SegmentKind => "segment.kind",
63 }
64 }
65}
66
67pub(super) const TWO_CHAR_OPS: &[&str] = &["<=", ">=", "!=", "=~", "!~", "<@", "@>", "?="];
68
69#[derive(Debug, Clone)]
70pub(super) enum LhsExpr {
71 Attr(Lhs),
72 Count {
73 domain: Domain,
74 filter: Option<Box<Node>>,
75 },
76 SegmentOf {
77 scope: SegmentScope,
78 kind: String,
79 },
80}
81
82#[derive(Debug, Clone, Copy, Eq, PartialEq)]
83pub(super) enum SegmentScope {
84 Def,
85 Source,
86 Target,
87}
88
89#[derive(Debug, Clone, Eq, PartialEq)]
90pub(super) enum Domain {
91 Children(String),
92 Segments,
93 OutRefs,
94 InRefs,
95}
96
97#[derive(Debug, Clone, Copy, Eq, PartialEq)]
98pub(super) enum QuantKind {
99 Any,
100 All,
101 None,
102}
103
104#[derive(Debug, Clone, Copy, Eq, PartialEq)]
105pub(super) enum Op {
106 Eq,
107 Ne,
108 Lt,
109 Le,
110 Gt,
111 Ge,
112 RegexMatch,
113 RegexNoMatch,
114 AncestorOf,
115 DescendantOf,
116 BindMatch,
117 PathMatch,
118}
119
120#[derive(Debug, Clone)]
121pub(super) enum Rhs {
122 Number(u32),
123 RegexStr(String),
124 Moniker(Moniker),
125 Str(String),
126 PathPattern(super::path::Pattern),
127 Projection(Lhs),
128}
129
130#[derive(Debug, Clone)]
131pub(super) struct Atom {
132 pub lhs: LhsExpr,
133 pub op: Op,
134 pub rhs: Rhs,
135 pub raw: String,
136 pub regex: Option<Regex>,
137}
138
139#[derive(Debug, Clone)]
140pub(super) enum Node {
141 Atom(Atom),
142 And(Vec<Node>),
143 Or(Vec<Node>),
144 Not(Box<Node>),
145 Implies(Box<Node>, Box<Node>),
146 Quantifier {
147 kind: QuantKind,
148 domain: Domain,
149 filter: Box<Node>,
150 },
151}
152
153#[derive(Debug, Clone)]
154pub(super) struct Expr {
155 pub root: Node,
156}
157
158#[derive(Debug, Clone, thiserror::Error)]
159pub enum ParseError {
160 #[error("expression `{expr}`: {msg}")]
161 BadExpr { expr: String, msg: String },
162}
163
164pub(super) fn parse(input: &str, scheme: &str, allowed_kinds: &[&str]) -> Result<Expr, ParseError> {
165 let raw = input.to_string();
166 let mut p = Parser {
167 input,
168 pos: 0,
169 scheme,
170 allowed_kinds,
171 raw: &raw,
172 };
173 let root = p.parse_expr()?;
174 p.skip_ws();
175 if p.pos < p.input.len() {
176 let msg = format!("trailing input at byte {}: `{}`", p.pos, &p.input[p.pos..]);
177 return Err(ParseError::BadExpr { expr: raw, msg });
178 }
179 Ok(Expr { root })
180}
181
182struct Parser<'a> {
183 input: &'a str,
184 pos: usize,
185 scheme: &'a str,
186 allowed_kinds: &'a [&'a str],
187 raw: &'a str,
188}
189
190impl<'a> Parser<'a> {
191 fn parse_expr(&mut self) -> Result<Node, ParseError> {
192 let lhs = self.parse_or()?;
193 self.skip_ws();
194 if self.eat_keyword("=>") {
195 let rhs = self.parse_or()?;
196 return Ok(Node::Implies(Box::new(lhs), Box::new(rhs)));
197 }
198 Ok(lhs)
199 }
200
201 fn parse_or(&mut self) -> Result<Node, ParseError> {
202 let mut nodes = vec![self.parse_and()?];
203 loop {
204 self.skip_ws();
205 if !self.eat_keyword("OR") {
206 break;
207 }
208 nodes.push(self.parse_and()?);
209 }
210 Ok(if nodes.len() == 1 {
211 nodes.pop().unwrap()
212 } else {
213 Node::Or(nodes)
214 })
215 }
216
217 fn parse_and(&mut self) -> Result<Node, ParseError> {
218 let mut nodes = vec![self.parse_not()?];
219 loop {
220 self.skip_ws();
221 if !self.eat_keyword("AND") {
222 break;
223 }
224 nodes.push(self.parse_not()?);
225 }
226 Ok(if nodes.len() == 1 {
227 nodes.pop().unwrap()
228 } else {
229 Node::And(nodes)
230 })
231 }
232
233 fn parse_not(&mut self) -> Result<Node, ParseError> {
234 self.skip_ws();
235 if self.eat_keyword("NOT") {
236 let inner = self.parse_not()?;
237 return Ok(Node::Not(Box::new(inner)));
238 }
239 self.parse_primary()
240 }
241
242 fn parse_primary(&mut self) -> Result<Node, ParseError> {
243 self.skip_ws();
244 if self.peek_byte() == Some(b'(') {
245 self.pos += 1;
246 let inner = self.parse_expr()?;
247 self.skip_ws();
248 if self.peek_byte() != Some(b')') {
249 return Err(ParseError::BadExpr {
250 expr: self.raw.to_string(),
251 msg: format!("missing `)` at byte {}", self.pos),
252 });
253 }
254 self.pos += 1;
255 return Ok(inner);
256 }
257 if let Some(q) = self.try_parse_quantifier()? {
258 return Ok(q);
259 }
260 if let Some(atom) = self.try_parse_count_atom()? {
261 return Ok(Node::Atom(atom));
262 }
263 if let Some(atom) = self.try_parse_segment_atom()? {
264 return Ok(Node::Atom(atom));
265 }
266 let atom_end = self.find_atom_end();
267 if atom_end == self.pos {
268 return Err(ParseError::BadExpr {
269 expr: self.raw.to_string(),
270 msg: format!("expected atom at byte {}", self.pos),
271 });
272 }
273 let atom_str = &self.input[self.pos..atom_end];
274 let atom = parse_atom(atom_str, self.scheme, self.raw)?;
275 self.pos = atom_end;
276 Ok(Node::Atom(atom))
277 }
278
279 fn try_parse_segment_atom(&mut self) -> Result<Option<Atom>, ParseError> {
280 self.skip_ws();
281 let rest = &self.input[self.pos..];
282 let (scope, prefix_len) = if rest.starts_with("source.segment(") {
283 (SegmentScope::Source, "source.segment(".len())
284 } else if rest.starts_with("target.segment(") {
285 (SegmentScope::Target, "target.segment(".len())
286 } else if rest.starts_with("segment(") {
287 (SegmentScope::Def, "segment(".len())
288 } else {
289 return Ok(None);
290 };
291 let raw_start = self.pos;
292 self.pos += prefix_len;
293 let bytes = self.input.as_bytes();
294 let arg_start = self.pos;
295 while self.pos < bytes.len() && bytes[self.pos] != b')' {
296 self.pos += 1;
297 }
298 if self.pos == bytes.len() {
299 return Err(ParseError::BadExpr {
300 expr: self.raw.to_string(),
301 msg: "unclosed `segment(...)` projection".to_string(),
302 });
303 }
304 let arg = self.input[arg_start..self.pos].trim();
305 let kind = unquote(arg).to_string();
306 if kind.is_empty() {
307 return Err(ParseError::BadExpr {
308 expr: self.raw.to_string(),
309 msg: "segment(<kind>) needs a kind argument".to_string(),
310 });
311 }
312 self.pos += 1;
313 self.skip_ws();
314 let (op_str, op_len) = self.eat_op().ok_or_else(|| ParseError::BadExpr {
315 expr: self.raw.to_string(),
316 msg: format!(
317 "expected `<op> <rhs>` after `segment(...)` at byte {}",
318 self.pos
319 ),
320 })?;
321 self.pos += op_len;
322 let op = parse_op(op_str, self.raw)?;
323 let rhs_end = self.find_atom_end();
324 let rhs_str = self.input[self.pos..rhs_end].trim();
325 if rhs_str.is_empty() {
326 return Err(ParseError::BadExpr {
327 expr: self.raw.to_string(),
328 msg: "empty RHS after `segment(...)` op".to_string(),
329 });
330 }
331 let rhs = parse_rhs(rhs_str, op, self.scheme, self.raw)?;
332 let regex = match (&op, &rhs) {
333 (Op::RegexMatch | Op::RegexNoMatch, Rhs::RegexStr(p)) => {
334 Some(Regex::new(p).map_err(|e| ParseError::BadExpr {
335 expr: self.raw.to_string(),
336 msg: format!("invalid regex `{p}`: {e}"),
337 })?)
338 }
339 _ => None,
340 };
341 match op {
342 Op::Eq | Op::Ne | Op::RegexMatch | Op::RegexNoMatch => {}
343 _ => {
344 return Err(ParseError::BadExpr {
345 expr: self.raw.to_string(),
346 msg: format!("operator {op:?} not valid for segment(...) projection"),
347 });
348 }
349 }
350 self.pos = rhs_end;
351 let raw = self.input[raw_start..self.pos].to_string();
352 Ok(Some(Atom {
353 lhs: LhsExpr::SegmentOf { scope, kind },
354 op,
355 rhs,
356 raw,
357 regex,
358 }))
359 }
360
361 fn try_parse_count_atom(&mut self) -> Result<Option<Atom>, ParseError> {
362 self.skip_ws();
363 if !self.input[self.pos..].starts_with("count(") {
364 return Ok(None);
365 }
366 let raw_start = self.pos;
367 self.pos += "count".len();
368 let (domain, filter) = self.parse_quantifier_body()?;
369 self.skip_ws();
370 let (op_str, op_len) = self.eat_op().ok_or_else(|| ParseError::BadExpr {
371 expr: self.raw.to_string(),
372 msg: format!(
373 "expected numeric comparison after `count(...)` at byte {}",
374 self.pos
375 ),
376 })?;
377 self.pos += op_len;
378 let op = parse_op(op_str, self.raw)?;
379 self.skip_ws();
380 let num_start = self.pos;
381 let bytes = self.input.as_bytes();
382 while self.pos < bytes.len() && bytes[self.pos].is_ascii_digit() {
383 self.pos += 1;
384 }
385 let num_str = &self.input[num_start..self.pos];
386 let n: u32 = num_str.parse().map_err(|_| ParseError::BadExpr {
387 expr: self.raw.to_string(),
388 msg: format!(
389 "expected number after `count(...) {op_str}` at byte {num_start}, got `{num_str}`"
390 ),
391 })?;
392 let raw = self.input[raw_start..self.pos].to_string();
393 Ok(Some(Atom {
394 lhs: LhsExpr::Count {
395 domain,
396 filter: filter.map(Box::new),
397 },
398 op,
399 rhs: Rhs::Number(n),
400 raw,
401 regex: None,
402 }))
403 }
404
405 fn try_parse_quantifier(&mut self) -> Result<Option<Node>, ParseError> {
406 self.skip_ws();
407 for (kw, qk) in [
408 ("any", QuantKind::Any),
409 ("all", QuantKind::All),
410 ("none", QuantKind::None),
411 ] {
412 if let Some(rest) = self.input[self.pos..].strip_prefix(kw)
413 && rest.starts_with('(')
414 {
415 self.pos += kw.len();
416 let (domain, filter) = self.parse_quantifier_body()?;
417 let filter = filter.ok_or_else(|| ParseError::BadExpr {
418 expr: self.raw.to_string(),
419 msg: format!("`{kw}` requires a filter expression: `{kw}(<domain>, <expr>)`"),
420 })?;
421 return Ok(Some(Node::Quantifier {
422 kind: qk,
423 domain,
424 filter: Box::new(filter),
425 }));
426 }
427 }
428 Ok(None)
429 }
430
431 fn parse_quantifier_body(&mut self) -> Result<(Domain, Option<Node>), ParseError> {
432 if self.peek_byte() != Some(b'(') {
433 return Err(ParseError::BadExpr {
434 expr: self.raw.to_string(),
435 msg: format!("expected `(` at byte {}", self.pos),
436 });
437 }
438 self.pos += 1;
439 self.skip_ws();
440 let start = self.pos;
441 let bytes = self.input.as_bytes();
442 while self.pos < bytes.len()
443 && (bytes[self.pos].is_ascii_alphanumeric() || bytes[self.pos] == b'_')
444 {
445 self.pos += 1;
446 }
447 let domain_ident = self.input[start..self.pos].to_string();
448 if domain_ident.is_empty() {
449 return Err(ParseError::BadExpr {
450 expr: self.raw.to_string(),
451 msg: format!("expected domain identifier at byte {}", start),
452 });
453 }
454 let domain = match domain_ident.as_str() {
455 "segment" => Domain::Segments,
456 "out_refs" => Domain::OutRefs,
457 "in_refs" => Domain::InRefs,
458 other => {
459 if !self.allowed_kinds.contains(&other) {
460 return Err(ParseError::BadExpr {
461 expr: self.raw.to_string(),
462 msg: format!(
463 "unknown domain `{other}` (allowed: segment, out_refs, in_refs, or one of {})",
464 self.allowed_kinds.join(", ")
465 ),
466 });
467 }
468 Domain::Children(other.to_string())
469 }
470 };
471 self.skip_ws();
472 let filter = if self.peek_byte() == Some(b',') {
473 self.pos += 1;
474 let f = self.parse_expr()?;
475 self.skip_ws();
476 Some(f)
477 } else {
478 None
479 };
480 if self.peek_byte() != Some(b')') {
481 return Err(ParseError::BadExpr {
482 expr: self.raw.to_string(),
483 msg: format!("missing `)` for quantifier at byte {}", self.pos),
484 });
485 }
486 self.pos += 1;
487 Ok((domain, filter))
488 }
489
490 fn find_atom_end(&self) -> usize {
491 let bytes = self.input.as_bytes();
492 let mut i = self.pos;
493 let mut depth: i32 = 0;
494 let mut in_string: Option<u8> = None;
495 while i < bytes.len() {
496 let c = bytes[i];
497 if let Some(q) = in_string {
498 if c == q {
499 in_string = None;
500 }
501 i += 1;
502 continue;
503 }
504 match c {
505 b'\'' | b'"' => {
506 in_string = Some(c);
507 i += 1;
508 }
509 b'(' => {
510 depth += 1;
511 i += 1;
512 }
513 b')' => {
514 if depth == 0 {
515 return i;
516 }
517 depth -= 1;
518 i += 1;
519 }
520 _ => {
521 if depth == 0 && self.boundary_at(i) {
522 return i;
523 }
524 i += 1;
525 }
526 }
527 }
528 i
529 }
530
531 fn boundary_at(&self, i: usize) -> bool {
532 let rest = &self.input[i..];
533 rest.starts_with(" AND ")
534 || rest.starts_with(" OR ")
535 || rest.starts_with(" => ")
536 || rest.starts_with(" AND\t")
537 || rest.starts_with(" OR\t")
538 || rest.starts_with(" =>\t")
539 }
540
541 fn skip_ws(&mut self) {
542 let bytes = self.input.as_bytes();
543 while self.pos < bytes.len() && bytes[self.pos].is_ascii_whitespace() {
544 self.pos += 1;
545 }
546 }
547
548 fn peek_byte(&self) -> Option<u8> {
549 self.input.as_bytes().get(self.pos).copied()
550 }
551
552 fn eat_op(&self) -> Option<(&'static str, usize)> {
553 let rest = &self.input[self.pos..];
554 for op in TWO_CHAR_OPS {
555 if rest.starts_with(op) {
556 return Some((*op, op.len()));
557 }
558 }
559 for op in ["<", ">", "=", "~"] {
560 if rest.starts_with(op) {
561 return Some((op, 1));
562 }
563 }
564 None
565 }
566
567 fn eat_keyword(&mut self, kw: &str) -> bool {
568 let rest = &self.input[self.pos..];
569 if let Some(after) = rest.strip_prefix(kw) {
570 let next_ok = after.is_empty()
571 || after.starts_with(|c: char| c.is_ascii_whitespace())
572 || after.starts_with('(');
573 if next_ok {
574 self.pos += kw.len();
575 return true;
576 }
577 }
578 false
579 }
580}
581
582fn parse_atom(input: &str, scheme: &str, full: &str) -> Result<Atom, ParseError> {
583 let raw = input.trim().to_string();
584 if let Some(atom) = parse_has_segment(&raw, full)? {
585 return Ok(atom);
586 }
587 let (lhs_str, op_str, rhs_str) = split_atom(&raw, full)?;
588 let lhs = parse_lhs(lhs_str, full)?;
589 let op = parse_op(op_str, full)?;
590 check_type(&lhs, op, full)?;
591 let rhs = parse_rhs(rhs_str, op, scheme, full)?;
592 let regex = match (&op, &rhs) {
593 (Op::RegexMatch | Op::RegexNoMatch, Rhs::RegexStr(p)) => {
594 Some(Regex::new(p).map_err(|e| ParseError::BadExpr {
595 expr: full.to_string(),
596 msg: format!("invalid regex `{p}`: {e}"),
597 })?)
598 }
599 _ => None,
600 };
601 Ok(Atom {
602 lhs,
603 op,
604 rhs,
605 raw,
606 regex,
607 })
608}
609
610fn parse_has_segment(raw: &str, full: &str) -> Result<Option<Atom>, ParseError> {
612 let Some(args) = raw
613 .strip_prefix("has_segment(")
614 .and_then(|s| s.strip_suffix(')'))
615 else {
616 return Ok(None);
617 };
618 let bail = |msg: String| ParseError::BadExpr {
619 expr: full.to_string(),
620 msg,
621 };
622 let mut parts = args.splitn(2, ',').map(str::trim);
623 let kind = parts
624 .next()
625 .ok_or_else(|| bail("has_segment(kind, name) needs two args".to_string()))?;
626 let name = parts
627 .next()
628 .ok_or_else(|| bail("has_segment(kind, name) needs two args".to_string()))?;
629 let kind = unquote(kind);
630 let name = unquote(name);
631 if kind.is_empty() || name.is_empty() {
632 return Err(bail(
633 "has_segment(kind, name) args must be non-empty strings".to_string(),
634 ));
635 }
636 let pat_src = format!("**/{kind}:{name}/**");
637 let pattern = super::path::parse(&pat_src).map_err(|e| bail(format!("{e}")))?;
638 Ok(Some(Atom {
639 lhs: LhsExpr::Attr(Lhs::Moniker),
640 op: Op::PathMatch,
641 rhs: Rhs::PathPattern(pattern),
642 raw: raw.to_string(),
643 regex: None,
644 }))
645}
646
647fn projection_name_to_lhs(s: &str) -> Option<Lhs> {
648 Some(match s {
649 "name" => Lhs::Name,
650 "lines" => Lhs::Lines,
651 "kind" => Lhs::Kind,
652 "shape" => Lhs::Shape,
653 "visibility" => Lhs::Visibility,
654 "text" => Lhs::Text,
655 "moniker" => Lhs::Moniker,
656 "depth" => Lhs::Depth,
657 "confidence" => Lhs::Confidence,
658 "parent.name" => Lhs::ParentName,
659 "parent.kind" => Lhs::ParentKind,
660 "parent.shape" => Lhs::ParentShape,
661 "source" => Lhs::SourceMoniker,
662 "source.name" => Lhs::SourceName,
663 "source.kind" => Lhs::SourceKind,
664 "source.shape" => Lhs::SourceShape,
665 "source.visibility" => Lhs::SourceVisibility,
666 "target" => Lhs::TargetMoniker,
667 "target.name" => Lhs::TargetName,
668 "target.kind" => Lhs::TargetKind,
669 "target.shape" => Lhs::TargetShape,
670 "target.visibility" => Lhs::TargetVisibility,
671 "segment.name" => Lhs::SegmentName,
672 "segment.kind" => Lhs::SegmentKind,
673 _ => return None,
674 })
675}
676
677fn unquote(s: &str) -> &str {
678 let s = s.trim();
679 if (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
680 || (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
681 {
682 &s[1..s.len() - 1]
683 } else {
684 s
685 }
686}
687
688fn split_atom<'a>(s: &'a str, full: &str) -> Result<(&'a str, &'a str, &'a str), ParseError> {
691 let bail = || ParseError::BadExpr {
692 expr: full.to_string(),
693 msg: format!("expected `<lhs> <op> <rhs>` in `{s}`"),
694 };
695 let bytes = s.as_bytes();
696 let lhs_end = lhs_token_end(bytes).ok_or_else(bail)?;
697 let after_lhs = s[lhs_end..].trim_start();
698 let op_offset = s.len() - after_lhs.len();
699 for op in TWO_CHAR_OPS {
700 if let Some(rest) = after_lhs.strip_prefix(op) {
701 let lhs = s[..lhs_end].trim();
702 let rhs = rest.trim();
703 if lhs.is_empty() || rhs.is_empty() {
704 return Err(bail());
705 }
706 return Ok((lhs, &s[op_offset..op_offset + op.len()], rhs));
707 }
708 }
709 for op in ['<', '>', '=', '~'] {
710 if let Some(rest) = after_lhs.strip_prefix(op) {
711 let lhs = s[..lhs_end].trim();
712 let rhs = rest.trim();
713 if lhs.is_empty() || rhs.is_empty() {
714 return Err(bail());
715 }
716 return Ok((lhs, &s[op_offset..op_offset + op.len_utf8()], rhs));
717 }
718 }
719 Err(bail())
720}
721
722fn lhs_token_end(bytes: &[u8]) -> Option<usize> {
723 let mut i = 0;
724 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
725 i += 1;
726 }
727 let start = i;
728 while i < bytes.len()
729 && (bytes[i].is_ascii_alphabetic() || bytes[i] == b'_' || bytes[i] == b'.')
730 {
731 i += 1;
732 }
733 if i == start {
734 return None;
735 }
736 if i < bytes.len() && bytes[i] == b'(' {
737 i += 1;
738 while i < bytes.len() && bytes[i] != b')' {
739 i += 1;
740 }
741 if i == bytes.len() {
742 return None;
743 }
744 i += 1;
745 }
746 Some(i)
747}
748
749fn parse_lhs(s: &str, full: &str) -> Result<LhsExpr, ParseError> {
750 if s.starts_with("count(") {
751 return Err(ParseError::BadExpr {
752 expr: full.to_string(),
753 msg: "internal: count(...) reached parse_lhs; should be handled at primary level"
754 .to_string(),
755 });
756 }
757 match projection_name_to_lhs(s) {
758 Some(lhs) => Ok(LhsExpr::Attr(lhs)),
759 None => Err(ParseError::BadExpr {
760 expr: full.to_string(),
761 msg: format!("unknown lhs `{s}`"),
762 }),
763 }
764}
765
766fn parse_op(s: &str, full: &str) -> Result<Op, ParseError> {
767 Ok(match s {
768 "=" => Op::Eq,
769 "!=" => Op::Ne,
770 "<" => Op::Lt,
771 "<=" => Op::Le,
772 ">" => Op::Gt,
773 ">=" => Op::Ge,
774 "=~" => Op::RegexMatch,
775 "!~" => Op::RegexNoMatch,
776 "@>" => Op::AncestorOf,
777 "<@" => Op::DescendantOf,
778 "?=" => Op::BindMatch,
779 "~" => Op::PathMatch,
780 other => {
781 return Err(ParseError::BadExpr {
782 expr: full.to_string(),
783 msg: format!("unknown operator `{other}`"),
784 });
785 }
786 })
787}
788
789fn check_type(lhs: &LhsExpr, op: Op, full: &str) -> Result<(), ParseError> {
790 use Lhs::*;
791 use Op::*;
792 let lhs_attr = match lhs {
793 LhsExpr::Attr(a) => *a,
794 LhsExpr::Count { .. } => {
795 return match op {
796 Lt | Le | Gt | Ge | Eq | Ne => Ok(()),
797 _ => Err(ParseError::BadExpr {
798 expr: full.to_string(),
799 msg: format!("count(...) only accepts numeric operators, got {op:?}"),
800 }),
801 };
802 }
803 LhsExpr::SegmentOf { .. } => {
804 return match op {
805 Eq | Ne | RegexMatch | RegexNoMatch => Ok(()),
806 _ => Err(ParseError::BadExpr {
807 expr: full.to_string(),
808 msg: format!("segment(...) only accepts string operators, got {op:?}"),
809 }),
810 };
811 }
812 };
813 let ok =
814 matches!(
815 (lhs_attr, op),
816 (
817 Name | Kind
818 | Shape | Visibility
819 | Text | Confidence
820 | ParentName | ParentKind
821 | ParentShape | SourceName
822 | SourceKind | SourceShape
823 | SourceVisibility
824 | TargetName | TargetKind
825 | TargetShape | TargetVisibility
826 | SegmentName | SegmentKind,
827 Eq | Ne | RegexMatch | RegexNoMatch,
828 ) | (Lines | Depth, Lt | Le | Gt | Ge | Eq | Ne)
829 | (
830 Moniker | SourceMoniker | TargetMoniker,
831 Eq | Ne | AncestorOf | DescendantOf | BindMatch | PathMatch,
832 )
833 );
834 if !ok {
835 return Err(ParseError::BadExpr {
836 expr: full.to_string(),
837 msg: format!("operator {op:?} not valid for lhs {lhs_attr:?}"),
838 });
839 }
840 Ok(())
841}
842
843fn parse_rhs(s: &str, op: Op, scheme: &str, full: &str) -> Result<Rhs, ParseError> {
844 let s = s.trim();
845 let s = if (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
846 || (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
847 {
848 &s[1..s.len() - 1]
849 } else {
850 s
851 };
852 Ok(match op {
853 Op::RegexMatch | Op::RegexNoMatch => Rhs::RegexStr(s.to_string()),
854 Op::PathMatch => {
855 let pattern = super::path::parse(s).map_err(|e| ParseError::BadExpr {
856 expr: full.to_string(),
857 msg: format!("{e}"),
858 })?;
859 Rhs::PathPattern(pattern)
860 }
861 Op::AncestorOf | Op::DescendantOf | Op::BindMatch => {
862 let cfg = UriConfig { scheme };
863 let m = from_uri(s, &cfg).map_err(|e| ParseError::BadExpr {
864 expr: full.to_string(),
865 msg: format!("invalid moniker URI `{s}`: {e}"),
866 })?;
867 Rhs::Moniker(m)
868 }
869 Op::Lt | Op::Le | Op::Gt | Op::Ge => {
870 let n: u32 = s.parse().map_err(|_| ParseError::BadExpr {
871 expr: full.to_string(),
872 msg: format!("expected number, got `{s}`"),
873 })?;
874 Rhs::Number(n)
875 }
876 Op::Eq | Op::Ne => {
877 if let Ok(n) = s.parse::<u32>() {
878 Rhs::Number(n)
879 } else if s.contains("+moniker://") {
880 let cfg = UriConfig { scheme };
881 let m = from_uri(s, &cfg).map_err(|e| ParseError::BadExpr {
882 expr: full.to_string(),
883 msg: format!("invalid moniker URI `{s}`: {e}"),
884 })?;
885 Rhs::Moniker(m)
886 } else if let Some(lhs) = projection_name_to_lhs(s) {
887 Rhs::Projection(lhs)
888 } else {
889 Rhs::Str(s.to_string())
890 }
891 }
892 })
893}
894
895#[cfg(test)]
896mod tests {
897 use super::*;
898
899 const TS: &str = "code+moniker://";
900 const KINDS: &[&str] = &["class", "method", "function", "module"];
901
902 fn solo(e: &Expr) -> &Atom {
903 match &e.root {
904 Node::Atom(a) => a,
905 other => panic!("expected solo Atom, got {other:?}"),
906 }
907 }
908
909 fn and_arms(e: &Expr) -> Vec<&Atom> {
910 match &e.root {
911 Node::And(children) => children
912 .iter()
913 .map(|c| match c {
914 Node::Atom(a) => a,
915 other => panic!("expected Atom under And, got {other:?}"),
916 })
917 .collect(),
918 Node::Atom(a) => vec![a],
919 other => panic!("expected And or Atom root, got {other:?}"),
920 }
921 }
922
923 #[test]
924 fn parses_name_regex() {
925 let e = parse("name =~ ^[A-Z]", TS, KINDS).unwrap();
926 let a = solo(&e);
927 assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::Name)));
928 assert!(matches!(a.op, Op::RegexMatch));
929 assert!(matches!(a.rhs, Rhs::RegexStr(_)));
930 assert!(a.regex.is_some());
931 }
932
933 #[test]
934 fn parses_shape_eq() {
935 let e = parse("shape = 'callable'", TS, KINDS).unwrap();
936 let a = solo(&e);
937 assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::Shape)));
938 assert!(matches!(a.op, Op::Eq));
939 match &a.rhs {
940 Rhs::Str(s) => assert_eq!(s, "callable"),
941 other => panic!("expected Str rhs, got {other:?}"),
942 }
943 }
944
945 #[test]
946 fn parses_parent_shape_eq() {
947 let e = parse("parent.shape = 'type'", TS, KINDS).unwrap();
948 let a = solo(&e);
949 assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::ParentShape)));
950 }
951
952 #[test]
953 fn parses_target_shape_regex() {
954 let e = parse("target.shape =~ ^(type|callable)$", TS, KINDS).unwrap();
955 let a = solo(&e);
956 assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::TargetShape)));
957 assert!(matches!(a.op, Op::RegexMatch));
958 }
959
960 #[test]
961 fn shape_rejects_numeric_operator() {
962 assert!(parse("shape < 'callable'", TS, KINDS).is_err());
963 }
964
965 #[test]
966 fn parses_lines_le() {
967 let e = parse("lines <= 60", TS, KINDS).unwrap();
968 let a = solo(&e);
969 match (&a.lhs, &a.op, &a.rhs) {
970 (LhsExpr::Attr(Lhs::Lines), Op::Le, Rhs::Number(60)) => {}
971 other => panic!("unexpected: {other:?}"),
972 }
973 }
974
975 #[test]
976 fn parses_moniker_descendant() {
977 let e = parse("moniker <@ code+moniker://./class:Foo", TS, KINDS).unwrap();
978 let a = solo(&e);
979 match (&a.lhs, &a.op, &a.rhs) {
980 (LhsExpr::Attr(Lhs::Moniker), Op::DescendantOf, Rhs::Moniker(_)) => {}
981 other => panic!("unexpected: {other:?}"),
982 }
983 }
984
985 #[test]
986 fn parses_count() {
987 let e = parse("count(method) <= 20", TS, KINDS).unwrap();
988 let a = solo(&e);
989 match (&a.lhs, &a.op, &a.rhs) {
990 (
991 LhsExpr::Count {
992 domain: Domain::Children(k),
993 filter: None,
994 },
995 Op::Le,
996 Rhs::Number(20),
997 ) if k == "method" => {}
998 other => panic!("unexpected: {other:?}"),
999 }
1000 }
1001
1002 #[test]
1003 fn parses_count_with_filter() {
1004 let e = parse("count(method, name =~ ^get) <= 5", TS, KINDS).unwrap();
1005 let a = solo(&e);
1006 match (&a.lhs, &a.op) {
1007 (
1008 LhsExpr::Count {
1009 domain: Domain::Children(k),
1010 filter: Some(_),
1011 },
1012 Op::Le,
1013 ) if k == "method" => {}
1014 other => panic!("unexpected: {other:?}"),
1015 }
1016 }
1017
1018 #[test]
1019 fn parses_any_quantifier() {
1020 let e = parse("any(method, name = 'execute')", TS, KINDS).unwrap();
1021 match &e.root {
1022 Node::Quantifier {
1023 kind: QuantKind::Any,
1024 domain: Domain::Children(k),
1025 ..
1026 } if k == "method" => {}
1027 other => panic!("unexpected: {other:?}"),
1028 }
1029 }
1030
1031 #[test]
1032 fn parses_all_quantifier_on_segment() {
1033 let e = parse("all(segment, segment.kind = 'module')", TS, KINDS).unwrap();
1034 match &e.root {
1035 Node::Quantifier {
1036 kind: QuantKind::All,
1037 domain: Domain::Segments,
1038 ..
1039 } => {}
1040 other => panic!("unexpected: {other:?}"),
1041 }
1042 }
1043
1044 #[test]
1045 fn parses_none_quantifier_on_out_refs() {
1046 let e = parse("none(out_refs, kind = 'imports')", TS, KINDS).unwrap();
1047 match &e.root {
1048 Node::Quantifier {
1049 kind: QuantKind::None,
1050 domain: Domain::OutRefs,
1051 ..
1052 } => {}
1053 other => panic!("unexpected: {other:?}"),
1054 }
1055 }
1056
1057 #[test]
1058 fn rejects_quantifier_without_filter() {
1059 assert!(parse("any(method)", TS, KINDS).is_err());
1060 }
1061
1062 #[test]
1063 fn parses_and_combination() {
1064 let e = parse("name =~ ^[A-Z] AND lines <= 60", TS, KINDS).unwrap();
1065 assert_eq!(and_arms(&e).len(), 2);
1066 }
1067
1068 #[test]
1069 fn rejects_op_lhs_type_mismatch() {
1070 let r = parse("lines =~ foo", TS, KINDS);
1071 assert!(r.is_err(), "lines is numeric, =~ should be rejected");
1072 }
1073
1074 #[test]
1075 fn rejects_unknown_lhs() {
1076 let r = parse("bogus = foo", TS, KINDS);
1077 assert!(r.is_err());
1078 }
1079
1080 #[test]
1081 fn rejects_count_with_regex_op() {
1082 let r = parse("count(method) =~ foo", TS, KINDS);
1083 assert!(r.is_err());
1084 }
1085
1086 #[test]
1087 fn rejects_count_kind_typo() {
1088 let r = parse("count(methdo) <= 20", TS, KINDS);
1089 match r {
1090 Err(ParseError::BadExpr { msg, .. }) => {
1091 assert!(msg.contains("methdo"), "{msg}");
1092 assert!(msg.contains("unknown domain"), "{msg}");
1093 }
1094 other => panic!("expected BadExpr, got {other:?}"),
1095 }
1096 }
1097
1098 #[test]
1099 fn rejects_invalid_regex() {
1100 let r = parse("name =~ [unclosed", TS, KINDS);
1101 assert!(r.is_err());
1102 }
1103
1104 #[test]
1105 fn rejects_invalid_moniker_uri() {
1106 let r = parse("moniker <@ not-a-uri", TS, KINDS);
1107 assert!(r.is_err());
1108 }
1109
1110 #[test]
1111 fn rejects_non_numeric_for_lines() {
1112 let r = parse("lines <= forty", TS, KINDS);
1113 assert!(r.is_err());
1114 }
1115
1116 #[test]
1117 fn regex_rhs_containing_op_chars_is_not_split_on_rhs() {
1118 let e = parse("text =~ ^count\\(.+\\) <= 20$", TS, KINDS).unwrap();
1120 let a = solo(&e);
1121 match (&a.lhs, &a.op) {
1122 (LhsExpr::Attr(Lhs::Text), Op::RegexMatch) => {}
1123 other => panic!("unexpected: {other:?}"),
1124 }
1125 }
1126
1127 #[test]
1128 fn regex_rhs_with_neq_token_is_not_split_on_rhs() {
1129 let e = parse("text =~ foo!=bar", TS, KINDS).unwrap();
1130 let a = solo(&e);
1131 assert!(matches!(a.op, Op::RegexMatch));
1132 match &a.rhs {
1133 Rhs::RegexStr(s) => assert_eq!(s, "foo!=bar"),
1134 other => panic!("unexpected: {other:?}"),
1135 }
1136 }
1137
1138 #[test]
1139 fn strips_surrounding_quotes_on_rhs() {
1140 let e = parse("name =~ \"^foo$\"", TS, KINDS).unwrap();
1141 match &solo(&e).rhs {
1142 Rhs::RegexStr(s) => assert_eq!(s, "^foo$"),
1143 other => panic!("unexpected: {other:?}"),
1144 }
1145 }
1146
1147 #[test]
1150 fn parses_or() {
1151 let e = parse("name = 'Foo' OR name = 'Bar'", TS, KINDS).unwrap();
1152 match &e.root {
1153 Node::Or(children) => assert_eq!(children.len(), 2),
1154 other => panic!("expected Or, got {other:?}"),
1155 }
1156 }
1157
1158 #[test]
1159 fn parses_not() {
1160 let e = parse("NOT name = 'Foo'", TS, KINDS).unwrap();
1161 assert!(matches!(e.root, Node::Not(_)));
1162 }
1163
1164 #[test]
1165 fn parses_implies() {
1166 let e = parse("name = 'Foo' => kind = 'class'", TS, KINDS).unwrap();
1167 assert!(matches!(e.root, Node::Implies(_, _)));
1168 }
1169
1170 #[test]
1171 fn parses_parens_override_precedence() {
1172 let e = parse("(name = 'X' OR name = 'Y') AND lines <= 10", TS, KINDS).unwrap();
1175 assert!(matches!(e.root, Node::And(_)));
1176 }
1177
1178 #[test]
1179 fn precedence_implies_is_lowest() {
1180 let e = parse(
1182 "name = 'X' OR name = 'Y' => lines <= 10 AND kind = 'class'",
1183 TS,
1184 KINDS,
1185 )
1186 .unwrap();
1187 match e.root {
1188 Node::Implies(lhs, rhs) => {
1189 assert!(matches!(*lhs, Node::Or(_)));
1190 assert!(matches!(*rhs, Node::And(_)));
1191 }
1192 other => panic!("expected Implies at root, got {other:?}"),
1193 }
1194 }
1195
1196 #[test]
1197 fn precedence_not_binds_tighter_than_and() {
1198 let e = parse("NOT name = 'X' AND lines <= 10", TS, KINDS).unwrap();
1200 match e.root {
1201 Node::And(children) => {
1202 assert!(matches!(children[0], Node::Not(_)));
1203 assert!(matches!(children[1], Node::Atom(_)));
1204 }
1205 other => panic!("expected And, got {other:?}"),
1206 }
1207 }
1208
1209 #[test]
1210 fn rejects_unmatched_paren() {
1211 assert!(parse("(name = 'X'", TS, KINDS).is_err());
1212 assert!(parse("name = 'X')", TS, KINDS).is_err());
1213 }
1214
1215 #[test]
1218 fn parses_path_match() {
1219 let e = parse("moniker ~ '**/class:Foo/**'", TS, KINDS).unwrap();
1220 let a = solo(&e);
1221 assert!(matches!(a.op, Op::PathMatch));
1222 assert!(matches!(a.rhs, Rhs::PathPattern(_)));
1223 }
1224
1225 #[test]
1226 fn parses_path_match_with_regex_step() {
1227 let e = parse("moniker ~ '**/class:/Port$/'", TS, KINDS).unwrap();
1228 let a = solo(&e);
1229 assert!(matches!(a.op, Op::PathMatch));
1230 }
1231
1232 #[test]
1233 fn has_segment_desugars_to_path_match() {
1234 let e = parse("has_segment('module', 'domain')", TS, KINDS).unwrap();
1235 let a = solo(&e);
1236 assert!(matches!(a.op, Op::PathMatch));
1237 match &a.rhs {
1238 Rhs::PathPattern(p) => assert_eq!(p.raw, "**/module:domain/**"),
1239 other => panic!("expected PathPattern, got {other:?}"),
1240 }
1241 }
1242
1243 #[test]
1244 fn rejects_path_match_on_non_moniker_lhs() {
1245 assert!(parse("name ~ 'foo'", TS, KINDS).is_err());
1246 }
1247
1248 #[test]
1249 fn rejects_invalid_path_pattern() {
1250 assert!(parse("moniker ~ ''", TS, KINDS).is_err());
1251 assert!(parse("moniker ~ 'no-colon-step'", TS, KINDS).is_err());
1252 }
1253}