Skip to main content

code_moniker_cli/check/
expr.rs

1//! Rule DSL for `code-moniker check`. Full reference: docs/cli/check-dsl.md.
2
3use regex::Regex;
4
5use code_moniker_core::core::moniker::Moniker;
6use code_moniker_core::core::uri::{UriConfig, from_uri};
7
8#[derive(Debug, Clone, Copy, Eq, PartialEq)]
9pub(super) enum Lhs {
10	Name,
11	Lines,
12	Kind,
13	Shape,
14	Visibility,
15	Text,
16	Moniker,
17	Depth,
18	Confidence,
19	ParentName,
20	ParentKind,
21	ParentShape,
22	SourceName,
23	SourceKind,
24	SourceShape,
25	SourceVisibility,
26	SourceMoniker,
27	TargetName,
28	TargetKind,
29	TargetShape,
30	TargetVisibility,
31	TargetMoniker,
32	SegmentName,
33	SegmentKind,
34}
35
36impl Lhs {
37	pub(super) fn as_str(self) -> &'static str {
38		match self {
39			Self::Name => "name",
40			Self::Lines => "lines",
41			Self::Kind => "kind",
42			Self::Shape => "shape",
43			Self::Visibility => "visibility",
44			Self::Text => "text",
45			Self::Moniker => "moniker",
46			Self::Depth => "depth",
47			Self::Confidence => "confidence",
48			Self::ParentName => "parent.name",
49			Self::ParentKind => "parent.kind",
50			Self::ParentShape => "parent.shape",
51			Self::SourceName => "source.name",
52			Self::SourceKind => "source.kind",
53			Self::SourceShape => "source.shape",
54			Self::SourceVisibility => "source.visibility",
55			Self::SourceMoniker => "source",
56			Self::TargetName => "target.name",
57			Self::TargetKind => "target.kind",
58			Self::TargetShape => "target.shape",
59			Self::TargetVisibility => "target.visibility",
60			Self::TargetMoniker => "target",
61			Self::SegmentName => "segment.name",
62			Self::SegmentKind => "segment.kind",
63		}
64	}
65}
66
67pub(super) const TWO_CHAR_OPS: &[&str] = &["<=", ">=", "!=", "=~", "!~", "<@", "@>", "?="];
68
69#[derive(Debug, Clone)]
70pub(super) enum LhsExpr {
71	Attr(Lhs),
72	Count {
73		domain: Domain,
74		filter: Option<Box<Node>>,
75	},
76	SegmentOf {
77		scope: SegmentScope,
78		kind: String,
79	},
80}
81
82#[derive(Debug, Clone, Copy, Eq, PartialEq)]
83pub(super) enum SegmentScope {
84	Def,
85	Source,
86	Target,
87}
88
89#[derive(Debug, Clone, Eq, PartialEq)]
90pub(super) enum Domain {
91	Children(String),
92	Segments,
93	OutRefs,
94	InRefs,
95}
96
97#[derive(Debug, Clone, Copy, Eq, PartialEq)]
98pub(super) enum QuantKind {
99	Any,
100	All,
101	None,
102}
103
104#[derive(Debug, Clone, Copy, Eq, PartialEq)]
105pub(super) enum Op {
106	Eq,
107	Ne,
108	Lt,
109	Le,
110	Gt,
111	Ge,
112	RegexMatch,
113	RegexNoMatch,
114	AncestorOf,
115	DescendantOf,
116	BindMatch,
117	PathMatch,
118}
119
120#[derive(Debug, Clone)]
121pub(super) enum Rhs {
122	Number(u32),
123	RegexStr(String),
124	Moniker(Moniker),
125	Str(String),
126	PathPattern(super::path::Pattern),
127	Projection(Lhs),
128}
129
130#[derive(Debug, Clone)]
131pub(super) struct Atom {
132	pub lhs: LhsExpr,
133	pub op: Op,
134	pub rhs: Rhs,
135	pub raw: String,
136	pub regex: Option<Regex>,
137}
138
139#[derive(Debug, Clone)]
140pub(super) enum Node {
141	Atom(Atom),
142	And(Vec<Node>),
143	Or(Vec<Node>),
144	Not(Box<Node>),
145	Implies(Box<Node>, Box<Node>),
146	Quantifier {
147		kind: QuantKind,
148		domain: Domain,
149		filter: Box<Node>,
150	},
151}
152
153#[derive(Debug, Clone)]
154pub(super) struct Expr {
155	pub root: Node,
156}
157
158#[derive(Debug, Clone, thiserror::Error)]
159pub enum ParseError {
160	#[error("expression `{expr}`: {msg}")]
161	BadExpr { expr: String, msg: String },
162}
163
164pub(super) fn parse(input: &str, scheme: &str, allowed_kinds: &[&str]) -> Result<Expr, ParseError> {
165	let raw = input.to_string();
166	let mut p = Parser {
167		input,
168		pos: 0,
169		scheme,
170		allowed_kinds,
171		raw: &raw,
172	};
173	let root = p.parse_expr()?;
174	p.skip_ws();
175	if p.pos < p.input.len() {
176		let msg = format!("trailing input at byte {}: `{}`", p.pos, &p.input[p.pos..]);
177		return Err(ParseError::BadExpr { expr: raw, msg });
178	}
179	Ok(Expr { root })
180}
181
182struct Parser<'a> {
183	input: &'a str,
184	pos: usize,
185	scheme: &'a str,
186	allowed_kinds: &'a [&'a str],
187	raw: &'a str,
188}
189
190impl<'a> Parser<'a> {
191	fn parse_expr(&mut self) -> Result<Node, ParseError> {
192		let lhs = self.parse_or()?;
193		self.skip_ws();
194		if self.eat_keyword("=>") {
195			let rhs = self.parse_or()?;
196			return Ok(Node::Implies(Box::new(lhs), Box::new(rhs)));
197		}
198		Ok(lhs)
199	}
200
201	fn parse_or(&mut self) -> Result<Node, ParseError> {
202		let mut nodes = vec![self.parse_and()?];
203		loop {
204			self.skip_ws();
205			if !self.eat_keyword("OR") {
206				break;
207			}
208			nodes.push(self.parse_and()?);
209		}
210		Ok(if nodes.len() == 1 {
211			nodes.pop().unwrap()
212		} else {
213			Node::Or(nodes)
214		})
215	}
216
217	fn parse_and(&mut self) -> Result<Node, ParseError> {
218		let mut nodes = vec![self.parse_not()?];
219		loop {
220			self.skip_ws();
221			if !self.eat_keyword("AND") {
222				break;
223			}
224			nodes.push(self.parse_not()?);
225		}
226		Ok(if nodes.len() == 1 {
227			nodes.pop().unwrap()
228		} else {
229			Node::And(nodes)
230		})
231	}
232
233	fn parse_not(&mut self) -> Result<Node, ParseError> {
234		self.skip_ws();
235		if self.eat_keyword("NOT") {
236			let inner = self.parse_not()?;
237			return Ok(Node::Not(Box::new(inner)));
238		}
239		self.parse_primary()
240	}
241
242	fn parse_primary(&mut self) -> Result<Node, ParseError> {
243		self.skip_ws();
244		if self.peek_byte() == Some(b'(') {
245			self.pos += 1;
246			let inner = self.parse_expr()?;
247			self.skip_ws();
248			if self.peek_byte() != Some(b')') {
249				return Err(ParseError::BadExpr {
250					expr: self.raw.to_string(),
251					msg: format!("missing `)` at byte {}", self.pos),
252				});
253			}
254			self.pos += 1;
255			return Ok(inner);
256		}
257		if let Some(q) = self.try_parse_quantifier()? {
258			return Ok(q);
259		}
260		if let Some(atom) = self.try_parse_count_atom()? {
261			return Ok(Node::Atom(atom));
262		}
263		if let Some(atom) = self.try_parse_segment_atom()? {
264			return Ok(Node::Atom(atom));
265		}
266		let atom_end = self.find_atom_end();
267		if atom_end == self.pos {
268			return Err(ParseError::BadExpr {
269				expr: self.raw.to_string(),
270				msg: format!("expected atom at byte {}", self.pos),
271			});
272		}
273		let atom_str = &self.input[self.pos..atom_end];
274		let atom = parse_atom(atom_str, self.scheme, self.raw)?;
275		self.pos = atom_end;
276		Ok(Node::Atom(atom))
277	}
278
279	fn try_parse_segment_atom(&mut self) -> Result<Option<Atom>, ParseError> {
280		self.skip_ws();
281		let rest = &self.input[self.pos..];
282		let (scope, prefix_len) = if rest.starts_with("source.segment(") {
283			(SegmentScope::Source, "source.segment(".len())
284		} else if rest.starts_with("target.segment(") {
285			(SegmentScope::Target, "target.segment(".len())
286		} else if rest.starts_with("segment(") {
287			(SegmentScope::Def, "segment(".len())
288		} else {
289			return Ok(None);
290		};
291		let raw_start = self.pos;
292		self.pos += prefix_len;
293		let bytes = self.input.as_bytes();
294		let arg_start = self.pos;
295		while self.pos < bytes.len() && bytes[self.pos] != b')' {
296			self.pos += 1;
297		}
298		if self.pos == bytes.len() {
299			return Err(ParseError::BadExpr {
300				expr: self.raw.to_string(),
301				msg: "unclosed `segment(...)` projection".to_string(),
302			});
303		}
304		let arg = self.input[arg_start..self.pos].trim();
305		let kind = unquote(arg).to_string();
306		if kind.is_empty() {
307			return Err(ParseError::BadExpr {
308				expr: self.raw.to_string(),
309				msg: "segment(<kind>) needs a kind argument".to_string(),
310			});
311		}
312		self.pos += 1;
313		self.skip_ws();
314		let (op_str, op_len) = self.eat_op().ok_or_else(|| ParseError::BadExpr {
315			expr: self.raw.to_string(),
316			msg: format!(
317				"expected `<op> <rhs>` after `segment(...)` at byte {}",
318				self.pos
319			),
320		})?;
321		self.pos += op_len;
322		let op = parse_op(op_str, self.raw)?;
323		let rhs_end = self.find_atom_end();
324		let rhs_str = self.input[self.pos..rhs_end].trim();
325		if rhs_str.is_empty() {
326			return Err(ParseError::BadExpr {
327				expr: self.raw.to_string(),
328				msg: "empty RHS after `segment(...)` op".to_string(),
329			});
330		}
331		let rhs = parse_rhs(rhs_str, op, self.scheme, self.raw)?;
332		let regex = match (&op, &rhs) {
333			(Op::RegexMatch | Op::RegexNoMatch, Rhs::RegexStr(p)) => {
334				Some(Regex::new(p).map_err(|e| ParseError::BadExpr {
335					expr: self.raw.to_string(),
336					msg: format!("invalid regex `{p}`: {e}"),
337				})?)
338			}
339			_ => None,
340		};
341		match op {
342			Op::Eq | Op::Ne | Op::RegexMatch | Op::RegexNoMatch => {}
343			_ => {
344				return Err(ParseError::BadExpr {
345					expr: self.raw.to_string(),
346					msg: format!("operator {op:?} not valid for segment(...) projection"),
347				});
348			}
349		}
350		self.pos = rhs_end;
351		let raw = self.input[raw_start..self.pos].to_string();
352		Ok(Some(Atom {
353			lhs: LhsExpr::SegmentOf { scope, kind },
354			op,
355			rhs,
356			raw,
357			regex,
358		}))
359	}
360
361	fn try_parse_count_atom(&mut self) -> Result<Option<Atom>, ParseError> {
362		self.skip_ws();
363		if !self.input[self.pos..].starts_with("count(") {
364			return Ok(None);
365		}
366		let raw_start = self.pos;
367		self.pos += "count".len();
368		let (domain, filter) = self.parse_quantifier_body()?;
369		self.skip_ws();
370		let (op_str, op_len) = self.eat_op().ok_or_else(|| ParseError::BadExpr {
371			expr: self.raw.to_string(),
372			msg: format!(
373				"expected numeric comparison after `count(...)` at byte {}",
374				self.pos
375			),
376		})?;
377		self.pos += op_len;
378		let op = parse_op(op_str, self.raw)?;
379		self.skip_ws();
380		let num_start = self.pos;
381		let bytes = self.input.as_bytes();
382		while self.pos < bytes.len() && bytes[self.pos].is_ascii_digit() {
383			self.pos += 1;
384		}
385		let num_str = &self.input[num_start..self.pos];
386		let n: u32 = num_str.parse().map_err(|_| ParseError::BadExpr {
387			expr: self.raw.to_string(),
388			msg: format!(
389				"expected number after `count(...) {op_str}` at byte {num_start}, got `{num_str}`"
390			),
391		})?;
392		let raw = self.input[raw_start..self.pos].to_string();
393		Ok(Some(Atom {
394			lhs: LhsExpr::Count {
395				domain,
396				filter: filter.map(Box::new),
397			},
398			op,
399			rhs: Rhs::Number(n),
400			raw,
401			regex: None,
402		}))
403	}
404
405	fn try_parse_quantifier(&mut self) -> Result<Option<Node>, ParseError> {
406		self.skip_ws();
407		for (kw, qk) in [
408			("any", QuantKind::Any),
409			("all", QuantKind::All),
410			("none", QuantKind::None),
411		] {
412			if let Some(rest) = self.input[self.pos..].strip_prefix(kw)
413				&& rest.starts_with('(')
414			{
415				self.pos += kw.len();
416				let (domain, filter) = self.parse_quantifier_body()?;
417				let filter = filter.ok_or_else(|| ParseError::BadExpr {
418					expr: self.raw.to_string(),
419					msg: format!("`{kw}` requires a filter expression: `{kw}(<domain>, <expr>)`"),
420				})?;
421				return Ok(Some(Node::Quantifier {
422					kind: qk,
423					domain,
424					filter: Box::new(filter),
425				}));
426			}
427		}
428		Ok(None)
429	}
430
431	fn parse_quantifier_body(&mut self) -> Result<(Domain, Option<Node>), ParseError> {
432		if self.peek_byte() != Some(b'(') {
433			return Err(ParseError::BadExpr {
434				expr: self.raw.to_string(),
435				msg: format!("expected `(` at byte {}", self.pos),
436			});
437		}
438		self.pos += 1;
439		self.skip_ws();
440		let start = self.pos;
441		let bytes = self.input.as_bytes();
442		while self.pos < bytes.len()
443			&& (bytes[self.pos].is_ascii_alphanumeric() || bytes[self.pos] == b'_')
444		{
445			self.pos += 1;
446		}
447		let domain_ident = self.input[start..self.pos].to_string();
448		if domain_ident.is_empty() {
449			return Err(ParseError::BadExpr {
450				expr: self.raw.to_string(),
451				msg: format!("expected domain identifier at byte {}", start),
452			});
453		}
454		let domain = match domain_ident.as_str() {
455			"segment" => Domain::Segments,
456			"out_refs" => Domain::OutRefs,
457			"in_refs" => Domain::InRefs,
458			other => {
459				if !self.allowed_kinds.contains(&other) {
460					return Err(ParseError::BadExpr {
461						expr: self.raw.to_string(),
462						msg: format!(
463							"unknown domain `{other}` (allowed: segment, out_refs, in_refs, or one of {})",
464							self.allowed_kinds.join(", ")
465						),
466					});
467				}
468				Domain::Children(other.to_string())
469			}
470		};
471		self.skip_ws();
472		let filter = if self.peek_byte() == Some(b',') {
473			self.pos += 1;
474			let f = self.parse_expr()?;
475			self.skip_ws();
476			Some(f)
477		} else {
478			None
479		};
480		if self.peek_byte() != Some(b')') {
481			return Err(ParseError::BadExpr {
482				expr: self.raw.to_string(),
483				msg: format!("missing `)` for quantifier at byte {}", self.pos),
484			});
485		}
486		self.pos += 1;
487		Ok((domain, filter))
488	}
489
490	fn find_atom_end(&self) -> usize {
491		let bytes = self.input.as_bytes();
492		let mut i = self.pos;
493		let mut depth: i32 = 0;
494		let mut in_string: Option<u8> = None;
495		while i < bytes.len() {
496			let c = bytes[i];
497			if let Some(q) = in_string {
498				if c == q {
499					in_string = None;
500				}
501				i += 1;
502				continue;
503			}
504			match c {
505				b'\'' | b'"' => {
506					in_string = Some(c);
507					i += 1;
508				}
509				b'(' => {
510					depth += 1;
511					i += 1;
512				}
513				b')' => {
514					if depth == 0 {
515						return i;
516					}
517					depth -= 1;
518					i += 1;
519				}
520				_ => {
521					if depth == 0 && self.boundary_at(i) {
522						return i;
523					}
524					i += 1;
525				}
526			}
527		}
528		i
529	}
530
531	fn boundary_at(&self, i: usize) -> bool {
532		let rest = &self.input[i..];
533		rest.starts_with(" AND ")
534			|| rest.starts_with(" OR ")
535			|| rest.starts_with(" => ")
536			|| rest.starts_with(" AND\t")
537			|| rest.starts_with(" OR\t")
538			|| rest.starts_with(" =>\t")
539	}
540
541	fn skip_ws(&mut self) {
542		let bytes = self.input.as_bytes();
543		while self.pos < bytes.len() && bytes[self.pos].is_ascii_whitespace() {
544			self.pos += 1;
545		}
546	}
547
548	fn peek_byte(&self) -> Option<u8> {
549		self.input.as_bytes().get(self.pos).copied()
550	}
551
552	fn eat_op(&self) -> Option<(&'static str, usize)> {
553		let rest = &self.input[self.pos..];
554		for op in TWO_CHAR_OPS {
555			if rest.starts_with(op) {
556				return Some((*op, op.len()));
557			}
558		}
559		for op in ["<", ">", "=", "~"] {
560			if rest.starts_with(op) {
561				return Some((op, 1));
562			}
563		}
564		None
565	}
566
567	fn eat_keyword(&mut self, kw: &str) -> bool {
568		let rest = &self.input[self.pos..];
569		if let Some(after) = rest.strip_prefix(kw) {
570			let next_ok = after.is_empty()
571				|| after.starts_with(|c: char| c.is_ascii_whitespace())
572				|| after.starts_with('(');
573			if next_ok {
574				self.pos += kw.len();
575				return true;
576			}
577		}
578		false
579	}
580}
581
582fn parse_atom(input: &str, scheme: &str, full: &str) -> Result<Atom, ParseError> {
583	let raw = input.trim().to_string();
584	if let Some(atom) = parse_has_segment(&raw, full)? {
585		return Ok(atom);
586	}
587	let (lhs_str, op_str, rhs_str) = split_atom(&raw, full)?;
588	let lhs = parse_lhs(lhs_str, full)?;
589	let op = parse_op(op_str, full)?;
590	check_type(&lhs, op, full)?;
591	let rhs = parse_rhs(rhs_str, op, scheme, full)?;
592	let regex = match (&op, &rhs) {
593		(Op::RegexMatch | Op::RegexNoMatch, Rhs::RegexStr(p)) => {
594			Some(Regex::new(p).map_err(|e| ParseError::BadExpr {
595				expr: full.to_string(),
596				msg: format!("invalid regex `{p}`: {e}"),
597			})?)
598		}
599		_ => None,
600	};
601	Ok(Atom {
602		lhs,
603		op,
604		rhs,
605		raw,
606		regex,
607	})
608}
609
610/// `has_segment("kind", "name")` is sugar for `moniker ~ '**/kind:name/**'`.
611fn parse_has_segment(raw: &str, full: &str) -> Result<Option<Atom>, ParseError> {
612	let Some(args) = raw
613		.strip_prefix("has_segment(")
614		.and_then(|s| s.strip_suffix(')'))
615	else {
616		return Ok(None);
617	};
618	let bail = |msg: String| ParseError::BadExpr {
619		expr: full.to_string(),
620		msg,
621	};
622	let mut parts = args.splitn(2, ',').map(str::trim);
623	let kind = parts
624		.next()
625		.ok_or_else(|| bail("has_segment(kind, name) needs two args".to_string()))?;
626	let name = parts
627		.next()
628		.ok_or_else(|| bail("has_segment(kind, name) needs two args".to_string()))?;
629	let kind = unquote(kind);
630	let name = unquote(name);
631	if kind.is_empty() || name.is_empty() {
632		return Err(bail(
633			"has_segment(kind, name) args must be non-empty strings".to_string(),
634		));
635	}
636	let pat_src = format!("**/{kind}:{name}/**");
637	let pattern = super::path::parse(&pat_src).map_err(|e| bail(format!("{e}")))?;
638	Ok(Some(Atom {
639		lhs: LhsExpr::Attr(Lhs::Moniker),
640		op: Op::PathMatch,
641		rhs: Rhs::PathPattern(pattern),
642		raw: raw.to_string(),
643		regex: None,
644	}))
645}
646
647fn projection_name_to_lhs(s: &str) -> Option<Lhs> {
648	Some(match s {
649		"name" => Lhs::Name,
650		"lines" => Lhs::Lines,
651		"kind" => Lhs::Kind,
652		"shape" => Lhs::Shape,
653		"visibility" => Lhs::Visibility,
654		"text" => Lhs::Text,
655		"moniker" => Lhs::Moniker,
656		"depth" => Lhs::Depth,
657		"confidence" => Lhs::Confidence,
658		"parent.name" => Lhs::ParentName,
659		"parent.kind" => Lhs::ParentKind,
660		"parent.shape" => Lhs::ParentShape,
661		"source" => Lhs::SourceMoniker,
662		"source.name" => Lhs::SourceName,
663		"source.kind" => Lhs::SourceKind,
664		"source.shape" => Lhs::SourceShape,
665		"source.visibility" => Lhs::SourceVisibility,
666		"target" => Lhs::TargetMoniker,
667		"target.name" => Lhs::TargetName,
668		"target.kind" => Lhs::TargetKind,
669		"target.shape" => Lhs::TargetShape,
670		"target.visibility" => Lhs::TargetVisibility,
671		"segment.name" => Lhs::SegmentName,
672		"segment.kind" => Lhs::SegmentKind,
673		_ => return None,
674	})
675}
676
677fn unquote(s: &str) -> &str {
678	let s = s.trim();
679	if (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
680		|| (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
681	{
682		&s[1..s.len() - 1]
683	} else {
684		s
685	}
686}
687
688/// Operator search restricted to the LHS prefix so an op char inside a regex
689/// RHS (`^[a-z]+$`, `foo<=bar`) can't be mistaken for the main operator.
690fn split_atom<'a>(s: &'a str, full: &str) -> Result<(&'a str, &'a str, &'a str), ParseError> {
691	let bail = || ParseError::BadExpr {
692		expr: full.to_string(),
693		msg: format!("expected `<lhs> <op> <rhs>` in `{s}`"),
694	};
695	let bytes = s.as_bytes();
696	let lhs_end = lhs_token_end(bytes).ok_or_else(bail)?;
697	let after_lhs = s[lhs_end..].trim_start();
698	let op_offset = s.len() - after_lhs.len();
699	for op in TWO_CHAR_OPS {
700		if let Some(rest) = after_lhs.strip_prefix(op) {
701			let lhs = s[..lhs_end].trim();
702			let rhs = rest.trim();
703			if lhs.is_empty() || rhs.is_empty() {
704				return Err(bail());
705			}
706			return Ok((lhs, &s[op_offset..op_offset + op.len()], rhs));
707		}
708	}
709	for op in ['<', '>', '=', '~'] {
710		if let Some(rest) = after_lhs.strip_prefix(op) {
711			let lhs = s[..lhs_end].trim();
712			let rhs = rest.trim();
713			if lhs.is_empty() || rhs.is_empty() {
714				return Err(bail());
715			}
716			return Ok((lhs, &s[op_offset..op_offset + op.len_utf8()], rhs));
717		}
718	}
719	Err(bail())
720}
721
722fn lhs_token_end(bytes: &[u8]) -> Option<usize> {
723	let mut i = 0;
724	while i < bytes.len() && bytes[i].is_ascii_whitespace() {
725		i += 1;
726	}
727	let start = i;
728	while i < bytes.len()
729		&& (bytes[i].is_ascii_alphabetic() || bytes[i] == b'_' || bytes[i] == b'.')
730	{
731		i += 1;
732	}
733	if i == start {
734		return None;
735	}
736	if i < bytes.len() && bytes[i] == b'(' {
737		i += 1;
738		while i < bytes.len() && bytes[i] != b')' {
739			i += 1;
740		}
741		if i == bytes.len() {
742			return None;
743		}
744		i += 1;
745	}
746	Some(i)
747}
748
749fn parse_lhs(s: &str, full: &str) -> Result<LhsExpr, ParseError> {
750	if s.starts_with("count(") {
751		return Err(ParseError::BadExpr {
752			expr: full.to_string(),
753			msg: "internal: count(...) reached parse_lhs; should be handled at primary level"
754				.to_string(),
755		});
756	}
757	match projection_name_to_lhs(s) {
758		Some(lhs) => Ok(LhsExpr::Attr(lhs)),
759		None => Err(ParseError::BadExpr {
760			expr: full.to_string(),
761			msg: format!("unknown lhs `{s}`"),
762		}),
763	}
764}
765
766fn parse_op(s: &str, full: &str) -> Result<Op, ParseError> {
767	Ok(match s {
768		"=" => Op::Eq,
769		"!=" => Op::Ne,
770		"<" => Op::Lt,
771		"<=" => Op::Le,
772		">" => Op::Gt,
773		">=" => Op::Ge,
774		"=~" => Op::RegexMatch,
775		"!~" => Op::RegexNoMatch,
776		"@>" => Op::AncestorOf,
777		"<@" => Op::DescendantOf,
778		"?=" => Op::BindMatch,
779		"~" => Op::PathMatch,
780		other => {
781			return Err(ParseError::BadExpr {
782				expr: full.to_string(),
783				msg: format!("unknown operator `{other}`"),
784			});
785		}
786	})
787}
788
789fn check_type(lhs: &LhsExpr, op: Op, full: &str) -> Result<(), ParseError> {
790	use Lhs::*;
791	use Op::*;
792	let lhs_attr = match lhs {
793		LhsExpr::Attr(a) => *a,
794		LhsExpr::Count { .. } => {
795			return match op {
796				Lt | Le | Gt | Ge | Eq | Ne => Ok(()),
797				_ => Err(ParseError::BadExpr {
798					expr: full.to_string(),
799					msg: format!("count(...) only accepts numeric operators, got {op:?}"),
800				}),
801			};
802		}
803		LhsExpr::SegmentOf { .. } => {
804			return match op {
805				Eq | Ne | RegexMatch | RegexNoMatch => Ok(()),
806				_ => Err(ParseError::BadExpr {
807					expr: full.to_string(),
808					msg: format!("segment(...) only accepts string operators, got {op:?}"),
809				}),
810			};
811		}
812	};
813	let ok =
814		matches!(
815			(lhs_attr, op),
816			(
817				Name | Kind
818					| Shape | Visibility
819					| Text | Confidence
820					| ParentName | ParentKind
821					| ParentShape | SourceName
822					| SourceKind | SourceShape
823					| SourceVisibility
824					| TargetName | TargetKind
825					| TargetShape | TargetVisibility
826					| SegmentName | SegmentKind,
827				Eq | Ne | RegexMatch | RegexNoMatch,
828			) | (Lines | Depth, Lt | Le | Gt | Ge | Eq | Ne)
829				| (
830					Moniker | SourceMoniker | TargetMoniker,
831					Eq | Ne | AncestorOf | DescendantOf | BindMatch | PathMatch,
832				)
833		);
834	if !ok {
835		return Err(ParseError::BadExpr {
836			expr: full.to_string(),
837			msg: format!("operator {op:?} not valid for lhs {lhs_attr:?}"),
838		});
839	}
840	Ok(())
841}
842
843fn parse_rhs(s: &str, op: Op, scheme: &str, full: &str) -> Result<Rhs, ParseError> {
844	let s = s.trim();
845	let s = if (s.starts_with('"') && s.ends_with('"') && s.len() >= 2)
846		|| (s.starts_with('\'') && s.ends_with('\'') && s.len() >= 2)
847	{
848		&s[1..s.len() - 1]
849	} else {
850		s
851	};
852	Ok(match op {
853		Op::RegexMatch | Op::RegexNoMatch => Rhs::RegexStr(s.to_string()),
854		Op::PathMatch => {
855			let pattern = super::path::parse(s).map_err(|e| ParseError::BadExpr {
856				expr: full.to_string(),
857				msg: format!("{e}"),
858			})?;
859			Rhs::PathPattern(pattern)
860		}
861		Op::AncestorOf | Op::DescendantOf | Op::BindMatch => {
862			let cfg = UriConfig { scheme };
863			let m = from_uri(s, &cfg).map_err(|e| ParseError::BadExpr {
864				expr: full.to_string(),
865				msg: format!("invalid moniker URI `{s}`: {e}"),
866			})?;
867			Rhs::Moniker(m)
868		}
869		Op::Lt | Op::Le | Op::Gt | Op::Ge => {
870			let n: u32 = s.parse().map_err(|_| ParseError::BadExpr {
871				expr: full.to_string(),
872				msg: format!("expected number, got `{s}`"),
873			})?;
874			Rhs::Number(n)
875		}
876		Op::Eq | Op::Ne => {
877			if let Ok(n) = s.parse::<u32>() {
878				Rhs::Number(n)
879			} else if s.contains("+moniker://") {
880				let cfg = UriConfig { scheme };
881				let m = from_uri(s, &cfg).map_err(|e| ParseError::BadExpr {
882					expr: full.to_string(),
883					msg: format!("invalid moniker URI `{s}`: {e}"),
884				})?;
885				Rhs::Moniker(m)
886			} else if let Some(lhs) = projection_name_to_lhs(s) {
887				Rhs::Projection(lhs)
888			} else {
889				Rhs::Str(s.to_string())
890			}
891		}
892	})
893}
894
895#[cfg(test)]
896mod tests {
897	use super::*;
898
899	const TS: &str = "code+moniker://";
900	const KINDS: &[&str] = &["class", "method", "function", "module"];
901
902	fn solo(e: &Expr) -> &Atom {
903		match &e.root {
904			Node::Atom(a) => a,
905			other => panic!("expected solo Atom, got {other:?}"),
906		}
907	}
908
909	fn and_arms(e: &Expr) -> Vec<&Atom> {
910		match &e.root {
911			Node::And(children) => children
912				.iter()
913				.map(|c| match c {
914					Node::Atom(a) => a,
915					other => panic!("expected Atom under And, got {other:?}"),
916				})
917				.collect(),
918			Node::Atom(a) => vec![a],
919			other => panic!("expected And or Atom root, got {other:?}"),
920		}
921	}
922
923	#[test]
924	fn parses_name_regex() {
925		let e = parse("name =~ ^[A-Z]", TS, KINDS).unwrap();
926		let a = solo(&e);
927		assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::Name)));
928		assert!(matches!(a.op, Op::RegexMatch));
929		assert!(matches!(a.rhs, Rhs::RegexStr(_)));
930		assert!(a.regex.is_some());
931	}
932
933	#[test]
934	fn parses_shape_eq() {
935		let e = parse("shape = 'callable'", TS, KINDS).unwrap();
936		let a = solo(&e);
937		assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::Shape)));
938		assert!(matches!(a.op, Op::Eq));
939		match &a.rhs {
940			Rhs::Str(s) => assert_eq!(s, "callable"),
941			other => panic!("expected Str rhs, got {other:?}"),
942		}
943	}
944
945	#[test]
946	fn parses_parent_shape_eq() {
947		let e = parse("parent.shape = 'type'", TS, KINDS).unwrap();
948		let a = solo(&e);
949		assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::ParentShape)));
950	}
951
952	#[test]
953	fn parses_target_shape_regex() {
954		let e = parse("target.shape =~ ^(type|callable)$", TS, KINDS).unwrap();
955		let a = solo(&e);
956		assert!(matches!(a.lhs, LhsExpr::Attr(Lhs::TargetShape)));
957		assert!(matches!(a.op, Op::RegexMatch));
958	}
959
960	#[test]
961	fn shape_rejects_numeric_operator() {
962		assert!(parse("shape < 'callable'", TS, KINDS).is_err());
963	}
964
965	#[test]
966	fn parses_lines_le() {
967		let e = parse("lines <= 60", TS, KINDS).unwrap();
968		let a = solo(&e);
969		match (&a.lhs, &a.op, &a.rhs) {
970			(LhsExpr::Attr(Lhs::Lines), Op::Le, Rhs::Number(60)) => {}
971			other => panic!("unexpected: {other:?}"),
972		}
973	}
974
975	#[test]
976	fn parses_moniker_descendant() {
977		let e = parse("moniker <@ code+moniker://./class:Foo", TS, KINDS).unwrap();
978		let a = solo(&e);
979		match (&a.lhs, &a.op, &a.rhs) {
980			(LhsExpr::Attr(Lhs::Moniker), Op::DescendantOf, Rhs::Moniker(_)) => {}
981			other => panic!("unexpected: {other:?}"),
982		}
983	}
984
985	#[test]
986	fn parses_count() {
987		let e = parse("count(method) <= 20", TS, KINDS).unwrap();
988		let a = solo(&e);
989		match (&a.lhs, &a.op, &a.rhs) {
990			(
991				LhsExpr::Count {
992					domain: Domain::Children(k),
993					filter: None,
994				},
995				Op::Le,
996				Rhs::Number(20),
997			) if k == "method" => {}
998			other => panic!("unexpected: {other:?}"),
999		}
1000	}
1001
1002	#[test]
1003	fn parses_count_with_filter() {
1004		let e = parse("count(method, name =~ ^get) <= 5", TS, KINDS).unwrap();
1005		let a = solo(&e);
1006		match (&a.lhs, &a.op) {
1007			(
1008				LhsExpr::Count {
1009					domain: Domain::Children(k),
1010					filter: Some(_),
1011				},
1012				Op::Le,
1013			) if k == "method" => {}
1014			other => panic!("unexpected: {other:?}"),
1015		}
1016	}
1017
1018	#[test]
1019	fn parses_any_quantifier() {
1020		let e = parse("any(method, name = 'execute')", TS, KINDS).unwrap();
1021		match &e.root {
1022			Node::Quantifier {
1023				kind: QuantKind::Any,
1024				domain: Domain::Children(k),
1025				..
1026			} if k == "method" => {}
1027			other => panic!("unexpected: {other:?}"),
1028		}
1029	}
1030
1031	#[test]
1032	fn parses_all_quantifier_on_segment() {
1033		let e = parse("all(segment, segment.kind = 'module')", TS, KINDS).unwrap();
1034		match &e.root {
1035			Node::Quantifier {
1036				kind: QuantKind::All,
1037				domain: Domain::Segments,
1038				..
1039			} => {}
1040			other => panic!("unexpected: {other:?}"),
1041		}
1042	}
1043
1044	#[test]
1045	fn parses_none_quantifier_on_out_refs() {
1046		let e = parse("none(out_refs, kind = 'imports')", TS, KINDS).unwrap();
1047		match &e.root {
1048			Node::Quantifier {
1049				kind: QuantKind::None,
1050				domain: Domain::OutRefs,
1051				..
1052			} => {}
1053			other => panic!("unexpected: {other:?}"),
1054		}
1055	}
1056
1057	#[test]
1058	fn rejects_quantifier_without_filter() {
1059		assert!(parse("any(method)", TS, KINDS).is_err());
1060	}
1061
1062	#[test]
1063	fn parses_and_combination() {
1064		let e = parse("name =~ ^[A-Z] AND lines <= 60", TS, KINDS).unwrap();
1065		assert_eq!(and_arms(&e).len(), 2);
1066	}
1067
1068	#[test]
1069	fn rejects_op_lhs_type_mismatch() {
1070		let r = parse("lines =~ foo", TS, KINDS);
1071		assert!(r.is_err(), "lines is numeric, =~ should be rejected");
1072	}
1073
1074	#[test]
1075	fn rejects_unknown_lhs() {
1076		let r = parse("bogus = foo", TS, KINDS);
1077		assert!(r.is_err());
1078	}
1079
1080	#[test]
1081	fn rejects_count_with_regex_op() {
1082		let r = parse("count(method) =~ foo", TS, KINDS);
1083		assert!(r.is_err());
1084	}
1085
1086	#[test]
1087	fn rejects_count_kind_typo() {
1088		let r = parse("count(methdo) <= 20", TS, KINDS);
1089		match r {
1090			Err(ParseError::BadExpr { msg, .. }) => {
1091				assert!(msg.contains("methdo"), "{msg}");
1092				assert!(msg.contains("unknown domain"), "{msg}");
1093			}
1094			other => panic!("expected BadExpr, got {other:?}"),
1095		}
1096	}
1097
1098	#[test]
1099	fn rejects_invalid_regex() {
1100		let r = parse("name =~ [unclosed", TS, KINDS);
1101		assert!(r.is_err());
1102	}
1103
1104	#[test]
1105	fn rejects_invalid_moniker_uri() {
1106		let r = parse("moniker <@ not-a-uri", TS, KINDS);
1107		assert!(r.is_err());
1108	}
1109
1110	#[test]
1111	fn rejects_non_numeric_for_lines() {
1112		let r = parse("lines <= forty", TS, KINDS);
1113		assert!(r.is_err());
1114	}
1115
1116	#[test]
1117	fn regex_rhs_containing_op_chars_is_not_split_on_rhs() {
1118		// RHS contains `>=` and `<=` — must NOT be taken as the main op.
1119		let e = parse("text =~ ^count\\(.+\\) <= 20$", TS, KINDS).unwrap();
1120		let a = solo(&e);
1121		match (&a.lhs, &a.op) {
1122			(LhsExpr::Attr(Lhs::Text), Op::RegexMatch) => {}
1123			other => panic!("unexpected: {other:?}"),
1124		}
1125	}
1126
1127	#[test]
1128	fn regex_rhs_with_neq_token_is_not_split_on_rhs() {
1129		let e = parse("text =~ foo!=bar", TS, KINDS).unwrap();
1130		let a = solo(&e);
1131		assert!(matches!(a.op, Op::RegexMatch));
1132		match &a.rhs {
1133			Rhs::RegexStr(s) => assert_eq!(s, "foo!=bar"),
1134			other => panic!("unexpected: {other:?}"),
1135		}
1136	}
1137
1138	#[test]
1139	fn strips_surrounding_quotes_on_rhs() {
1140		let e = parse("name =~ \"^foo$\"", TS, KINDS).unwrap();
1141		match &solo(&e).rhs {
1142			Rhs::RegexStr(s) => assert_eq!(s, "^foo$"),
1143			other => panic!("unexpected: {other:?}"),
1144		}
1145	}
1146
1147	// ─── booleans + implication ─────────────────────────────────────────
1148
1149	#[test]
1150	fn parses_or() {
1151		let e = parse("name = 'Foo' OR name = 'Bar'", TS, KINDS).unwrap();
1152		match &e.root {
1153			Node::Or(children) => assert_eq!(children.len(), 2),
1154			other => panic!("expected Or, got {other:?}"),
1155		}
1156	}
1157
1158	#[test]
1159	fn parses_not() {
1160		let e = parse("NOT name = 'Foo'", TS, KINDS).unwrap();
1161		assert!(matches!(e.root, Node::Not(_)));
1162	}
1163
1164	#[test]
1165	fn parses_implies() {
1166		let e = parse("name = 'Foo' => kind = 'class'", TS, KINDS).unwrap();
1167		assert!(matches!(e.root, Node::Implies(_, _)));
1168	}
1169
1170	#[test]
1171	fn parses_parens_override_precedence() {
1172		// `A OR B AND C` would normally bind as `A OR (B AND C)`.
1173		// `(A OR B) AND C` must produce an And at the root.
1174		let e = parse("(name = 'X' OR name = 'Y') AND lines <= 10", TS, KINDS).unwrap();
1175		assert!(matches!(e.root, Node::And(_)));
1176	}
1177
1178	#[test]
1179	fn precedence_implies_is_lowest() {
1180		// `A OR B => C AND D` ≡ `(A OR B) => (C AND D)`
1181		let e = parse(
1182			"name = 'X' OR name = 'Y' => lines <= 10 AND kind = 'class'",
1183			TS,
1184			KINDS,
1185		)
1186		.unwrap();
1187		match e.root {
1188			Node::Implies(lhs, rhs) => {
1189				assert!(matches!(*lhs, Node::Or(_)));
1190				assert!(matches!(*rhs, Node::And(_)));
1191			}
1192			other => panic!("expected Implies at root, got {other:?}"),
1193		}
1194	}
1195
1196	#[test]
1197	fn precedence_not_binds_tighter_than_and() {
1198		// `NOT A AND B` ≡ `(NOT A) AND B`
1199		let e = parse("NOT name = 'X' AND lines <= 10", TS, KINDS).unwrap();
1200		match e.root {
1201			Node::And(children) => {
1202				assert!(matches!(children[0], Node::Not(_)));
1203				assert!(matches!(children[1], Node::Atom(_)));
1204			}
1205			other => panic!("expected And, got {other:?}"),
1206		}
1207	}
1208
1209	#[test]
1210	fn rejects_unmatched_paren() {
1211		assert!(parse("(name = 'X'", TS, KINDS).is_err());
1212		assert!(parse("name = 'X')", TS, KINDS).is_err());
1213	}
1214
1215	// ─── path patterns ──────────────────────────────────────────────────
1216
1217	#[test]
1218	fn parses_path_match() {
1219		let e = parse("moniker ~ '**/class:Foo/**'", TS, KINDS).unwrap();
1220		let a = solo(&e);
1221		assert!(matches!(a.op, Op::PathMatch));
1222		assert!(matches!(a.rhs, Rhs::PathPattern(_)));
1223	}
1224
1225	#[test]
1226	fn parses_path_match_with_regex_step() {
1227		let e = parse("moniker ~ '**/class:/Port$/'", TS, KINDS).unwrap();
1228		let a = solo(&e);
1229		assert!(matches!(a.op, Op::PathMatch));
1230	}
1231
1232	#[test]
1233	fn has_segment_desugars_to_path_match() {
1234		let e = parse("has_segment('module', 'domain')", TS, KINDS).unwrap();
1235		let a = solo(&e);
1236		assert!(matches!(a.op, Op::PathMatch));
1237		match &a.rhs {
1238			Rhs::PathPattern(p) => assert_eq!(p.raw, "**/module:domain/**"),
1239			other => panic!("expected PathPattern, got {other:?}"),
1240		}
1241	}
1242
1243	#[test]
1244	fn rejects_path_match_on_non_moniker_lhs() {
1245		assert!(parse("name ~ 'foo'", TS, KINDS).is_err());
1246	}
1247
1248	#[test]
1249	fn rejects_invalid_path_pattern() {
1250		assert!(parse("moniker ~ ''", TS, KINDS).is_err());
1251		assert!(parse("moniker ~ 'no-colon-step'", TS, KINDS).is_err());
1252	}
1253}