Skip to main content

code_moniker_cli/check/
path.rs

1//! Path patterns matched against the segments of a moniker.
2//!
3//! `'module:domain'`, `'**/class:*'`, `'class:/^[A-Z].*Port$/'`, `'**/module:a/**'`.
4//! `**` matches zero or more segments ; the other step forms match exactly one.
5
6use regex::Regex;
7
8use code_moniker_core::core::moniker::Moniker;
9
10#[derive(Debug, Clone)]
11pub enum Step {
12	Literal { kind: Vec<u8>, name: Vec<u8> },
13	KindWildcard(Vec<u8>),
14	NameWildcard(Vec<u8>),
15	AnySegment,
16	Regex { kind: Vec<u8>, re: Regex },
17	DoubleStar,
18}
19
20#[derive(Debug, Clone)]
21pub struct Pattern {
22	pub steps: Vec<Step>,
23	pub raw: String,
24}
25
26#[derive(Debug, Clone, thiserror::Error)]
27pub enum PatternError {
28	#[error("path pattern `{pattern}`: {msg}")]
29	Bad { pattern: String, msg: String },
30}
31
32pub fn parse(input: &str) -> Result<Pattern, PatternError> {
33	let raw = input.to_string();
34	if input.is_empty() {
35		return Err(PatternError::Bad {
36			pattern: raw,
37			msg: "empty path pattern".to_string(),
38		});
39	}
40	let mut steps = Vec::new();
41	for raw_step in split_outer(input) {
42		steps.push(parse_step(&raw_step, &raw)?);
43	}
44	Ok(Pattern { steps, raw })
45}
46
47/// Split on `/` but skip slashes inside a `/.../` regex run. A regex starts
48/// right after `:`, ends at the next unescaped `/`. The closing `/` is part
49/// of the step (so `class:/Entity$/` is one step), and the next character
50/// starts a new step without needing another separator.
51fn split_outer(s: &str) -> Vec<String> {
52	let mut out = Vec::new();
53	let mut buf = String::new();
54	let mut in_regex = false;
55	let mut prev_was_colon = false;
56	let mut prev_was_backslash = false;
57	let mut just_closed_regex = false;
58	for c in s.chars() {
59		if just_closed_regex {
60			out.push(std::mem::take(&mut buf));
61			just_closed_regex = false;
62		}
63		if !in_regex && c == '/' && !prev_was_colon {
64			out.push(std::mem::take(&mut buf));
65			prev_was_colon = false;
66			prev_was_backslash = false;
67			continue;
68		}
69		buf.push(c);
70		if in_regex {
71			if c == '/' && !prev_was_backslash {
72				in_regex = false;
73				just_closed_regex = true;
74			}
75		} else if c == '/' && prev_was_colon {
76			in_regex = true;
77		}
78		prev_was_colon = c == ':';
79		prev_was_backslash = c == '\\' && !prev_was_backslash;
80	}
81	out.push(buf);
82	out
83}
84
85fn parse_step(s: &str, full: &str) -> Result<Step, PatternError> {
86	if s == "**" {
87		return Ok(Step::DoubleStar);
88	}
89	if s == "*" {
90		return Ok(Step::AnySegment);
91	}
92	let Some(colon) = s.find(':') else {
93		return Err(PatternError::Bad {
94			pattern: full.to_string(),
95			msg: format!("step `{s}` is missing the `kind:name` separator"),
96		});
97	};
98	let kind = &s[..colon];
99	let name = &s[colon + 1..];
100	if kind.is_empty() {
101		return Err(PatternError::Bad {
102			pattern: full.to_string(),
103			msg: format!("step `{s}` has empty kind"),
104		});
105	}
106	if name.is_empty() {
107		return Err(PatternError::Bad {
108			pattern: full.to_string(),
109			msg: format!("step `{s}` has empty name"),
110		});
111	}
112	if kind == "*" {
113		return Ok(Step::NameWildcard(name.as_bytes().to_vec()));
114	}
115	if name == "*" {
116		return Ok(Step::KindWildcard(kind.as_bytes().to_vec()));
117	}
118	if let Some(stripped) = name.strip_prefix('/').and_then(|r| r.strip_suffix('/')) {
119		let re = Regex::new(stripped).map_err(|e| PatternError::Bad {
120			pattern: full.to_string(),
121			msg: format!("invalid regex `{stripped}`: {e}"),
122		})?;
123		return Ok(Step::Regex {
124			kind: kind.as_bytes().to_vec(),
125			re,
126		});
127	}
128	Ok(Step::Literal {
129		kind: kind.as_bytes().to_vec(),
130		name: name.as_bytes().to_vec(),
131	})
132}
133
134/// Matches `pattern` against the segments of `m`. `**` is greedy non-deterministic
135/// — recursive backtracking, O(2^n) worst case but moniker depth ≤ ~10 in practice.
136pub fn matches(pattern: &Pattern, m: &Moniker) -> bool {
137	let view = m.as_view();
138	let segs: Vec<(&[u8], &[u8])> = view.segments().map(|s| (s.kind, s.name)).collect();
139	match_steps(&pattern.steps, &segs)
140}
141
142fn match_steps(steps: &[Step], segs: &[(&[u8], &[u8])]) -> bool {
143	match steps.split_first() {
144		None => segs.is_empty(),
145		Some((Step::DoubleStar, rest)) => (0..=segs.len()).any(|k| match_steps(rest, &segs[k..])),
146		Some((step, rest)) => match segs.split_first() {
147			None => false,
148			Some((seg, segs_rest)) => match_step(step, seg) && match_steps(rest, segs_rest),
149		},
150	}
151}
152
153fn match_step(step: &Step, seg: &(&[u8], &[u8])) -> bool {
154	let (k, n) = *seg;
155	match step {
156		Step::Literal { kind, name } => k == kind.as_slice() && n == name.as_slice(),
157		Step::KindWildcard(kind) => k == kind.as_slice(),
158		Step::NameWildcard(name) => n == name.as_slice(),
159		Step::AnySegment => true,
160		Step::Regex { kind, re } => {
161			k == kind.as_slice() && {
162				match std::str::from_utf8(n) {
163					Ok(s) => re.is_match(s),
164					Err(_) => false,
165				}
166			}
167		}
168		Step::DoubleStar => unreachable!("DoubleStar handled in match_steps"),
169	}
170}
171
172#[cfg(test)]
173mod tests {
174	use super::*;
175	use code_moniker_core::core::moniker::MonikerBuilder;
176
177	fn build(steps: &[(&[u8], &[u8])]) -> Moniker {
178		let mut b = MonikerBuilder::new();
179		b.project(b".");
180		for (k, n) in steps {
181			b.segment(k, n);
182		}
183		b.build()
184	}
185
186	fn assert_match(pat: &str, m: &Moniker) {
187		let p = parse(pat).expect("pattern parses");
188		assert!(matches(&p, m), "pattern `{pat}` should match {m:?}");
189	}
190
191	fn assert_no_match(pat: &str, m: &Moniker) {
192		let p = parse(pat).expect("pattern parses");
193		assert!(!matches(&p, m), "pattern `{pat}` should NOT match {m:?}");
194	}
195
196	#[test]
197	fn literal_anchored_matches_exact() {
198		let m = build(&[(b"lang", b"ts"), (b"module", b"domain")]);
199		assert_match("lang:ts/module:domain", &m);
200	}
201
202	#[test]
203	fn literal_anchored_does_not_match_with_extra_tail() {
204		let m = build(&[(b"lang", b"ts"), (b"module", b"domain"), (b"class", b"Foo")]);
205		// Without `/**`, pattern must match the WHOLE path.
206		assert_no_match("lang:ts/module:domain", &m);
207	}
208
209	#[test]
210	fn double_star_matches_any_depth() {
211		let m = build(&[(b"lang", b"ts"), (b"module", b"a"), (b"class", b"Foo")]);
212		assert_match("**/class:Foo", &m);
213		assert_match("**/class:Foo/**", &m);
214		assert_match("lang:ts/**/class:Foo", &m);
215	}
216
217	#[test]
218	fn double_star_matches_zero_segments() {
219		let m = build(&[(b"class", b"Foo")]);
220		assert_match("**/class:Foo", &m); // `**` matches 0 segments
221	}
222
223	#[test]
224	fn kind_wildcard_matches_any_name() {
225		let m = build(&[(b"lang", b"ts"), (b"class", b"Anything")]);
226		assert_match("lang:ts/class:*", &m);
227	}
228
229	#[test]
230	fn name_wildcard_matches_any_kind() {
231		let m1 = build(&[(b"lang", b"ts"), (b"class", b"Foo")]);
232		let m2 = build(&[(b"lang", b"ts"), (b"interface", b"Foo")]);
233		assert_match("lang:ts/*:Foo", &m1);
234		assert_match("lang:ts/*:Foo", &m2);
235	}
236
237	#[test]
238	fn any_segment_matches_one() {
239		let m = build(&[(b"lang", b"ts"), (b"module", b"x"), (b"class", b"Y")]);
240		assert_match("lang:ts/*/class:Y", &m);
241	}
242
243	#[test]
244	fn regex_step_matches_name() {
245		let m = build(&[(b"class", b"UserPort")]);
246		assert_match("class:/Port$/", &m);
247		assert_no_match("class:/Adapter$/", &m);
248	}
249
250	#[test]
251	fn ddd_aliases_against_real_moniker_shape() {
252		let m = build(&[
253			(b"lang", b"ts"),
254			(b"module", b"domain"),
255			(b"class", b"OrderEntity"),
256			(b"method", b"validate"),
257		]);
258		assert_match("**/module:domain/**", &m);
259		// `**/class:/Entity$/**` — has a class:…Entity segment somewhere.
260		// Without the trailing `**`, the pattern requires the regex step to
261		// be the LAST segment.
262		assert_match("**/class:/Entity$/**", &m);
263		assert_match("**/class:/Entity$/method:*", &m);
264		assert_no_match("**/module:infrastructure/**", &m);
265	}
266
267	#[test]
268	fn rejects_empty_pattern() {
269		assert!(parse("").is_err());
270	}
271
272	#[test]
273	fn rejects_step_without_colon() {
274		assert!(parse("foo/bar").is_err());
275	}
276
277	#[test]
278	fn rejects_bad_regex() {
279		assert!(parse("class:/[unclosed/").is_err());
280	}
281}