Skip to main content

aristo_cli/
filter.rs

1//! J2 unified filter grammar shared across `aristo list` / `verify` /
2//! `graph` / `critique`.
3//!
4//! Form: `<key>=<value>`. Allowed keys: `id`, `file`, `parent`, `status`.
5//! Multiple `--filter` flags AND together at the call site (not modeled
6//! here — this type represents a single filter clause).
7//!
8//! The `id`, `parent`, and `status` values may be comma-separated
9//! (`id=a,b,c`): the members form a value-level OR — the clause matches
10//! an annotation whose id (resp. parent / status) equals ANY listed
11//! member. `file` takes a single path (its optional `:<LO>-<HI>` range
12//! suffix would make a comma ambiguous, and a path may legitimately
13//! contain a comma).
14
15use std::str::FromStr;
16
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub enum Filter {
19    /// Match if the annotation id equals any member of the set.
20    /// `id=a,b` → `["a", "b"]`; `id=a` → `["a"]`.
21    Id(Vec<String>),
22    /// Match by file path, optionally restricted to a closed line
23    /// range. Syntax: `file=<path>` or `file=<path>:<LO>-<HI>`.
24    File {
25        path: String,
26        line_range: Option<(u32, u32)>,
27    },
28    /// Match if any of the annotation's parents equals any member.
29    Parent(Vec<String>),
30    /// Match if the annotation's status label equals any member.
31    Status(Vec<String>),
32}
33
34#[derive(Debug, PartialEq, Eq)]
35pub enum FilterParseError {
36    /// No `=` separator in the filter expression.
37    MissingEquals { input: String },
38    /// Unknown left-hand side (typo or unsupported key).
39    UnknownKey { key: String },
40    /// Right-hand side is empty (e.g. `id=`).
41    EmptyValue { key: String },
42    /// `file=<path>:<LO>-<HI>` parse failed (LO/HI not integers, or
43    /// LO > HI, or syntax doesn't match).
44    BadLineRange { input: String, detail: String },
45}
46
47impl std::fmt::Display for FilterParseError {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        match self {
50            FilterParseError::MissingEquals { input } => write!(
51                f,
52                "filter `{input}` is missing `=`; expected one of: \
53                 id=<id>, file=<path>[:<LO>-<HI>], parent=<id>, status=<state>"
54            ),
55            FilterParseError::UnknownKey { key } => write!(
56                f,
57                "unknown filter key `{key}`; expected one of: id, file, parent, status"
58            ),
59            FilterParseError::EmptyValue { key } => {
60                write!(f, "filter `{key}=` has no value")
61            }
62            FilterParseError::BadLineRange { input, detail } => write!(
63                f,
64                "filter `{input}` has a bad line range: {detail} \
65                 (expected `file=<path>:<LO>-<HI>` where LO and HI are positive \
66                 integers and LO ≤ HI)"
67            ),
68        }
69    }
70}
71
72impl std::error::Error for FilterParseError {}
73
74impl FromStr for Filter {
75    type Err = FilterParseError;
76
77    fn from_str(s: &str) -> Result<Self, Self::Err> {
78        let (key, value) = s
79            .split_once('=')
80            .ok_or_else(|| FilterParseError::MissingEquals {
81                input: s.to_string(),
82            })?;
83        if value.is_empty() {
84            return Err(FilterParseError::EmptyValue {
85                key: key.to_string(),
86            });
87        }
88        match key {
89            "id" => parse_value_set(key, value).map(Filter::Id),
90            "file" => parse_file_value(s, value),
91            "parent" => parse_value_set(key, value).map(Filter::Parent),
92            "status" => parse_value_set(key, value).map(Filter::Status),
93            other => Err(FilterParseError::UnknownKey {
94                key: other.to_string(),
95            }),
96        }
97    }
98}
99
100#[aristo::intent(
101    "`id`, `parent`, and `status` values split on `,` into a value-level \
102     OR set (`id=a,b` matches a OR b); members are trimmed and empties \
103     dropped, and an all-empty value (`id=,`) is `EmptyValue`. `file` is \
104     deliberately NOT comma-split — its optional `:<LO>-<HI>` range \
105     suffix and the fact that a path may contain a `,` make splitting \
106     ambiguous. A refactor that routed `file` through this helper \"for \
107     consistency\" would silently break range parsing and comma-bearing \
108     paths.",
109    verify = "test",
110    id = "filter_value_set_comma_splits_scalar_keys_not_file"
111)]
112fn parse_value_set(key: &str, value: &str) -> Result<Vec<String>, FilterParseError> {
113    let members: Vec<String> = value
114        .split(',')
115        .map(str::trim)
116        .filter(|m| !m.is_empty())
117        .map(String::from)
118        .collect();
119    if members.is_empty() {
120        return Err(FilterParseError::EmptyValue {
121            key: key.to_string(),
122        });
123    }
124    Ok(members)
125}
126
127/// Split `<path>[:<LO>-<HI>]` into a `Filter::File`. The `:` separator
128/// for the range is the LAST `:` in the string so paths containing `:`
129/// (Windows drive letters, future namespacing) are accepted as-is when
130/// no range follows.
131fn parse_file_value(full_input: &str, value: &str) -> Result<Filter, FilterParseError> {
132    // Detect range presence via a trailing `:N-M` suffix on the value.
133    if let Some((path, range_str)) = split_off_trailing_range(value) {
134        let (lo, hi) =
135            parse_line_range(range_str).map_err(|detail| FilterParseError::BadLineRange {
136                input: full_input.to_string(),
137                detail,
138            })?;
139        Ok(Filter::File {
140            path: path.to_string(),
141            line_range: Some((lo, hi)),
142        })
143    } else {
144        Ok(Filter::File {
145            path: value.to_string(),
146            line_range: None,
147        })
148    }
149}
150
151/// Returns Some((path, "LO-HI")) iff value ends with `:<digits>-<digits>`.
152/// Returns None otherwise (treat as path-only — including paths that
153/// contain `:` not followed by a digit-range).
154fn split_off_trailing_range(value: &str) -> Option<(&str, &str)> {
155    let colon = value.rfind(':')?;
156    let candidate = &value[colon + 1..];
157    // Range suffix must contain a `-` separating two digit sequences.
158    let dash = candidate.find('-')?;
159    let (lo_str, hi_str) = (&candidate[..dash], &candidate[dash + 1..]);
160    if lo_str.is_empty() || hi_str.is_empty() {
161        return None;
162    }
163    if !lo_str.bytes().all(|b| b.is_ascii_digit()) || !hi_str.bytes().all(|b| b.is_ascii_digit()) {
164        return None;
165    }
166    Some((&value[..colon], candidate))
167}
168
169fn parse_line_range(range_str: &str) -> Result<(u32, u32), String> {
170    let (lo_str, hi_str) = range_str
171        .split_once('-')
172        .ok_or_else(|| format!("range `{range_str}` is missing `-` separator"))?;
173    let lo: u32 = lo_str
174        .parse()
175        .map_err(|e| format!("LO `{lo_str}` is not a u32: {e}"))?;
176    let hi: u32 = hi_str
177        .parse()
178        .map_err(|e| format!("HI `{hi_str}` is not a u32: {e}"))?;
179    if lo == 0 || hi == 0 {
180        return Err("LO and HI must be ≥ 1 (line numbers are 1-indexed)".into());
181    }
182    if lo > hi {
183        return Err(format!("LO ({lo}) is greater than HI ({hi})"));
184    }
185    Ok((lo, hi))
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    #[test]
193    fn parses_id() {
194        assert_eq!(
195            "id=foo".parse::<Filter>().unwrap(),
196            Filter::Id(vec!["foo".into()])
197        );
198    }
199
200    #[test]
201    fn parses_id_comma_list_into_value_set() {
202        // J2 grammar: `id=a,b,c` is a comma-separated OR set, NOT a single
203        // literal id "a,b,c". Regression test for the bug where the whole
204        // value was kept verbatim and matched nothing.
205        assert_eq!(
206            "id=a,b,c".parse::<Filter>().unwrap(),
207            Filter::Id(vec!["a".into(), "b".into(), "c".into()])
208        );
209    }
210
211    #[test]
212    fn comma_members_are_trimmed_and_empties_dropped() {
213        assert_eq!(
214            "id=a, b ,,c,".parse::<Filter>().unwrap(),
215            Filter::Id(vec!["a".into(), "b".into(), "c".into()])
216        );
217    }
218
219    #[test]
220    fn all_empty_comma_value_rejected() {
221        // `id=,` and `id=, ,` have no non-empty member → EmptyValue.
222        assert!(matches!(
223            "id=,".parse::<Filter>().unwrap_err(),
224            FilterParseError::EmptyValue { .. }
225        ));
226        assert!(matches!(
227            "id=, ,".parse::<Filter>().unwrap_err(),
228            FilterParseError::EmptyValue { .. }
229        ));
230    }
231
232    #[test]
233    fn parent_and_status_also_comma_split() {
234        assert_eq!(
235            "parent=a,b".parse::<Filter>().unwrap(),
236            Filter::Parent(vec!["a".into(), "b".into()])
237        );
238        assert_eq!(
239            "status=verified,stale".parse::<Filter>().unwrap(),
240            Filter::Status(vec!["verified".into(), "stale".into()])
241        );
242    }
243
244    #[test]
245    fn file_value_is_not_comma_split() {
246        // `file` keeps commas verbatim — a path may contain one, and the
247        // optional `:<LO>-<HI>` range grammar would make a comma split
248        // ambiguous. Guards the intent
249        // filter_value_set_comma_splits_scalar_keys_not_file.
250        assert_eq!(
251            "file=a,b.rs".parse::<Filter>().unwrap(),
252            Filter::File {
253                path: "a,b.rs".into(),
254                line_range: None,
255            }
256        );
257    }
258
259    #[test]
260    fn parses_file_with_slashes() {
261        assert_eq!(
262            "file=src/lib.rs".parse::<Filter>().unwrap(),
263            Filter::File {
264                path: "src/lib.rs".into(),
265                line_range: None,
266            }
267        );
268    }
269
270    #[test]
271    fn parses_file_with_line_range() {
272        assert_eq!(
273            "file=src/lib.rs:10-50".parse::<Filter>().unwrap(),
274            Filter::File {
275                path: "src/lib.rs".into(),
276                line_range: Some((10, 50)),
277            }
278        );
279    }
280
281    #[test]
282    fn parses_file_with_single_line_range() {
283        // LO == HI is a one-line range; useful for `file=x.rs:42-42`.
284        assert_eq!(
285            "file=src/lib.rs:42-42".parse::<Filter>().unwrap(),
286            Filter::File {
287                path: "src/lib.rs".into(),
288                line_range: Some((42, 42)),
289            }
290        );
291    }
292
293    #[test]
294    fn file_with_colon_but_no_range_treated_as_path() {
295        // Path containing `:` not followed by a digit-range is taken
296        // verbatim (rare on Unix, but Windows drive letters & future
297        // namespacing shouldn't be eaten by the range parser).
298        assert_eq!(
299            "file=C:Users/foo.rs".parse::<Filter>().unwrap(),
300            Filter::File {
301                path: "C:Users/foo.rs".into(),
302                line_range: None,
303            }
304        );
305    }
306
307    #[test]
308    fn file_range_zero_rejected() {
309        let err = "file=src/x.rs:0-10".parse::<Filter>().unwrap_err();
310        assert!(matches!(err, FilterParseError::BadLineRange { .. }));
311        assert!(err.to_string().contains("≥ 1"));
312    }
313
314    #[test]
315    fn file_range_inverted_rejected() {
316        let err = "file=src/x.rs:50-10".parse::<Filter>().unwrap_err();
317        assert!(matches!(err, FilterParseError::BadLineRange { .. }));
318        assert!(err.to_string().contains("greater than"));
319    }
320
321    #[test]
322    fn parses_parent() {
323        assert_eq!(
324            "parent=root_invariants".parse::<Filter>().unwrap(),
325            Filter::Parent(vec!["root_invariants".into()])
326        );
327    }
328
329    #[test]
330    fn parses_status() {
331        assert_eq!(
332            "status=verified".parse::<Filter>().unwrap(),
333            Filter::Status(vec!["verified".into()])
334        );
335    }
336
337    #[test]
338    fn value_may_contain_equals_sign() {
339        // split_once('=') is greedy on the first `=`, so values with an `=`
340        // inside (rare for ids/paths but possible) survive.
341        assert_eq!(
342            "id=foo=bar".parse::<Filter>().unwrap(),
343            Filter::Id(vec!["foo=bar".into()])
344        );
345    }
346
347    #[test]
348    fn aristos_namespaced_id_parses() {
349        // `aristos:` prefix contains a colon, not an equals — must round-trip.
350        assert_eq!(
351            "id=aristos:my_thing".parse::<Filter>().unwrap(),
352            Filter::Id(vec!["aristos:my_thing".into()])
353        );
354    }
355
356    #[test]
357    fn missing_equals_rejected() {
358        let err = "id".parse::<Filter>().unwrap_err();
359        assert!(matches!(err, FilterParseError::MissingEquals { .. }));
360        assert!(err.to_string().contains("missing `=`"));
361    }
362
363    #[test]
364    fn unknown_key_rejected_with_helpful_message() {
365        let err = "kind=intent".parse::<Filter>().unwrap_err();
366        assert!(matches!(err, FilterParseError::UnknownKey { .. }));
367        let msg = err.to_string();
368        assert!(msg.contains("kind"));
369        assert!(msg.contains("id, file, parent, status"));
370    }
371
372    #[test]
373    fn empty_value_rejected() {
374        let err = "id=".parse::<Filter>().unwrap_err();
375        assert!(matches!(err, FilterParseError::EmptyValue { .. }));
376    }
377}