omne-cli 0.2.0

CLI for managing omne volumes: init, upgrade, and validate kernel and distro releases
Documentation
//! Reserved-token scanner for reconstructed assistant lines.
//!
//! Unit 10 of the v1 kernel plan. Consumed by the loop controller in
//! Unit 11: for every line yielded by `claude_proc::ClaudeProcess`, the
//! controller feeds the `text` into a [`Scanner`] and decides what to
//! do with the returned [`Hit`].
//!
//! Vocabulary in v1:
//!
//! - [`BLOCKED`] — always reserved. Signals the pipe run is waiting on
//!   a human or an external condition. Detected regardless of whether
//!   the active loop registers it.
//! - User-supplied `until` tokens — carried on `loop.until` in the
//!   pipe YAML. `ALL_TASKS_COMPLETE` is the canonical one for
//!   ship-or-halt loops, but any string the user registered is fair
//!   game.
//!
//! Match rule: the scanner returns `Some(Hit)` **only** when the
//! input line, after `trim`, is exactly equal to a registered token.
//! A narrative like `I am BLOCKED by a dependency` must not match; a
//! standalone `BLOCKED` line (or one padded with whitespace) must.
//!
//! Priority: when a stream contains both a `BLOCKED` and an `until`
//! line, `BLOCKED` wins. Per-line [`Scanner::feed`] is unaware of
//! prior lines, so callers that want this guarantee either use
//! [`Scanner::scan`] (stateful pass over an iterator) or track the
//! precedence themselves via [`Kind::Blocked`] vs [`Kind::Until`].
//!
//! Wrap-form tokens (`<promise>TOKEN</promise>`) are deferred past v1
//! — the plan explicitly narrows the surface to standalone lines.

#![allow(dead_code)]

/// The reserved sentinel that always takes priority over user tokens.
pub const BLOCKED: &str = "BLOCKED";

/// Classifies a [`Hit`] so callers can resolve priority without
/// re-comparing against the constant.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum Kind {
    /// Matched the reserved [`BLOCKED`] constant.
    Blocked,
    /// Matched a user-registered `until` token.
    Until,
}

/// Outcome of a successful sentinel match.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Hit {
    /// The exact token text (e.g. `"BLOCKED"`, `"ALL_TASKS_COMPLETE"`).
    /// Owned, because the line it came from is typically borrowed from
    /// a streaming iterator that will reuse its buffer.
    pub token: String,
    pub kind: Kind,
}

/// Stateless sentinel matcher.
///
/// `Scanner::new(until_tokens)` builds one; [`feed`](Scanner::feed)
/// tests a single reconstructed line; [`scan`](Scanner::scan) folds an
/// iterator of lines into a single priority-respecting outcome.
///
/// The type holds no interior mutable state, so a single `Scanner` is
/// safe to share across threads for the rare case where multiple
/// streams feed the same loop controller.
#[derive(Debug, Clone)]
pub struct Scanner {
    /// User-supplied tokens. Stored owned so borrow lifetimes don't
    /// leak through the public API. Duplicates and empty strings are
    /// filtered at construction to avoid surprising hits on blank
    /// lines (which `feed` rejects anyway, but the extra work costs
    /// nothing and documents the invariant).
    until: Vec<String>,
}

impl Scanner {
    /// Build a scanner that always watches for [`BLOCKED`] plus every
    /// non-empty, unique `until` token in `tokens`.
    ///
    /// Any token that trims to empty is dropped; passing
    /// `BLOCKED` explicitly is a no-op (it is always reserved).
    pub fn new<S: AsRef<str>>(tokens: &[S]) -> Self {
        let mut until: Vec<String> = Vec::with_capacity(tokens.len());
        for t in tokens {
            let trimmed = t.as_ref().trim();
            if trimmed.is_empty() || trimmed == BLOCKED {
                continue;
            }
            let owned = trimmed.to_string();
            if !until.contains(&owned) {
                until.push(owned);
            }
        }
        Self { until }
    }

    /// Test a single reconstructed line for an exact-token match.
    ///
    /// Returns `Some(Hit { kind: Blocked, .. })` when the trimmed line
    /// equals [`BLOCKED`]; returns `Some(Hit { kind: Until, .. })`
    /// when it equals any registered `until` token; otherwise `None`.
    /// `BLOCKED` is checked first so a caller that registered it by
    /// mistake via `tokens` still gets the correct [`Kind`].
    pub fn feed(&self, line: &str) -> Option<Hit> {
        let trimmed = line.trim();
        if trimmed.is_empty() {
            return None;
        }
        if trimmed == BLOCKED {
            return Some(Hit {
                token: BLOCKED.to_string(),
                kind: Kind::Blocked,
            });
        }
        for u in &self.until {
            if trimmed == u.as_str() {
                return Some(Hit {
                    token: u.clone(),
                    kind: Kind::Until,
                });
            }
        }
        None
    }

    /// Fold an iterator of lines into a priority-respecting outcome.
    ///
    /// Returns the first [`Hit`] observed unless a later line produces
    /// a [`Kind::Blocked`] hit, in which case `BLOCKED` wins and the
    /// function short-circuits — there is nothing a later line could
    /// say that would outrank `BLOCKED`. This matches the plan's
    /// "BLOCKED always takes priority" contract and lets the loop
    /// controller drive a single pass without manual priority logic.
    pub fn scan<L, I>(&self, lines: I) -> Option<Hit>
    where
        L: AsRef<str>,
        I: IntoIterator<Item = L>,
    {
        let mut current: Option<Hit> = None;
        for line in lines {
            let Some(hit) = self.feed(line.as_ref()) else {
                continue;
            };
            match hit.kind {
                Kind::Blocked => return Some(hit),
                Kind::Until if current.is_none() => current = Some(hit),
                Kind::Until => { /* keep first Until hit */ }
            }
        }
        current
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    // ---- basic feed semantics -----------------------------------------

    #[test]
    fn feed_hits_on_standalone_reserved_token() {
        let s = Scanner::new::<&str>(&[]);
        let hit = s.feed("BLOCKED").expect("standalone BLOCKED line hits");
        assert_eq!(hit.token, "BLOCKED");
        assert_eq!(hit.kind, Kind::Blocked);
    }

    #[test]
    fn feed_hits_on_whitespace_padded_reserved_token() {
        let s = Scanner::new::<&str>(&[]);
        let hit = s.feed("   BLOCKED   ").expect("trim() before compare");
        assert_eq!(hit.kind, Kind::Blocked);
    }

    #[test]
    fn feed_misses_on_substring_containing_token() {
        let s = Scanner::new::<&str>(&[]);
        assert!(
            s.feed("I am BLOCKED by a dependency").is_none(),
            "narrative mention must not match"
        );
        assert!(
            s.feed("BLOCKED: yesterday").is_none(),
            "prefix-only no match"
        );
        assert!(s.feed("un-BLOCKED").is_none(), "hyphen-prefixed no match");
    }

    #[test]
    fn feed_misses_on_empty_or_whitespace_line() {
        let s = Scanner::new::<&str>(&[]);
        assert!(s.feed("").is_none());
        assert!(s.feed("   ").is_none());
        assert!(s.feed("\t\t").is_none());
    }

    #[test]
    fn feed_is_case_sensitive() {
        let s = Scanner::new::<&str>(&[]);
        assert!(s.feed("blocked").is_none(), "lowercase must not match");
        assert!(s.feed("Blocked").is_none(), "mixed case must not match");
    }

    // ---- user `until` tokens ------------------------------------------

    #[test]
    fn user_until_token_hits_with_kind_until() {
        let s = Scanner::new(&["ALL_TASKS_COMPLETE"]);
        let hit = s.feed("ALL_TASKS_COMPLETE").expect("registered until hits");
        assert_eq!(hit.token, "ALL_TASKS_COMPLETE");
        assert_eq!(hit.kind, Kind::Until);
    }

    #[test]
    fn multiple_until_tokens_register() {
        let s = Scanner::new(&["READY", "APPROVED_LATER"]);
        assert_eq!(s.feed("READY").unwrap().kind, Kind::Until);
        assert_eq!(s.feed("APPROVED_LATER").unwrap().kind, Kind::Until);
        assert!(s.feed("UNREGISTERED").is_none());
    }

    #[test]
    fn explicit_blocked_in_tokens_is_still_classified_as_blocked() {
        // A caller that naively passes BLOCKED in the until slice
        // still gets Kind::Blocked back — the reserved check runs
        // first in `feed`.
        let s = Scanner::new(&["BLOCKED"]);
        assert_eq!(s.feed("BLOCKED").unwrap().kind, Kind::Blocked);
    }

    #[test]
    fn constructor_drops_empty_and_duplicate_until_tokens() {
        let s = Scanner::new(&["READY", "", "READY", "  ", "DONE"]);
        // No direct getter — assert via hits instead.
        assert!(s.feed("READY").is_some());
        assert!(s.feed("DONE").is_some());
        // Empty string must not hit (feed rejects empty lines anyway,
        // but the constructor-level filter documents the invariant).
        assert!(s.feed("").is_none());
    }

    // ---- cross-line priority via scan() -------------------------------

    #[test]
    fn scan_returns_blocked_when_both_tokens_appear() {
        let s = Scanner::new(&["ALL_TASKS_COMPLETE"]);
        let lines = ["noise", "ALL_TASKS_COMPLETE", "more noise", "BLOCKED"];
        let hit = s.scan(lines).expect("some hit");
        assert_eq!(hit.kind, Kind::Blocked, "BLOCKED beats Until");
    }

    #[test]
    fn scan_returns_blocked_short_circuit_even_if_until_follows() {
        // BLOCKED appears first; scan should return it without
        // consulting later lines. Use a panicking iterator to prove
        // short-circuit.
        struct OnceThenPanic {
            yielded: bool,
        }
        impl Iterator for OnceThenPanic {
            type Item = &'static str;
            fn next(&mut self) -> Option<&'static str> {
                if !self.yielded {
                    self.yielded = true;
                    Some("BLOCKED")
                } else {
                    panic!("scan did not short-circuit on BLOCKED");
                }
            }
        }
        let s = Scanner::new(&["ALL_TASKS_COMPLETE"]);
        let hit = s
            .scan(OnceThenPanic { yielded: false })
            .expect("BLOCKED returned");
        assert_eq!(hit.kind, Kind::Blocked);
    }

    #[test]
    fn scan_returns_first_until_when_no_blocked_appears() {
        let s = Scanner::new(&["READY", "ALL_TASKS_COMPLETE"]);
        let hit = s
            .scan(["noise", "READY", "ALL_TASKS_COMPLETE", "tail"])
            .expect("some hit");
        assert_eq!(hit.kind, Kind::Until);
        assert_eq!(hit.token, "READY", "first Until wins");
    }

    #[test]
    fn scan_returns_none_on_no_hits() {
        let s = Scanner::new(&["READY"]);
        assert!(s.scan(["foo", "bar", "baz"]).is_none());
    }

    // ---- fuzz: zero false positives around token boundaries -----------

    #[test]
    fn fuzz_lines_around_tokens_never_false_positive() {
        // A scanner with two registered tokens must never match a
        // line that is not exactly one of {BLOCKED, READY,
        // ALL_TASKS_COMPLETE}. The fuzz asserts this across 1000
        // perturbations of each token (prefixes, suffixes, internal
        // punctuation) as required by plan Unit 10 verification.
        let s = Scanner::new(&["READY", "ALL_TASKS_COMPLETE"]);
        let perturbations = [
            "{}.",
            "{} now",
            "now {}",
            "{}?",
            "({})",
            " prefix {} suffix ",
            "{}!",
            "maybe-{}",
            "{}_x",
            "x_{}",
        ];
        for seed in 0..100 {
            for token in ["BLOCKED", "READY", "ALL_TASKS_COMPLETE"] {
                for pat in perturbations {
                    let line = pat.replace("{}", token);
                    assert!(
                        s.feed(&line).is_none(),
                        "false positive at seed={seed} token={token} line={line:?}"
                    );
                }
            }
        }
        // Sanity-check the exact forms still hit, so the fuzz's
        // null result is meaningful and not caused by a broken scanner.
        assert!(s.feed("BLOCKED").is_some());
        assert!(s.feed("READY").is_some());
        assert!(s.feed("ALL_TASKS_COMPLETE").is_some());
    }
}