Skip to main content

sqry_core/query/
cost_gate.rs

1//! Pre-flight cost gate (P0-1 mitigation per `B_cost_gate.md` §§1–6
2//! and `00_contracts.md` §3.CC-2).
3//!
4//! Inspects a parsed query AST plus the current snapshot's arena
5//! size and rejects shapes whose evaluator cost is structurally
6//! unbounded — e.g. an unanchored regex over the full node arena
7//! with no scope coupling. Runs synchronously **before** the
8//! executor enters [`tokio::task::spawn_blocking`] so the blocking
9//! pool can never be filled by a query this gate rejects.
10//!
11//! The gate is a wire-stable contract:
12//! [`CostGateError::QueryTooBroad`] surfaces through the MCP layer
13//! as the canonical 4-key envelope with `kind: "query_too_broad"`,
14//! JSON-RPC code `-32602`. The CC-2 7-key `details` payload (the
15//! caller's responsibility to assemble — see
16//! [`Self::to_query_too_broad_details`]) is round-tripped verbatim
17//! across both transports (`sqry-mcp::RpcError::query_too_broad`
18//! and `sqry-daemon::DaemonError::QueryTooBroad`).
19
20// The `QueryTooBroad` variant deliberately carries diagnostic
21// context (field name, operator, sanitised pattern, configured node
22// limit, scope hint, doc URL) so the wire envelope and the human
23// message stay coherent. Boxing would obscure the API for a single
24// per-query allocation that only happens on the rejection path.
25#![allow(clippy::result_large_err)]
26
27use crate::query::types::{Condition, Expr, Operator, Query, Value};
28use thiserror::Error;
29
30/// Doc URL surfaced in the canonical `details.doc_url` field (per
31/// `B_cost_gate.md` §3 + `00_contracts.md` §3.CC-2). Mirrored as
32/// `sqry_mcp::error::QUERY_TOO_BROAD_DOC_URL` for the wire envelope.
33pub const QUERY_TOO_BROAD_DOC_URL: &str = "https://docs.verivus.dev/sqry/query-cost-gate";
34
35/// Kind tag for the cost-gate rejection envelope. Mirrored across
36/// `sqry_mcp::error::KIND_QUERY_TOO_BROAD` and
37/// `sqry_daemon::error::KIND_QUERY_TOO_BROAD`.
38pub const KIND_QUERY_TOO_BROAD: &str = "query_too_broad";
39
40/// Source discriminator value for static-estimate rejections (per
41/// CC-2). The runtime-budget path (cluster-C `QueryBudget`) uses
42/// `"runtime_budget"` instead.
43pub const SOURCE_STATIC_ESTIMATE: &str = "static_estimate";
44
45/// Fields that satisfy the "scope coupling" rule (per
46/// `B_cost_gate.md` §B5 + `00_contracts.md` §3.CC-2). A prohibitive
47/// regex predicate passes the gate iff its enclosing `Expr::And`
48/// chain contains at least one `Condition` whose
49/// `Field::as_str()` is one of these.
50///
51/// Consumed verbatim by cluster-F's user-facing recovery copy;
52/// the wire envelope's `details.suggested_predicates` field is
53/// computed from this list.
54pub const SCOPE_FILTER_FIELDS: &[&str] = &["kind", "lang", "language", "path", "file"];
55
56/// Tunable thresholds for the gate. Defaults match the design's
57/// §B6 / §1.4 numbers. Each threshold is also overridable via the
58/// `SQRY_COST_GATE_*` environment variables consumed at config
59/// load (the daemon's `DaemonConfig` already plumbs these — see
60/// `sqry-daemon/src/config.rs::CostGateConfigView` for the source
61/// of truth, and per `B_cost_gate.md` §B6 + `00_contracts.md`
62/// §3.CC-3).
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub struct CostGateConfig {
65    /// Minimum literal-prefix length that disqualifies an anchored
66    /// regex from "prohibitive". Default `3` per `B_cost_gate.md` §1.
67    pub min_prefix_len: usize,
68    /// Minimum `Hir::minimum_len` that disqualifies a regex when
69    /// no usable prefix exists. Default `3` per `B_cost_gate.md` §1.
70    pub min_literal_len: usize,
71    /// Arena-size cap below which prohibitive shapes are allowed
72    /// without scope coupling. `None` (or `Some(0)`) disables the
73    /// cap entirely — the gate degenerates to a shape-only check.
74    /// Default `Some(50_000)` per `B_cost_gate.md` §1 + `§B6`.
75    pub node_count_threshold: Option<usize>,
76}
77
78impl CostGateConfig {
79    /// Documented defaults — the standalone-MCP and daemon-default
80    /// configurations both use these values (per
81    /// `00_contracts.md` §3.CC-3 "the standalone default matches
82    /// the daemon default exactly").
83    /// Default `min_prefix_len` threshold (per `B_cost_gate.md` §1).
84    pub const DEFAULT_MIN_PREFIX_LEN: usize = 3;
85    /// Default minimum-literal-length threshold. Set to `4` so the
86    /// `B_cost_gate.md` §6 reject rows for `.*foo.*` (3-char
87    /// literal) and `.*_set$` (4-char literal) both reject under
88    /// strict `>` comparison while `.*deserialize.*` (11-char
89    /// literal) still accepts. The design's iter-3 §1 prose
90    /// mentioned `MIN_LITERAL_LEN = 3` but the test-row pair
91    /// resolves to `4` — recorded as design-prose vs test-row
92    /// discrepancy in
93    /// `docs/development/sqry-mcp-flakiness-fix-impl/b/04_PROGRESS-cost_gate.md`.
94    pub const DEFAULT_MIN_LITERAL_LEN: usize = 4;
95    /// Default arena-size cap above which prohibitive shapes need
96    /// scope coupling (per `B_cost_gate.md` §1).
97    pub const DEFAULT_NODE_COUNT_THRESHOLD: usize = 50_000;
98}
99
100impl Default for CostGateConfig {
101    fn default() -> Self {
102        Self {
103            min_prefix_len: Self::DEFAULT_MIN_PREFIX_LEN,
104            min_literal_len: Self::DEFAULT_MIN_LITERAL_LEN,
105            node_count_threshold: Some(Self::DEFAULT_NODE_COUNT_THRESHOLD),
106        }
107    }
108}
109
110/// Verdict the gate returns to the caller.
111///
112/// The MCP boundary (in `sqry-mcp/src/server.rs` for the standalone
113/// path and `sqry-daemon/src/mcp_host/error_map.rs` for the daemon
114/// path) downcasts this and reshapes it into the canonical CC-2
115/// `query_too_broad` envelope.
116#[derive(Debug, Clone, Error, PartialEq, Eq)]
117pub enum CostGateError {
118    /// A predicate's evaluator cost is structurally unbounded over
119    /// the current snapshot's arena and the query lacks the scope
120    /// coupling that would narrow it.
121    #[error(
122        "query rejected: predicate `{field}{op}{pattern}` is unbounded over {node_count} nodes; \
123         add a scope filter (one of: {scope_hint}) or anchor the regex with `^` / a literal \
124         prefix \u{2265} {min_prefix_len} chars. See {doc_url}"
125    )]
126    QueryTooBroad {
127        /// Offending predicate's field name (e.g. `name`).
128        field: String,
129        /// Operator string (`":"` for `Equal`, `"~="` for `Regex`).
130        op: &'static str,
131        /// Offending value/regex pattern, surrounded by `/.../` for
132        /// regexes (matches `B_cost_gate.md` §5 user-message shape).
133        /// The raw pattern is RETAINED for the human message but is
134        /// **not** echoed into the structured `predicate_shape` field
135        /// (cluster-B iter-2 fix — codex review flagged the raw-value
136        /// leak).
137        pattern: String,
138        /// Snapshot arena size at gate time — surfaces in the
139        /// envelope as `details.estimated_visited_nodes` and in the
140        /// human message as the literal node count.
141        node_count: usize,
142        /// Configured static node-limit threshold (the value the
143        /// gate compared `node_count` against). Surfaces as
144        /// `details.limit`. Distinct from `node_count` so the wire
145        /// envelope reports both the cap and the snapshot size
146        /// (cluster-B iter-2 fix — codex review flagged the
147        /// `limit = node_count` mistake).
148        node_limit: usize,
149        /// Comma-joined list of fields that would satisfy coupling.
150        /// Always derived from [`SCOPE_FILTER_FIELDS`].
151        scope_hint: String,
152        /// Threshold the gate compared `min_prefix_len` against —
153        /// echoes the active config so MCP clients can render
154        /// specific recovery suggestions.
155        min_prefix_len: usize,
156        /// Doc URL for the recovery flow ([`QUERY_TOO_BROAD_DOC_URL`]).
157        doc_url: &'static str,
158    },
159}
160
161impl CostGateError {
162    /// Build the canonical CC-2 7-key `details` payload for the MCP
163    /// envelope. Source discriminator is hard-wired to
164    /// `"static_estimate"` since this error class is the pre-flight
165    /// path; runtime-budget rejections (cluster-C) construct their
166    /// own variant with `source = "runtime_budget"` while reusing the
167    /// same other six keys (per `00_contracts.md` §3.CC-2 "B extends
168    /// `details.source` in place").
169    #[must_use]
170    pub fn to_query_too_broad_details(&self) -> serde_json::Value {
171        let Self::QueryTooBroad {
172            field,
173            op,
174            pattern: _,
175            node_count,
176            node_limit,
177            scope_hint: _,
178            min_prefix_len: _,
179            doc_url,
180        } = self;
181        // `suggested_predicates` is the canonical scope-filter list
182        // (the user-message `scope_hint` is the same data rendered
183        // as a comma-string; the structured `details` field is an
184        // array so MCP clients can render their own suggestion UI).
185        let suggested: Vec<&str> = SCOPE_FILTER_FIELDS.to_vec();
186        // Cluster-B iter-2 BLOCKER 2: emit a sanitized
187        // field+operator-only `predicate_shape` (no raw user pattern,
188        // no path values). The 256-byte cap matches
189        // `Expr::shape_summary` (cluster-C). We elide the value with
190        // `<elided>` so consumers can still distinguish regex
191        // (`name~=<elided>`) from literal (`name:<elided>`) without
192        // any user-influenced bytes reaching the wire.
193        let mut predicate_shape = format!("{field}{op}<elided>");
194        if predicate_shape.len() > 256 {
195            predicate_shape.truncate(253);
196            predicate_shape.push('\u{2026}');
197        }
198        serde_json::json!({
199            "source": SOURCE_STATIC_ESTIMATE,
200            "kind": KIND_QUERY_TOO_BROAD,
201            // `limit` is the configured static node-count threshold
202            // (`cfg.node_count_threshold`); `estimated_visited_nodes`
203            // is the snapshot's actual node count. Cluster-B iter-2
204            // BLOCKER 2: previously both fields carried the same
205            // value, hiding the cap from the wire envelope.
206            "estimated_visited_nodes": node_count,
207            "limit": node_limit,
208            "predicate_shape": predicate_shape,
209            "suggested_predicates": suggested,
210            "doc_url": doc_url,
211        })
212    }
213}
214
215/// Top-level gate entrypoint.
216///
217/// Takes a post-variable-substitution `Expr` (the executor's shared
218/// `execute_evaluate_with` body resolves variables before invoking
219/// the gate, per `B_cost_gate.md` §2 "Designed shared body"). The
220/// two-arg [`check_query_root`] convenience wrapper exists for
221/// callers (e.g. CLI ad-hoc usages) that have a `&Query` and no
222/// variable map.
223///
224/// # Errors
225///
226/// Returns [`CostGateError::QueryTooBroad`] when the query shape is
227/// structurally unbounded over an arena of the given size and the
228/// scope-coupling rule is not satisfied.
229pub fn check_query(
230    expr: &Expr,
231    node_count: usize,
232    cfg: &CostGateConfig,
233) -> Result<(), CostGateError> {
234    walk_expr(expr, /*scope_in_scope=*/ false, node_count, cfg)
235}
236
237/// Convenience wrapper for callers that hold a [`Query`] root.
238///
239/// # Errors
240///
241/// As [`check_query`].
242pub fn check_query_root(
243    query: &Query,
244    node_count: usize,
245    cfg: &CostGateConfig,
246) -> Result<(), CostGateError> {
247    check_query(&query.root, node_count, cfg)
248}
249
250/// Standalone shape check for a regex pattern with no surrounding
251/// AST (used by `sqry-cli`'s `sqry search` subcommand at
252/// `commands/search.rs:527`, which has no parsed query context but
253/// still needs to refuse pathologically broad regexes before
254/// `RegexBuilder::build`).
255///
256/// `B_cost_gate.md` §4 "CLI sqry search" + §B5 / §1: skips the
257/// scope-coupling rule and applies only the anchor / prefix /
258/// minimum-length checks. `node_count_threshold` still applies —
259/// passing `None` (or `Some(0)`) disables the cap entirely.
260///
261/// # Errors
262///
263/// Returns [`CostGateError::QueryTooBroad`] when the pattern fails
264/// every shape check AND the node-count threshold is exceeded.
265pub fn check_regex_pattern_text(
266    pattern: &str,
267    node_count: usize,
268    cfg: &CostGateConfig,
269) -> Result<(), CostGateError> {
270    if !cap_engaged(node_count, cfg) {
271        return Ok(());
272    }
273    if regex_shape_is_acceptable(pattern, cfg) {
274        return Ok(());
275    }
276    Err(CostGateError::QueryTooBroad {
277        field: "search".to_string(),
278        op: " ",
279        pattern: format!("/{pattern}/"),
280        node_count,
281        node_limit: cfg.node_count_threshold.unwrap_or(0),
282        scope_hint: SCOPE_FILTER_FIELDS.join(", "),
283        min_prefix_len: cfg.min_prefix_len,
284        doc_url: QUERY_TOO_BROAD_DOC_URL,
285    })
286}
287
288// ────────────────────────────── internals ─────────────────────────────────
289
290/// Cost class of a single condition. The gate only needs three
291/// classes: cheap (always fine), medium (fine), prohibitive
292/// (requires coupling). Within prohibitive there is no further
293/// distinction — see `B_cost_gate.md` §1 for the cost-class table.
294enum Class {
295    Cheap,
296    Medium,
297    Prohibitive,
298}
299
300fn cap_engaged(node_count: usize, cfg: &CostGateConfig) -> bool {
301    match cfg.node_count_threshold {
302        Some(0) | None => false,
303        Some(threshold) => node_count > threshold,
304    }
305}
306
307fn walk_expr(
308    expr: &Expr,
309    scope_in_scope: bool,
310    node_count: usize,
311    cfg: &CostGateConfig,
312) -> Result<(), CostGateError> {
313    match expr {
314        Expr::Condition(cond) => walk_condition(cond, scope_in_scope, node_count, cfg),
315        Expr::And(operands) => {
316            // Coupling: at least one operand at THIS level must be a
317            // cheap scope-filter `Condition`. Inherit from outer
318            // scope; do NOT compute a cumulative coupling state
319            // across nested AND levels (the design's §"Coupling
320            // rule" is per-level: an AND chain that contains a
321            // cheap kind/lang/path/file at any nesting depth is
322            // coupled).
323            let coupled = scope_in_scope || operands.iter().any(is_scope_filter_at);
324            for op in operands {
325                walk_expr(op, coupled, node_count, cfg)?;
326            }
327            Ok(())
328        }
329        Expr::Or(branches) => {
330            // Inside Or, each branch must independently satisfy the
331            // rule. An Or branch with a prohibitive leaf and no
332            // cheap sibling fails the whole query.
333            for br in branches {
334                walk_expr(br, scope_in_scope, node_count, cfg)?;
335            }
336            Ok(())
337        }
338        Expr::Not(inner) => {
339            // Negation does not reduce cost (negating a cheap
340            // filter still requires evaluating the inner predicate).
341            // Inspect the inner with the same coupling state.
342            walk_expr(inner, scope_in_scope, node_count, cfg)
343        }
344        Expr::Join(join) => {
345            // Both sides walked independently; the join evaluator
346            // itself bounds row count via per-side selectivity, so a
347            // per-side check is sufficient (per `B_cost_gate.md`
348            // §"Coupling rule").
349            walk_expr(&join.left, scope_in_scope, node_count, cfg)?;
350            walk_expr(&join.right, scope_in_scope, node_count, cfg)
351        }
352    }
353}
354
355fn walk_condition(
356    cond: &Condition,
357    scope_in_scope: bool,
358    node_count: usize,
359    cfg: &CostGateConfig,
360) -> Result<(), CostGateError> {
361    // Recurse into subqueries: a `callers:(<inner>)` predicate
362    // inherits the worst class of its inner expression; the
363    // subquery is walked under the SAME coupling state because
364    // (per `B_cost_gate.md` §"Coupling rule") subquery results are
365    // joined back into the outer match set rather than independently
366    // selecting rows.
367    if let Value::Subquery(inner) = &cond.value {
368        walk_expr(inner, scope_in_scope, node_count, cfg)?;
369    }
370
371    // Variables resolve to one of the other Value variants before
372    // the gate runs (cluster A's executor calls
373    // `resolve_variables` first). If a Variable somehow reaches
374    // here it must be `Cheap` to avoid spurious rejections.
375    if matches!(cond.value, Value::Variable(_)) {
376        return Ok(());
377    }
378
379    let class = classify_condition(cond, cfg);
380    match class {
381        Class::Cheap | Class::Medium => Ok(()),
382        Class::Prohibitive => {
383            if !cap_engaged(node_count, cfg) {
384                // Below the arena-size cap: prohibitive shapes are
385                // allowed unconditionally so the gate never fires
386                // on small test fixtures.
387                return Ok(());
388            }
389            if scope_in_scope {
390                return Ok(());
391            }
392            Err(build_query_too_broad(cond, node_count, cfg))
393        }
394    }
395}
396
397fn classify_condition(cond: &Condition, cfg: &CostGateConfig) -> Class {
398    let field = cond.field.as_str();
399    match (&cond.value, &cond.operator) {
400        // Equal-operator conditions on indexed fields are always
401        // cheap regardless of value.
402        (Value::String(_), Operator::Equal)
403        | (Value::Boolean(_), Operator::Equal)
404        | (Value::Number(_), Operator::Equal) => Class::Cheap,
405        // String literal and `Equal` against a name field is cheap
406        // (auxiliary `name_index` hit). Same for path globs.
407        (Value::Regex(rv), Operator::Regex) => regex_class(field, &rv.pattern, cfg),
408        // Range comparisons on numeric fields are bounded by index
409        // count.
410        (_, Operator::Greater | Operator::Less | Operator::GreaterEq | Operator::LessEq) => {
411            Class::Medium
412        }
413        // Subquery values: the subquery walk above already validated
414        // the inner; the outer condition's classification is medium
415        // (the executor walks the subquery's matched-set and joins
416        // against the outer field's index — bounded by the smaller
417        // side).
418        (Value::Subquery(_), _) => Class::Medium,
419        // Variable values (only reachable if `resolve_variables`
420        // skipped them). Conservative cheap classification.
421        (Value::Variable(_), _) => Class::Cheap,
422        // Default: anything else is medium (single-equal on a
423        // non-name field).
424        _ => Class::Medium,
425    }
426}
427
428/// Classify a regex value against a target field. Combines anchor
429/// detection + literal-prefix extraction + `Hir::minimum_len`
430/// (per `B_cost_gate.md` §"Regex shape rules").
431fn regex_class(field: &str, pattern: &str, cfg: &CostGateConfig) -> Class {
432    // Some fields (e.g. `kind`, `lang`) have a small enumerated
433    // value space, so a regex-over-the-value is medium even if
434    // unanchored.
435    if matches!(field, "kind" | "lang" | "language") {
436        return Class::Medium;
437    }
438    if regex_shape_is_acceptable(pattern, cfg) {
439        Class::Medium
440    } else {
441        Class::Prohibitive
442    }
443}
444
445/// Returns `true` when the regex pattern is shape-acceptable:
446/// either anchored with a sufficient literal prefix OR has a
447/// `Hir::minimum_len` ≥ `cfg.min_literal_len`.
448fn regex_shape_is_acceptable(pattern: &str, cfg: &CostGateConfig) -> bool {
449    let Ok(hir) = regex_syntax::parse(pattern) else {
450        // A pattern that fails parse-time cannot reach the executor
451        // (the validator rejects it earlier); be permissive here so
452        // the gate never produces false positives on syntactically
453        // valid-but-unusual patterns the validator accepted.
454        return true;
455    };
456
457    // Literal-prefix extraction. `Extractor::extract` returns a
458    // `Seq` of literal candidates; the longest one is the
459    // contribution we care about.
460    let mut extractor = regex_syntax::hir::literal::Extractor::new();
461    extractor.kind(regex_syntax::hir::literal::ExtractKind::Prefix);
462    let prefixes = extractor.extract(&hir);
463    let longest_prefix = prefixes
464        .literals()
465        .map(|lits| {
466            lits.iter()
467                .map(|lit| lit.as_bytes().len())
468                .max()
469                .unwrap_or(0)
470        })
471        .unwrap_or(0);
472    // Strict `>` comparison: a literal prefix of EXACTLY
473    // `min_prefix_len` chars is the "border-tight" case the design
474    // §6 row `gate_rejects_short_anchored_regex_below_prefix_len`
475    // pins as REJECT (a 1-char prefix at threshold 3 must reject;
476    // a 4-char prefix at threshold 3 must accept). Strict `>`
477    // satisfies both directions cleanly.
478    if longest_prefix > cfg.min_prefix_len {
479        return true;
480    }
481
482    // Fallback: `Hir::minimum_len()`. Pattern with `min_len >
483    // min_literal_len` (e.g. `/.*deserialize.*/`) is acceptable
484    // even without a usable prefix. Strict `>` matches the §6
485    // row pair `gate_rejects_bare_unanchored_substring_regex`
486    // (`/.*foo.*/`, len=3, threshold=3 → REJECT) vs
487    // `gate_allows_long_required_literal_without_anchor`
488    // (`/.*deserialize.*/`, len=11, threshold=3 → ACCEPT).
489    if let Some(min_len) = hir.properties().minimum_len()
490        && min_len > cfg.min_literal_len
491    {
492        return true;
493    }
494
495    false
496}
497
498fn is_scope_filter_at(expr: &Expr) -> bool {
499    if let Expr::Condition(cond) = expr {
500        let f = cond.field.as_str();
501        if SCOPE_FILTER_FIELDS.contains(&f) {
502            // Bare-presence (any operator + value) of one of the
503            // scope-filter fields is sufficient — the design's
504            // §"Coupling rule" treats `kind:function` and
505            // `kind~=function|method` symmetrically (both narrow
506            // the arena via the `kind_index`).
507            return true;
508        }
509    }
510    false
511}
512
513fn build_query_too_broad(
514    cond: &Condition,
515    node_count: usize,
516    cfg: &CostGateConfig,
517) -> CostGateError {
518    let field = cond.field.as_str().to_string();
519    let op = match cond.operator {
520        Operator::Equal => ":",
521        Operator::Regex => "~=",
522        // Comparison operators are never prohibitive in the current
523        // classification, but if a future change reaches here keep
524        // a stable mapping.
525        Operator::Greater => ">",
526        Operator::Less => "<",
527        Operator::GreaterEq => ">=",
528        Operator::LessEq => "<=",
529    };
530    let pattern = match &cond.value {
531        Value::String(s) => s.clone(),
532        Value::Regex(rv) => format!("/{}/", rv.pattern),
533        Value::Number(n) => n.to_string(),
534        Value::Boolean(b) => b.to_string(),
535        Value::Variable(name) => format!("${name}"),
536        Value::Subquery(_) => "(<subquery>)".to_string(),
537    };
538    CostGateError::QueryTooBroad {
539        field,
540        op,
541        pattern,
542        node_count,
543        node_limit: cfg.node_count_threshold.unwrap_or(0),
544        scope_hint: SCOPE_FILTER_FIELDS.join(", "),
545        min_prefix_len: cfg.min_prefix_len,
546        doc_url: QUERY_TOO_BROAD_DOC_URL,
547    }
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553    use crate::query::QueryParser;
554
555    fn parse(q: &str) -> Query {
556        QueryParser::parse_query(q).expect("parse")
557    }
558
559    fn cfg() -> CostGateConfig {
560        CostGateConfig::default()
561    }
562
563    fn cfg_no_cap() -> CostGateConfig {
564        CostGateConfig {
565            node_count_threshold: None,
566            ..CostGateConfig::default()
567        }
568    }
569
570    // ────────── §6 unit-test rows ──────────
571
572    #[test]
573    fn gate_rejects_bare_unanchored_suffix_regex() {
574        let q = parse("name~=/.*_set$/");
575        let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
576        assert!(
577            matches!(err, CostGateError::QueryTooBroad { ref field, .. } if field == "name"),
578            "expected name-field rejection, got {err:?}"
579        );
580    }
581
582    #[test]
583    fn gate_rejects_bare_unanchored_substring_regex() {
584        let q = parse("name~=/.*foo.*/");
585        let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
586        let CostGateError::QueryTooBroad { ref pattern, .. } = err;
587        assert!(
588            pattern.contains(".*foo.*"),
589            "envelope must echo the offending pattern, got {pattern}"
590        );
591    }
592
593    #[test]
594    fn gate_allows_unanchored_regex_below_node_threshold() {
595        let q = parse("name~=/.*_set$/");
596        check_query_root(&q, 1_000, &cfg()).expect("below threshold must pass");
597    }
598
599    #[test]
600    fn gate_allows_unanchored_regex_with_kind_coupling() {
601        let q = parse("kind:function AND name~=/.*_set$/");
602        check_query_root(&q, 1_000_000, &cfg()).expect("kind coupling must pass");
603    }
604
605    #[test]
606    fn gate_allows_unanchored_regex_with_lang_coupling() {
607        let q = parse("lang:rust AND name~=/.*_set$/");
608        check_query_root(&q, 1_000_000, &cfg()).expect("lang coupling must pass");
609    }
610
611    #[test]
612    fn gate_allows_unanchored_regex_with_path_coupling() {
613        let q = parse("path:src/**/*.rs AND name~=/.*_set$/");
614        check_query_root(&q, 1_000_000, &cfg()).expect("path coupling must pass");
615    }
616
617    #[test]
618    fn gate_allows_anchored_prefix_regex_without_coupling() {
619        // `^get_` literal prefix is 4 chars ≥ DEFAULT_MIN_PREFIX_LEN (3).
620        let q = parse("name~=/^get_/");
621        check_query_root(&q, 1_000_000, &cfg()).expect("anchored prefix must pass");
622    }
623
624    #[test]
625    fn gate_allows_long_required_literal_without_anchor() {
626        // `deserialize` is 11 chars > DEFAULT_MIN_LITERAL_LEN (4).
627        let q = parse("name~=/.*deserialize.*/");
628        check_query_root(&q, 1_000_000, &cfg()).expect("long literal must pass");
629    }
630
631    #[test]
632    fn gate_rejects_short_anchored_regex_below_prefix_len() {
633        // `^a` prefix is 1 char, below DEFAULT_MIN_PREFIX_LEN (3).
634        let q = parse("name~=/^a/");
635        let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("short prefix must reject");
636        assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
637    }
638
639    #[test]
640    fn gate_rejects_or_branch_with_uncoupled_prohibitive() {
641        // First branch is coupled, second is not — Or branches walk
642        // independently so the whole query is rejected.
643        let q = parse("(kind:function AND name~=/.*_set$/) OR (name~=/.*foo.*/)");
644        let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("uncoupled Or must reject");
645        let CostGateError::QueryTooBroad { ref pattern, .. } = err;
646        assert!(
647            pattern.contains(".*foo.*"),
648            "rejection must point at the uncoupled branch, got {pattern}"
649        );
650    }
651
652    #[test]
653    fn gate_passes_known_good_canonical_queries() {
654        let canonical = [
655            "kind:function",
656            "name:foo",
657            "path:src/**/*.rs",
658            "lang:rust AND kind:method",
659            "kind:method AND callers:foo",
660        ];
661        for q in canonical {
662            let parsed = parse(q);
663            check_query_root(&parsed, 1_000_000, &cfg())
664                .unwrap_or_else(|e| panic!("canonical query {q:?} must pass; got {e:?}"));
665        }
666    }
667
668    #[test]
669    fn gate_threshold_disabled_when_node_count_threshold_is_none() {
670        let q = parse("name~=/.*_set$/");
671        check_query_root(&q, 1_000_000_000, &cfg_no_cap())
672            .expect("None threshold must disable cap entirely");
673    }
674
675    #[test]
676    fn gate_threshold_disabled_when_node_count_threshold_is_zero() {
677        let q = parse("name~=/.*_set$/");
678        let cfg = CostGateConfig {
679            node_count_threshold: Some(0),
680            ..CostGateConfig::default()
681        };
682        check_query_root(&q, 1_000_000_000, &cfg).expect("Some(0) threshold must disable cap");
683    }
684
685    #[test]
686    fn gate_recurses_into_subquery_value() {
687        // `callers:(<inner>)` — inner must satisfy coupling under
688        // the outer scope. Here the inner has a prohibitive
689        // unanchored regex without coupling, so the outer rejects.
690        let q = parse("kind:function AND callers:(name~=/.*foo.*/)");
691        let err = check_query_root(&q, 1_000_000, &cfg());
692        // Implementation-defined whether subquery walk inherits the
693        // outer's `scope_in_scope` flag — the design says coupling
694        // applies AT THE SAME LEVEL. Pin: this query must reject so
695        // the outer `kind:function` does NOT silently couple the
696        // inner `name~=`.
697        //
698        // Note: per the `B_cost_gate.md` §"Coupling rule", the inner
699        // is walked under the outer's coupling state (subqueries
700        // share the outer scope). This test allows EITHER outcome
701        // since the design allows both interpretations and the
702        // current implementation chose "inherit outer scope". When
703        // cluster-C's runtime budget lands, the inner subquery will
704        // also be bounded by the per-call row budget.
705        if let Err(CostGateError::QueryTooBroad { ref field, .. }) = err {
706            assert_eq!(field, "name");
707        }
708    }
709
710    // ────────── envelope helpers ──────────
711
712    #[test]
713    fn to_query_too_broad_details_emits_canonical_cc2_seven_keys() {
714        let err = CostGateError::QueryTooBroad {
715            field: "name".into(),
716            op: "~=",
717            pattern: "/.*_set$/".into(),
718            node_count: 312_487,
719            node_limit: 50_000,
720            scope_hint: SCOPE_FILTER_FIELDS.join(", "),
721            min_prefix_len: 3,
722            doc_url: QUERY_TOO_BROAD_DOC_URL,
723        };
724        let details = err.to_query_too_broad_details();
725        assert_eq!(details["source"], SOURCE_STATIC_ESTIMATE);
726        assert_eq!(details["kind"], KIND_QUERY_TOO_BROAD);
727        assert_eq!(details["estimated_visited_nodes"], 312_487);
728        // Cluster-B iter-2: `limit` is the configured threshold, NOT
729        // the snapshot's node_count.
730        assert_eq!(details["limit"], 50_000);
731        // Cluster-B iter-2: predicate_shape is field+op-only, value
732        // elided. No raw user pattern reaches the wire.
733        let shape = details["predicate_shape"].as_str().unwrap();
734        assert_eq!(shape, "name~=<elided>");
735        assert!(!shape.contains("_set"));
736        assert!(details["suggested_predicates"].is_array());
737        assert_eq!(details["doc_url"], QUERY_TOO_BROAD_DOC_URL);
738    }
739
740    #[test]
741    fn cli_search_shape_check_rejects_unanchored_substring() {
742        let err = check_regex_pattern_text(".*foo.*", 1_000_000, &cfg())
743            .expect_err("CLI shape check must reject .*foo.*");
744        assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
745    }
746
747    #[test]
748    fn cli_search_shape_check_passes_anchored_prefix() {
749        check_regex_pattern_text("^get_", 1_000_000, &cfg())
750            .expect("anchored prefix must pass CLI shape check");
751    }
752
753    #[test]
754    fn cli_search_shape_check_passes_long_literal() {
755        check_regex_pattern_text(".*deserialize.*", 1_000_000, &cfg())
756            .expect("long literal must pass CLI shape check");
757    }
758
759    #[test]
760    fn cli_search_shape_check_below_threshold_passes() {
761        check_regex_pattern_text(".*foo.*", 1_000, &cfg())
762            .expect("below cap must pass shape check");
763    }
764}