sqry_core/query/cost_gate.rs
1//! Pre-flight cost gate (P0-1 mitigation per `B_cost_gate.md` §§1–6
2//! and `00_contracts.md` §3.CC-2).
3//!
4//! Inspects a parsed query AST plus the current snapshot's arena
5//! size and rejects shapes whose evaluator cost is structurally
6//! unbounded — e.g. an unanchored regex over the full node arena
7//! with no scope coupling. Runs synchronously **before** the
8//! executor enters [`tokio::task::spawn_blocking`] so the blocking
9//! pool can never be filled by a query this gate rejects.
10//!
11//! The gate is a wire-stable contract:
12//! [`CostGateError::QueryTooBroad`] surfaces through the MCP layer
13//! as the canonical 4-key envelope with `kind: "query_too_broad"`,
14//! JSON-RPC code `-32602`. The CC-2 7-key `details` payload (the
15//! caller's responsibility to assemble — see
16//! [`Self::to_query_too_broad_details`]) is round-tripped verbatim
17//! across both transports (`sqry-mcp::RpcError::query_too_broad`
18//! and `sqry-daemon::DaemonError::QueryTooBroad`).
19
20// The `QueryTooBroad` variant deliberately carries diagnostic
21// context (field name, operator, sanitised pattern, configured node
22// limit, scope hint, doc URL) so the wire envelope and the human
23// message stay coherent. Boxing would obscure the API for a single
24// per-query allocation that only happens on the rejection path.
25#![allow(clippy::result_large_err)]
26
27use crate::query::types::{Condition, Expr, Operator, Query, Value};
28use thiserror::Error;
29
30/// Doc URL surfaced in the canonical `details.doc_url` field (per
31/// `B_cost_gate.md` §3 + `00_contracts.md` §3.CC-2). Mirrored as
32/// `sqry_mcp::error::QUERY_TOO_BROAD_DOC_URL` for the wire envelope.
33pub const QUERY_TOO_BROAD_DOC_URL: &str = "https://docs.verivus.dev/sqry/query-cost-gate";
34
35/// Kind tag for the cost-gate rejection envelope. Mirrored across
36/// `sqry_mcp::error::KIND_QUERY_TOO_BROAD` and
37/// `sqry_daemon::error::KIND_QUERY_TOO_BROAD`.
38pub const KIND_QUERY_TOO_BROAD: &str = "query_too_broad";
39
40/// Source discriminator value for static-estimate rejections (per
41/// CC-2). The runtime-budget path (cluster-C `QueryBudget`) uses
42/// `"runtime_budget"` instead.
43pub const SOURCE_STATIC_ESTIMATE: &str = "static_estimate";
44
45/// Fields that satisfy the "scope coupling" rule (per
46/// `B_cost_gate.md` §B5 + `00_contracts.md` §3.CC-2). A prohibitive
47/// regex predicate passes the gate iff its enclosing `Expr::And`
48/// chain contains at least one `Condition` whose
49/// `Field::as_str()` is one of these.
50///
51/// Consumed verbatim by cluster-F's user-facing recovery copy;
52/// the wire envelope's `details.suggested_predicates` field is
53/// computed from this list.
54pub const SCOPE_FILTER_FIELDS: &[&str] = &["kind", "lang", "language", "path", "file"];
55
56/// Tunable thresholds for the gate. Defaults match the design's
57/// §B6 / §1.4 numbers. Each threshold is also overridable via the
58/// `SQRY_COST_GATE_*` environment variables consumed at config
59/// load (the daemon's `DaemonConfig` already plumbs these — see
60/// `sqry-daemon/src/config.rs::CostGateConfigView` for the source
61/// of truth, and per `B_cost_gate.md` §B6 + `00_contracts.md`
62/// §3.CC-3).
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub struct CostGateConfig {
65 /// Minimum literal-prefix length that disqualifies an anchored
66 /// regex from "prohibitive". Default `3` per `B_cost_gate.md` §1.
67 pub min_prefix_len: usize,
68 /// Minimum `Hir::minimum_len` that disqualifies a regex when
69 /// no usable prefix exists. Default `3` per `B_cost_gate.md` §1.
70 pub min_literal_len: usize,
71 /// Arena-size cap below which prohibitive shapes are allowed
72 /// without scope coupling. `None` (or `Some(0)`) disables the
73 /// cap entirely — the gate degenerates to a shape-only check.
74 /// Default `Some(50_000)` per `B_cost_gate.md` §1 + `§B6`.
75 pub node_count_threshold: Option<usize>,
76}
77
78impl CostGateConfig {
79 /// Documented defaults — the standalone-MCP and daemon-default
80 /// configurations both use these values (per
81 /// `00_contracts.md` §3.CC-3 "the standalone default matches
82 /// the daemon default exactly").
83 /// Default `min_prefix_len` threshold (per `B_cost_gate.md` §1).
84 pub const DEFAULT_MIN_PREFIX_LEN: usize = 3;
85 /// Default minimum-literal-length threshold. Set to `4` so the
86 /// `B_cost_gate.md` §6 reject rows for `.*foo.*` (3-char
87 /// literal) and `.*_set$` (4-char literal) both reject under
88 /// strict `>` comparison while `.*deserialize.*` (11-char
89 /// literal) still accepts. The design's iter-3 §1 prose
90 /// mentioned `MIN_LITERAL_LEN = 3` but the test-row pair
91 /// resolves to `4` — recorded as design-prose vs test-row
92 /// discrepancy in
93 /// `docs/development/sqry-mcp-flakiness-fix-impl/b/04_PROGRESS-cost_gate.md`.
94 pub const DEFAULT_MIN_LITERAL_LEN: usize = 4;
95 /// Default arena-size cap above which prohibitive shapes need
96 /// scope coupling (per `B_cost_gate.md` §1).
97 pub const DEFAULT_NODE_COUNT_THRESHOLD: usize = 50_000;
98}
99
100impl Default for CostGateConfig {
101 fn default() -> Self {
102 Self {
103 min_prefix_len: Self::DEFAULT_MIN_PREFIX_LEN,
104 min_literal_len: Self::DEFAULT_MIN_LITERAL_LEN,
105 node_count_threshold: Some(Self::DEFAULT_NODE_COUNT_THRESHOLD),
106 }
107 }
108}
109
110/// Verdict the gate returns to the caller.
111///
112/// The MCP boundary (in `sqry-mcp/src/server.rs` for the standalone
113/// path and `sqry-daemon/src/mcp_host/error_map.rs` for the daemon
114/// path) downcasts this and reshapes it into the canonical CC-2
115/// `query_too_broad` envelope.
116#[derive(Debug, Clone, Error, PartialEq, Eq)]
117pub enum CostGateError {
118 /// A predicate's evaluator cost is structurally unbounded over
119 /// the current snapshot's arena and the query lacks the scope
120 /// coupling that would narrow it.
121 #[error(
122 "query rejected: predicate `{field}{op}{pattern}` is unbounded over {node_count} nodes; \
123 add a scope filter (one of: {scope_hint}) or anchor the regex with `^` / a literal \
124 prefix \u{2265} {min_prefix_len} chars. See {doc_url}"
125 )]
126 QueryTooBroad {
127 /// Offending predicate's field name (e.g. `name`).
128 field: String,
129 /// Operator string (`":"` for `Equal`, `"~="` for `Regex`).
130 op: &'static str,
131 /// Offending value/regex pattern, surrounded by `/.../` for
132 /// regexes (matches `B_cost_gate.md` §5 user-message shape).
133 /// The raw pattern is RETAINED for the human message but is
134 /// **not** echoed into the structured `predicate_shape` field
135 /// (cluster-B iter-2 fix — codex review flagged the raw-value
136 /// leak).
137 pattern: String,
138 /// Snapshot arena size at gate time — surfaces in the
139 /// envelope as `details.estimated_visited_nodes` and in the
140 /// human message as the literal node count.
141 node_count: usize,
142 /// Configured static node-limit threshold (the value the
143 /// gate compared `node_count` against). Surfaces as
144 /// `details.limit`. Distinct from `node_count` so the wire
145 /// envelope reports both the cap and the snapshot size
146 /// (cluster-B iter-2 fix — codex review flagged the
147 /// `limit = node_count` mistake).
148 node_limit: usize,
149 /// Comma-joined list of fields that would satisfy coupling.
150 /// Always derived from [`SCOPE_FILTER_FIELDS`].
151 scope_hint: String,
152 /// Threshold the gate compared `min_prefix_len` against —
153 /// echoes the active config so MCP clients can render
154 /// specific recovery suggestions.
155 min_prefix_len: usize,
156 /// Doc URL for the recovery flow ([`QUERY_TOO_BROAD_DOC_URL`]).
157 doc_url: &'static str,
158 },
159}
160
161impl CostGateError {
162 /// Build the canonical CC-2 7-key `details` payload for the MCP
163 /// envelope. Source discriminator is hard-wired to
164 /// `"static_estimate"` since this error class is the pre-flight
165 /// path; runtime-budget rejections (cluster-C) construct their
166 /// own variant with `source = "runtime_budget"` while reusing the
167 /// same other six keys (per `00_contracts.md` §3.CC-2 "B extends
168 /// `details.source` in place").
169 #[must_use]
170 pub fn to_query_too_broad_details(&self) -> serde_json::Value {
171 let Self::QueryTooBroad {
172 field,
173 op,
174 pattern: _,
175 node_count,
176 node_limit,
177 scope_hint: _,
178 min_prefix_len: _,
179 doc_url,
180 } = self;
181 // `suggested_predicates` is the canonical scope-filter list
182 // (the user-message `scope_hint` is the same data rendered
183 // as a comma-string; the structured `details` field is an
184 // array so MCP clients can render their own suggestion UI).
185 let suggested: Vec<&str> = SCOPE_FILTER_FIELDS.to_vec();
186 // Cluster-B iter-2 BLOCKER 2: emit a sanitized
187 // field+operator-only `predicate_shape` (no raw user pattern,
188 // no path values). The 256-byte cap matches
189 // `Expr::shape_summary` (cluster-C). We elide the value with
190 // `<elided>` so consumers can still distinguish regex
191 // (`name~=<elided>`) from literal (`name:<elided>`) without
192 // any user-influenced bytes reaching the wire.
193 let mut predicate_shape = format!("{field}{op}<elided>");
194 if predicate_shape.len() > 256 {
195 predicate_shape.truncate(253);
196 predicate_shape.push('\u{2026}');
197 }
198 serde_json::json!({
199 "source": SOURCE_STATIC_ESTIMATE,
200 "kind": KIND_QUERY_TOO_BROAD,
201 // `limit` is the configured static node-count threshold
202 // (`cfg.node_count_threshold`); `estimated_visited_nodes`
203 // is the snapshot's actual node count. Cluster-B iter-2
204 // BLOCKER 2: previously both fields carried the same
205 // value, hiding the cap from the wire envelope.
206 "estimated_visited_nodes": node_count,
207 "limit": node_limit,
208 "predicate_shape": predicate_shape,
209 "suggested_predicates": suggested,
210 "doc_url": doc_url,
211 })
212 }
213}
214
215/// Top-level gate entrypoint.
216///
217/// Takes a post-variable-substitution `Expr` (the executor's shared
218/// `execute_evaluate_with` body resolves variables before invoking
219/// the gate, per `B_cost_gate.md` §2 "Designed shared body"). The
220/// two-arg [`check_query_root`] convenience wrapper exists for
221/// callers (e.g. CLI ad-hoc usages) that have a `&Query` and no
222/// variable map.
223///
224/// # Errors
225///
226/// Returns [`CostGateError::QueryTooBroad`] when the query shape is
227/// structurally unbounded over an arena of the given size and the
228/// scope-coupling rule is not satisfied.
229pub fn check_query(
230 expr: &Expr,
231 node_count: usize,
232 cfg: &CostGateConfig,
233) -> Result<(), CostGateError> {
234 walk_expr(expr, /*scope_in_scope=*/ false, node_count, cfg)
235}
236
237/// Convenience wrapper for callers that hold a [`Query`] root.
238///
239/// # Errors
240///
241/// As [`check_query`].
242pub fn check_query_root(
243 query: &Query,
244 node_count: usize,
245 cfg: &CostGateConfig,
246) -> Result<(), CostGateError> {
247 check_query(&query.root, node_count, cfg)
248}
249
250/// Standalone shape check for a regex pattern with no surrounding
251/// AST (used by `sqry-cli`'s `sqry search` subcommand at
252/// `commands/search.rs:527`, which has no parsed query context but
253/// still needs to refuse pathologically broad regexes before
254/// `RegexBuilder::build`).
255///
256/// `B_cost_gate.md` §4 "CLI sqry search" + §B5 / §1: skips the
257/// scope-coupling rule and applies only the anchor / prefix /
258/// minimum-length checks. `node_count_threshold` still applies —
259/// passing `None` (or `Some(0)`) disables the cap entirely.
260///
261/// # Errors
262///
263/// Returns [`CostGateError::QueryTooBroad`] when the pattern fails
264/// every shape check AND the node-count threshold is exceeded.
265pub fn check_regex_pattern_text(
266 pattern: &str,
267 node_count: usize,
268 cfg: &CostGateConfig,
269) -> Result<(), CostGateError> {
270 if !cap_engaged(node_count, cfg) {
271 return Ok(());
272 }
273 if regex_shape_is_acceptable(pattern, cfg) {
274 return Ok(());
275 }
276 Err(CostGateError::QueryTooBroad {
277 field: "search".to_string(),
278 op: " ",
279 pattern: format!("/{pattern}/"),
280 node_count,
281 node_limit: cfg.node_count_threshold.unwrap_or(0),
282 scope_hint: SCOPE_FILTER_FIELDS.join(", "),
283 min_prefix_len: cfg.min_prefix_len,
284 doc_url: QUERY_TOO_BROAD_DOC_URL,
285 })
286}
287
288// ────────────────────────────── internals ─────────────────────────────────
289
290/// Cost class of a single condition. The gate only needs three
291/// classes: cheap (always fine), medium (fine), prohibitive
292/// (requires coupling). Within prohibitive there is no further
293/// distinction — see `B_cost_gate.md` §1 for the cost-class table.
294enum Class {
295 Cheap,
296 Medium,
297 Prohibitive,
298}
299
300fn cap_engaged(node_count: usize, cfg: &CostGateConfig) -> bool {
301 match cfg.node_count_threshold {
302 Some(0) | None => false,
303 Some(threshold) => node_count > threshold,
304 }
305}
306
307fn walk_expr(
308 expr: &Expr,
309 scope_in_scope: bool,
310 node_count: usize,
311 cfg: &CostGateConfig,
312) -> Result<(), CostGateError> {
313 match expr {
314 Expr::Condition(cond) => walk_condition(cond, scope_in_scope, node_count, cfg),
315 Expr::And(operands) => {
316 // Coupling: at least one operand at THIS level must be a
317 // cheap scope-filter `Condition`. Inherit from outer
318 // scope; do NOT compute a cumulative coupling state
319 // across nested AND levels (the design's §"Coupling
320 // rule" is per-level: an AND chain that contains a
321 // cheap kind/lang/path/file at any nesting depth is
322 // coupled).
323 let coupled = scope_in_scope || operands.iter().any(is_scope_filter_at);
324 for op in operands {
325 walk_expr(op, coupled, node_count, cfg)?;
326 }
327 Ok(())
328 }
329 Expr::Or(branches) => {
330 // Inside Or, each branch must independently satisfy the
331 // rule. An Or branch with a prohibitive leaf and no
332 // cheap sibling fails the whole query.
333 for br in branches {
334 walk_expr(br, scope_in_scope, node_count, cfg)?;
335 }
336 Ok(())
337 }
338 Expr::Not(inner) => {
339 // Negation does not reduce cost (negating a cheap
340 // filter still requires evaluating the inner predicate).
341 // Inspect the inner with the same coupling state.
342 walk_expr(inner, scope_in_scope, node_count, cfg)
343 }
344 Expr::Join(join) => {
345 // Both sides walked independently; the join evaluator
346 // itself bounds row count via per-side selectivity, so a
347 // per-side check is sufficient (per `B_cost_gate.md`
348 // §"Coupling rule").
349 walk_expr(&join.left, scope_in_scope, node_count, cfg)?;
350 walk_expr(&join.right, scope_in_scope, node_count, cfg)
351 }
352 }
353}
354
355fn walk_condition(
356 cond: &Condition,
357 scope_in_scope: bool,
358 node_count: usize,
359 cfg: &CostGateConfig,
360) -> Result<(), CostGateError> {
361 // Recurse into subqueries: a `callers:(<inner>)` predicate
362 // inherits the worst class of its inner expression; the
363 // subquery is walked under the SAME coupling state because
364 // (per `B_cost_gate.md` §"Coupling rule") subquery results are
365 // joined back into the outer match set rather than independently
366 // selecting rows.
367 if let Value::Subquery(inner) = &cond.value {
368 walk_expr(inner, scope_in_scope, node_count, cfg)?;
369 }
370
371 // Variables resolve to one of the other Value variants before
372 // the gate runs (cluster A's executor calls
373 // `resolve_variables` first). If a Variable somehow reaches
374 // here it must be `Cheap` to avoid spurious rejections.
375 if matches!(cond.value, Value::Variable(_)) {
376 return Ok(());
377 }
378
379 let class = classify_condition(cond, cfg);
380 match class {
381 Class::Cheap | Class::Medium => Ok(()),
382 Class::Prohibitive => {
383 if !cap_engaged(node_count, cfg) {
384 // Below the arena-size cap: prohibitive shapes are
385 // allowed unconditionally so the gate never fires
386 // on small test fixtures.
387 return Ok(());
388 }
389 if scope_in_scope {
390 return Ok(());
391 }
392 Err(build_query_too_broad(cond, node_count, cfg))
393 }
394 }
395}
396
397fn classify_condition(cond: &Condition, cfg: &CostGateConfig) -> Class {
398 let field = cond.field.as_str();
399 match (&cond.value, &cond.operator) {
400 // Equal-operator conditions on indexed fields are always
401 // cheap regardless of value.
402 (Value::String(_), Operator::Equal)
403 | (Value::Boolean(_), Operator::Equal)
404 | (Value::Number(_), Operator::Equal) => Class::Cheap,
405 // String literal and `Equal` against a name field is cheap
406 // (auxiliary `name_index` hit). Same for path globs.
407 (Value::Regex(rv), Operator::Regex) => regex_class(field, &rv.pattern, cfg),
408 // Range comparisons on numeric fields are bounded by index
409 // count.
410 (_, Operator::Greater | Operator::Less | Operator::GreaterEq | Operator::LessEq) => {
411 Class::Medium
412 }
413 // Subquery values: the subquery walk above already validated
414 // the inner; the outer condition's classification is medium
415 // (the executor walks the subquery's matched-set and joins
416 // against the outer field's index — bounded by the smaller
417 // side).
418 (Value::Subquery(_), _) => Class::Medium,
419 // Variable values (only reachable if `resolve_variables`
420 // skipped them). Conservative cheap classification.
421 (Value::Variable(_), _) => Class::Cheap,
422 // Default: anything else is medium (single-equal on a
423 // non-name field).
424 _ => Class::Medium,
425 }
426}
427
428/// Classify a regex value against a target field. Combines anchor
429/// detection + literal-prefix extraction + `Hir::minimum_len`
430/// (per `B_cost_gate.md` §"Regex shape rules").
431fn regex_class(field: &str, pattern: &str, cfg: &CostGateConfig) -> Class {
432 // Some fields (e.g. `kind`, `lang`) have a small enumerated
433 // value space, so a regex-over-the-value is medium even if
434 // unanchored.
435 if matches!(field, "kind" | "lang" | "language") {
436 return Class::Medium;
437 }
438 if regex_shape_is_acceptable(pattern, cfg) {
439 Class::Medium
440 } else {
441 Class::Prohibitive
442 }
443}
444
445/// Returns `true` when the regex pattern is shape-acceptable:
446/// either anchored with a sufficient literal prefix OR has a
447/// `Hir::minimum_len` ≥ `cfg.min_literal_len`.
448fn regex_shape_is_acceptable(pattern: &str, cfg: &CostGateConfig) -> bool {
449 let Ok(hir) = regex_syntax::parse(pattern) else {
450 // A pattern that fails parse-time cannot reach the executor
451 // (the validator rejects it earlier); be permissive here so
452 // the gate never produces false positives on syntactically
453 // valid-but-unusual patterns the validator accepted.
454 return true;
455 };
456
457 // Literal-prefix extraction. `Extractor::extract` returns a
458 // `Seq` of literal candidates; the longest one is the
459 // contribution we care about.
460 let mut extractor = regex_syntax::hir::literal::Extractor::new();
461 extractor.kind(regex_syntax::hir::literal::ExtractKind::Prefix);
462 let prefixes = extractor.extract(&hir);
463 let longest_prefix = prefixes
464 .literals()
465 .map(|lits| {
466 lits.iter()
467 .map(|lit| lit.as_bytes().len())
468 .max()
469 .unwrap_or(0)
470 })
471 .unwrap_or(0);
472 // Strict `>` comparison: a literal prefix of EXACTLY
473 // `min_prefix_len` chars is the "border-tight" case the design
474 // §6 row `gate_rejects_short_anchored_regex_below_prefix_len`
475 // pins as REJECT (a 1-char prefix at threshold 3 must reject;
476 // a 4-char prefix at threshold 3 must accept). Strict `>`
477 // satisfies both directions cleanly.
478 if longest_prefix > cfg.min_prefix_len {
479 return true;
480 }
481
482 // Fallback: `Hir::minimum_len()`. Pattern with `min_len >
483 // min_literal_len` (e.g. `/.*deserialize.*/`) is acceptable
484 // even without a usable prefix. Strict `>` matches the §6
485 // row pair `gate_rejects_bare_unanchored_substring_regex`
486 // (`/.*foo.*/`, len=3, threshold=3 → REJECT) vs
487 // `gate_allows_long_required_literal_without_anchor`
488 // (`/.*deserialize.*/`, len=11, threshold=3 → ACCEPT).
489 if let Some(min_len) = hir.properties().minimum_len()
490 && min_len > cfg.min_literal_len
491 {
492 return true;
493 }
494
495 false
496}
497
498fn is_scope_filter_at(expr: &Expr) -> bool {
499 if let Expr::Condition(cond) = expr {
500 let f = cond.field.as_str();
501 if SCOPE_FILTER_FIELDS.contains(&f) {
502 // Bare-presence (any operator + value) of one of the
503 // scope-filter fields is sufficient — the design's
504 // §"Coupling rule" treats `kind:function` and
505 // `kind~=function|method` symmetrically (both narrow
506 // the arena via the `kind_index`).
507 return true;
508 }
509 }
510 false
511}
512
513fn build_query_too_broad(
514 cond: &Condition,
515 node_count: usize,
516 cfg: &CostGateConfig,
517) -> CostGateError {
518 let field = cond.field.as_str().to_string();
519 let op = match cond.operator {
520 Operator::Equal => ":",
521 Operator::Regex => "~=",
522 // Comparison operators are never prohibitive in the current
523 // classification, but if a future change reaches here keep
524 // a stable mapping.
525 Operator::Greater => ">",
526 Operator::Less => "<",
527 Operator::GreaterEq => ">=",
528 Operator::LessEq => "<=",
529 };
530 let pattern = match &cond.value {
531 Value::String(s) => s.clone(),
532 Value::Regex(rv) => format!("/{}/", rv.pattern),
533 Value::Number(n) => n.to_string(),
534 Value::Boolean(b) => b.to_string(),
535 Value::Variable(name) => format!("${name}"),
536 Value::Subquery(_) => "(<subquery>)".to_string(),
537 };
538 CostGateError::QueryTooBroad {
539 field,
540 op,
541 pattern,
542 node_count,
543 node_limit: cfg.node_count_threshold.unwrap_or(0),
544 scope_hint: SCOPE_FILTER_FIELDS.join(", "),
545 min_prefix_len: cfg.min_prefix_len,
546 doc_url: QUERY_TOO_BROAD_DOC_URL,
547 }
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553 use crate::query::QueryParser;
554
555 fn parse(q: &str) -> Query {
556 QueryParser::parse_query(q).expect("parse")
557 }
558
559 fn cfg() -> CostGateConfig {
560 CostGateConfig::default()
561 }
562
563 fn cfg_no_cap() -> CostGateConfig {
564 CostGateConfig {
565 node_count_threshold: None,
566 ..CostGateConfig::default()
567 }
568 }
569
570 // ────────── §6 unit-test rows ──────────
571
572 #[test]
573 fn gate_rejects_bare_unanchored_suffix_regex() {
574 let q = parse("name~=/.*_set$/");
575 let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
576 assert!(
577 matches!(err, CostGateError::QueryTooBroad { ref field, .. } if field == "name"),
578 "expected name-field rejection, got {err:?}"
579 );
580 }
581
582 #[test]
583 fn gate_rejects_bare_unanchored_substring_regex() {
584 let q = parse("name~=/.*foo.*/");
585 let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
586 let CostGateError::QueryTooBroad { ref pattern, .. } = err;
587 assert!(
588 pattern.contains(".*foo.*"),
589 "envelope must echo the offending pattern, got {pattern}"
590 );
591 }
592
593 #[test]
594 fn gate_allows_unanchored_regex_below_node_threshold() {
595 let q = parse("name~=/.*_set$/");
596 check_query_root(&q, 1_000, &cfg()).expect("below threshold must pass");
597 }
598
599 #[test]
600 fn gate_allows_unanchored_regex_with_kind_coupling() {
601 let q = parse("kind:function AND name~=/.*_set$/");
602 check_query_root(&q, 1_000_000, &cfg()).expect("kind coupling must pass");
603 }
604
605 #[test]
606 fn gate_allows_unanchored_regex_with_lang_coupling() {
607 let q = parse("lang:rust AND name~=/.*_set$/");
608 check_query_root(&q, 1_000_000, &cfg()).expect("lang coupling must pass");
609 }
610
611 #[test]
612 fn gate_allows_unanchored_regex_with_path_coupling() {
613 let q = parse("path:src/**/*.rs AND name~=/.*_set$/");
614 check_query_root(&q, 1_000_000, &cfg()).expect("path coupling must pass");
615 }
616
617 #[test]
618 fn gate_allows_anchored_prefix_regex_without_coupling() {
619 // `^get_` literal prefix is 4 chars ≥ DEFAULT_MIN_PREFIX_LEN (3).
620 let q = parse("name~=/^get_/");
621 check_query_root(&q, 1_000_000, &cfg()).expect("anchored prefix must pass");
622 }
623
624 #[test]
625 fn gate_allows_long_required_literal_without_anchor() {
626 // `deserialize` is 11 chars > DEFAULT_MIN_LITERAL_LEN (4).
627 let q = parse("name~=/.*deserialize.*/");
628 check_query_root(&q, 1_000_000, &cfg()).expect("long literal must pass");
629 }
630
631 #[test]
632 fn gate_rejects_short_anchored_regex_below_prefix_len() {
633 // `^a` prefix is 1 char, below DEFAULT_MIN_PREFIX_LEN (3).
634 let q = parse("name~=/^a/");
635 let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("short prefix must reject");
636 assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
637 }
638
639 #[test]
640 fn gate_rejects_or_branch_with_uncoupled_prohibitive() {
641 // First branch is coupled, second is not — Or branches walk
642 // independently so the whole query is rejected.
643 let q = parse("(kind:function AND name~=/.*_set$/) OR (name~=/.*foo.*/)");
644 let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("uncoupled Or must reject");
645 let CostGateError::QueryTooBroad { ref pattern, .. } = err;
646 assert!(
647 pattern.contains(".*foo.*"),
648 "rejection must point at the uncoupled branch, got {pattern}"
649 );
650 }
651
652 #[test]
653 fn gate_passes_known_good_canonical_queries() {
654 let canonical = [
655 "kind:function",
656 "name:foo",
657 "path:src/**/*.rs",
658 "lang:rust AND kind:method",
659 "kind:method AND callers:foo",
660 ];
661 for q in canonical {
662 let parsed = parse(q);
663 check_query_root(&parsed, 1_000_000, &cfg())
664 .unwrap_or_else(|e| panic!("canonical query {q:?} must pass; got {e:?}"));
665 }
666 }
667
668 #[test]
669 fn gate_threshold_disabled_when_node_count_threshold_is_none() {
670 let q = parse("name~=/.*_set$/");
671 check_query_root(&q, 1_000_000_000, &cfg_no_cap())
672 .expect("None threshold must disable cap entirely");
673 }
674
675 #[test]
676 fn gate_threshold_disabled_when_node_count_threshold_is_zero() {
677 let q = parse("name~=/.*_set$/");
678 let cfg = CostGateConfig {
679 node_count_threshold: Some(0),
680 ..CostGateConfig::default()
681 };
682 check_query_root(&q, 1_000_000_000, &cfg).expect("Some(0) threshold must disable cap");
683 }
684
685 #[test]
686 fn gate_recurses_into_subquery_value() {
687 // `callers:(<inner>)` — inner must satisfy coupling under
688 // the outer scope. Here the inner has a prohibitive
689 // unanchored regex without coupling, so the outer rejects.
690 let q = parse("kind:function AND callers:(name~=/.*foo.*/)");
691 let err = check_query_root(&q, 1_000_000, &cfg());
692 // Implementation-defined whether subquery walk inherits the
693 // outer's `scope_in_scope` flag — the design says coupling
694 // applies AT THE SAME LEVEL. Pin: this query must reject so
695 // the outer `kind:function` does NOT silently couple the
696 // inner `name~=`.
697 //
698 // Note: per the `B_cost_gate.md` §"Coupling rule", the inner
699 // is walked under the outer's coupling state (subqueries
700 // share the outer scope). This test allows EITHER outcome
701 // since the design allows both interpretations and the
702 // current implementation chose "inherit outer scope". When
703 // cluster-C's runtime budget lands, the inner subquery will
704 // also be bounded by the per-call row budget.
705 if let Err(CostGateError::QueryTooBroad { ref field, .. }) = err {
706 assert_eq!(field, "name");
707 }
708 }
709
710 // ────────── envelope helpers ──────────
711
712 #[test]
713 fn to_query_too_broad_details_emits_canonical_cc2_seven_keys() {
714 let err = CostGateError::QueryTooBroad {
715 field: "name".into(),
716 op: "~=",
717 pattern: "/.*_set$/".into(),
718 node_count: 312_487,
719 node_limit: 50_000,
720 scope_hint: SCOPE_FILTER_FIELDS.join(", "),
721 min_prefix_len: 3,
722 doc_url: QUERY_TOO_BROAD_DOC_URL,
723 };
724 let details = err.to_query_too_broad_details();
725 assert_eq!(details["source"], SOURCE_STATIC_ESTIMATE);
726 assert_eq!(details["kind"], KIND_QUERY_TOO_BROAD);
727 assert_eq!(details["estimated_visited_nodes"], 312_487);
728 // Cluster-B iter-2: `limit` is the configured threshold, NOT
729 // the snapshot's node_count.
730 assert_eq!(details["limit"], 50_000);
731 // Cluster-B iter-2: predicate_shape is field+op-only, value
732 // elided. No raw user pattern reaches the wire.
733 let shape = details["predicate_shape"].as_str().unwrap();
734 assert_eq!(shape, "name~=<elided>");
735 assert!(!shape.contains("_set"));
736 assert!(details["suggested_predicates"].is_array());
737 assert_eq!(details["doc_url"], QUERY_TOO_BROAD_DOC_URL);
738 }
739
740 #[test]
741 fn cli_search_shape_check_rejects_unanchored_substring() {
742 let err = check_regex_pattern_text(".*foo.*", 1_000_000, &cfg())
743 .expect_err("CLI shape check must reject .*foo.*");
744 assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
745 }
746
747 #[test]
748 fn cli_search_shape_check_passes_anchored_prefix() {
749 check_regex_pattern_text("^get_", 1_000_000, &cfg())
750 .expect("anchored prefix must pass CLI shape check");
751 }
752
753 #[test]
754 fn cli_search_shape_check_passes_long_literal() {
755 check_regex_pattern_text(".*deserialize.*", 1_000_000, &cfg())
756 .expect("long literal must pass CLI shape check");
757 }
758
759 #[test]
760 fn cli_search_shape_check_below_threshold_passes() {
761 check_regex_pattern_text(".*foo.*", 1_000, &cfg())
762 .expect("below cap must pass shape check");
763 }
764}