sqry_core/query/cost_gate.rs
1//! Pre-flight cost gate (P0-1 mitigation per `B_cost_gate.md` §§1–6
2//! and `00_contracts.md` §3.CC-2).
3//!
4//! Inspects a parsed query AST plus the current snapshot's arena
5//! size and rejects shapes whose evaluator cost is structurally
6//! unbounded — e.g. an unanchored regex over the full node arena
7//! with no scope coupling. Runs synchronously **before** the
8//! executor enters [`tokio::task::spawn_blocking`] so the blocking
9//! pool can never be filled by a query this gate rejects.
10//!
11//! The gate is a wire-stable contract:
12//! [`CostGateError::QueryTooBroad`] surfaces through the MCP layer
13//! as the canonical 4-key envelope with `kind: "query_too_broad"`,
14//! JSON-RPC code `-32602`. The CC-2 7-key `details` payload (the
15//! caller's responsibility to assemble — see
16//! [`Self::to_query_too_broad_details`]) is round-tripped verbatim
17//! across both transports (`sqry-mcp::RpcError::query_too_broad`
18//! and `sqry-daemon::DaemonError::QueryTooBroad`).
19
20// The `QueryTooBroad` variant deliberately carries diagnostic
21// context (field name, operator, sanitised pattern, configured node
22// limit, scope hint, doc URL) so the wire envelope and the human
23// message stay coherent. Boxing would obscure the API for a single
24// per-query allocation that only happens on the rejection path.
25#![allow(clippy::result_large_err)]
26
27use crate::query::types::{Condition, Expr, Operator, Query, Value};
28use thiserror::Error;
29
30/// Doc URL surfaced in the canonical `details.doc_url` field (per
31/// `B_cost_gate.md` §3 + `00_contracts.md` §3.CC-2). Mirrored as
32/// `sqry_mcp::error::QUERY_TOO_BROAD_DOC_URL` for the wire envelope.
33pub const QUERY_TOO_BROAD_DOC_URL: &str = "https://docs.verivus.dev/sqry/query-cost-gate";
34
35/// Kind tag for the cost-gate rejection envelope. Mirrored across
36/// `sqry_mcp::error::KIND_QUERY_TOO_BROAD` and
37/// `sqry_daemon::error::KIND_QUERY_TOO_BROAD`.
38pub const KIND_QUERY_TOO_BROAD: &str = "query_too_broad";
39
40/// Source discriminator value for static-estimate rejections (per
41/// CC-2). The runtime-budget path (cluster-C `QueryBudget`) uses
42/// `"runtime_budget"` instead.
43pub const SOURCE_STATIC_ESTIMATE: &str = "static_estimate";
44
45/// Fields that satisfy the "scope coupling" rule (per
46/// `B_cost_gate.md` §B5 + `00_contracts.md` §3.CC-2). A prohibitive
47/// regex predicate passes the gate iff its enclosing `Expr::And`
48/// chain contains at least one `Condition` whose
49/// `Field::as_str()` is one of these.
50///
51/// Consumed verbatim by cluster-F's user-facing recovery copy;
52/// the wire envelope's `details.suggested_predicates` field is
53/// computed from this list.
54pub const SCOPE_FILTER_FIELDS: &[&str] = &["kind", "lang", "language", "path", "file"];
55
56/// Tunable thresholds for the gate. Defaults match the design's
57/// §B6 / §1.4 numbers. Each threshold is also overridable via the
58/// `SQRY_COST_GATE_*` environment variables consumed at config
59/// load (the daemon's `DaemonConfig` already plumbs these — see
60/// `sqry-daemon/src/config.rs::CostGateConfigView` for the source
61/// of truth, and per `B_cost_gate.md` §B6 + `00_contracts.md`
62/// §3.CC-3).
63#[derive(Debug, Clone, Copy, PartialEq, Eq)]
64pub struct CostGateConfig {
65 /// Minimum literal-prefix length that disqualifies an anchored
66 /// regex from "prohibitive". Default `3` per `B_cost_gate.md` §1.
67 pub min_prefix_len: usize,
68 /// Minimum `Hir::minimum_len` that disqualifies a regex when
69 /// no usable prefix exists. Default `3` per `B_cost_gate.md` §1.
70 pub min_literal_len: usize,
71 /// Arena-size cap below which prohibitive shapes are allowed
72 /// without scope coupling. `None` (or `Some(0)`) disables the
73 /// cap entirely — the gate degenerates to a shape-only check.
74 /// Default `Some(50_000)` per `B_cost_gate.md` §1 + `§B6`.
75 pub node_count_threshold: Option<usize>,
76}
77
78impl CostGateConfig {
79 /// Documented defaults — the standalone-MCP and daemon-default
80 /// configurations both use these values (per
81 /// `00_contracts.md` §3.CC-3 "the standalone default matches
82 /// the daemon default exactly").
83 /// Default `min_prefix_len` threshold (per `B_cost_gate.md` §1).
84 pub const DEFAULT_MIN_PREFIX_LEN: usize = 3;
85 /// Default minimum-literal-length threshold. Set to `4` so the
86 /// `B_cost_gate.md` §6 reject rows for `.*foo.*` (3-char
87 /// literal) and `.*_set$` (4-char literal) both reject under
88 /// strict `>` comparison while `.*deserialize.*` (11-char
89 /// literal) still accepts. The design's iter-3 §1 prose
90 /// mentioned `MIN_LITERAL_LEN = 3` but the test-row pair
91 /// resolves to `4` — recorded as design-prose vs test-row
92 /// discrepancy in
93 /// `docs/development/sqry-mcp-flakiness-fix-impl/b/04_PROGRESS-cost_gate.md`.
94 pub const DEFAULT_MIN_LITERAL_LEN: usize = 4;
95 /// Default arena-size cap above which prohibitive shapes need
96 /// scope coupling (per `B_cost_gate.md` §1).
97 pub const DEFAULT_NODE_COUNT_THRESHOLD: usize = 50_000;
98}
99
100impl Default for CostGateConfig {
101 fn default() -> Self {
102 Self {
103 min_prefix_len: Self::DEFAULT_MIN_PREFIX_LEN,
104 min_literal_len: Self::DEFAULT_MIN_LITERAL_LEN,
105 node_count_threshold: Some(Self::DEFAULT_NODE_COUNT_THRESHOLD),
106 }
107 }
108}
109
110/// Verdict the gate returns to the caller.
111///
112/// The MCP boundary (in `sqry-mcp/src/server.rs` for the standalone
113/// path and `sqry-daemon/src/mcp_host/error_map.rs` for the daemon
114/// path) downcasts this and reshapes it into the canonical CC-2
115/// `query_too_broad` envelope.
116#[derive(Debug, Clone, Error, PartialEq, Eq)]
117pub enum CostGateError {
118 /// A predicate's evaluator cost is structurally unbounded over
119 /// the current snapshot's arena and the query lacks the scope
120 /// coupling that would narrow it.
121 #[error(
122 "query rejected: predicate `{field}{op}{pattern}` is unbounded over {node_count} nodes; \
123 add a scope filter (one of: {scope_hint}) or anchor the regex with `^` / a literal \
124 prefix \u{2265} {min_prefix_len} chars. See {doc_url}"
125 )]
126 QueryTooBroad {
127 /// Offending predicate's field name (e.g. `name`).
128 field: String,
129 /// Operator string (`":"` for `Equal`, `"~="` for `Regex`).
130 op: &'static str,
131 /// Offending value/regex pattern, surrounded by `/.../` for
132 /// regexes (matches `B_cost_gate.md` §5 user-message shape).
133 /// The raw pattern is RETAINED for the human message but is
134 /// **not** echoed into the structured `predicate_shape` field
135 /// (cluster-B iter-2 fix — codex review flagged the raw-value
136 /// leak).
137 pattern: String,
138 /// Snapshot arena size at gate time — surfaces in the
139 /// envelope as `details.estimated_visited_nodes` and in the
140 /// human message as the literal node count.
141 node_count: usize,
142 /// Configured static node-limit threshold (the value the
143 /// gate compared `node_count` against). Surfaces as
144 /// `details.limit`. Distinct from `node_count` so the wire
145 /// envelope reports both the cap and the snapshot size
146 /// (cluster-B iter-2 fix — codex review flagged the
147 /// `limit = node_count` mistake).
148 node_limit: usize,
149 /// Comma-joined list of fields that would satisfy coupling.
150 /// Always derived from [`SCOPE_FILTER_FIELDS`].
151 scope_hint: String,
152 /// Threshold the gate compared `min_prefix_len` against —
153 /// echoes the active config so MCP clients can render
154 /// specific recovery suggestions.
155 min_prefix_len: usize,
156 /// Doc URL for the recovery flow ([`QUERY_TOO_BROAD_DOC_URL`]).
157 doc_url: &'static str,
158 },
159}
160
161impl CostGateError {
162 /// Build the canonical CC-2 7-key `details` payload for the MCP
163 /// envelope. Source discriminator is hard-wired to
164 /// `"static_estimate"` since this error class is the pre-flight
165 /// path; runtime-budget rejections (cluster-C) construct their
166 /// own variant with `source = "runtime_budget"` while reusing the
167 /// same other six keys (per `00_contracts.md` §3.CC-2 "B extends
168 /// `details.source` in place").
169 #[must_use]
170 pub fn to_query_too_broad_details(&self) -> serde_json::Value {
171 let Self::QueryTooBroad {
172 field,
173 op,
174 pattern: _,
175 node_count,
176 node_limit,
177 scope_hint: _,
178 min_prefix_len: _,
179 doc_url,
180 } = self;
181 // `suggested_predicates` is the canonical scope-filter list
182 // (the user-message `scope_hint` is the same data rendered
183 // as a comma-string; the structured `details` field is an
184 // array so MCP clients can render their own suggestion UI).
185 let suggested: Vec<&str> = SCOPE_FILTER_FIELDS.to_vec();
186 // Cluster-B iter-2 BLOCKER 2: emit a sanitized
187 // field+operator-only `predicate_shape` (no raw user pattern,
188 // no path values). The 256-byte cap matches
189 // `Expr::shape_summary` (cluster-C). We elide the value with
190 // `<elided>` so consumers can still distinguish regex
191 // (`name~=<elided>`) from literal (`name:<elided>`) without
192 // any user-influenced bytes reaching the wire.
193 let mut predicate_shape = format!("{field}{op}<elided>");
194 if predicate_shape.len() > 256 {
195 predicate_shape.truncate(253);
196 predicate_shape.push('\u{2026}');
197 }
198 serde_json::json!({
199 "source": SOURCE_STATIC_ESTIMATE,
200 "kind": KIND_QUERY_TOO_BROAD,
201 // `limit` is the configured static node-count threshold
202 // (`cfg.node_count_threshold`); `estimated_visited_nodes`
203 // is the snapshot's actual node count. Cluster-B iter-2
204 // BLOCKER 2: previously both fields carried the same
205 // value, hiding the cap from the wire envelope.
206 "estimated_visited_nodes": node_count,
207 "limit": node_limit,
208 "predicate_shape": predicate_shape,
209 "suggested_predicates": suggested,
210 "doc_url": doc_url,
211 })
212 }
213}
214
215/// Top-level gate entrypoint.
216///
217/// Takes a post-variable-substitution `Expr` (the executor's shared
218/// `execute_evaluate_with` body resolves variables before invoking
219/// the gate, per `B_cost_gate.md` §2 "Designed shared body"). The
220/// two-arg [`check_query_root`] convenience wrapper exists for
221/// callers (e.g. CLI ad-hoc usages) that have a `&Query` and no
222/// variable map.
223///
224/// # Errors
225///
226/// Returns [`CostGateError::QueryTooBroad`] when the query shape is
227/// structurally unbounded over an arena of the given size and the
228/// scope-coupling rule is not satisfied.
229pub fn check_query(
230 expr: &Expr,
231 node_count: usize,
232 cfg: &CostGateConfig,
233) -> Result<(), CostGateError> {
234 walk_expr(expr, /*scope_in_scope=*/ false, node_count, cfg)
235}
236
237/// Convenience wrapper for callers that hold a [`Query`] root.
238///
239/// # Errors
240///
241/// As [`check_query`].
242pub fn check_query_root(
243 query: &Query,
244 node_count: usize,
245 cfg: &CostGateConfig,
246) -> Result<(), CostGateError> {
247 check_query(&query.root, node_count, cfg)
248}
249
250/// Standalone shape check for a regex pattern with no surrounding
251/// AST (used by `sqry-cli`'s `sqry search` subcommand at
252/// `commands/search.rs:527`, which has no parsed query context but
253/// still needs to refuse pathologically broad regexes before
254/// `RegexBuilder::build`).
255///
256/// `B_cost_gate.md` §4 "CLI sqry search" + §B5 / §1: skips the
257/// scope-coupling rule and applies only the anchor / prefix /
258/// minimum-length checks. `node_count_threshold` still applies —
259/// passing `None` (or `Some(0)`) disables the cap entirely.
260///
261/// # Errors
262///
263/// Returns [`CostGateError::QueryTooBroad`] when the pattern fails
264/// every shape check AND the node-count threshold is exceeded.
265pub fn check_regex_pattern_text(
266 pattern: &str,
267 node_count: usize,
268 cfg: &CostGateConfig,
269) -> Result<(), CostGateError> {
270 if !cap_engaged(node_count, cfg) {
271 return Ok(());
272 }
273 if regex_shape_is_acceptable(pattern, cfg) {
274 return Ok(());
275 }
276 Err(CostGateError::QueryTooBroad {
277 field: "search".to_string(),
278 op: " ",
279 pattern: format!("/{pattern}/"),
280 node_count,
281 node_limit: cfg.node_count_threshold.unwrap_or(0),
282 scope_hint: SCOPE_FILTER_FIELDS.join(", "),
283 min_prefix_len: cfg.min_prefix_len,
284 doc_url: QUERY_TOO_BROAD_DOC_URL,
285 })
286}
287
288// ────────────────────────────── internals ─────────────────────────────────
289
290/// Cost class of a single condition. The gate only needs three
291/// classes: cheap (always fine), medium (fine), prohibitive
292/// (requires coupling). Within prohibitive there is no further
293/// distinction — see `B_cost_gate.md` §1 for the cost-class table.
294enum Class {
295 Cheap,
296 Medium,
297 Prohibitive,
298}
299
300fn cap_engaged(node_count: usize, cfg: &CostGateConfig) -> bool {
301 match cfg.node_count_threshold {
302 Some(0) | None => false,
303 Some(threshold) => node_count > threshold,
304 }
305}
306
307fn walk_expr(
308 expr: &Expr,
309 scope_in_scope: bool,
310 node_count: usize,
311 cfg: &CostGateConfig,
312) -> Result<(), CostGateError> {
313 match expr {
314 Expr::Condition(cond) => walk_condition(cond, scope_in_scope, node_count, cfg),
315 Expr::And(operands) => {
316 // Coupling: at least one operand at THIS level must be a
317 // cheap scope-filter `Condition`. Inherit from outer
318 // scope; do NOT compute a cumulative coupling state
319 // across nested AND levels (the design's §"Coupling
320 // rule" is per-level: an AND chain that contains a
321 // cheap kind/lang/path/file at any nesting depth is
322 // coupled).
323 let coupled = scope_in_scope || operands.iter().any(is_scope_filter_at);
324 for op in operands {
325 walk_expr(op, coupled, node_count, cfg)?;
326 }
327 Ok(())
328 }
329 Expr::Or(branches) => {
330 // Inside Or, each branch must independently satisfy the
331 // rule. An Or branch with a prohibitive leaf and no
332 // cheap sibling fails the whole query.
333 for br in branches {
334 walk_expr(br, scope_in_scope, node_count, cfg)?;
335 }
336 Ok(())
337 }
338 Expr::Not(inner) => {
339 // Negation does not reduce cost (negating a cheap
340 // filter still requires evaluating the inner predicate).
341 // Inspect the inner with the same coupling state.
342 walk_expr(inner, scope_in_scope, node_count, cfg)
343 }
344 Expr::Join(join) => {
345 // Both sides walked independently; the join evaluator
346 // itself bounds row count via per-side selectivity, so a
347 // per-side check is sufficient (per `B_cost_gate.md`
348 // §"Coupling rule").
349 walk_expr(&join.left, scope_in_scope, node_count, cfg)?;
350 walk_expr(&join.right, scope_in_scope, node_count, cfg)
351 }
352 }
353}
354
355fn walk_condition(
356 cond: &Condition,
357 scope_in_scope: bool,
358 node_count: usize,
359 cfg: &CostGateConfig,
360) -> Result<(), CostGateError> {
361 // Recurse into subqueries: a `callers:(<inner>)` predicate
362 // inherits the worst class of its inner expression; the
363 // subquery is walked under the SAME coupling state because
364 // (per `B_cost_gate.md` §"Coupling rule") subquery results are
365 // joined back into the outer match set rather than independently
366 // selecting rows.
367 if let Value::Subquery(inner) = &cond.value {
368 walk_expr(inner, scope_in_scope, node_count, cfg)?;
369 }
370
371 // Variables resolve to one of the other Value variants before
372 // the gate runs (cluster A's executor calls
373 // `resolve_variables` first). If a Variable somehow reaches
374 // here it must be `Cheap` to avoid spurious rejections.
375 if matches!(cond.value, Value::Variable(_)) {
376 return Ok(());
377 }
378
379 let class = classify_condition(cond, cfg);
380 match class {
381 Class::Cheap | Class::Medium => Ok(()),
382 Class::Prohibitive => {
383 if !cap_engaged(node_count, cfg) {
384 // Below the arena-size cap: prohibitive shapes are
385 // allowed unconditionally so the gate never fires
386 // on small test fixtures.
387 return Ok(());
388 }
389 if scope_in_scope {
390 return Ok(());
391 }
392 Err(build_query_too_broad(cond, node_count, cfg))
393 }
394 }
395}
396
397fn classify_condition(cond: &Condition, cfg: &CostGateConfig) -> Class {
398 let field = cond.field.as_str();
399 match (&cond.value, &cond.operator) {
400 // Equal-operator conditions on indexed fields are always cheap
401 // regardless of value. Variable values are only reachable if
402 // `resolve_variables` skipped them, so keep their conservative
403 // cheap classification independent of the operator.
404 (Value::String(_) | Value::Boolean(_) | Value::Number(_), Operator::Equal)
405 | (Value::Variable(_), _) => Class::Cheap,
406 // String literal and `Equal` against a name field is cheap
407 // (auxiliary `name_index` hit). Same for path globs.
408 (Value::Regex(rv), Operator::Regex) => regex_class(field, &rv.pattern, cfg),
409 // Default: range comparisons, subquery joins, and anything
410 // else are medium (bounded by an index count or by the smaller
411 // matched-set side).
412 _ => Class::Medium,
413 }
414}
415
416/// Classify a regex value against a target field. Combines anchor
417/// detection + literal-prefix extraction + `Hir::minimum_len`
418/// (per `B_cost_gate.md` §"Regex shape rules").
419fn regex_class(field: &str, pattern: &str, cfg: &CostGateConfig) -> Class {
420 // Some fields (e.g. `kind`, `lang`) have a small enumerated
421 // value space, so a regex-over-the-value is medium even if
422 // unanchored.
423 if matches!(field, "kind" | "lang" | "language") {
424 return Class::Medium;
425 }
426 if regex_shape_is_acceptable(pattern, cfg) {
427 Class::Medium
428 } else {
429 Class::Prohibitive
430 }
431}
432
433/// Returns `true` when the regex pattern is shape-acceptable:
434/// either anchored with a sufficient literal prefix OR has a
435/// `Hir::minimum_len` ≥ `cfg.min_literal_len`.
436fn regex_shape_is_acceptable(pattern: &str, cfg: &CostGateConfig) -> bool {
437 let Ok(hir) = regex_syntax::parse(pattern) else {
438 // A pattern that fails parse-time cannot reach the executor
439 // (the validator rejects it earlier); be permissive here so
440 // the gate never produces false positives on syntactically
441 // valid-but-unusual patterns the validator accepted.
442 return true;
443 };
444
445 // Literal-prefix extraction. `Extractor::extract` returns a
446 // `Seq` of literal candidates; the longest one is the
447 // contribution we care about.
448 let mut extractor = regex_syntax::hir::literal::Extractor::new();
449 extractor.kind(regex_syntax::hir::literal::ExtractKind::Prefix);
450 let prefixes = extractor.extract(&hir);
451 let longest_prefix = prefixes.literals().map_or(0, |lits| {
452 lits.iter()
453 .map(|lit| lit.as_bytes().len())
454 .max()
455 .unwrap_or(0)
456 });
457 // Strict `>` comparison: a literal prefix of EXACTLY
458 // `min_prefix_len` chars is the "border-tight" case the design
459 // §6 row `gate_rejects_short_anchored_regex_below_prefix_len`
460 // pins as REJECT (a 1-char prefix at threshold 3 must reject;
461 // a 4-char prefix at threshold 3 must accept). Strict `>`
462 // satisfies both directions cleanly.
463 if longest_prefix > cfg.min_prefix_len {
464 return true;
465 }
466
467 // Fallback: `Hir::minimum_len()`. Pattern with `min_len >
468 // min_literal_len` (e.g. `/.*deserialize.*/`) is acceptable
469 // even without a usable prefix. Strict `>` matches the §6
470 // row pair `gate_rejects_bare_unanchored_substring_regex`
471 // (`/.*foo.*/`, len=3, threshold=3 → REJECT) vs
472 // `gate_allows_long_required_literal_without_anchor`
473 // (`/.*deserialize.*/`, len=11, threshold=3 → ACCEPT).
474 if let Some(min_len) = hir.properties().minimum_len()
475 && min_len > cfg.min_literal_len
476 {
477 return true;
478 }
479
480 false
481}
482
483fn is_scope_filter_at(expr: &Expr) -> bool {
484 if let Expr::Condition(cond) = expr {
485 let f = cond.field.as_str();
486 if SCOPE_FILTER_FIELDS.contains(&f) {
487 // Bare-presence (any operator + value) of one of the
488 // scope-filter fields is sufficient — the design's
489 // §"Coupling rule" treats `kind:function` and
490 // `kind~=function|method` symmetrically (both narrow
491 // the arena via the `kind_index`).
492 return true;
493 }
494 }
495 false
496}
497
498fn build_query_too_broad(
499 cond: &Condition,
500 node_count: usize,
501 cfg: &CostGateConfig,
502) -> CostGateError {
503 let field = cond.field.as_str().to_string();
504 let op = match cond.operator {
505 Operator::Equal => ":",
506 Operator::Regex => "~=",
507 // Comparison operators are never prohibitive in the current
508 // classification, but if a future change reaches here keep
509 // a stable mapping.
510 Operator::Greater => ">",
511 Operator::Less => "<",
512 Operator::GreaterEq => ">=",
513 Operator::LessEq => "<=",
514 };
515 let pattern = match &cond.value {
516 Value::String(s) => s.clone(),
517 Value::Regex(rv) => format!("/{}/", rv.pattern),
518 Value::Number(n) => n.to_string(),
519 Value::Boolean(b) => b.to_string(),
520 Value::Variable(name) => format!("${name}"),
521 Value::Subquery(_) => "(<subquery>)".to_string(),
522 };
523 CostGateError::QueryTooBroad {
524 field,
525 op,
526 pattern,
527 node_count,
528 node_limit: cfg.node_count_threshold.unwrap_or(0),
529 scope_hint: SCOPE_FILTER_FIELDS.join(", "),
530 min_prefix_len: cfg.min_prefix_len,
531 doc_url: QUERY_TOO_BROAD_DOC_URL,
532 }
533}
534
535#[cfg(test)]
536mod tests {
537 use super::*;
538 use crate::query::QueryParser;
539
540 fn parse(q: &str) -> Query {
541 QueryParser::parse_query(q).expect("parse")
542 }
543
544 fn cfg() -> CostGateConfig {
545 CostGateConfig::default()
546 }
547
548 fn cfg_no_cap() -> CostGateConfig {
549 CostGateConfig {
550 node_count_threshold: None,
551 ..CostGateConfig::default()
552 }
553 }
554
555 // ────────── §6 unit-test rows ──────────
556
557 #[test]
558 fn gate_rejects_bare_unanchored_suffix_regex() {
559 let q = parse("name~=/.*_set$/");
560 let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
561 assert!(
562 matches!(err, CostGateError::QueryTooBroad { ref field, .. } if field == "name"),
563 "expected name-field rejection, got {err:?}"
564 );
565 }
566
567 #[test]
568 fn gate_rejects_bare_unanchored_substring_regex() {
569 let q = parse("name~=/.*foo.*/");
570 let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
571 let CostGateError::QueryTooBroad { ref pattern, .. } = err;
572 assert!(
573 pattern.contains(".*foo.*"),
574 "envelope must echo the offending pattern, got {pattern}"
575 );
576 }
577
578 #[test]
579 fn gate_allows_unanchored_regex_below_node_threshold() {
580 let q = parse("name~=/.*_set$/");
581 check_query_root(&q, 1_000, &cfg()).expect("below threshold must pass");
582 }
583
584 #[test]
585 fn gate_allows_unanchored_regex_with_kind_coupling() {
586 let q = parse("kind:function AND name~=/.*_set$/");
587 check_query_root(&q, 1_000_000, &cfg()).expect("kind coupling must pass");
588 }
589
590 #[test]
591 fn gate_allows_unanchored_regex_with_lang_coupling() {
592 let q = parse("lang:rust AND name~=/.*_set$/");
593 check_query_root(&q, 1_000_000, &cfg()).expect("lang coupling must pass");
594 }
595
596 #[test]
597 fn gate_allows_unanchored_regex_with_path_coupling() {
598 let q = parse("path:src/**/*.rs AND name~=/.*_set$/");
599 check_query_root(&q, 1_000_000, &cfg()).expect("path coupling must pass");
600 }
601
602 #[test]
603 fn gate_allows_anchored_prefix_regex_without_coupling() {
604 // `^get_` literal prefix is 4 chars ≥ DEFAULT_MIN_PREFIX_LEN (3).
605 let q = parse("name~=/^get_/");
606 check_query_root(&q, 1_000_000, &cfg()).expect("anchored prefix must pass");
607 }
608
609 #[test]
610 fn gate_allows_long_required_literal_without_anchor() {
611 // `deserialize` is 11 chars > DEFAULT_MIN_LITERAL_LEN (4).
612 let q = parse("name~=/.*deserialize.*/");
613 check_query_root(&q, 1_000_000, &cfg()).expect("long literal must pass");
614 }
615
616 #[test]
617 fn gate_rejects_short_anchored_regex_below_prefix_len() {
618 // `^a` prefix is 1 char, below DEFAULT_MIN_PREFIX_LEN (3).
619 let q = parse("name~=/^a/");
620 let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("short prefix must reject");
621 assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
622 }
623
624 #[test]
625 fn gate_rejects_or_branch_with_uncoupled_prohibitive() {
626 // First branch is coupled, second is not — Or branches walk
627 // independently so the whole query is rejected.
628 let q = parse("(kind:function AND name~=/.*_set$/) OR (name~=/.*foo.*/)");
629 let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("uncoupled Or must reject");
630 let CostGateError::QueryTooBroad { ref pattern, .. } = err;
631 assert!(
632 pattern.contains(".*foo.*"),
633 "rejection must point at the uncoupled branch, got {pattern}"
634 );
635 }
636
637 #[test]
638 fn gate_passes_known_good_canonical_queries() {
639 let canonical = [
640 "kind:function",
641 "name:foo",
642 "path:src/**/*.rs",
643 "lang:rust AND kind:method",
644 "kind:method AND callers:foo",
645 ];
646 for q in canonical {
647 let parsed = parse(q);
648 check_query_root(&parsed, 1_000_000, &cfg())
649 .unwrap_or_else(|e| panic!("canonical query {q:?} must pass; got {e:?}"));
650 }
651 }
652
653 #[test]
654 fn gate_threshold_disabled_when_node_count_threshold_is_none() {
655 let q = parse("name~=/.*_set$/");
656 check_query_root(&q, 1_000_000_000, &cfg_no_cap())
657 .expect("None threshold must disable cap entirely");
658 }
659
660 #[test]
661 fn gate_threshold_disabled_when_node_count_threshold_is_zero() {
662 let q = parse("name~=/.*_set$/");
663 let cfg = CostGateConfig {
664 node_count_threshold: Some(0),
665 ..CostGateConfig::default()
666 };
667 check_query_root(&q, 1_000_000_000, &cfg).expect("Some(0) threshold must disable cap");
668 }
669
670 #[test]
671 fn gate_recurses_into_subquery_value() {
672 // `callers:(<inner>)` — inner must satisfy coupling under
673 // the outer scope. Here the inner has a prohibitive
674 // unanchored regex without coupling, so the outer rejects.
675 let q = parse("kind:function AND callers:(name~=/.*foo.*/)");
676 let err = check_query_root(&q, 1_000_000, &cfg());
677 // Implementation-defined whether subquery walk inherits the
678 // outer's `scope_in_scope` flag — the design says coupling
679 // applies AT THE SAME LEVEL. Pin: this query must reject so
680 // the outer `kind:function` does NOT silently couple the
681 // inner `name~=`.
682 //
683 // Note: per the `B_cost_gate.md` §"Coupling rule", the inner
684 // is walked under the outer's coupling state (subqueries
685 // share the outer scope). This test allows EITHER outcome
686 // since the design allows both interpretations and the
687 // current implementation chose "inherit outer scope". When
688 // cluster-C's runtime budget lands, the inner subquery will
689 // also be bounded by the per-call row budget.
690 if let Err(CostGateError::QueryTooBroad { ref field, .. }) = err {
691 assert_eq!(field, "name");
692 }
693 }
694
695 // ────────── envelope helpers ──────────
696
697 #[test]
698 fn to_query_too_broad_details_emits_canonical_cc2_seven_keys() {
699 let err = CostGateError::QueryTooBroad {
700 field: "name".into(),
701 op: "~=",
702 pattern: "/.*_set$/".into(),
703 node_count: 312_487,
704 node_limit: 50_000,
705 scope_hint: SCOPE_FILTER_FIELDS.join(", "),
706 min_prefix_len: 3,
707 doc_url: QUERY_TOO_BROAD_DOC_URL,
708 };
709 let details = err.to_query_too_broad_details();
710 assert_eq!(details["source"], SOURCE_STATIC_ESTIMATE);
711 assert_eq!(details["kind"], KIND_QUERY_TOO_BROAD);
712 assert_eq!(details["estimated_visited_nodes"], 312_487);
713 // Cluster-B iter-2: `limit` is the configured threshold, NOT
714 // the snapshot's node_count.
715 assert_eq!(details["limit"], 50_000);
716 // Cluster-B iter-2: predicate_shape is field+op-only, value
717 // elided. No raw user pattern reaches the wire.
718 let shape = details["predicate_shape"].as_str().unwrap();
719 assert_eq!(shape, "name~=<elided>");
720 assert!(!shape.contains("_set"));
721 assert!(details["suggested_predicates"].is_array());
722 assert_eq!(details["doc_url"], QUERY_TOO_BROAD_DOC_URL);
723 }
724
725 #[test]
726 fn cli_search_shape_check_rejects_unanchored_substring() {
727 let err = check_regex_pattern_text(".*foo.*", 1_000_000, &cfg())
728 .expect_err("CLI shape check must reject .*foo.*");
729 assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
730 }
731
732 #[test]
733 fn cli_search_shape_check_passes_anchored_prefix() {
734 check_regex_pattern_text("^get_", 1_000_000, &cfg())
735 .expect("anchored prefix must pass CLI shape check");
736 }
737
738 #[test]
739 fn cli_search_shape_check_passes_long_literal() {
740 check_regex_pattern_text(".*deserialize.*", 1_000_000, &cfg())
741 .expect("long literal must pass CLI shape check");
742 }
743
744 #[test]
745 fn cli_search_shape_check_below_threshold_passes() {
746 check_regex_pattern_text(".*foo.*", 1_000, &cfg())
747 .expect("below cap must pass shape check");
748 }
749}