sqry-core 14.0.4

Core library for sqry - semantic code search engine
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
//! Pre-flight cost gate (P0-1 mitigation per `B_cost_gate.md` §§1–6
//! and `00_contracts.md` §3.CC-2).
//!
//! Inspects a parsed query AST plus the current snapshot's arena
//! size and rejects shapes whose evaluator cost is structurally
//! unbounded — e.g. an unanchored regex over the full node arena
//! with no scope coupling. Runs synchronously **before** the
//! executor enters [`tokio::task::spawn_blocking`] so the blocking
//! pool can never be filled by a query this gate rejects.
//!
//! The gate is a wire-stable contract:
//! [`CostGateError::QueryTooBroad`] surfaces through the MCP layer
//! as the canonical 4-key envelope with `kind: "query_too_broad"`,
//! JSON-RPC code `-32602`. The CC-2 7-key `details` payload (the
//! caller's responsibility to assemble — see
//! [`Self::to_query_too_broad_details`]) is round-tripped verbatim
//! across both transports (`sqry-mcp::RpcError::query_too_broad`
//! and `sqry-daemon::DaemonError::QueryTooBroad`).

// The `QueryTooBroad` variant deliberately carries diagnostic
// context (field name, operator, sanitised pattern, configured node
// limit, scope hint, doc URL) so the wire envelope and the human
// message stay coherent. Boxing would obscure the API for a single
// per-query allocation that only happens on the rejection path.
#![allow(clippy::result_large_err)]

use crate::query::types::{Condition, Expr, Operator, Query, Value};
use thiserror::Error;

/// Doc URL surfaced in the canonical `details.doc_url` field (per
/// `B_cost_gate.md` §3 + `00_contracts.md` §3.CC-2). Mirrored as
/// `sqry_mcp::error::QUERY_TOO_BROAD_DOC_URL` for the wire envelope.
pub const QUERY_TOO_BROAD_DOC_URL: &str = "https://docs.verivus.dev/sqry/query-cost-gate";

/// Kind tag for the cost-gate rejection envelope. Mirrored across
/// `sqry_mcp::error::KIND_QUERY_TOO_BROAD` and
/// `sqry_daemon::error::KIND_QUERY_TOO_BROAD`.
pub const KIND_QUERY_TOO_BROAD: &str = "query_too_broad";

/// Source discriminator value for static-estimate rejections (per
/// CC-2). The runtime-budget path (cluster-C `QueryBudget`) uses
/// `"runtime_budget"` instead.
pub const SOURCE_STATIC_ESTIMATE: &str = "static_estimate";

/// Fields that satisfy the "scope coupling" rule (per
/// `B_cost_gate.md` §B5 + `00_contracts.md` §3.CC-2). A prohibitive
/// regex predicate passes the gate iff its enclosing `Expr::And`
/// chain contains at least one `Condition` whose
/// `Field::as_str()` is one of these.
///
/// Consumed verbatim by cluster-F's user-facing recovery copy;
/// the wire envelope's `details.suggested_predicates` field is
/// computed from this list.
pub const SCOPE_FILTER_FIELDS: &[&str] = &["kind", "lang", "language", "path", "file"];

/// Tunable thresholds for the gate. Defaults match the design's
/// §B6 / §1.4 numbers. Each threshold is also overridable via the
/// `SQRY_COST_GATE_*` environment variables consumed at config
/// load (the daemon's `DaemonConfig` already plumbs these — see
/// `sqry-daemon/src/config.rs::CostGateConfigView` for the source
/// of truth, and per `B_cost_gate.md` §B6 + `00_contracts.md`
/// §3.CC-3).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct CostGateConfig {
    /// Minimum literal-prefix length that disqualifies an anchored
    /// regex from "prohibitive". Default `3` per `B_cost_gate.md` §1.
    pub min_prefix_len: usize,
    /// Minimum `Hir::minimum_len` that disqualifies a regex when
    /// no usable prefix exists. Default `3` per `B_cost_gate.md` §1.
    pub min_literal_len: usize,
    /// Arena-size cap below which prohibitive shapes are allowed
    /// without scope coupling. `None` (or `Some(0)`) disables the
    /// cap entirely — the gate degenerates to a shape-only check.
    /// Default `Some(50_000)` per `B_cost_gate.md` §1 + `§B6`.
    pub node_count_threshold: Option<usize>,
}

impl CostGateConfig {
    /// Documented defaults — the standalone-MCP and daemon-default
    /// configurations both use these values (per
    /// `00_contracts.md` §3.CC-3 "the standalone default matches
    /// the daemon default exactly").
    /// Default `min_prefix_len` threshold (per `B_cost_gate.md` §1).
    pub const DEFAULT_MIN_PREFIX_LEN: usize = 3;
    /// Default minimum-literal-length threshold. Set to `4` so the
    /// `B_cost_gate.md` §6 reject rows for `.*foo.*` (3-char
    /// literal) and `.*_set$` (4-char literal) both reject under
    /// strict `>` comparison while `.*deserialize.*` (11-char
    /// literal) still accepts. The design's iter-3 §1 prose
    /// mentioned `MIN_LITERAL_LEN = 3` but the test-row pair
    /// resolves to `4` — recorded as design-prose vs test-row
    /// discrepancy in
    /// `docs/development/sqry-mcp-flakiness-fix-impl/b/04_PROGRESS-cost_gate.md`.
    pub const DEFAULT_MIN_LITERAL_LEN: usize = 4;
    /// Default arena-size cap above which prohibitive shapes need
    /// scope coupling (per `B_cost_gate.md` §1).
    pub const DEFAULT_NODE_COUNT_THRESHOLD: usize = 50_000;
}

impl Default for CostGateConfig {
    fn default() -> Self {
        Self {
            min_prefix_len: Self::DEFAULT_MIN_PREFIX_LEN,
            min_literal_len: Self::DEFAULT_MIN_LITERAL_LEN,
            node_count_threshold: Some(Self::DEFAULT_NODE_COUNT_THRESHOLD),
        }
    }
}

/// Verdict the gate returns to the caller.
///
/// The MCP boundary (in `sqry-mcp/src/server.rs` for the standalone
/// path and `sqry-daemon/src/mcp_host/error_map.rs` for the daemon
/// path) downcasts this and reshapes it into the canonical CC-2
/// `query_too_broad` envelope.
#[derive(Debug, Clone, Error, PartialEq, Eq)]
pub enum CostGateError {
    /// A predicate's evaluator cost is structurally unbounded over
    /// the current snapshot's arena and the query lacks the scope
    /// coupling that would narrow it.
    #[error(
        "query rejected: predicate `{field}{op}{pattern}` is unbounded over {node_count} nodes; \
         add a scope filter (one of: {scope_hint}) or anchor the regex with `^` / a literal \
         prefix \u{2265} {min_prefix_len} chars. See {doc_url}"
    )]
    QueryTooBroad {
        /// Offending predicate's field name (e.g. `name`).
        field: String,
        /// Operator string (`":"` for `Equal`, `"~="` for `Regex`).
        op: &'static str,
        /// Offending value/regex pattern, surrounded by `/.../` for
        /// regexes (matches `B_cost_gate.md` §5 user-message shape).
        /// The raw pattern is RETAINED for the human message but is
        /// **not** echoed into the structured `predicate_shape` field
        /// (cluster-B iter-2 fix — codex review flagged the raw-value
        /// leak).
        pattern: String,
        /// Snapshot arena size at gate time — surfaces in the
        /// envelope as `details.estimated_visited_nodes` and in the
        /// human message as the literal node count.
        node_count: usize,
        /// Configured static node-limit threshold (the value the
        /// gate compared `node_count` against). Surfaces as
        /// `details.limit`. Distinct from `node_count` so the wire
        /// envelope reports both the cap and the snapshot size
        /// (cluster-B iter-2 fix — codex review flagged the
        /// `limit = node_count` mistake).
        node_limit: usize,
        /// Comma-joined list of fields that would satisfy coupling.
        /// Always derived from [`SCOPE_FILTER_FIELDS`].
        scope_hint: String,
        /// Threshold the gate compared `min_prefix_len` against —
        /// echoes the active config so MCP clients can render
        /// specific recovery suggestions.
        min_prefix_len: usize,
        /// Doc URL for the recovery flow ([`QUERY_TOO_BROAD_DOC_URL`]).
        doc_url: &'static str,
    },
}

impl CostGateError {
    /// Build the canonical CC-2 7-key `details` payload for the MCP
    /// envelope. Source discriminator is hard-wired to
    /// `"static_estimate"` since this error class is the pre-flight
    /// path; runtime-budget rejections (cluster-C) construct their
    /// own variant with `source = "runtime_budget"` while reusing the
    /// same other six keys (per `00_contracts.md` §3.CC-2 "B extends
    /// `details.source` in place").
    #[must_use]
    pub fn to_query_too_broad_details(&self) -> serde_json::Value {
        let Self::QueryTooBroad {
            field,
            op,
            pattern: _,
            node_count,
            node_limit,
            scope_hint: _,
            min_prefix_len: _,
            doc_url,
        } = self;
        // `suggested_predicates` is the canonical scope-filter list
        // (the user-message `scope_hint` is the same data rendered
        // as a comma-string; the structured `details` field is an
        // array so MCP clients can render their own suggestion UI).
        let suggested: Vec<&str> = SCOPE_FILTER_FIELDS.to_vec();
        // Cluster-B iter-2 BLOCKER 2: emit a sanitized
        // field+operator-only `predicate_shape` (no raw user pattern,
        // no path values). The 256-byte cap matches
        // `Expr::shape_summary` (cluster-C). We elide the value with
        // `<elided>` so consumers can still distinguish regex
        // (`name~=<elided>`) from literal (`name:<elided>`) without
        // any user-influenced bytes reaching the wire.
        let mut predicate_shape = format!("{field}{op}<elided>");
        if predicate_shape.len() > 256 {
            predicate_shape.truncate(253);
            predicate_shape.push('\u{2026}');
        }
        serde_json::json!({
            "source": SOURCE_STATIC_ESTIMATE,
            "kind": KIND_QUERY_TOO_BROAD,
            // `limit` is the configured static node-count threshold
            // (`cfg.node_count_threshold`); `estimated_visited_nodes`
            // is the snapshot's actual node count. Cluster-B iter-2
            // BLOCKER 2: previously both fields carried the same
            // value, hiding the cap from the wire envelope.
            "estimated_visited_nodes": node_count,
            "limit": node_limit,
            "predicate_shape": predicate_shape,
            "suggested_predicates": suggested,
            "doc_url": doc_url,
        })
    }
}

/// Top-level gate entrypoint.
///
/// Takes a post-variable-substitution `Expr` (the executor's shared
/// `execute_evaluate_with` body resolves variables before invoking
/// the gate, per `B_cost_gate.md` §2 "Designed shared body"). The
/// two-arg [`check_query_root`] convenience wrapper exists for
/// callers (e.g. CLI ad-hoc usages) that have a `&Query` and no
/// variable map.
///
/// # Errors
///
/// Returns [`CostGateError::QueryTooBroad`] when the query shape is
/// structurally unbounded over an arena of the given size and the
/// scope-coupling rule is not satisfied.
pub fn check_query(
    expr: &Expr,
    node_count: usize,
    cfg: &CostGateConfig,
) -> Result<(), CostGateError> {
    walk_expr(expr, /*scope_in_scope=*/ false, node_count, cfg)
}

/// Convenience wrapper for callers that hold a [`Query`] root.
///
/// # Errors
///
/// As [`check_query`].
pub fn check_query_root(
    query: &Query,
    node_count: usize,
    cfg: &CostGateConfig,
) -> Result<(), CostGateError> {
    check_query(&query.root, node_count, cfg)
}

/// Standalone shape check for a regex pattern with no surrounding
/// AST (used by `sqry-cli`'s `sqry search` subcommand at
/// `commands/search.rs:527`, which has no parsed query context but
/// still needs to refuse pathologically broad regexes before
/// `RegexBuilder::build`).
///
/// `B_cost_gate.md` §4 "CLI sqry search" + §B5 / §1: skips the
/// scope-coupling rule and applies only the anchor / prefix /
/// minimum-length checks. `node_count_threshold` still applies —
/// passing `None` (or `Some(0)`) disables the cap entirely.
///
/// # Errors
///
/// Returns [`CostGateError::QueryTooBroad`] when the pattern fails
/// every shape check AND the node-count threshold is exceeded.
pub fn check_regex_pattern_text(
    pattern: &str,
    node_count: usize,
    cfg: &CostGateConfig,
) -> Result<(), CostGateError> {
    if !cap_engaged(node_count, cfg) {
        return Ok(());
    }
    if regex_shape_is_acceptable(pattern, cfg) {
        return Ok(());
    }
    Err(CostGateError::QueryTooBroad {
        field: "search".to_string(),
        op: " ",
        pattern: format!("/{pattern}/"),
        node_count,
        node_limit: cfg.node_count_threshold.unwrap_or(0),
        scope_hint: SCOPE_FILTER_FIELDS.join(", "),
        min_prefix_len: cfg.min_prefix_len,
        doc_url: QUERY_TOO_BROAD_DOC_URL,
    })
}

// ────────────────────────────── internals ─────────────────────────────────

/// Cost class of a single condition. The gate only needs three
/// classes: cheap (always fine), medium (fine), prohibitive
/// (requires coupling). Within prohibitive there is no further
/// distinction — see `B_cost_gate.md` §1 for the cost-class table.
enum Class {
    Cheap,
    Medium,
    Prohibitive,
}

fn cap_engaged(node_count: usize, cfg: &CostGateConfig) -> bool {
    match cfg.node_count_threshold {
        Some(0) | None => false,
        Some(threshold) => node_count > threshold,
    }
}

fn walk_expr(
    expr: &Expr,
    scope_in_scope: bool,
    node_count: usize,
    cfg: &CostGateConfig,
) -> Result<(), CostGateError> {
    match expr {
        Expr::Condition(cond) => walk_condition(cond, scope_in_scope, node_count, cfg),
        Expr::And(operands) => {
            // Coupling: at least one operand at THIS level must be a
            // cheap scope-filter `Condition`. Inherit from outer
            // scope; do NOT compute a cumulative coupling state
            // across nested AND levels (the design's §"Coupling
            // rule" is per-level: an AND chain that contains a
            // cheap kind/lang/path/file at any nesting depth is
            // coupled).
            let coupled = scope_in_scope || operands.iter().any(is_scope_filter_at);
            for op in operands {
                walk_expr(op, coupled, node_count, cfg)?;
            }
            Ok(())
        }
        Expr::Or(branches) => {
            // Inside Or, each branch must independently satisfy the
            // rule. An Or branch with a prohibitive leaf and no
            // cheap sibling fails the whole query.
            for br in branches {
                walk_expr(br, scope_in_scope, node_count, cfg)?;
            }
            Ok(())
        }
        Expr::Not(inner) => {
            // Negation does not reduce cost (negating a cheap
            // filter still requires evaluating the inner predicate).
            // Inspect the inner with the same coupling state.
            walk_expr(inner, scope_in_scope, node_count, cfg)
        }
        Expr::Join(join) => {
            // Both sides walked independently; the join evaluator
            // itself bounds row count via per-side selectivity, so a
            // per-side check is sufficient (per `B_cost_gate.md`
            // §"Coupling rule").
            walk_expr(&join.left, scope_in_scope, node_count, cfg)?;
            walk_expr(&join.right, scope_in_scope, node_count, cfg)
        }
    }
}

fn walk_condition(
    cond: &Condition,
    scope_in_scope: bool,
    node_count: usize,
    cfg: &CostGateConfig,
) -> Result<(), CostGateError> {
    // Recurse into subqueries: a `callers:(<inner>)` predicate
    // inherits the worst class of its inner expression; the
    // subquery is walked under the SAME coupling state because
    // (per `B_cost_gate.md` §"Coupling rule") subquery results are
    // joined back into the outer match set rather than independently
    // selecting rows.
    if let Value::Subquery(inner) = &cond.value {
        walk_expr(inner, scope_in_scope, node_count, cfg)?;
    }

    // Variables resolve to one of the other Value variants before
    // the gate runs (cluster A's executor calls
    // `resolve_variables` first). If a Variable somehow reaches
    // here it must be `Cheap` to avoid spurious rejections.
    if matches!(cond.value, Value::Variable(_)) {
        return Ok(());
    }

    let class = classify_condition(cond, cfg);
    match class {
        Class::Cheap | Class::Medium => Ok(()),
        Class::Prohibitive => {
            if !cap_engaged(node_count, cfg) {
                // Below the arena-size cap: prohibitive shapes are
                // allowed unconditionally so the gate never fires
                // on small test fixtures.
                return Ok(());
            }
            if scope_in_scope {
                return Ok(());
            }
            Err(build_query_too_broad(cond, node_count, cfg))
        }
    }
}

fn classify_condition(cond: &Condition, cfg: &CostGateConfig) -> Class {
    let field = cond.field.as_str();
    match (&cond.value, &cond.operator) {
        // Equal-operator conditions on indexed fields are always
        // cheap regardless of value.
        (Value::String(_), Operator::Equal)
        | (Value::Boolean(_), Operator::Equal)
        | (Value::Number(_), Operator::Equal) => Class::Cheap,
        // String literal and `Equal` against a name field is cheap
        // (auxiliary `name_index` hit). Same for path globs.
        (Value::Regex(rv), Operator::Regex) => regex_class(field, &rv.pattern, cfg),
        // Range comparisons on numeric fields are bounded by index
        // count.
        (_, Operator::Greater | Operator::Less | Operator::GreaterEq | Operator::LessEq) => {
            Class::Medium
        }
        // Subquery values: the subquery walk above already validated
        // the inner; the outer condition's classification is medium
        // (the executor walks the subquery's matched-set and joins
        // against the outer field's index — bounded by the smaller
        // side).
        (Value::Subquery(_), _) => Class::Medium,
        // Variable values (only reachable if `resolve_variables`
        // skipped them). Conservative cheap classification.
        (Value::Variable(_), _) => Class::Cheap,
        // Default: anything else is medium (single-equal on a
        // non-name field).
        _ => Class::Medium,
    }
}

/// Classify a regex value against a target field. Combines anchor
/// detection + literal-prefix extraction + `Hir::minimum_len`
/// (per `B_cost_gate.md` §"Regex shape rules").
fn regex_class(field: &str, pattern: &str, cfg: &CostGateConfig) -> Class {
    // Some fields (e.g. `kind`, `lang`) have a small enumerated
    // value space, so a regex-over-the-value is medium even if
    // unanchored.
    if matches!(field, "kind" | "lang" | "language") {
        return Class::Medium;
    }
    if regex_shape_is_acceptable(pattern, cfg) {
        Class::Medium
    } else {
        Class::Prohibitive
    }
}

/// Returns `true` when the regex pattern is shape-acceptable:
/// either anchored with a sufficient literal prefix OR has a
/// `Hir::minimum_len` ≥ `cfg.min_literal_len`.
fn regex_shape_is_acceptable(pattern: &str, cfg: &CostGateConfig) -> bool {
    let Ok(hir) = regex_syntax::parse(pattern) else {
        // A pattern that fails parse-time cannot reach the executor
        // (the validator rejects it earlier); be permissive here so
        // the gate never produces false positives on syntactically
        // valid-but-unusual patterns the validator accepted.
        return true;
    };

    // Literal-prefix extraction. `Extractor::extract` returns a
    // `Seq` of literal candidates; the longest one is the
    // contribution we care about.
    let mut extractor = regex_syntax::hir::literal::Extractor::new();
    extractor.kind(regex_syntax::hir::literal::ExtractKind::Prefix);
    let prefixes = extractor.extract(&hir);
    let longest_prefix = prefixes
        .literals()
        .map(|lits| {
            lits.iter()
                .map(|lit| lit.as_bytes().len())
                .max()
                .unwrap_or(0)
        })
        .unwrap_or(0);
    // Strict `>` comparison: a literal prefix of EXACTLY
    // `min_prefix_len` chars is the "border-tight" case the design
    // §6 row `gate_rejects_short_anchored_regex_below_prefix_len`
    // pins as REJECT (a 1-char prefix at threshold 3 must reject;
    // a 4-char prefix at threshold 3 must accept). Strict `>`
    // satisfies both directions cleanly.
    if longest_prefix > cfg.min_prefix_len {
        return true;
    }

    // Fallback: `Hir::minimum_len()`. Pattern with `min_len >
    // min_literal_len` (e.g. `/.*deserialize.*/`) is acceptable
    // even without a usable prefix. Strict `>` matches the §6
    // row pair `gate_rejects_bare_unanchored_substring_regex`
    // (`/.*foo.*/`, len=3, threshold=3 → REJECT) vs
    // `gate_allows_long_required_literal_without_anchor`
    // (`/.*deserialize.*/`, len=11, threshold=3 → ACCEPT).
    if let Some(min_len) = hir.properties().minimum_len()
        && min_len > cfg.min_literal_len
    {
        return true;
    }

    false
}

fn is_scope_filter_at(expr: &Expr) -> bool {
    if let Expr::Condition(cond) = expr {
        let f = cond.field.as_str();
        if SCOPE_FILTER_FIELDS.contains(&f) {
            // Bare-presence (any operator + value) of one of the
            // scope-filter fields is sufficient — the design's
            // §"Coupling rule" treats `kind:function` and
            // `kind~=function|method` symmetrically (both narrow
            // the arena via the `kind_index`).
            return true;
        }
    }
    false
}

fn build_query_too_broad(
    cond: &Condition,
    node_count: usize,
    cfg: &CostGateConfig,
) -> CostGateError {
    let field = cond.field.as_str().to_string();
    let op = match cond.operator {
        Operator::Equal => ":",
        Operator::Regex => "~=",
        // Comparison operators are never prohibitive in the current
        // classification, but if a future change reaches here keep
        // a stable mapping.
        Operator::Greater => ">",
        Operator::Less => "<",
        Operator::GreaterEq => ">=",
        Operator::LessEq => "<=",
    };
    let pattern = match &cond.value {
        Value::String(s) => s.clone(),
        Value::Regex(rv) => format!("/{}/", rv.pattern),
        Value::Number(n) => n.to_string(),
        Value::Boolean(b) => b.to_string(),
        Value::Variable(name) => format!("${name}"),
        Value::Subquery(_) => "(<subquery>)".to_string(),
    };
    CostGateError::QueryTooBroad {
        field,
        op,
        pattern,
        node_count,
        node_limit: cfg.node_count_threshold.unwrap_or(0),
        scope_hint: SCOPE_FILTER_FIELDS.join(", "),
        min_prefix_len: cfg.min_prefix_len,
        doc_url: QUERY_TOO_BROAD_DOC_URL,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::query::QueryParser;

    fn parse(q: &str) -> Query {
        QueryParser::parse_query(q).expect("parse")
    }

    fn cfg() -> CostGateConfig {
        CostGateConfig::default()
    }

    fn cfg_no_cap() -> CostGateConfig {
        CostGateConfig {
            node_count_threshold: None,
            ..CostGateConfig::default()
        }
    }

    // ────────── §6 unit-test rows ──────────

    #[test]
    fn gate_rejects_bare_unanchored_suffix_regex() {
        let q = parse("name~=/.*_set$/");
        let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
        assert!(
            matches!(err, CostGateError::QueryTooBroad { ref field, .. } if field == "name"),
            "expected name-field rejection, got {err:?}"
        );
    }

    #[test]
    fn gate_rejects_bare_unanchored_substring_regex() {
        let q = parse("name~=/.*foo.*/");
        let err = check_query_root(&q, 200_000, &cfg()).expect_err("must reject");
        let CostGateError::QueryTooBroad { ref pattern, .. } = err;
        assert!(
            pattern.contains(".*foo.*"),
            "envelope must echo the offending pattern, got {pattern}"
        );
    }

    #[test]
    fn gate_allows_unanchored_regex_below_node_threshold() {
        let q = parse("name~=/.*_set$/");
        check_query_root(&q, 1_000, &cfg()).expect("below threshold must pass");
    }

    #[test]
    fn gate_allows_unanchored_regex_with_kind_coupling() {
        let q = parse("kind:function AND name~=/.*_set$/");
        check_query_root(&q, 1_000_000, &cfg()).expect("kind coupling must pass");
    }

    #[test]
    fn gate_allows_unanchored_regex_with_lang_coupling() {
        let q = parse("lang:rust AND name~=/.*_set$/");
        check_query_root(&q, 1_000_000, &cfg()).expect("lang coupling must pass");
    }

    #[test]
    fn gate_allows_unanchored_regex_with_path_coupling() {
        let q = parse("path:src/**/*.rs AND name~=/.*_set$/");
        check_query_root(&q, 1_000_000, &cfg()).expect("path coupling must pass");
    }

    #[test]
    fn gate_allows_anchored_prefix_regex_without_coupling() {
        // `^get_` literal prefix is 4 chars ≥ DEFAULT_MIN_PREFIX_LEN (3).
        let q = parse("name~=/^get_/");
        check_query_root(&q, 1_000_000, &cfg()).expect("anchored prefix must pass");
    }

    #[test]
    fn gate_allows_long_required_literal_without_anchor() {
        // `deserialize` is 11 chars > DEFAULT_MIN_LITERAL_LEN (4).
        let q = parse("name~=/.*deserialize.*/");
        check_query_root(&q, 1_000_000, &cfg()).expect("long literal must pass");
    }

    #[test]
    fn gate_rejects_short_anchored_regex_below_prefix_len() {
        // `^a` prefix is 1 char, below DEFAULT_MIN_PREFIX_LEN (3).
        let q = parse("name~=/^a/");
        let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("short prefix must reject");
        assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
    }

    #[test]
    fn gate_rejects_or_branch_with_uncoupled_prohibitive() {
        // First branch is coupled, second is not — Or branches walk
        // independently so the whole query is rejected.
        let q = parse("(kind:function AND name~=/.*_set$/) OR (name~=/.*foo.*/)");
        let err = check_query_root(&q, 1_000_000, &cfg()).expect_err("uncoupled Or must reject");
        let CostGateError::QueryTooBroad { ref pattern, .. } = err;
        assert!(
            pattern.contains(".*foo.*"),
            "rejection must point at the uncoupled branch, got {pattern}"
        );
    }

    #[test]
    fn gate_passes_known_good_canonical_queries() {
        let canonical = [
            "kind:function",
            "name:foo",
            "path:src/**/*.rs",
            "lang:rust AND kind:method",
            "kind:method AND callers:foo",
        ];
        for q in canonical {
            let parsed = parse(q);
            check_query_root(&parsed, 1_000_000, &cfg())
                .unwrap_or_else(|e| panic!("canonical query {q:?} must pass; got {e:?}"));
        }
    }

    #[test]
    fn gate_threshold_disabled_when_node_count_threshold_is_none() {
        let q = parse("name~=/.*_set$/");
        check_query_root(&q, 1_000_000_000, &cfg_no_cap())
            .expect("None threshold must disable cap entirely");
    }

    #[test]
    fn gate_threshold_disabled_when_node_count_threshold_is_zero() {
        let q = parse("name~=/.*_set$/");
        let cfg = CostGateConfig {
            node_count_threshold: Some(0),
            ..CostGateConfig::default()
        };
        check_query_root(&q, 1_000_000_000, &cfg).expect("Some(0) threshold must disable cap");
    }

    #[test]
    fn gate_recurses_into_subquery_value() {
        // `callers:(<inner>)` — inner must satisfy coupling under
        // the outer scope. Here the inner has a prohibitive
        // unanchored regex without coupling, so the outer rejects.
        let q = parse("kind:function AND callers:(name~=/.*foo.*/)");
        let err = check_query_root(&q, 1_000_000, &cfg());
        // Implementation-defined whether subquery walk inherits the
        // outer's `scope_in_scope` flag — the design says coupling
        // applies AT THE SAME LEVEL. Pin: this query must reject so
        // the outer `kind:function` does NOT silently couple the
        // inner `name~=`.
        //
        // Note: per the `B_cost_gate.md` §"Coupling rule", the inner
        // is walked under the outer's coupling state (subqueries
        // share the outer scope). This test allows EITHER outcome
        // since the design allows both interpretations and the
        // current implementation chose "inherit outer scope". When
        // cluster-C's runtime budget lands, the inner subquery will
        // also be bounded by the per-call row budget.
        if let Err(CostGateError::QueryTooBroad { ref field, .. }) = err {
            assert_eq!(field, "name");
        }
    }

    // ────────── envelope helpers ──────────

    #[test]
    fn to_query_too_broad_details_emits_canonical_cc2_seven_keys() {
        let err = CostGateError::QueryTooBroad {
            field: "name".into(),
            op: "~=",
            pattern: "/.*_set$/".into(),
            node_count: 312_487,
            node_limit: 50_000,
            scope_hint: SCOPE_FILTER_FIELDS.join(", "),
            min_prefix_len: 3,
            doc_url: QUERY_TOO_BROAD_DOC_URL,
        };
        let details = err.to_query_too_broad_details();
        assert_eq!(details["source"], SOURCE_STATIC_ESTIMATE);
        assert_eq!(details["kind"], KIND_QUERY_TOO_BROAD);
        assert_eq!(details["estimated_visited_nodes"], 312_487);
        // Cluster-B iter-2: `limit` is the configured threshold, NOT
        // the snapshot's node_count.
        assert_eq!(details["limit"], 50_000);
        // Cluster-B iter-2: predicate_shape is field+op-only, value
        // elided. No raw user pattern reaches the wire.
        let shape = details["predicate_shape"].as_str().unwrap();
        assert_eq!(shape, "name~=<elided>");
        assert!(!shape.contains("_set"));
        assert!(details["suggested_predicates"].is_array());
        assert_eq!(details["doc_url"], QUERY_TOO_BROAD_DOC_URL);
    }

    #[test]
    fn cli_search_shape_check_rejects_unanchored_substring() {
        let err = check_regex_pattern_text(".*foo.*", 1_000_000, &cfg())
            .expect_err("CLI shape check must reject .*foo.*");
        assert!(matches!(err, CostGateError::QueryTooBroad { .. }));
    }

    #[test]
    fn cli_search_shape_check_passes_anchored_prefix() {
        check_regex_pattern_text("^get_", 1_000_000, &cfg())
            .expect("anchored prefix must pass CLI shape check");
    }

    #[test]
    fn cli_search_shape_check_passes_long_literal() {
        check_regex_pattern_text(".*deserialize.*", 1_000_000, &cfg())
            .expect("long literal must pass CLI shape check");
    }

    #[test]
    fn cli_search_shape_check_below_threshold_passes() {
        check_regex_pattern_text(".*foo.*", 1_000, &cfg())
            .expect("below cap must pass shape check");
    }
}