Skip to main content

chio_data_guards/
vector_guard.rs

1//! Vector database guard (roadmap phase 7.2).
2//!
3//! `VectorDbGuard` inspects tool calls that target a vector database --
4//! Pinecone, Weaviate, Qdrant, Chroma, or any database the operator
5//! configures as vector-flavored -- and enforces four categories of
6//! policy that the SQL guard cannot:
7//!
8//! 1. **Collection allowlist.** A query to a collection that is not on the
9//!    operator's allowlist is denied.
10//! 2. **Namespace scoping.** A query whose `namespace` field disagrees
11//!    with the grant's active namespace is denied.  Empty/missing
12//!    namespaces collapse to a single shared bucket.
13//! 3. **Operation class.** Upsert, delete, or index-mutation verbs are
14//!    denied when the active grant carries
15//!    [`SqlOperationClass::ReadOnly`](chio_core::capability::SqlOperationClass::ReadOnly).
16//!    The reuse of `SqlOperationClass` is deliberate -- see
17//!    `docs/ROADMAP.md` phase 7.2 -- so a single constraint enum covers
18//!    every database-shaped grant.
19//! 4. **`top_k` ceiling.** A query whose `top_k` exceeds the grant's
20//!    [`Constraint::MaxRowsReturned`](chio_core::capability::Constraint::MaxRowsReturned)
21//!    is denied.  The guard fails closed when `top_k` is missing from the
22//!    arguments and a ceiling is configured.
23//!
24//! # Fail-closed rules
25//!
26//! Like every other guard in this crate, the vector guard is fail-closed:
27//!
28//! - JSON parse errors in the arguments deny.
29//! - Missing required fields (collection when the allowlist is non-empty,
30//!   namespace when a namespace is configured, `top_k` when a ceiling is
31//!   configured) deny.
32//! - An empty collection allowlist denies every request (no collection is
33//!   implicitly allowed).  Operators can opt into an open configuration
34//!   via [`VectorGuardConfig::allow_all`].
35//!
36//! # Action detection
37//!
38//! `chio-guards` already categorises some vector flows as
39//! [`ToolAction::MemoryRead`]/[`ToolAction::MemoryWrite`]; this guard
40//! primarily drives off [`ToolAction::DatabaseQuery`] with a
41//! vector-flavored `database` (or a tool name that matches a configured
42//! vendor substring) so it can enforce the same policy against bespoke
43//! vendor-adapted SDK tools as well.  The memory-read/write actions are
44//! handled as a second pass -- they carry the store and optional key but
45//! no `top_k` or `operation` hint, so we lift those from the raw
46//! arguments JSON.
47//!
48//! # Tool argument schema
49//!
50//! The guard extracts four fields from the tool arguments by JSON path:
51//!
52//! | field         | default arg keys                        |
53//! |---------------|-----------------------------------------|
54//! | collection    | `collection`, `index`, `class`, `store` |
55//! | namespace     | `namespace`, `tenant`, `partition`      |
56//! | operation     | `operation`, `op`, `action`             |
57//! | top_k         | `top_k`, `topK`, `k`, `limit`           |
58//!
59//! All paths are configurable via [`VectorGuardConfig::field_paths`].
60
61use std::collections::HashSet;
62
63use serde::{Deserialize, Serialize};
64use serde_json::Value;
65use tracing::warn;
66
67use chio_core::capability::{ChioScope, Constraint, SqlOperationClass, ToolGrant};
68use chio_guards::{extract_action, ToolAction};
69use chio_kernel::{GuardContext, KernelError, Verdict};
70use thiserror::Error;
71
72/// Structured reason for a [`VectorDbGuard`] denial.
73#[derive(Clone, Debug, Error, PartialEq, Eq)]
74pub enum VectorGuardDenyReason {
75    /// The tool action is not a database/memory style access the guard
76    /// can reason about, but policy requires one.  Emitted only in tests;
77    /// the guard passes through unknown actions at runtime.
78    #[error("tool action is not a vector-database access")]
79    NotAVectorAccess,
80
81    /// The request does not target a vector database according to the
82    /// configured vendor substrings and `allow_all` is disabled.
83    #[error("database '{database}' is not flagged as vector-shaped")]
84    NotVectorFlavored {
85        /// The database identifier reported by the tool call.
86        database: String,
87    },
88
89    /// A referenced collection is not on the operator's allowlist.
90    #[error("collection '{collection}' is not in the allowlist")]
91    CollectionNotAllowed {
92        /// The offending collection name.
93        collection: String,
94    },
95
96    /// The collection allowlist is empty and `allow_all` is false.
97    #[error("vector guard has no configured collection allowlist and allow_all is false")]
98    NoConfig,
99
100    /// The request targets a namespace that is not permitted by the
101    /// active grant.
102    #[error("namespace '{namespace}' is not in the allowlist")]
103    NamespaceNotAllowed {
104        /// The offending namespace name.
105        namespace: String,
106    },
107
108    /// The operation verb was denied (for example an `upsert` under a
109    /// read-only grant).
110    #[error("operation '{operation}' is not allowed by the active operation class")]
111    OperationNotAllowed {
112        /// The offending operation verb.
113        operation: String,
114    },
115
116    /// A `top_k` (or equivalent) value exceeds the configured ceiling.
117    #[error("top_k {requested} exceeds max_rows_returned {max}")]
118    TopKExceedsLimit {
119        /// The requested top-k value.
120        requested: u64,
121        /// The configured ceiling.
122        max: u64,
123    },
124
125    /// The arguments could not be parsed.
126    #[error("vector guard argument parse error: {error}")]
127    ParseError {
128        /// Human readable error message.
129        error: String,
130    },
131}
132
133impl VectorGuardDenyReason {
134    /// Short stable tag suitable for metrics labels.
135    pub fn code(&self) -> &'static str {
136        match self {
137            Self::NotAVectorAccess => "not_a_vector_access",
138            Self::NotVectorFlavored { .. } => "not_vector_flavored",
139            Self::CollectionNotAllowed { .. } => "collection_not_allowed",
140            Self::NoConfig => "no_config",
141            Self::NamespaceNotAllowed { .. } => "namespace_not_allowed",
142            Self::OperationNotAllowed { .. } => "operation_not_allowed",
143            Self::TopKExceedsLimit { .. } => "top_k_exceeds_limit",
144            Self::ParseError { .. } => "parse_error",
145        }
146    }
147}
148
149/// Configurable JSON field paths for the argument extractor.
150#[derive(Clone, Debug, Serialize, Deserialize)]
151pub struct VectorFieldPaths {
152    /// Keys scanned in order for the collection/index name.
153    pub collection: Vec<String>,
154    /// Keys scanned in order for the namespace.
155    pub namespace: Vec<String>,
156    /// Keys scanned in order for the operation verb.
157    pub operation: Vec<String>,
158    /// Keys scanned in order for the top-k value.
159    pub top_k: Vec<String>,
160}
161
162impl Default for VectorFieldPaths {
163    fn default() -> Self {
164        Self {
165            collection: vec![
166                "collection".into(),
167                "index".into(),
168                "class".into(),
169                "store".into(),
170            ],
171            namespace: vec!["namespace".into(), "tenant".into(), "partition".into()],
172            operation: vec!["operation".into(), "op".into(), "action".into()],
173            top_k: vec!["top_k".into(), "topK".into(), "k".into(), "limit".into()],
174        }
175    }
176}
177
178/// Configuration for [`VectorDbGuard`].
179///
180/// The guard is fail-closed by default: an empty `collection_allowlist`
181/// denies every call unless `allow_all` is set.
182#[derive(Clone, Debug, Serialize, Deserialize)]
183pub struct VectorGuardConfig {
184    /// Substrings that mark a database identifier (or tool name) as
185    /// vector-flavored.  Defaults to the four vendors called out in the
186    /// roadmap plus the generic `"vector"` sentinel.
187    #[serde(default = "default_vendor_markers")]
188    pub vendor_markers: Vec<String>,
189
190    /// Collections the grant may touch.  Case-insensitive.
191    #[serde(default)]
192    pub collection_allowlist: Vec<String>,
193
194    /// Optional namespace allowlist.  `None` disables namespace
195    /// enforcement; `Some(empty)` denies every namespaced request.
196    #[serde(default)]
197    pub namespace_allowlist: Option<Vec<String>>,
198
199    /// Operation verbs that are always denied regardless of the active
200    /// operation class (for example: `"drop_index"`).  Case-insensitive.
201    #[serde(default)]
202    pub denied_operations: Vec<String>,
203
204    /// Operation verbs considered "mutating" for the purposes of
205    /// [`SqlOperationClass::ReadOnly`] enforcement.  Case-insensitive.
206    #[serde(default = "default_mutating_operations")]
207    pub mutating_operations: Vec<String>,
208
209    /// JSON field path overrides.
210    #[serde(default)]
211    pub field_paths: VectorFieldPaths,
212
213    /// Allow every request that passes field-path parsing, ignoring the
214    /// allowlists.  Parse errors still deny.
215    #[serde(default)]
216    pub allow_all: bool,
217}
218
219fn default_vendor_markers() -> Vec<String> {
220    vec![
221        "vector".into(),
222        "pinecone".into(),
223        "weaviate".into(),
224        "qdrant".into(),
225        "chroma".into(),
226        "milvus".into(),
227    ]
228}
229
230fn default_mutating_operations() -> Vec<String> {
231    vec![
232        "upsert".into(),
233        "insert".into(),
234        "update".into(),
235        "delete".into(),
236        "write".into(),
237        "index".into(),
238        "reindex".into(),
239        "drop".into(),
240        "drop_index".into(),
241        "create_collection".into(),
242        "delete_collection".into(),
243    ]
244}
245
246impl Default for VectorGuardConfig {
247    fn default() -> Self {
248        Self {
249            vendor_markers: default_vendor_markers(),
250            collection_allowlist: Vec::new(),
251            namespace_allowlist: None,
252            denied_operations: Vec::new(),
253            mutating_operations: default_mutating_operations(),
254            field_paths: VectorFieldPaths::default(),
255            allow_all: false,
256        }
257    }
258}
259
260impl VectorGuardConfig {
261    /// Returns true when the operator has not configured any allowlist.
262    pub fn is_empty(&self) -> bool {
263        self.collection_allowlist.is_empty()
264            && self
265                .namespace_allowlist
266                .as_ref()
267                .map(|v| v.is_empty())
268                .unwrap_or(true)
269            && self.denied_operations.is_empty()
270    }
271
272    /// Case-insensitive collection match.
273    pub fn collection_allowed(&self, name: &str) -> bool {
274        let lower = name.to_ascii_lowercase();
275        self.collection_allowlist
276            .iter()
277            .any(|c| c.to_ascii_lowercase() == lower)
278    }
279
280    /// Case-insensitive namespace match.  Returns `true` when namespace
281    /// enforcement is disabled.
282    pub fn namespace_allowed(&self, name: &str) -> bool {
283        match &self.namespace_allowlist {
284            None => true,
285            Some(list) => {
286                let lower = name.to_ascii_lowercase();
287                list.iter().any(|c| c.to_ascii_lowercase() == lower)
288            }
289        }
290    }
291
292    /// Returns true when the tool name or database identifier matches any
293    /// configured vendor substring (case-insensitive).
294    pub fn looks_like_vector(&self, database: &str, tool: &str) -> bool {
295        let db = database.to_ascii_lowercase();
296        let tl = tool.to_ascii_lowercase();
297        self.vendor_markers.iter().any(|m| {
298            !m.is_empty()
299                && (db.contains(&m.to_ascii_lowercase()) || tl.contains(&m.to_ascii_lowercase()))
300        })
301    }
302}
303
304/// The parsed view of a vector-database tool call.
305#[derive(Clone, Debug, PartialEq, Eq)]
306pub struct VectorCall {
307    /// Normalised collection name (lowercased).
308    pub collection: String,
309    /// Optional namespace string from the arguments.
310    pub namespace: Option<String>,
311    /// Optional operation verb from the arguments.
312    pub operation: Option<String>,
313    /// Optional `top_k` ceiling from the arguments.
314    pub top_k: Option<u64>,
315}
316
317/// Vector database guard (roadmap phase 7.2).
318pub struct VectorDbGuard {
319    config: VectorGuardConfig,
320}
321
322impl VectorDbGuard {
323    /// Construct a new guard with the given configuration.
324    pub fn new(config: VectorGuardConfig) -> Self {
325        if config.allow_all {
326            warn!(
327                target: "chio.data-guards.vector",
328                "vector-db-guard constructed with allow_all=true; fail-closed default disabled"
329            );
330        }
331        Self { config }
332    }
333
334    /// Read-only access to the configuration.
335    pub fn config(&self) -> &VectorGuardConfig {
336        &self.config
337    }
338
339    /// Evaluate an already-extracted [`VectorCall`] against the configured
340    /// policy and the active capability scope.
341    ///
342    /// Returns `Ok(())` to allow; `Err(VectorGuardDenyReason)` to deny.
343    pub fn check(&self, call: &VectorCall, scope: &ChioScope) -> Result<(), VectorGuardDenyReason> {
344        self.check_with_matched_grant(call, scope, None)
345    }
346
347    fn check_with_matched_grant(
348        &self,
349        call: &VectorCall,
350        scope: &ChioScope,
351        matched_grant_index: Option<usize>,
352    ) -> Result<(), VectorGuardDenyReason> {
353        if self.config.allow_all {
354            return Ok(());
355        }
356
357        if self.config.is_empty() {
358            return Err(VectorGuardDenyReason::NoConfig);
359        }
360
361        // Collection allowlist.
362        if !self.config.collection_allowlist.is_empty()
363            && !self.config.collection_allowed(&call.collection)
364        {
365            return Err(VectorGuardDenyReason::CollectionNotAllowed {
366                collection: call.collection.clone(),
367            });
368        }
369
370        // Namespace allowlist.
371        if let Some(ns) = &call.namespace {
372            if !self.config.namespace_allowed(ns) {
373                return Err(VectorGuardDenyReason::NamespaceNotAllowed {
374                    namespace: ns.clone(),
375                });
376            }
377        } else if self
378            .config
379            .namespace_allowlist
380            .as_ref()
381            .map(|v| !v.is_empty())
382            .unwrap_or(false)
383        {
384            // Namespaces are being enforced but the call did not provide
385            // one: fail-closed.
386            return Err(VectorGuardDenyReason::NamespaceNotAllowed {
387                namespace: String::new(),
388            });
389        }
390
391        // Operation verb handling.
392        if let Some(op) = &call.operation {
393            let op_lower = op.to_ascii_lowercase();
394
395            // Hard denylist always wins.
396            if self
397                .config
398                .denied_operations
399                .iter()
400                .any(|d| d.to_ascii_lowercase() == op_lower)
401            {
402                return Err(VectorGuardDenyReason::OperationNotAllowed {
403                    operation: op.clone(),
404                });
405            }
406
407            // Inspect the active grant's operation class.
408            let class = operation_class_for_request(scope, matched_grant_index);
409            if let Some(class) = class {
410                let is_mutation = self
411                    .config
412                    .mutating_operations
413                    .iter()
414                    .any(|m| m.to_ascii_lowercase() == op_lower);
415
416                match (class, is_mutation) {
417                    (SqlOperationClass::ReadOnly, true) => {
418                        return Err(VectorGuardDenyReason::OperationNotAllowed {
419                            operation: op.clone(),
420                        })
421                    }
422                    (SqlOperationClass::ReadWrite, _) if op_lower == "drop_index" => {
423                        // DDL-ish verbs even under read-write need Admin.
424                        return Err(VectorGuardDenyReason::OperationNotAllowed {
425                            operation: op.clone(),
426                        });
427                    }
428                    _ => {}
429                }
430            }
431        } else if let Some(class) = operation_class_for_request(scope, matched_grant_index) {
432            // No explicit operation verb was provided, but the active grant
433            // narrows to a specific OperationClass. Fail closed: a ReadOnly
434            // scope must not allow a call that could be a write, and any
435            // OperationClass stricter than Admin requires knowing the verb
436            // to make an allow decision. This closes the vector-write bypass
437            // where a MemoryWrite-shaped call omits `operation` and would
438            // otherwise skip the mutation gate entirely.
439            if matches!(
440                class,
441                SqlOperationClass::ReadOnly | SqlOperationClass::ReadWrite
442            ) {
443                return Err(VectorGuardDenyReason::OperationNotAllowed {
444                    operation: String::new(),
445                });
446            }
447        }
448
449        // top_k ceiling.
450        if let Some(max) = max_rows_for_request(scope, matched_grant_index) {
451            match call.top_k {
452                Some(k) if k > max => {
453                    return Err(VectorGuardDenyReason::TopKExceedsLimit { requested: k, max });
454                }
455                None => {
456                    // A ceiling is set but the call did not declare top_k.
457                    // Fail-closed.
458                    return Err(VectorGuardDenyReason::TopKExceedsLimit {
459                        requested: u64::MAX,
460                        max,
461                    });
462                }
463                _ => {}
464            }
465        }
466
467        Ok(())
468    }
469
470    /// Extract a [`VectorCall`] from the tool arguments value.
471    pub fn extract_call(&self, arguments: &Value) -> Result<VectorCall, VectorGuardDenyReason> {
472        if !arguments.is_object() && !arguments.is_null() {
473            return Err(VectorGuardDenyReason::ParseError {
474                error: "arguments must be a JSON object".into(),
475            });
476        }
477        let collection = pick_string(arguments, &self.config.field_paths.collection)
478            .map(|s| s.to_ascii_lowercase())
479            .ok_or(VectorGuardDenyReason::ParseError {
480                error: "missing collection/index field".into(),
481            })?;
482        let namespace = pick_string(arguments, &self.config.field_paths.namespace);
483        let operation = pick_string(arguments, &self.config.field_paths.operation);
484        let top_k = pick_number(arguments, &self.config.field_paths.top_k);
485
486        Ok(VectorCall {
487            collection,
488            namespace,
489            operation,
490            top_k,
491        })
492    }
493}
494
495impl chio_kernel::Guard for VectorDbGuard {
496    fn name(&self) -> &str {
497        "vector-db"
498    }
499
500    fn evaluate(&self, ctx: &GuardContext) -> Result<Verdict, KernelError> {
501        let tool = &ctx.request.tool_name;
502        let args = &ctx.request.arguments;
503        let action = extract_action(tool, args);
504
505        let database = match &action {
506            ToolAction::DatabaseQuery { database, .. } => database.clone(),
507            ToolAction::MemoryRead { store, .. } | ToolAction::MemoryWrite { store, .. } => {
508                store.clone()
509            }
510            // Fall back to inspecting the tool name directly: not every
511            // bespoke vector SDK tool is wired up in `extract_action`.
512            _ => tool.clone(),
513        };
514
515        // Non-vector traffic always short-circuits to Allow regardless of
516        // `allow_all`. The old `!allow_all && !looks_like_vector` gate
517        // inverted the bypass intent: enabling `allow_all` forced every
518        // tool call (including non-vector ones) through `extract_call`,
519        // which then denied any call lacking vector-specific fields.
520        // Split the condition so `allow_all` only governs whether vector
521        // policy is enforced on vector-shaped requests.
522        if !self.config.looks_like_vector(&database, tool) {
523            return Ok(Verdict::Allow);
524        }
525        let call = match self.extract_call(args) {
526            Ok(c) => c,
527            Err(reason) => {
528                warn!(
529                    target: "chio.data-guards.vector",
530                    code = reason.code(),
531                    reason = %reason,
532                    database = %database,
533                    "vector-db-guard denied: parse failed"
534                );
535                return Ok(Verdict::Deny);
536            }
537        };
538
539        if self.config.allow_all {
540            // Dry-run/debug mode still validates vector call shape so
541            // malformed payloads fail closed, but skips allowlist and
542            // scope enforcement once parsing succeeds.
543            return Ok(Verdict::Allow);
544        }
545
546        match self.check_with_matched_grant(&call, ctx.scope, ctx.matched_grant_index) {
547            Ok(()) => Ok(Verdict::Allow),
548            Err(reason) => {
549                warn!(
550                    target: "chio.data-guards.vector",
551                    code = reason.code(),
552                    reason = %reason,
553                    database = %database,
554                    collection = %call.collection,
555                    "vector-db-guard denied"
556                );
557                Ok(Verdict::Deny)
558            }
559        }
560    }
561}
562
563// ---------------------------------------------------------------------------
564// Helpers
565// ---------------------------------------------------------------------------
566
567fn active_grant(scope: &ChioScope, matched_grant_index: Option<usize>) -> Option<&ToolGrant> {
568    matched_grant_index.and_then(|index| scope.grants.get(index))
569}
570
571fn operation_class_for_constraints(constraints: &[Constraint]) -> Option<SqlOperationClass> {
572    let mut strongest: Option<SqlOperationClass> = None;
573    for c in constraints {
574        if let Constraint::OperationClass(class) = c {
575            strongest = Some(match (strongest, *class) {
576                (None, new) => new,
577                (Some(SqlOperationClass::ReadOnly), _) => SqlOperationClass::ReadOnly,
578                (_, SqlOperationClass::ReadOnly) => SqlOperationClass::ReadOnly,
579                (Some(SqlOperationClass::ReadWrite), _) => SqlOperationClass::ReadWrite,
580                (_, SqlOperationClass::ReadWrite) => SqlOperationClass::ReadWrite,
581                (Some(SqlOperationClass::Admin), SqlOperationClass::Admin) => {
582                    SqlOperationClass::Admin
583                }
584            });
585        }
586    }
587    strongest
588}
589
590/// Return the strictest [`SqlOperationClass`] for the matched grant when
591/// available, or across the full scope as a conservative fallback for
592/// direct callers that do not supply grant attribution.
593fn operation_class_for_request(
594    scope: &ChioScope,
595    matched_grant_index: Option<usize>,
596) -> Option<SqlOperationClass> {
597    if let Some(grant) = active_grant(scope, matched_grant_index) {
598        return operation_class_for_constraints(&grant.constraints);
599    }
600
601    let mut strongest: Option<SqlOperationClass> = None;
602    for grant in &scope.grants {
603        strongest = match (
604            strongest,
605            operation_class_for_constraints(&grant.constraints),
606        ) {
607            (Some(SqlOperationClass::ReadOnly), _) => Some(SqlOperationClass::ReadOnly),
608            (_, Some(SqlOperationClass::ReadOnly)) => Some(SqlOperationClass::ReadOnly),
609            (Some(SqlOperationClass::ReadWrite), _) => Some(SqlOperationClass::ReadWrite),
610            (_, Some(SqlOperationClass::ReadWrite)) => Some(SqlOperationClass::ReadWrite),
611            (None, Some(class)) => Some(class),
612            (current, None) => current,
613            (Some(SqlOperationClass::Admin), Some(SqlOperationClass::Admin)) => {
614                Some(SqlOperationClass::Admin)
615            }
616        };
617    }
618    strongest
619}
620
621fn max_rows_for_constraints(constraints: &[Constraint]) -> Option<u64> {
622    let mut min: Option<u64> = None;
623    for c in constraints {
624        if let Constraint::MaxRowsReturned(n) = c {
625            min = Some(min.map_or(*n, |m| m.min(*n)));
626        }
627    }
628    min
629}
630
631/// Return the lowest `MaxRowsReturned` from the matched grant when
632/// available, or across the full scope as a conservative fallback for
633/// direct callers that do not supply grant attribution.
634fn max_rows_for_request(scope: &ChioScope, matched_grant_index: Option<usize>) -> Option<u64> {
635    if let Some(grant) = active_grant(scope, matched_grant_index) {
636        return max_rows_for_constraints(&grant.constraints);
637    }
638
639    let mut min: Option<u64> = None;
640    for grant in &scope.grants {
641        if let Some(grant_min) = max_rows_for_constraints(&grant.constraints) {
642            min = Some(min.map_or(grant_min, |current| current.min(grant_min)));
643        }
644    }
645    min
646}
647
648/// Walk `keys` over the top level of `value` and return the first string
649/// we find.
650fn pick_string(value: &Value, keys: &[String]) -> Option<String> {
651    for key in keys {
652        if let Some(s) = value.get(key).and_then(|v| v.as_str()) {
653            if !s.is_empty() {
654                return Some(s.to_string());
655            }
656        }
657    }
658    None
659}
660
661/// Walk `keys` over the top level of `value` and return the first unsigned
662/// integer.
663fn pick_number(value: &Value, keys: &[String]) -> Option<u64> {
664    for key in keys {
665        if let Some(n) = value.get(key).and_then(|v| v.as_u64()) {
666            return Some(n);
667        }
668        // Accept stringified numbers too for SDKs that over-quote.
669        if let Some(s) = value.get(key).and_then(|v| v.as_str()) {
670            if let Ok(n) = s.parse::<u64>() {
671                return Some(n);
672            }
673        }
674    }
675    None
676}
677
678/// Convenience: turn a hash-set style vec into a normalised lower-case set
679/// for callers that need to build their own filters.
680#[doc(hidden)]
681pub fn lowercase_set<I, S>(items: I) -> HashSet<String>
682where
683    I: IntoIterator<Item = S>,
684    S: AsRef<str>,
685{
686    items
687        .into_iter()
688        .map(|s| s.as_ref().to_ascii_lowercase())
689        .collect()
690}
691
692#[cfg(test)]
693mod tests {
694    use super::*;
695    use chio_core::capability::{CapabilityToken, CapabilityTokenBody, Operation, ToolGrant};
696    use chio_core::crypto::Keypair;
697    use chio_kernel::{Guard, GuardContext, ToolCallRequest, Verdict};
698
699    fn grant_with_constraints(constraints: Vec<Constraint>) -> ToolGrant {
700        ToolGrant {
701            server_id: "srv".into(),
702            tool_name: "*".into(),
703            operations: vec![Operation::Invoke],
704            constraints,
705            max_invocations: None,
706            max_cost_per_invocation: None,
707            max_total_cost: None,
708            dpop_required: None,
709        }
710    }
711
712    fn scope_with(constraints: Vec<Constraint>) -> ChioScope {
713        ChioScope {
714            grants: vec![grant_with_constraints(constraints)],
715            resource_grants: vec![],
716            prompt_grants: vec![],
717        }
718    }
719
720    fn test_capability() -> CapabilityToken {
721        let kp = Keypair::generate();
722        CapabilityToken::sign(
723            CapabilityTokenBody {
724                id: "cap-vector-guard".into(),
725                issuer: kp.public_key(),
726                subject: kp.public_key(),
727                scope: ChioScope::default(),
728                issued_at: 0,
729                expires_at: u64::MAX,
730                delegation_chain: vec![],
731            },
732            &kp,
733        )
734        .unwrap()
735    }
736
737    fn base_cfg() -> VectorGuardConfig {
738        VectorGuardConfig {
739            collection_allowlist: vec!["docs".into()],
740            ..Default::default()
741        }
742    }
743
744    #[test]
745    fn deny_collection_not_in_allowlist() {
746        let g = VectorDbGuard::new(base_cfg());
747        let call = VectorCall {
748            collection: "secrets".into(),
749            namespace: None,
750            operation: Some("query".into()),
751            top_k: Some(10),
752        };
753        let err = g.check(&call, &ChioScope::default()).unwrap_err();
754        assert!(matches!(
755            err,
756            VectorGuardDenyReason::CollectionNotAllowed { .. }
757        ));
758    }
759
760    #[test]
761    fn allow_collection_in_allowlist() {
762        let g = VectorDbGuard::new(base_cfg());
763        let call = VectorCall {
764            collection: "docs".into(),
765            namespace: None,
766            operation: Some("query".into()),
767            top_k: Some(5),
768        };
769        g.check(&call, &ChioScope::default()).unwrap();
770    }
771
772    #[test]
773    fn deny_cross_namespace() {
774        let cfg = VectorGuardConfig {
775            collection_allowlist: vec!["docs".into()],
776            namespace_allowlist: Some(vec!["tenant-a".into()]),
777            ..Default::default()
778        };
779        let g = VectorDbGuard::new(cfg);
780        let call = VectorCall {
781            collection: "docs".into(),
782            namespace: Some("tenant-b".into()),
783            operation: None,
784            top_k: None,
785        };
786        let err = g.check(&call, &ChioScope::default()).unwrap_err();
787        assert!(matches!(
788            err,
789            VectorGuardDenyReason::NamespaceNotAllowed { .. }
790        ));
791    }
792
793    #[test]
794    fn deny_upsert_under_readonly() {
795        let g = VectorDbGuard::new(base_cfg());
796        let call = VectorCall {
797            collection: "docs".into(),
798            namespace: None,
799            operation: Some("upsert".into()),
800            top_k: None,
801        };
802        let scope = scope_with(vec![Constraint::OperationClass(
803            SqlOperationClass::ReadOnly,
804        )]);
805        let err = g.check(&call, &scope).unwrap_err();
806        assert!(matches!(
807            err,
808            VectorGuardDenyReason::OperationNotAllowed { .. }
809        ));
810    }
811
812    #[test]
813    fn allow_query_under_readonly() {
814        let g = VectorDbGuard::new(base_cfg());
815        let call = VectorCall {
816            collection: "docs".into(),
817            namespace: None,
818            operation: Some("query".into()),
819            top_k: Some(1),
820        };
821        let scope = scope_with(vec![
822            Constraint::OperationClass(SqlOperationClass::ReadOnly),
823            Constraint::MaxRowsReturned(50),
824        ]);
825        g.check(&call, &scope).unwrap();
826    }
827
828    #[test]
829    fn deny_top_k_over_max_rows() {
830        let g = VectorDbGuard::new(base_cfg());
831        let call = VectorCall {
832            collection: "docs".into(),
833            namespace: None,
834            operation: Some("query".into()),
835            top_k: Some(500),
836        };
837        let scope = scope_with(vec![Constraint::MaxRowsReturned(50)]);
838        let err = g.check(&call, &scope).unwrap_err();
839        match err {
840            VectorGuardDenyReason::TopKExceedsLimit { requested, max } => {
841                assert_eq!(requested, 500);
842                assert_eq!(max, 50);
843            }
844            other => panic!("unexpected reason: {other:?}"),
845        }
846    }
847
848    #[test]
849    fn deny_missing_top_k_when_ceiling_set() {
850        let g = VectorDbGuard::new(base_cfg());
851        let call = VectorCall {
852            collection: "docs".into(),
853            namespace: None,
854            operation: Some("query".into()),
855            top_k: None,
856        };
857        let scope = scope_with(vec![Constraint::MaxRowsReturned(50)]);
858        let err = g.check(&call, &scope).unwrap_err();
859        assert!(matches!(
860            err,
861            VectorGuardDenyReason::TopKExceedsLimit { .. }
862        ));
863    }
864
865    #[test]
866    fn empty_config_denies() {
867        let g = VectorDbGuard::new(VectorGuardConfig::default());
868        let call = VectorCall {
869            collection: "docs".into(),
870            namespace: None,
871            operation: None,
872            top_k: None,
873        };
874        let err = g.check(&call, &ChioScope::default()).unwrap_err();
875        assert!(matches!(err, VectorGuardDenyReason::NoConfig));
876    }
877
878    #[test]
879    fn allow_all_skips_allowlists() {
880        let g = VectorDbGuard::new(VectorGuardConfig {
881            allow_all: true,
882            ..Default::default()
883        });
884        let call = VectorCall {
885            collection: "anything".into(),
886            namespace: Some("anywhere".into()),
887            operation: Some("upsert".into()),
888            top_k: Some(10_000),
889        };
890        g.check(&call, &ChioScope::default()).unwrap();
891    }
892
893    #[test]
894    fn allow_all_still_denies_parse_errors() {
895        let guard = VectorDbGuard::new(VectorGuardConfig {
896            allow_all: true,
897            ..Default::default()
898        });
899        let request = ToolCallRequest {
900            request_id: "req-vector-allow-all-parse".to_string(),
901            capability: test_capability(),
902            tool_name: "pinecone_query".to_string(),
903            server_id: "srv".to_string(),
904            agent_id: "agent".to_string(),
905            arguments: serde_json::json!({"namespace": "tenant-a"}),
906            dpop_proof: None,
907            governed_intent: None,
908            approval_token: None,
909            model_metadata: None,
910            federated_origin_kernel_id: None,
911        };
912        let scope = ChioScope::default();
913        let agent_id = String::from("agent");
914        let server_id = String::from("srv");
915        let verdict = guard
916            .evaluate(&GuardContext {
917                request: &request,
918                scope: &scope,
919                agent_id: &agent_id,
920                server_id: &server_id,
921                session_filesystem_roots: None,
922                matched_grant_index: None,
923            })
924            .unwrap();
925        assert_eq!(verdict, Verdict::Deny);
926    }
927
928    #[test]
929    fn extract_call_parses_defaults() {
930        let g = VectorDbGuard::new(base_cfg());
931        let args = serde_json::json!({
932            "collection": "docs",
933            "namespace": "tenant-a",
934            "operation": "query",
935            "top_k": 42
936        });
937        let call = g.extract_call(&args).unwrap();
938        assert_eq!(call.collection, "docs");
939        assert_eq!(call.namespace.as_deref(), Some("tenant-a"));
940        assert_eq!(call.operation.as_deref(), Some("query"));
941        assert_eq!(call.top_k, Some(42));
942    }
943
944    #[test]
945    fn extract_call_missing_collection_errors() {
946        let g = VectorDbGuard::new(base_cfg());
947        let args = serde_json::json!({"namespace": "tenant-a"});
948        let err = g.extract_call(&args).unwrap_err();
949        assert!(matches!(err, VectorGuardDenyReason::ParseError { .. }));
950    }
951
952    #[test]
953    fn looks_like_vector_matches_vendor_substring() {
954        let cfg = VectorGuardConfig::default();
955        assert!(cfg.looks_like_vector("pinecone-prod", "query"));
956        assert!(cfg.looks_like_vector("main", "weaviate_search"));
957        assert!(cfg.looks_like_vector("vector-store", "query"));
958        assert!(!cfg.looks_like_vector("postgres", "sql"));
959    }
960
961    #[test]
962    fn reason_codes_are_stable() {
963        assert_eq!(VectorGuardDenyReason::NoConfig.code(), "no_config");
964        assert_eq!(
965            VectorGuardDenyReason::CollectionNotAllowed {
966                collection: "x".into(),
967            }
968            .code(),
969            "collection_not_allowed"
970        );
971        assert_eq!(
972            VectorGuardDenyReason::TopKExceedsLimit {
973                requested: 1,
974                max: 0,
975            }
976            .code(),
977            "top_k_exceeds_limit"
978        );
979    }
980
981    #[test]
982    fn lowercase_set_normalises() {
983        let s = lowercase_set(["Foo", "BAR"]);
984        assert!(s.contains("foo"));
985        assert!(s.contains("bar"));
986    }
987}