1use std::borrow::Cow;
54
55use regex::{Regex, RegexBuilder};
56use serde::{Deserialize, Serialize};
57use serde_json::Value;
58use tracing::warn;
59
60use chio_core::capability::{ChioScope, Constraint};
61use chio_guards::post_invocation::{
62 PostInvocationContext, PostInvocationHook, PostInvocationVerdict,
63};
64use chio_kernel::{GuardContext, KernelError, Verdict};
65
66pub const DEFAULT_REDACTION_MARKER: &str = "[REDACTED]";
68const MAX_REDACT_PII_PATTERNS: usize = 64;
69const MAX_REDACT_PII_PATTERN_LEN: usize = 512;
70const MAX_REDACT_PII_PATTERN_COMPLEXITY: usize = 96;
71const REDACT_PII_REGEX_SIZE_LIMIT: usize = 1 << 20;
72const REDACT_PII_DFA_SIZE_LIMIT: usize = 1 << 20;
73
74#[derive(Clone, Debug, Serialize, Deserialize)]
76pub struct QueryResultGuardConfig {
77 #[serde(default = "default_redaction_marker")]
80 pub redaction_marker: String,
81
82 #[serde(default)]
87 pub redact_pii_patterns: Vec<String>,
88
89 #[serde(default = "default_rows_keys")]
92 pub rows_keys: Vec<String>,
93}
94
95fn default_redaction_marker() -> String {
96 DEFAULT_REDACTION_MARKER.to_string()
97}
98
99fn default_rows_keys() -> Vec<String> {
100 vec![
101 "rows".into(),
102 "results".into(),
103 "records".into(),
104 "data".into(),
105 ]
106}
107
108impl Default for QueryResultGuardConfig {
109 fn default() -> Self {
110 Self {
111 redaction_marker: default_redaction_marker(),
112 redact_pii_patterns: Vec::new(),
113 rows_keys: default_rows_keys(),
114 }
115 }
116}
117
118#[derive(Debug)]
121pub struct QueryResultGuard {
122 config: QueryResultGuardConfig,
123 pii_regex: Vec<(String, Regex)>,
124}
125
126impl QueryResultGuard {
127 pub fn new(config: QueryResultGuardConfig) -> Result<Self, String> {
132 if config.redact_pii_patterns.len() > MAX_REDACT_PII_PATTERNS {
133 return Err(format!(
134 "query_result.redact_pii_patterns allows at most {MAX_REDACT_PII_PATTERNS} patterns"
135 ));
136 }
137 let mut pii_regex = Vec::with_capacity(config.redact_pii_patterns.len());
138 for pattern in &config.redact_pii_patterns {
139 let trimmed = pattern.trim();
140 if trimmed.is_empty() {
141 return Err(
142 "query_result.redact_pii_patterns cannot contain empty patterns".to_string(),
143 );
144 }
145 if trimmed.len() > MAX_REDACT_PII_PATTERN_LEN {
146 return Err(format!(
147 "query_result.redact_pii_patterns entries must be at most {MAX_REDACT_PII_PATTERN_LEN} characters"
148 ));
149 }
150 let complexity = pii_pattern_complexity(trimmed);
151 if complexity > MAX_REDACT_PII_PATTERN_COMPLEXITY {
152 return Err(format!(
153 "query_result.redact_pii_patterns entries must have complexity at most {MAX_REDACT_PII_PATTERN_COMPLEXITY}"
154 ));
155 }
156 let re = RegexBuilder::new(trimmed)
157 .case_insensitive(true)
158 .size_limit(REDACT_PII_REGEX_SIZE_LIMIT)
159 .dfa_size_limit(REDACT_PII_DFA_SIZE_LIMIT)
160 .build()
161 .map_err(|error| {
162 format!("invalid query_result.redact_pii_patterns entry `{trimmed}`: {error}")
163 })?;
164 pii_regex.push((trimmed.to_string(), re));
165 }
166 Ok(Self { config, pii_regex })
167 }
168
169 pub fn config(&self) -> &QueryResultGuardConfig {
171 &self.config
172 }
173
174 pub fn redact_result(&self, scope: &ChioScope, value: &mut Value) {
192 self.redact_result_for_request(scope, None, value);
193 }
194
195 pub fn redact_result_for_request(
198 &self,
199 scope: &ChioScope,
200 matched_grant_index: Option<usize>,
201 value: &mut Value,
202 ) {
203 let constraints = constraints_for_request(scope, matched_grant_index);
204 let max_rows = min_max_rows(&constraints);
205 let denied = column_denylist(&constraints);
206 let requires_row_shape = max_rows.is_some() || !denied.is_empty();
207
208 if let Some(array) = locate_rows_array_mut(value, &self.config.rows_keys) {
210 if let Some(limit) = max_rows {
211 if array.len() > limit as usize {
212 array.truncate(limit as usize);
213 }
214 }
215 if !denied.is_empty() {
216 for row in array.iter_mut() {
217 redact_columns(row, &denied, &self.config.redaction_marker);
218 }
219 }
220 } else if requires_row_shape {
221 redact_unstructured_result(value, &self.config.redaction_marker);
222 }
223
224 if !self.pii_regex.is_empty() {
227 redact_pii_in_place(value, &self.pii_regex, &self.config.redaction_marker);
228 }
229 }
230
231 fn redact_result_cloned_for_request(
233 &self,
234 scope: &ChioScope,
235 matched_grant_index: Option<usize>,
236 value: &Value,
237 ) -> Value {
238 let mut out = value.clone();
239 self.redact_result_for_request(scope, matched_grant_index, &mut out);
240 out
241 }
242}
243
244fn pii_pattern_complexity(pattern: &str) -> usize {
245 let mut score = 0usize;
246 let mut escaped = false;
247 for ch in pattern.chars() {
248 if escaped {
249 escaped = false;
250 continue;
251 }
252 match ch {
253 '\\' => escaped = true,
254 '|' | '*' | '+' | '?' => score = score.saturating_add(4),
255 '{' | '[' | '(' => score = score.saturating_add(2),
256 _ => {}
257 }
258 }
259 score
260}
261
262impl QueryResultGuard {
264 pub fn as_hook(&self, scope: ChioScope) -> QueryResultHook<'_> {
271 QueryResultHook { guard: self, scope }
272 }
273
274 pub fn into_owned_hook(self, scope: ChioScope) -> OwnedQueryResultHook {
277 OwnedQueryResultHook { guard: self, scope }
278 }
279}
280
281pub struct QueryResultHook<'a> {
283 guard: &'a QueryResultGuard,
284 scope: ChioScope,
285}
286
287impl<'a> PostInvocationHook for QueryResultHook<'a> {
288 fn name(&self) -> &str {
289 "query-result"
290 }
291
292 fn inspect(&self, ctx: &PostInvocationContext<'_>, response: &Value) -> PostInvocationVerdict {
293 let scope = ctx.scope.unwrap_or(&self.scope);
294 let redacted =
295 self.guard
296 .redact_result_cloned_for_request(scope, ctx.matched_grant_index, response);
297 if redacted == *response {
298 PostInvocationVerdict::Allow
299 } else {
300 PostInvocationVerdict::Redact(redacted)
301 }
302 }
303}
304
305pub struct OwnedQueryResultHook {
308 guard: QueryResultGuard,
309 scope: ChioScope,
310}
311
312impl PostInvocationHook for OwnedQueryResultHook {
313 fn name(&self) -> &str {
314 "query-result"
315 }
316
317 fn inspect(&self, ctx: &PostInvocationContext<'_>, response: &Value) -> PostInvocationVerdict {
318 let scope = ctx.scope.unwrap_or(&self.scope);
319 let redacted =
320 self.guard
321 .redact_result_cloned_for_request(scope, ctx.matched_grant_index, response);
322 if redacted == *response {
323 PostInvocationVerdict::Allow
324 } else {
325 PostInvocationVerdict::Redact(redacted)
326 }
327 }
328}
329
330impl chio_kernel::Guard for QueryResultGuard {
331 fn name(&self) -> &str {
332 "query-result"
333 }
334
335 fn evaluate(&self, _ctx: &GuardContext) -> Result<Verdict, KernelError> {
336 Ok(Verdict::Allow)
341 }
342}
343
344fn constraints_for_request(
349 scope: &ChioScope,
350 matched_grant_index: Option<usize>,
351) -> Vec<&Constraint> {
352 if let Some(index) = matched_grant_index {
353 if let Some(grant) = scope.grants.get(index) {
354 return grant.constraints.iter().collect();
355 }
356 warn!(
357 target: "chio.data-guards.result",
358 matched_grant_index = index,
359 grant_count = scope.grants.len(),
360 "matched grant index missing from scope, falling back to full scope"
361 );
362 }
363
364 scope
365 .grants
366 .iter()
367 .flat_map(|grant| grant.constraints.iter())
368 .collect()
369}
370
371fn min_max_rows(constraints: &[&Constraint]) -> Option<u64> {
372 let mut min: Option<u64> = None;
373 for constraint in constraints {
374 if let Constraint::MaxRowsReturned(n) = constraint {
375 min = Some(min.map_or(*n, |m| m.min(*n)));
376 }
377 }
378 min
379}
380
381fn column_denylist(constraints: &[&Constraint]) -> Vec<String> {
382 let mut out = Vec::new();
383 for constraint in constraints {
384 if let Constraint::ColumnDenylist(list) = constraint {
385 for entry in list {
386 out.push(entry.to_ascii_lowercase());
387 }
388 }
389 }
390 out
391}
392
393fn locate_rows_array_mut<'a>(
394 value: &'a mut Value,
395 rows_keys: &[String],
396) -> Option<&'a mut Vec<Value>> {
397 let is_value_envelope = value
398 .as_object()
399 .and_then(|object| object.get("kind"))
400 .and_then(Value::as_str)
401 == Some("value");
402 let value = if is_value_envelope {
403 value.get_mut("value")?
404 } else {
405 value
406 };
407
408 if value.is_array() {
409 return value.as_array_mut();
410 }
411
412 let obj = value.as_object_mut()?;
413 let rows_key = rows_keys
414 .iter()
415 .find(|key| obj.get(*key).map(Value::is_array).unwrap_or(false))?
416 .clone();
417 obj.get_mut(&rows_key).and_then(Value::as_array_mut)
418}
419
420fn redact_unstructured_result(value: &mut Value, marker: &str) {
421 match value {
422 Value::Object(map) => {
423 for field in map.values_mut() {
424 redact_nested_values(field, marker);
425 }
426 }
427 _ => redact_nested_values(value, marker),
428 }
429}
430
431fn redact_nested_values(value: &mut Value, marker: &str) {
432 match value {
433 Value::Array(items) => {
434 for item in items {
435 redact_nested_values(item, marker);
436 }
437 }
438 Value::Object(map) => {
439 for field in map.values_mut() {
440 redact_nested_values(field, marker);
441 }
442 }
443 _ => *value = Value::String(marker.to_string()),
444 }
445}
446
447fn redact_columns(row: &mut Value, denied: &[String], marker: &str) {
461 let Some(map) = row.as_object_mut() else {
462 *row = Value::String(marker.to_string());
463 return;
464 };
465
466 let bare: Vec<Cow<'_, str>> = denied
469 .iter()
470 .map(|s| match s.rsplit_once('.') {
471 Some((_, col)) => Cow::Borrowed(col),
472 None => Cow::Borrowed(s.as_str()),
473 })
474 .collect();
475
476 let truly_bare: Vec<&str> = denied
482 .iter()
483 .filter(|s| !s.contains('.'))
484 .map(|s| s.as_str())
485 .collect();
486
487 let keys: Vec<String> = map.keys().cloned().collect();
488 for key in &keys {
489 let lower = key.to_ascii_lowercase();
490
491 let match_bare = bare.iter().any(|b| b.as_ref() == lower);
493 if match_bare {
494 if let Some(v) = map.get_mut(key) {
495 *v = Value::String(marker.to_string());
496 }
497 continue;
498 }
499
500 if let Some(Value::Object(inner)) = map.get_mut(key) {
506 let inner_keys: Vec<String> = inner.keys().cloned().collect();
507 for col in inner_keys {
508 let col_lower = col.to_ascii_lowercase();
509 let dotted = format!("{}.{}", lower, col_lower);
510 let hit = denied.iter().any(|d| d == &dotted)
511 || truly_bare.iter().any(|b| *b == col_lower);
512 if hit {
513 if let Some(v) = inner.get_mut(&col) {
514 *v = Value::String(marker.to_string());
515 }
516 }
517 }
518 }
519 }
520}
521
522fn redact_pii_in_place(value: &mut Value, patterns: &[(String, Regex)], marker: &str) {
527 match value {
528 Value::String(s) => {
529 let mut out: Cow<'_, str> = Cow::Borrowed(s.as_str());
530 for (_, re) in patterns {
531 if re.is_match(out.as_ref()) {
532 out = Cow::Owned(re.replace_all(out.as_ref(), marker).into_owned());
533 }
534 }
535 if !matches!(&out, Cow::Borrowed(_)) {
536 *s = out.into_owned();
537 }
538 }
539 Value::Array(items) => {
540 for item in items {
541 redact_pii_in_place(item, patterns, marker);
542 }
543 }
544 Value::Object(map) => {
545 for (_k, v) in map.iter_mut() {
546 redact_pii_in_place(v, patterns, marker);
547 }
548 }
549 _ => {}
550 }
551}
552
553#[cfg(test)]
558mod tests {
559 use super::*;
560 use chio_core::capability::{Operation, ToolGrant};
561
562 fn grant(constraints: Vec<Constraint>) -> ToolGrant {
563 ToolGrant {
564 server_id: "srv".into(),
565 tool_name: "*".into(),
566 operations: vec![Operation::Invoke],
567 constraints,
568 max_invocations: None,
569 max_cost_per_invocation: None,
570 max_total_cost: None,
571 dpop_required: None,
572 }
573 }
574
575 fn scope(constraints: Vec<Constraint>) -> ChioScope {
576 ChioScope {
577 grants: vec![grant(constraints)],
578 ..Default::default()
579 }
580 }
581
582 #[test]
583 fn truncates_rows_to_max_rows_returned() {
584 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
585 let scope = scope(vec![Constraint::MaxRowsReturned(2)]);
586 let mut value = serde_json::json!({
587 "rows": [
588 {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}
589 ]
590 });
591 guard.redact_result(&scope, &mut value);
592 let rows = value.get("rows").and_then(|v| v.as_array()).unwrap();
593 assert_eq!(rows.len(), 2);
594 }
595
596 #[test]
597 fn leaves_rows_untouched_when_no_max_rows() {
598 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
599 let scope = scope(vec![]);
600 let mut value = serde_json::json!({"rows": [{"id": 1}, {"id": 2}]});
601 guard.redact_result(&scope, &mut value);
602 assert_eq!(value["rows"].as_array().unwrap().len(), 2);
603 }
604
605 #[test]
606 fn redacts_bare_column_name() {
607 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
608 let scope = scope(vec![Constraint::ColumnDenylist(vec!["email".into()])]);
609 let mut value = serde_json::json!({
610 "rows": [
611 {"id": 1, "email": "a@b.com"},
612 {"id": 2, "email": "c@d.com"}
613 ]
614 });
615 guard.redact_result(&scope, &mut value);
616 for row in value["rows"].as_array().unwrap() {
617 assert_eq!(row["email"], "[REDACTED]");
618 assert_ne!(row["id"], "[REDACTED]");
619 }
620 }
621
622 #[test]
623 fn redacts_qualified_column_name_on_flat_row() {
624 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
627 let scope = scope(vec![Constraint::ColumnDenylist(vec!["users.email".into()])]);
628 let mut value = serde_json::json!({
629 "rows": [{"id": 1, "email": "a@b.com"}]
630 });
631 guard.redact_result(&scope, &mut value);
632 assert_eq!(value["rows"][0]["email"], "[REDACTED]");
633 }
634
635 #[test]
636 fn redacts_qualified_column_name_on_nested_row() {
637 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
638 let scope = scope(vec![Constraint::ColumnDenylist(vec!["users.email".into()])]);
639 let mut value = serde_json::json!({
640 "rows": [
641 {"users": {"id": 1, "email": "a@b.com"}},
642 {"users": {"id": 2, "email": "c@d.com"}}
643 ]
644 });
645 guard.redact_result(&scope, &mut value);
646 for row in value["rows"].as_array().unwrap() {
647 assert_eq!(row["users"]["email"], "[REDACTED]");
648 assert_ne!(row["users"]["id"], "[REDACTED]");
649 }
650 }
651
652 #[test]
653 fn truncation_then_redaction_compose() {
654 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
655 let scope = scope(vec![
656 Constraint::MaxRowsReturned(1),
657 Constraint::ColumnDenylist(vec!["email".into()]),
658 ]);
659 let mut value = serde_json::json!({
660 "rows": [
661 {"id": 1, "email": "a@b.com"},
662 {"id": 2, "email": "c@d.com"}
663 ]
664 });
665 guard.redact_result(&scope, &mut value);
666 let rows = value["rows"].as_array().unwrap();
667 assert_eq!(rows.len(), 1);
668 assert_eq!(rows[0]["email"], "[REDACTED]");
669 }
670
671 #[test]
672 fn pii_patterns_redact_strings() {
673 let guard = QueryResultGuard::new(QueryResultGuardConfig {
674 redact_pii_patterns: vec![r"\b\d{3}-\d{2}-\d{4}\b".into()],
675 ..Default::default()
676 })
677 .unwrap();
678 let scope = scope(vec![]);
679 let mut value = serde_json::json!({
680 "rows": [{"id": 1, "note": "SSN: 123-45-6789"}]
681 });
682 guard.redact_result(&scope, &mut value);
683 let note = value["rows"][0]["note"].as_str().unwrap();
684 assert!(note.contains("[REDACTED]"));
685 assert!(!note.contains("123-45-6789"));
686 }
687
688 #[test]
689 fn invalid_pii_pattern_rejects_guard_construction() {
690 let error = QueryResultGuard::new(QueryResultGuardConfig {
691 redact_pii_patterns: vec!["[".into()],
692 ..Default::default()
693 })
694 .unwrap_err();
695 assert!(error.contains("invalid query_result.redact_pii_patterns entry"));
696 }
697
698 #[test]
699 fn top_level_array_is_treated_as_rows() {
700 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
701 let scope = scope(vec![Constraint::MaxRowsReturned(1)]);
702 let mut value = serde_json::json!([1, 2, 3]);
703 guard.redact_result(&scope, &mut value);
704 assert_eq!(value, serde_json::json!([1]));
705 }
706
707 #[test]
708 fn constrained_unknown_row_key_is_redacted_fail_closed() {
709 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
710 let scope = scope(vec![Constraint::ColumnDenylist(vec!["email".into()])]);
711 let mut value = serde_json::json!({
712 "items": [
713 {"id": 1, "email": "a@b.com"},
714 {"id": 2, "email": "c@d.com"}
715 ],
716 "count": 2
717 });
718 guard.redact_result(&scope, &mut value);
719 assert_eq!(
720 value,
721 serde_json::json!({
722 "items": [
723 {"id": "[REDACTED]", "email": "[REDACTED]"},
724 {"id": "[REDACTED]", "email": "[REDACTED]"}
725 ],
726 "count": "[REDACTED]"
727 })
728 );
729 }
730
731 #[test]
732 fn post_invocation_hook_returns_redact_when_modified() {
733 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
734 let scope = scope(vec![Constraint::MaxRowsReturned(1)]);
735 let hook = guard.as_hook(scope);
736 let value = serde_json::json!({"rows": [{"id": 1}, {"id": 2}]});
737 let context = PostInvocationContext::synthetic("sql");
738 match hook.inspect(&context, &value) {
739 PostInvocationVerdict::Redact(v) => {
740 assert_eq!(v["rows"].as_array().unwrap().len(), 1);
741 }
742 other => panic!("expected Redact, got {other:?}"),
743 }
744 }
745
746 #[test]
747 fn post_invocation_hook_returns_allow_when_unchanged() {
748 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
749 let scope = scope(vec![]);
750 let hook = guard.as_hook(scope);
751 let value = serde_json::json!({"rows": [{"id": 1}]});
752 let context = PostInvocationContext::synthetic("sql");
753 match hook.inspect(&context, &value) {
754 PostInvocationVerdict::Allow => {}
755 other => panic!("expected Allow, got {other:?}"),
756 }
757 }
758
759 #[test]
760 fn pre_invocation_guard_impl_allows_everything() {
761 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
764 assert_eq!(
765 <QueryResultGuard as chio_kernel::Guard>::name(&guard),
766 "query-result"
767 );
768 }
769
770 #[test]
771 fn strictest_max_rows_wins() {
772 let g = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
773 let scope_multi = ChioScope {
774 grants: vec![
775 grant(vec![Constraint::MaxRowsReturned(10)]),
776 grant(vec![Constraint::MaxRowsReturned(3)]),
777 ],
778 ..Default::default()
779 };
780 let mut value = serde_json::json!({
781 "rows": [
782 {"id": 1}, {"id": 2}, {"id": 3}, {"id": 4}, {"id": 5}
783 ]
784 });
785 g.redact_result(&scope_multi, &mut value);
786 assert_eq!(value["rows"].as_array().unwrap().len(), 3);
787 }
788
789 #[test]
790 fn matched_grant_constraints_override_other_grants() {
791 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
792 let scope_multi = ChioScope {
793 grants: vec![
794 grant(vec![
795 Constraint::MaxRowsReturned(1),
796 Constraint::ColumnDenylist(vec!["email".into()]),
797 ]),
798 grant(vec![
799 Constraint::MaxRowsReturned(5),
800 Constraint::ColumnDenylist(vec!["ssn".into()]),
801 ]),
802 ],
803 ..Default::default()
804 };
805 let mut value = serde_json::json!({
806 "rows": [
807 {"id": 1, "email": "a@b.com", "ssn": "123-45-6789"},
808 {"id": 2, "email": "c@d.com", "ssn": "987-65-4321"}
809 ]
810 });
811
812 guard.redact_result_for_request(&scope_multi, Some(1), &mut value);
813
814 let rows = value["rows"].as_array().unwrap();
815 assert_eq!(rows.len(), 2);
816 assert_eq!(rows[0]["ssn"], "[REDACTED]");
817 assert_eq!(rows[1]["ssn"], "[REDACTED]");
818 assert_ne!(rows[0]["email"], "[REDACTED]");
819 assert_ne!(rows[1]["email"], "[REDACTED]");
820 }
821
822 #[test]
823 fn alternative_rows_key_respected() {
824 let guard = QueryResultGuard::new(QueryResultGuardConfig::default()).unwrap();
825 let scope = scope(vec![Constraint::MaxRowsReturned(1)]);
826 let mut value = serde_json::json!({
827 "results": [{"id": 1}, {"id": 2}, {"id": 3}]
828 });
829 guard.redact_result(&scope, &mut value);
830 assert_eq!(value["results"].as_array().unwrap().len(), 1);
831 }
832}