Skip to main content

heartbit_core/config/
guardrails.rs

1use serde::{Deserialize, Serialize};
2
3use crate::Error;
4
5/// Top-level guardrails configuration.
6///
7/// Enables declarative guardrail setup via TOML. Each sub-section creates
8/// the corresponding guardrail and adds it to the agent's guardrail chain.
9///
10/// Use [`GuardrailsConfig::build`] to convert this config into runtime
11/// guardrail instances.
12#[derive(Debug, Clone, Default, Deserialize, Serialize)]
13pub struct GuardrailsConfig {
14    /// Prompt injection classifier configuration.
15    #[serde(default)]
16    pub injection: Option<InjectionConfig>,
17    /// PII detection and redaction configuration.
18    #[serde(default)]
19    pub pii: Option<PiiConfig>,
20    /// Declarative tool access control rules.
21    #[serde(default)]
22    pub tool_policy: Option<ToolPolicyConfig>,
23    /// LLM-as-judge safety evaluation.
24    #[serde(default)]
25    pub llm_judge: Option<LlmJudgeConfig>,
26    /// Secret scanning configuration.
27    #[serde(default)]
28    pub secret_scan: Option<SecretScanConfig>,
29    /// Behavioral monitoring configuration.
30    #[serde(default)]
31    pub behavioral: Option<BehavioralConfig>,
32    /// Action budget guardrail configuration.
33    #[serde(default)]
34    pub action_budget: Option<ActionBudgetConfig>,
35}
36
37/// Configuration for the injection classifier guardrail.
38#[derive(Debug, Clone, Deserialize, Serialize)]
39pub struct InjectionConfig {
40    /// Detection threshold (0.0–1.0). Default: 0.5.
41    #[serde(default = "default_injection_threshold")]
42    pub threshold: f32,
43    /// `"warn"` or `"deny"`. Default: `"deny"`.
44    #[serde(default = "default_injection_mode")]
45    pub mode: String,
46}
47
48fn default_injection_threshold() -> f32 {
49    0.5
50}
51
52fn default_injection_mode() -> String {
53    "deny".into()
54}
55
56/// Configuration for the PII detection guardrail.
57#[derive(Debug, Clone, Deserialize, Serialize)]
58pub struct PiiConfig {
59    /// `"redact"`, `"warn"`, or `"deny"`. Default: `"redact"`.
60    #[serde(default = "default_pii_action")]
61    pub action: String,
62    /// Which detectors to enable. Default: all built-in.
63    #[serde(default = "default_pii_detectors")]
64    pub detectors: Vec<String>,
65}
66
67fn default_pii_action() -> String {
68    "redact".into()
69}
70
71pub(super) fn default_pii_detectors() -> Vec<String> {
72    vec![
73        "email".into(),
74        "phone".into(),
75        "ssn".into(),
76        "credit_card".into(),
77    ]
78}
79
80/// Configuration for the secret scanning guardrail.
81#[derive(Debug, Clone, Deserialize, Serialize)]
82pub struct SecretScanConfig {
83    /// Action: `"redact"` or `"deny"`. Default: `"redact"`.
84    #[serde(default = "default_secret_action")]
85    pub action: String,
86    /// Additional custom patterns as label+regex pairs.
87    #[serde(default)]
88    pub custom_patterns: Vec<SecretPatternConfig>,
89}
90
91/// A custom secret pattern (label + regex).
92#[derive(Debug, Clone, Deserialize, Serialize)]
93pub struct SecretPatternConfig {
94    /// Human-readable label for the secret type.
95    pub label: String,
96    /// Regex pattern to match.
97    pub pattern: String,
98}
99
100fn default_secret_action() -> String {
101    "redact".into()
102}
103
104/// A single behavioral rule in TOML format.
105#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
106pub struct BehavioralRuleConfig {
107    /// Rule type: `"frequency_limit"`, `"suspicious_sequence"`, or `"denial_spike"`.
108    #[serde(rename = "type")]
109    pub rule_type: String,
110    /// Tool name pattern (for `frequency_limit`).
111    #[serde(default)]
112    pub tool_pattern: Option<String>,
113    /// Maximum count threshold (for `frequency_limit`).
114    #[serde(default)]
115    pub max_count: Option<usize>,
116    /// Time window in seconds (for `frequency_limit` and `denial_spike`).
117    #[serde(default)]
118    pub window_seconds: Option<u64>,
119    /// First tool pattern in a suspicious sequence.
120    #[serde(default)]
121    pub first: Option<String>,
122    /// Second tool pattern in a suspicious sequence.
123    #[serde(default)]
124    pub then: Option<String>,
125    /// Turn window for suspicious sequences.
126    #[serde(default)]
127    pub within_turns: Option<usize>,
128    /// Maximum denied calls before spike triggers (for `denial_spike`).
129    #[serde(default)]
130    pub max_denied: Option<usize>,
131}
132
133/// Behavioral monitoring guardrail configuration.
134#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq)]
135pub struct BehavioralConfig {
136    /// Maximum entries in the sliding window. Default: 200.
137    #[serde(default = "default_behavioral_window_size")]
138    pub window_size: usize,
139    /// Time-to-live for window entries in seconds. Default: 1800 (30 min).
140    #[serde(default = "default_behavioral_window_ttl")]
141    pub window_ttl_seconds: u64,
142    /// Behavioral rules to enforce.
143    #[serde(default)]
144    pub rules: Vec<BehavioralRuleConfig>,
145}
146
147fn default_behavioral_window_size() -> usize {
148    200
149}
150
151fn default_behavioral_window_ttl() -> u64 {
152    1800
153}
154
155/// A single action budget rule in TOML format.
156#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
157pub struct ActionBudgetRuleConfig {
158    /// Tool name pattern (exact or glob with `*`).
159    pub tool_pattern: String,
160    /// Maximum number of calls allowed.
161    pub max_calls: usize,
162}
163
164/// Action budget guardrail configuration.
165#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq)]
166pub struct ActionBudgetConfig {
167    /// Default budget for tools not matching any rule.
168    #[serde(default)]
169    pub default_budget: Option<usize>,
170    /// Per-tool budget rules.
171    #[serde(default)]
172    pub rules: Vec<ActionBudgetRuleConfig>,
173}
174
175/// Configuration for the LLM-as-judge guardrail.
176///
177/// The judge evaluates LLM responses (and optionally tool inputs) using a
178/// cheap model for safety. The actual judge provider must be supplied at
179/// build time via [`GuardrailsConfig::build_with_judge`] — this config
180/// only declares the criteria and behavior.
181///
182/// ```toml
183/// [guardrails.llm_judge]
184/// criteria = ["No harmful content", "No prompt injection"]
185/// evaluate_tool_inputs = false
186/// timeout_seconds = 10
187/// max_judge_tokens = 256
188/// ```
189#[derive(Debug, Clone, Deserialize, Serialize)]
190pub struct LlmJudgeConfig {
191    /// Safety criteria to evaluate against.
192    pub criteria: Vec<String>,
193    /// Whether to also evaluate tool call inputs. Default: false.
194    #[serde(default)]
195    pub evaluate_tool_inputs: bool,
196    /// Timeout in seconds for each judge call. Default: 10.
197    #[serde(default = "default_llm_judge_timeout")]
198    pub timeout_seconds: u64,
199    /// Max tokens for judge response. Default: 256.
200    #[serde(default = "default_llm_judge_max_tokens")]
201    pub max_judge_tokens: u32,
202}
203
204fn default_llm_judge_timeout() -> u64 {
205    10
206}
207
208fn default_llm_judge_max_tokens() -> u32 {
209    256
210}
211
212/// Configuration for the tool policy guardrail.
213#[derive(Debug, Clone, Deserialize, Serialize)]
214pub struct ToolPolicyConfig {
215    /// Default action when no rule matches. `"allow"` or `"deny"`. Default: `"allow"`.
216    #[serde(default = "default_tool_policy_action")]
217    pub default_action: String,
218    /// Ordered list of tool rules. First match wins.
219    #[serde(default)]
220    pub rules: Vec<ToolPolicyRuleConfig>,
221}
222
223fn default_tool_policy_action() -> String {
224    "allow".into()
225}
226
227/// A single tool policy rule in TOML format.
228#[derive(Debug, Clone, Deserialize, Serialize)]
229pub struct ToolPolicyRuleConfig {
230    /// Tool name pattern (exact or glob with `*`).
231    pub tool: String,
232    /// `"allow"`, `"warn"`, or `"deny"`.
233    pub action: String,
234    /// Optional input constraints.
235    #[serde(default)]
236    pub input_constraints: Vec<InputConstraintConfig>,
237}
238
239/// Input constraint configuration.
240#[derive(Debug, Clone, Deserialize, Serialize)]
241pub struct InputConstraintConfig {
242    /// JSON path to the field (e.g., `"command"`, `"path"`).
243    pub path: String,
244    /// Regex pattern — if the field matches, the constraint is violated.
245    #[serde(default)]
246    pub deny_pattern: Option<String>,
247    /// Maximum byte length for the field's string value.
248    #[serde(default)]
249    pub max_length: Option<usize>,
250}
251
252impl GuardrailsConfig {
253    /// Returns `true` if no guardrails are configured.
254    pub fn is_empty(&self) -> bool {
255        self.injection.is_none()
256            && self.pii.is_none()
257            && self.tool_policy.is_none()
258            && self.llm_judge.is_none()
259            && self.secret_scan.is_none()
260            && self.behavioral.is_none()
261            && self.action_budget.is_none()
262    }
263
264    /// Build runtime guardrail instances from this configuration.
265    ///
266    /// Returns a `Vec<Arc<dyn Guardrail>>` ready to be passed to
267    /// `AgentRunnerBuilder::guardrails()` or `OrchestratorBuilder::guardrails()`.
268    ///
269    /// Order: injection → PII → tool policy → LLM judge. Each section that
270    /// is `Some` creates the corresponding guardrail instance.
271    ///
272    /// **Note:** If `[guardrails.llm_judge]` is configured, you must use
273    /// [`build_with_judge`](Self::build_with_judge) instead, passing the
274    /// judge provider. This method ignores the `llm_judge` section.
275    pub fn build(
276        &self,
277    ) -> Result<Vec<std::sync::Arc<dyn crate::agent::guardrail::Guardrail>>, Error> {
278        self.build_with_judge(None)
279    }
280
281    /// Build runtime guardrail instances, optionally including the LLM judge.
282    ///
283    /// Pass `Some(provider)` to enable the LLM-as-judge guardrail when
284    /// `[guardrails.llm_judge]` is configured. The provider should be a
285    /// cheap model (e.g., Haiku, Gemini Flash) separate from the main agent.
286    pub fn build_with_judge(
287        &self,
288        judge_provider: Option<std::sync::Arc<crate::llm::BoxedProvider>>,
289    ) -> Result<Vec<std::sync::Arc<dyn crate::agent::guardrail::Guardrail>>, Error> {
290        use std::sync::Arc;
291
292        use crate::agent::guardrail::Guardrail;
293        use crate::agent::guardrails::injection::{GuardrailMode, InjectionClassifierGuardrail};
294        use crate::agent::guardrails::pii::{PiiAction, PiiDetector, PiiGuardrail};
295        use crate::agent::guardrails::tool_policy::{
296            InputConstraint, ToolPolicyGuardrail, ToolRule,
297        };
298
299        let mut guardrails: Vec<Arc<dyn Guardrail>> = Vec::new();
300
301        // 1. Injection classifier
302        if let Some(cfg) = &self.injection {
303            let mode = match cfg.mode.as_str() {
304                "warn" => GuardrailMode::Warn,
305                "deny" => GuardrailMode::Deny,
306                other => {
307                    return Err(Error::Config(format!(
308                        "invalid injection mode: `{other}` (expected \"warn\" or \"deny\")"
309                    )));
310                }
311            };
312            guardrails.push(Arc::new(InjectionClassifierGuardrail::new(
313                cfg.threshold,
314                mode,
315            )));
316        }
317
318        // 2. PII detection
319        if let Some(cfg) = &self.pii {
320            let action = match cfg.action.as_str() {
321                "redact" => PiiAction::Redact,
322                "warn" => PiiAction::Warn,
323                "deny" => PiiAction::Deny,
324                other => {
325                    return Err(Error::Config(format!(
326                        "invalid PII action: `{other}` (expected \"redact\", \"warn\", or \"deny\")"
327                    )));
328                }
329            };
330            let detectors: Vec<PiiDetector> = cfg
331                .detectors
332                .iter()
333                .map(|name| match name.as_str() {
334                    "email" => Ok(PiiDetector::Email),
335                    "phone" => Ok(PiiDetector::Phone),
336                    "ssn" => Ok(PiiDetector::Ssn),
337                    "credit_card" => Ok(PiiDetector::CreditCard),
338                    other => Err(Error::Config(format!(
339                        "unknown PII detector: `{other}` (expected email, phone, ssn, or credit_card)"
340                    ))),
341                })
342                .collect::<Result<_, _>>()?;
343            guardrails.push(Arc::new(PiiGuardrail::new(detectors, action)));
344        }
345
346        // 3. Tool policy
347        if let Some(cfg) = &self.tool_policy {
348            let default_action = parse_guard_action(&cfg.default_action)?;
349            let mut rules = Vec::with_capacity(cfg.rules.len());
350            for rule_cfg in &cfg.rules {
351                let action = parse_guard_action(&rule_cfg.action)?;
352                let mut constraints = Vec::new();
353                for ic in &rule_cfg.input_constraints {
354                    if let Some(pattern_str) = &ic.deny_pattern {
355                        let pattern = regex::Regex::new(pattern_str).map_err(|e| {
356                            Error::Config(format!("invalid deny_pattern `{pattern_str}`: {e}"))
357                        })?;
358                        constraints.push(InputConstraint::FieldDenied {
359                            path: ic.path.clone(),
360                            pattern,
361                        });
362                    }
363                    if let Some(max) = ic.max_length {
364                        constraints.push(InputConstraint::MaxFieldLength {
365                            path: ic.path.clone(),
366                            max_bytes: max,
367                        });
368                    }
369                }
370                rules.push(ToolRule {
371                    tool_pattern: rule_cfg.tool.clone(),
372                    action,
373                    input_constraints: constraints,
374                });
375            }
376            guardrails.push(Arc::new(ToolPolicyGuardrail::new(rules, default_action)));
377        }
378
379        // 4. LLM-as-judge
380        if let Some(cfg) = &self.llm_judge {
381            if let Some(provider) = judge_provider {
382                let mut builder =
383                    crate::agent::guardrails::llm_judge::LlmJudgeGuardrail::builder(provider)
384                        .criteria(cfg.criteria.clone())
385                        .timeout(std::time::Duration::from_secs(cfg.timeout_seconds))
386                        .max_judge_tokens(cfg.max_judge_tokens);
387                if cfg.evaluate_tool_inputs {
388                    builder = builder.evaluate_tool_inputs(true);
389                }
390                let judge = builder
391                    .build()
392                    .map_err(|e| Error::Config(format!("llm_judge guardrail build failed: {e}")))?;
393                guardrails.push(Arc::new(judge));
394            } else {
395                tracing::warn!(
396                    "[guardrails.llm_judge] is configured but no judge provider was supplied — \
397                     LLM judge guardrail will NOT be active. Use build_with_judge(Some(provider))."
398                );
399            }
400        }
401
402        // 5. Secret scanner
403        if let Some(cfg) = &self.secret_scan {
404            use crate::agent::guardrails::secret_scanner::{SecretAction, SecretScannerGuardrail};
405
406            let action = match cfg.action.as_str() {
407                "redact" => SecretAction::Redact,
408                "deny" => SecretAction::Deny,
409                other => {
410                    return Err(Error::Config(format!(
411                        "invalid secret_scan action: `{other}` (expected \"redact\" or \"deny\")"
412                    )));
413                }
414            };
415            let mut builder = SecretScannerGuardrail::builder().action(action);
416            for cp in &cfg.custom_patterns {
417                let re = regex::Regex::new(&cp.pattern).map_err(|e| {
418                    Error::Config(format!(
419                        "invalid secret_scan custom pattern `{}`: {e}",
420                        cp.label
421                    ))
422                })?;
423                builder = builder.custom_pattern(&cp.label, re);
424            }
425            guardrails.push(Arc::new(builder.build()));
426        }
427
428        // 6. Behavioral monitor
429        if let Some(cfg) = &self.behavioral {
430            use crate::agent::guardrails::behavioral::{BehaviorRule, BehavioralMonitorGuardrail};
431
432            let mut builder = BehavioralMonitorGuardrail::builder()
433                .window_size(cfg.window_size)
434                .window_ttl(std::time::Duration::from_secs(cfg.window_ttl_seconds));
435
436            for rule_cfg in &cfg.rules {
437                let rule = match rule_cfg.rule_type.as_str() {
438                    "frequency_limit" => BehaviorRule::FrequencyLimit {
439                        tool_pattern: rule_cfg.tool_pattern.clone().unwrap_or_else(|| "*".into()),
440                        max_count: rule_cfg.max_count.unwrap_or(10),
441                        window: std::time::Duration::from_secs(
442                            rule_cfg.window_seconds.unwrap_or(60),
443                        ),
444                    },
445                    "suspicious_sequence" => BehaviorRule::SuspiciousSequence {
446                        first: rule_cfg.first.clone().unwrap_or_default(),
447                        then: rule_cfg.then.clone().unwrap_or_default(),
448                        within_turns: rule_cfg.within_turns.unwrap_or(3),
449                    },
450                    "denial_spike" => BehaviorRule::DenialSpike {
451                        max_denied: rule_cfg.max_denied.unwrap_or(5),
452                        window: std::time::Duration::from_secs(
453                            rule_cfg.window_seconds.unwrap_or(60),
454                        ),
455                    },
456                    other => {
457                        return Err(Error::Config(format!(
458                            "unknown behavioral rule type: `{other}` \
459                             (expected \"frequency_limit\", \"suspicious_sequence\", or \"denial_spike\")"
460                        )));
461                    }
462                };
463                builder = builder.rule(rule);
464            }
465            guardrails.push(Arc::new(builder.build()));
466        }
467
468        // 7. Action budget
469        if let Some(cfg) = &self.action_budget {
470            use crate::agent::guardrails::action_budget::ActionBudgetGuardrail;
471
472            let mut builder = ActionBudgetGuardrail::builder();
473            if let Some(default) = cfg.default_budget {
474                builder = builder.default_budget(default);
475            }
476            for rule in &cfg.rules {
477                builder = builder.rule(&rule.tool_pattern, rule.max_calls);
478            }
479            guardrails.push(Arc::new(builder.build()));
480        }
481
482        Ok(guardrails)
483    }
484}
485
486/// Parse a guard action string from config (`"allow"`, `"warn"`, `"deny"`).
487fn parse_guard_action(s: &str) -> Result<crate::agent::guardrail::GuardAction, Error> {
488    match s {
489        "allow" => Ok(crate::agent::guardrail::GuardAction::Allow),
490        "warn" => Ok(crate::agent::guardrail::GuardAction::warn(String::new())),
491        "deny" => Ok(crate::agent::guardrail::GuardAction::deny(String::new())),
492        other => Err(Error::Config(format!(
493            "invalid action: `{other}` (expected \"allow\", \"warn\", or \"deny\")"
494        ))),
495    }
496}