Skip to main content

gaze/
policy.rs

1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::str::FromStr;
5
6use serde::Deserialize;
7use thiserror::Error;
8
9use crate::{Action, LocaleTag, PiiClass, RulepackDict};
10
11pub const DEFAULT_NER_THRESHOLD: f32 = 0.3;
12
13/// Loaded redaction policy from a TOML configuration file.
14///
15/// Defines which rulepacks activate, which recognizers are enabled, and the locale chain.
16/// Load with [`Policy::load`] for library use or [`Policy::load_for_cli`] for CLI hosts.
17/// Both signatures take `&std::path::Path`.
18///
19/// Production deployments **must** use a policy -- the no-policy builder path is for
20/// development smoke-testing only and has an unauditable detection posture.
21///
22/// See `docs/policy.md` in the repository for the full TOML schema reference.
23#[derive(Debug, Clone, PartialEq, Default)]
24#[non_exhaustive]
25pub struct Policy {
26    pub session: SessionPolicy,
27    pub detectors: Vec<DetectorSpec>,
28    pub dictionaries: Vec<RulepackDict>,
29    pub rules: Vec<RuleSpec>,
30    pub ner: Option<NerPolicy>,
31    pub rulepacks: RulepackPolicy,
32    pub locale: Option<Vec<LocaleTag>>,
33}
34
35#[derive(Debug, Clone, PartialEq, Eq)]
36#[non_exhaustive]
37pub struct SessionPolicy {
38    pub scope: SessionScope,
39    pub ttl_secs: Option<u64>,
40}
41
42impl Default for SessionPolicy {
43    fn default() -> Self {
44        Self {
45            scope: SessionScope::Ephemeral,
46            ttl_secs: None,
47        }
48    }
49}
50
51#[derive(Debug, Clone, PartialEq, Eq)]
52#[non_exhaustive]
53pub enum SessionScope {
54    Ephemeral,
55    Conversation,
56    Persistent,
57}
58
59impl SessionScope {
60    pub fn parse(value: &str) -> Result<Self, PolicyError> {
61        match value {
62            "ephemeral" => Ok(SessionScope::Ephemeral),
63            "conversation" => Ok(SessionScope::Conversation),
64            "persistent" => Ok(SessionScope::Persistent),
65            other => Err(PolicyError::SessionScopeUnknown {
66                value: other.to_string(),
67            }),
68        }
69    }
70}
71
72impl FromStr for SessionScope {
73    type Err = PolicyError;
74
75    fn from_str(value: &str) -> Result<Self, Self::Err> {
76        Self::parse(value)
77    }
78}
79
80#[derive(Debug, Clone, PartialEq, Eq)]
81#[non_exhaustive]
82pub struct DetectorSpec {
83    pub kind: DetectorKind,
84    pub name: String,
85    pub pattern: Option<String>,
86    pub class: PiiClass,
87    pub dictionary_name: Option<String>,
88    pub case_sensitive: bool,
89    pub token_family: String,
90}
91
92impl Default for DetectorSpec {
93    fn default() -> Self {
94        Self {
95            kind: DetectorKind::Regex,
96            name: String::new(),
97            pattern: None,
98            class: PiiClass::Email,
99            dictionary_name: None,
100            case_sensitive: false,
101            token_family: "counter".to_string(),
102        }
103    }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq)]
107#[non_exhaustive]
108pub enum DetectorKind {
109    Regex,
110    Dictionary,
111    Unknown(String),
112}
113
114#[derive(Debug, Clone, PartialEq)]
115#[non_exhaustive]
116pub struct NerPolicy {
117    pub model_dir: Option<PathBuf>,
118    pub locale: Option<String>,
119    pub threshold: f32,
120}
121
122impl Default for NerPolicy {
123    fn default() -> Self {
124        Self {
125            model_dir: None,
126            locale: None,
127            threshold: DEFAULT_NER_THRESHOLD,
128        }
129    }
130}
131
132#[derive(Debug, Clone, PartialEq, Eq, Default)]
133#[non_exhaustive]
134pub struct RulepackPolicy {
135    pub bundled: Vec<String>,
136    pub paths: Vec<PathBuf>,
137}
138
139#[derive(Debug, Clone, PartialEq, Eq)]
140#[non_exhaustive]
141pub enum RuleSpec {
142    Class { class: PiiClass, action: Action },
143    Column { column: String, action: Action },
144    Default { action: Action },
145}
146
147#[derive(Debug, Error)]
148#[non_exhaustive]
149pub enum PolicyError {
150    #[error("failed to parse policy.toml: {0}")]
151    TomlParse(#[source] toml::de::Error),
152    #[error("failed to read policy file: {0}")]
153    Io(#[source] std::io::Error),
154    #[error("unknown pii class: {0}")]
155    UnknownClass(String),
156    #[error("invalid regex for detector '{name}': {source}")]
157    BadRegex {
158        name: String,
159        #[source]
160        source: regex::Error,
161    },
162    #[error(
163        "regex detector '{name}' shadows Gaze token shape sample '{shadowed_shape}' with pattern '{pattern}'"
164    )]
165    TokenShapeShadow {
166        name: String,
167        pattern: String,
168        shadowed_shape: String,
169    },
170    #[error("invalid dictionary detector '{name}': {reason}")]
171    BadDictionary { name: String, reason: String },
172    #[error("session.ttl_secs is required when session.scope = \"persistent\"")]
173    MissingTtl,
174    #[error("invalid session.ttl_secs: {0}")]
175    BadTtl(String),
176    #[error("policy must define at least one rule")]
177    NoRules,
178    #[error("policy must define at least one detector")]
179    NoDetectors,
180    #[error(
181        "legacy [[detector]] is unsupported in v0.4; migrate to [[policy.custom_recognizers]]: {0}"
182    )]
183    LegacyDetectorUnsupported(&'static str),
184    #[error("ner load error: {0}")]
185    NerLoad(String),
186    #[error("ner.threshold must be between 0.0 and 1.0 inclusive, got {value}")]
187    NerThresholdOutOfRange { value: f32 },
188    #[error("session.scope must be one of ephemeral, conversation, persistent, got {value}")]
189    SessionScopeUnknown { value: String },
190    #[error("ner.locale must be a BCP47 locale tag, got {value}")]
191    NerLocaleUnsupported { value: String },
192    #[error("unknown bundled rulepack: {value}")]
193    BundledRulepackUnknown { value: String },
194    #[error("unknown locale bucket: {name}")]
195    UnknownLocaleBucket { name: String },
196    #[error("{0}")]
197    UnsupportedRuleKind(String),
198}
199
200impl Policy {
201    pub fn load(path: &Path) -> Result<Policy, PolicyError> {
202        let raw = fs::read_to_string(path).map_err(PolicyError::Io)?;
203        let raw: RawPolicy = toml::from_str(&raw).map_err(PolicyError::TomlParse)?;
204        raw.try_into()
205    }
206
207    pub fn load_for_cli(path: &Path) -> Result<Policy, PolicyError> {
208        let policy = Self::load(path)?;
209        if policy
210            .rules
211            .iter()
212            .any(|rule| matches!(rule, RuleSpec::Column { .. }))
213        {
214            return Err(PolicyError::UnsupportedRuleKind(
215                "column rules not supported in CLI mode".to_string(),
216            ));
217        }
218        Ok(policy)
219    }
220}
221
222#[derive(Debug, Deserialize)]
223#[serde(deny_unknown_fields)]
224struct RawPolicy {
225    session: RawSessionPolicy,
226    #[serde(rename = "detector", default)]
227    detectors: Vec<RawDetectorSpec>,
228    #[serde(rename = "rule", default)]
229    rules: Vec<RawRuleSpec>,
230    #[serde(default)]
231    ner: Option<RawNerPolicy>,
232    #[serde(default)]
233    locale: Option<RawLocalePolicy>,
234    #[serde(default)]
235    policy: Option<RawPolicyTables>,
236}
237
238#[derive(Debug, Deserialize)]
239#[serde(deny_unknown_fields)]
240struct RawSessionPolicy {
241    scope: String,
242    ttl_secs: Option<u64>,
243}
244
245#[derive(Debug, Deserialize)]
246#[serde(deny_unknown_fields)]
247struct RawDetectorSpec {
248    kind: String,
249    name: String,
250    pattern: Option<String>,
251    class: String,
252    dictionary: Option<String>,
253    #[serde(default)]
254    terms: Vec<String>,
255    terms_file: Option<String>,
256    terms_from_context: Option<String>,
257    #[serde(default)]
258    case_sensitive: bool,
259    token_family: Option<String>,
260}
261
262#[derive(Debug, Deserialize)]
263#[serde(deny_unknown_fields)]
264struct RawNerPolicy {
265    model_dir: Option<String>,
266    locale: Option<String>,
267    #[serde(default)]
268    threshold: Option<f32>,
269}
270
271#[derive(Debug, Deserialize)]
272#[serde(deny_unknown_fields)]
273struct RawLocalePolicy {
274    #[serde(default)]
275    active: Vec<String>,
276}
277
278#[derive(Debug, Default, Deserialize)]
279#[serde(deny_unknown_fields)]
280struct RawPolicyTables {
281    #[serde(default)]
282    rulepacks: Option<RawRulepackPolicy>,
283    #[serde(default)]
284    custom_recognizers: Vec<RawDetectorSpec>,
285}
286
287#[derive(Debug, Deserialize)]
288#[serde(deny_unknown_fields)]
289struct RawRulepackPolicy {
290    #[serde(default)]
291    bundled: Vec<String>,
292    #[serde(default)]
293    paths: Vec<String>,
294}
295
296#[derive(Debug, Deserialize)]
297#[serde(deny_unknown_fields)]
298struct RawRuleSpec {
299    kind: String,
300    class: Option<String>,
301    column: Option<String>,
302    action: String,
303}
304
305impl TryFrom<RawPolicy> for Policy {
306    type Error = PolicyError;
307
308    fn try_from(raw: RawPolicy) -> Result<Self, Self::Error> {
309        let session = parse_session(raw.session)?;
310
311        if !raw.detectors.is_empty() {
312            return Err(PolicyError::LegacyDetectorUnsupported(
313                "https://github.com/EmpireTwo/gaze/blob/main/docs/policy.md#migrating-detector",
314            ));
315        }
316
317        let policy_tables = raw.policy.unwrap_or_default();
318        let RawPolicyTables {
319            rulepacks: raw_rulepacks,
320            custom_recognizers,
321        } = policy_tables;
322
323        let ner = raw.ner.map(parse_ner).transpose()?;
324        let mut detectors = Vec::with_capacity(custom_recognizers.len());
325        let mut dictionaries = Vec::new();
326        for detector in custom_recognizers {
327            let (detector, dictionary) = parse_detector(detector)?;
328            if let Some(dictionary) = dictionary {
329                dictionaries.push(dictionary);
330            }
331            detectors.push(detector);
332        }
333        let rulepacks = raw_rulepacks
334            .map(parse_rulepack_policy)
335            .transpose()?
336            .unwrap_or_else(|| RulepackPolicy {
337                bundled: vec!["core".to_string()],
338                paths: Vec::new(),
339            });
340
341        if detectors.is_empty() && rulepacks.bundled.is_empty() && rulepacks.paths.is_empty() {
342            return Err(PolicyError::NoDetectors);
343        }
344
345        let mut rules = Vec::with_capacity(raw.rules.len());
346        for rule in raw.rules {
347            rules.push(parse_rule(rule)?);
348        }
349        if rules.is_empty() {
350            return Err(PolicyError::NoRules);
351        }
352
353        let locale = raw.locale.map(parse_locale_policy).transpose()?.flatten();
354
355        Ok(Self {
356            session,
357            detectors,
358            dictionaries,
359            rules,
360            ner,
361            rulepacks,
362            locale,
363        })
364    }
365}
366
367fn parse_session(raw: RawSessionPolicy) -> Result<SessionPolicy, PolicyError> {
368    let scope = SessionScope::parse(&raw.scope)?;
369
370    match scope {
371        SessionScope::Persistent => match raw.ttl_secs {
372            Some(0) => Err(PolicyError::BadTtl(
373                "session.ttl_secs must be greater than zero".to_string(),
374            )),
375            Some(ttl_secs) => Ok(SessionPolicy {
376                scope,
377                ttl_secs: Some(ttl_secs),
378            }),
379            None => Err(PolicyError::MissingTtl),
380        },
381        _ => {
382            if raw.ttl_secs == Some(0) {
383                return Err(PolicyError::BadTtl(
384                    "session.ttl_secs must be greater than zero".to_string(),
385                ));
386            }
387            Ok(SessionPolicy {
388                scope,
389                ttl_secs: raw.ttl_secs,
390            })
391        }
392    }
393}
394
395fn parse_detector(
396    raw: RawDetectorSpec,
397) -> Result<(DetectorSpec, Option<RulepackDict>), PolicyError> {
398    let class = parse_class(&raw.class)?;
399    match raw.kind.as_str() {
400        "regex" => parse_regex_detector(raw, class),
401        "dictionary" => parse_dictionary_detector(raw, class),
402        other => Ok((
403            DetectorSpec {
404                kind: DetectorKind::Unknown(other.to_string()),
405                name: raw.name,
406                pattern: raw.pattern,
407                class,
408                dictionary_name: None,
409                case_sensitive: raw.case_sensitive,
410                token_family: raw.token_family.unwrap_or_else(|| "counter".to_string()),
411            },
412            None,
413        )),
414    }
415}
416
417fn parse_regex_detector(
418    raw: RawDetectorSpec,
419    class: PiiClass,
420) -> Result<(DetectorSpec, Option<RulepackDict>), PolicyError> {
421    let pattern = raw.pattern.ok_or_else(|| PolicyError::BadDictionary {
422        name: raw.name.clone(),
423        reason: "regex recognizers require pattern".to_string(),
424    })?;
425    let compiled = regex::Regex::new(&pattern).map_err(|source| PolicyError::BadRegex {
426        name: raw.name.clone(),
427        source,
428    })?;
429    crate::token_shape::reject_if_shadows_token_shape(&compiled, &raw.name).map_err(|shadow| {
430        PolicyError::TokenShapeShadow {
431            name: shadow.recognizer_id,
432            pattern: shadow.offending_pattern,
433            shadowed_shape: shadow.shadowed_shape,
434        }
435    })?;
436
437    Ok((
438        DetectorSpec {
439            kind: DetectorKind::Regex,
440            name: raw.name,
441            pattern: Some(pattern),
442            class,
443            dictionary_name: None,
444            case_sensitive: false,
445            token_family: raw.token_family.unwrap_or_else(|| "counter".to_string()),
446        },
447        None,
448    ))
449}
450
451fn parse_dictionary_detector(
452    raw: RawDetectorSpec,
453    class: PiiClass,
454) -> Result<(DetectorSpec, Option<RulepackDict>), PolicyError> {
455    if raw.pattern.is_some() {
456        return Err(PolicyError::BadDictionary {
457            name: raw.name,
458            reason: "dictionary recognizers must not set pattern".to_string(),
459        });
460    }
461
462    let dictionary_name = raw
463        .terms_from_context
464        .clone()
465        .or(raw.dictionary.clone())
466        .unwrap_or_else(|| raw.name.clone());
467    let mut terms = raw.terms;
468    if let Some(path) = raw.terms_file {
469        let path = expand_home(path)?;
470        let file = fs::read_to_string(&path).map_err(PolicyError::Io)?;
471        terms.extend(
472            file.lines()
473                .map(str::trim)
474                .filter(|line| !line.is_empty() && !line.starts_with('#'))
475                .map(str::to_string),
476        );
477    }
478
479    let dictionary = if raw.terms_from_context.is_some() {
480        if !terms.is_empty() {
481            return Err(PolicyError::BadDictionary {
482                name: raw.name.clone(),
483                reason: "terms_from_context cannot be combined with terms or terms_file"
484                    .to_string(),
485            });
486        }
487        None
488    } else {
489        if terms.is_empty() {
490            return Err(PolicyError::BadDictionary {
491                name: raw.name.clone(),
492                reason: "dictionary recognizers require terms, terms_file, or terms_from_context"
493                    .to_string(),
494            });
495        }
496        if !raw.case_sensitive && terms.iter().any(|term| !term.is_ascii()) {
497            return Err(PolicyError::BadDictionary {
498                name: raw.name.clone(),
499                reason:
500                    "unicode dictionary insensitive matching unsupported in v0.4.0, use case_sensitive = true"
501                        .to_string(),
502            });
503        }
504        Some(RulepackDict::new(
505            dictionary_name.clone(),
506            terms,
507            raw.case_sensitive,
508        ))
509    };
510
511    Ok((
512        DetectorSpec {
513            kind: DetectorKind::Dictionary,
514            name: raw.name,
515            pattern: None,
516            class,
517            dictionary_name: Some(dictionary_name),
518            case_sensitive: raw.case_sensitive,
519            token_family: raw.token_family.unwrap_or_else(|| "counter".to_string()),
520        },
521        dictionary,
522    ))
523}
524
525fn parse_rule(raw: RawRuleSpec) -> Result<RuleSpec, PolicyError> {
526    let action = parse_action(&raw.action)?;
527    match raw.kind.as_str() {
528        "class" => {
529            let class = raw
530                .class
531                .ok_or_else(|| PolicyError::UnknownClass("missing rule.class".to_string()))?;
532            Ok(RuleSpec::Class {
533                class: parse_class(&class)?,
534                action,
535            })
536        }
537        "column" => Ok(RuleSpec::Column {
538            column: raw
539                .column
540                .ok_or_else(|| PolicyError::BadTtl("missing rule.column".to_string()))?,
541            action,
542        }),
543        "default" => Ok(RuleSpec::Default { action }),
544        other => Err(PolicyError::BadTtl(format!("unknown rule.kind '{other}'"))),
545    }
546}
547
548fn parse_ner(raw: RawNerPolicy) -> Result<NerPolicy, PolicyError> {
549    let threshold = raw.threshold.unwrap_or(DEFAULT_NER_THRESHOLD);
550    if !(0.0..=1.0).contains(&threshold) {
551        return Err(PolicyError::NerThresholdOutOfRange { value: threshold });
552    }
553    if let Some(locale) = &raw.locale {
554        validate_ner_locale(locale)?;
555    }
556    Ok(NerPolicy {
557        model_dir: raw.model_dir.map(expand_home).transpose()?,
558        locale: raw.locale,
559        threshold,
560    })
561}
562
563pub fn validate_ner_locale(locale: &str) -> Result<(), PolicyError> {
564    LocaleTag::parse(locale)
565        .map(|_| ())
566        .map_err(|_| PolicyError::NerLocaleUnsupported {
567            value: locale.to_string(),
568        })
569}
570
571fn parse_locale_policy(raw: RawLocalePolicy) -> Result<Option<Vec<LocaleTag>>, PolicyError> {
572    if raw.active.is_empty() {
573        return Ok(None);
574    }
575    raw.active
576        .into_iter()
577        .map(|locale| {
578            LocaleTag::parse(&locale)
579                .map_err(|_| PolicyError::BadTtl(format!("unsupported locale tag '{locale}'")))
580        })
581        .collect::<Result<Vec<_>, _>>()
582        .map(Some)
583}
584
585fn parse_rulepack_policy(raw: RawRulepackPolicy) -> Result<RulepackPolicy, PolicyError> {
586    Ok(RulepackPolicy {
587        bundled: raw.bundled,
588        paths: raw
589            .paths
590            .into_iter()
591            .map(expand_home)
592            .collect::<Result<_, _>>()?,
593    })
594}
595
596fn expand_home(path: String) -> Result<PathBuf, PolicyError> {
597    if let Some(rest) = path.strip_prefix("~/") {
598        let home = env::var("HOME")
599            .map_err(|_| PolicyError::BadTtl("HOME is not set for ~/ expansion".to_string()))?;
600        Ok(PathBuf::from(home).join(rest))
601    } else {
602        Ok(PathBuf::from(path))
603    }
604}
605
606fn parse_class(input: &str) -> Result<PiiClass, PolicyError> {
607    let lower = input.trim().to_ascii_lowercase();
608    match lower.as_str() {
609        "email" => Ok(PiiClass::Email),
610        "name" => Ok(PiiClass::Name),
611        "location" => Ok(PiiClass::Location),
612        "organization" => Ok(PiiClass::Organization),
613        custom if custom.starts_with("custom:") => {
614            let name = input
615                .trim()
616                .split_once(':')
617                .map(|(_, name)| name)
618                .unwrap_or_default();
619            if name.trim().is_empty() {
620                return Err(PolicyError::UnknownClass(input.to_string()));
621            }
622            Ok(PiiClass::custom(name))
623        }
624        _ => Err(PolicyError::UnknownClass(input.to_string())),
625    }
626}
627
628fn parse_action(input: &str) -> Result<Action, PolicyError> {
629    match input {
630        "tokenize" => Ok(Action::Tokenize),
631        "redact" => Ok(Action::Redact),
632        "format_preserve" => Ok(Action::FormatPreserve),
633        "generalize" => Ok(Action::Generalize),
634        "preserve" => Ok(Action::Preserve),
635        other => Err(PolicyError::BadTtl(format!(
636            "unknown rule.action '{other}'"
637        ))),
638    }
639}
640
641#[cfg(test)]
642mod tests {
643    use std::fs;
644
645    use tempfile::tempdir;
646
647    use super::*;
648
649    #[test]
650    fn loads_policy_and_expands_home() {
651        let dir = tempdir().unwrap();
652        let path = dir.path().join("policy.toml");
653        fs::write(
654            &path,
655            r#"
656[session]
657scope = "persistent"
658ttl_secs = 86400
659
660[[policy.custom_recognizers]]
661kind = "regex"
662name = "emails"
663pattern = 'alice@example\.invalid'
664class = "email"
665
666[ner]
667model_dir = "~/.cache/gaze/model"
668locale = "de"
669threshold = 0.4
670
671[[rule]]
672kind = "class"
673class = "email"
674action = "tokenize"
675
676[[rule]]
677kind = "default"
678action = "preserve"
679"#,
680        )
681        .unwrap();
682
683        let old_home = env::var_os("HOME");
684        env::set_var("HOME", "/tmp/gaze-home");
685        let policy = Policy::load(&path).unwrap();
686        match old_home {
687            Some(value) => env::set_var("HOME", value),
688            None => env::remove_var("HOME"),
689        }
690
691        assert_eq!(policy.session.scope, SessionScope::Persistent);
692        assert_eq!(policy.session.ttl_secs, Some(86400));
693        assert_eq!(policy.detectors.len(), 1);
694        assert_eq!(policy.rules.len(), 2);
695        let ner = policy.ner.unwrap();
696        assert_eq!(
697            ner.model_dir,
698            Some(PathBuf::from("/tmp/gaze-home/.cache/gaze/model"))
699        );
700        assert_eq!(ner.threshold, 0.4);
701    }
702
703    #[test]
704    fn rejects_ner_threshold_out_of_range() {
705        let raw = r#"
706[session]
707scope = "ephemeral"
708
709[ner]
710threshold = 1.1
711
712[[policy.custom_recognizers]]
713kind = "regex"
714name = "emails"
715pattern = ".+"
716class = "email"
717
718[[rule]]
719kind = "default"
720action = "preserve"
721"#;
722        let raw: RawPolicy = toml::from_str(raw).expect("raw policy");
723
724        assert!(matches!(
725            Policy::try_from(raw),
726            Err(PolicyError::NerThresholdOutOfRange { value }) if value == 1.1
727        ));
728    }
729
730    #[test]
731    fn accepts_bcp47_ner_locale_hints() {
732        for locale in ["de", "en-US", "pt-BR", "zh-Hant"] {
733            assert!(
734                validate_ner_locale(locale).is_ok(),
735                "NER locale hints should accept BCP47-shaped tag {locale}"
736            );
737        }
738
739        assert!(matches!(
740            validate_ner_locale("bad locale!"),
741            Err(PolicyError::NerLocaleUnsupported { value }) if value == "bad locale!"
742        ));
743    }
744
745    #[test]
746    fn rejects_unknown_session_scope_with_typed_error() {
747        let raw = r#"
748[session]
749scope = "forever"
750
751[[policy.custom_recognizers]]
752kind = "regex"
753name = "emails"
754pattern = ".+"
755class = "email"
756
757[[rule]]
758kind = "default"
759action = "preserve"
760"#;
761
762        let raw = toml::from_str::<RawPolicy>(raw).unwrap();
763        let err = Policy::try_from(raw).unwrap_err();
764
765        assert!(matches!(
766            err,
767            PolicyError::SessionScopeUnknown { value } if value == "forever"
768        ));
769    }
770
771    #[test]
772    fn custom_email_recognizer_loads_under_preservation() {
773        let raw = r#"
774[session]
775scope = "ephemeral"
776
777[[policy.custom_recognizers]]
778kind = "regex"
779name = "emails"
780pattern = 'alice@example\.invalid'
781class = "email"
782
783[[rule]]
784kind = "default"
785action = "preserve"
786"#;
787
788        let raw = toml::from_str::<RawPolicy>(raw).unwrap();
789        let policy = Policy::try_from(raw).unwrap();
790
791        assert_eq!(policy.detectors.len(), 1);
792        assert_eq!(policy.detectors[0].name, "emails");
793    }
794
795    #[test]
796    fn rejects_unknown_keys() {
797        let dir = tempdir().unwrap();
798        let path = dir.path().join("policy.toml");
799        fs::write(
800            &path,
801            r#"
802[session]
803scope = "ephemeral"
804bogus = true
805
806[[policy.custom_recognizers]]
807kind = "regex"
808name = "emails"
809pattern = ".+"
810class = "email"
811
812[[rule]]
813kind = "default"
814action = "preserve"
815"#,
816        )
817        .unwrap();
818
819        assert!(matches!(
820            Policy::load(&path),
821            Err(PolicyError::TomlParse(_))
822        ));
823    }
824
825    #[test]
826    fn loads_dictionary_custom_recognizer_terms() {
827        let dir = tempdir().unwrap();
828        let path = dir.path().join("policy.toml");
829        fs::write(
830            &path,
831            r#"
832[session]
833scope = "ephemeral"
834
835[[policy.custom_recognizers]]
836kind = "dictionary"
837name = "songs"
838class = "custom:song"
839terms = ["Song A"]
840case_sensitive = true
841
842[[rule]]
843kind = "class"
844class = "custom:song"
845action = "tokenize"
846
847[[rule]]
848kind = "default"
849action = "preserve"
850"#,
851        )
852        .unwrap();
853
854        let policy = Policy::load(&path).unwrap();
855        assert_eq!(policy.detectors[0].kind, DetectorKind::Dictionary);
856        assert_eq!(
857            policy.detectors[0].dictionary_name.as_deref(),
858            Some("songs")
859        );
860        assert_eq!(policy.dictionaries[0].terms, vec!["Song A"]);
861    }
862}