1use std::env;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::str::FromStr;
5
6use serde::Deserialize;
7use thiserror::Error;
8
9use crate::{Action, LocaleTag, PiiClass, RulepackDict};
10
11pub const DEFAULT_NER_THRESHOLD: f32 = 0.3;
12
13#[derive(Debug, Clone, PartialEq, Default)]
24#[non_exhaustive]
25pub struct Policy {
26 pub session: SessionPolicy,
27 pub detectors: Vec<DetectorSpec>,
28 pub dictionaries: Vec<RulepackDict>,
29 pub rules: Vec<RuleSpec>,
30 pub ner: Option<NerPolicy>,
31 pub rulepacks: RulepackPolicy,
32 pub locale: Option<Vec<LocaleTag>>,
33}
34
35#[derive(Debug, Clone, PartialEq, Eq)]
36#[non_exhaustive]
37pub struct SessionPolicy {
38 pub scope: SessionScope,
39 pub ttl_secs: Option<u64>,
40}
41
42impl Default for SessionPolicy {
43 fn default() -> Self {
44 Self {
45 scope: SessionScope::Ephemeral,
46 ttl_secs: None,
47 }
48 }
49}
50
51#[derive(Debug, Clone, PartialEq, Eq)]
52#[non_exhaustive]
53pub enum SessionScope {
54 Ephemeral,
55 Conversation,
56 Persistent,
57}
58
59impl SessionScope {
60 pub fn parse(value: &str) -> Result<Self, PolicyError> {
61 match value {
62 "ephemeral" => Ok(SessionScope::Ephemeral),
63 "conversation" => Ok(SessionScope::Conversation),
64 "persistent" => Ok(SessionScope::Persistent),
65 other => Err(PolicyError::SessionScopeUnknown {
66 value: other.to_string(),
67 }),
68 }
69 }
70}
71
72impl FromStr for SessionScope {
73 type Err = PolicyError;
74
75 fn from_str(value: &str) -> Result<Self, Self::Err> {
76 Self::parse(value)
77 }
78}
79
80#[derive(Debug, Clone, PartialEq, Eq)]
81#[non_exhaustive]
82pub struct DetectorSpec {
83 pub kind: DetectorKind,
84 pub name: String,
85 pub pattern: Option<String>,
86 pub class: PiiClass,
87 pub dictionary_name: Option<String>,
88 pub case_sensitive: bool,
89 pub token_family: String,
90}
91
92impl Default for DetectorSpec {
93 fn default() -> Self {
94 Self {
95 kind: DetectorKind::Regex,
96 name: String::new(),
97 pattern: None,
98 class: PiiClass::Email,
99 dictionary_name: None,
100 case_sensitive: false,
101 token_family: "counter".to_string(),
102 }
103 }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq)]
107#[non_exhaustive]
108pub enum DetectorKind {
109 Regex,
110 Dictionary,
111 Unknown(String),
112}
113
114#[derive(Debug, Clone, PartialEq)]
115#[non_exhaustive]
116pub struct NerPolicy {
117 pub model_dir: Option<PathBuf>,
118 pub locale: Option<String>,
119 pub threshold: f32,
120}
121
122impl Default for NerPolicy {
123 fn default() -> Self {
124 Self {
125 model_dir: None,
126 locale: None,
127 threshold: DEFAULT_NER_THRESHOLD,
128 }
129 }
130}
131
132#[derive(Debug, Clone, PartialEq, Eq, Default)]
133#[non_exhaustive]
134pub struct RulepackPolicy {
135 pub bundled: Vec<String>,
136 pub paths: Vec<PathBuf>,
137}
138
139#[derive(Debug, Clone, PartialEq, Eq)]
140#[non_exhaustive]
141pub enum RuleSpec {
142 Class { class: PiiClass, action: Action },
143 Column { column: String, action: Action },
144 Default { action: Action },
145}
146
147#[derive(Debug, Error)]
148#[non_exhaustive]
149pub enum PolicyError {
150 #[error("failed to parse policy.toml: {0}")]
151 TomlParse(#[source] toml::de::Error),
152 #[error("failed to read policy file: {0}")]
153 Io(#[source] std::io::Error),
154 #[error("unknown pii class: {0}")]
155 UnknownClass(String),
156 #[error("invalid regex for detector '{name}': {source}")]
157 BadRegex {
158 name: String,
159 #[source]
160 source: regex::Error,
161 },
162 #[error(
163 "regex detector '{name}' shadows Gaze token shape sample '{shadowed_shape}' with pattern '{pattern}'"
164 )]
165 TokenShapeShadow {
166 name: String,
167 pattern: String,
168 shadowed_shape: String,
169 },
170 #[error("invalid dictionary detector '{name}': {reason}")]
171 BadDictionary { name: String, reason: String },
172 #[error("session.ttl_secs is required when session.scope = \"persistent\"")]
173 MissingTtl,
174 #[error("invalid session.ttl_secs: {0}")]
175 BadTtl(String),
176 #[error("policy must define at least one rule")]
177 NoRules,
178 #[error("policy must define at least one detector")]
179 NoDetectors,
180 #[error(
181 "legacy [[detector]] is unsupported in v0.4; migrate to [[policy.custom_recognizers]]: {0}"
182 )]
183 LegacyDetectorUnsupported(&'static str),
184 #[error("ner load error: {0}")]
185 NerLoad(String),
186 #[error("ner.threshold must be between 0.0 and 1.0 inclusive, got {value}")]
187 NerThresholdOutOfRange { value: f32 },
188 #[error("session.scope must be one of ephemeral, conversation, persistent, got {value}")]
189 SessionScopeUnknown { value: String },
190 #[error("ner.locale must be a BCP47 locale tag, got {value}")]
191 NerLocaleUnsupported { value: String },
192 #[error("unknown bundled rulepack: {value}")]
193 BundledRulepackUnknown { value: String },
194 #[error("unknown locale bucket: {name}")]
195 UnknownLocaleBucket { name: String },
196 #[error("{0}")]
197 UnsupportedRuleKind(String),
198}
199
200impl Policy {
201 pub fn load(path: &Path) -> Result<Policy, PolicyError> {
202 let raw = fs::read_to_string(path).map_err(PolicyError::Io)?;
203 let raw: RawPolicy = toml::from_str(&raw).map_err(PolicyError::TomlParse)?;
204 raw.try_into()
205 }
206
207 pub fn load_for_cli(path: &Path) -> Result<Policy, PolicyError> {
208 let policy = Self::load(path)?;
209 if policy
210 .rules
211 .iter()
212 .any(|rule| matches!(rule, RuleSpec::Column { .. }))
213 {
214 return Err(PolicyError::UnsupportedRuleKind(
215 "column rules not supported in CLI mode".to_string(),
216 ));
217 }
218 Ok(policy)
219 }
220}
221
222#[derive(Debug, Deserialize)]
223#[serde(deny_unknown_fields)]
224struct RawPolicy {
225 session: RawSessionPolicy,
226 #[serde(rename = "detector", default)]
227 detectors: Vec<RawDetectorSpec>,
228 #[serde(rename = "rule", default)]
229 rules: Vec<RawRuleSpec>,
230 #[serde(default)]
231 ner: Option<RawNerPolicy>,
232 #[serde(default)]
233 locale: Option<RawLocalePolicy>,
234 #[serde(default)]
235 policy: Option<RawPolicyTables>,
236}
237
238#[derive(Debug, Deserialize)]
239#[serde(deny_unknown_fields)]
240struct RawSessionPolicy {
241 scope: String,
242 ttl_secs: Option<u64>,
243}
244
245#[derive(Debug, Deserialize)]
246#[serde(deny_unknown_fields)]
247struct RawDetectorSpec {
248 kind: String,
249 name: String,
250 pattern: Option<String>,
251 class: String,
252 dictionary: Option<String>,
253 #[serde(default)]
254 terms: Vec<String>,
255 terms_file: Option<String>,
256 terms_from_context: Option<String>,
257 #[serde(default)]
258 case_sensitive: bool,
259 token_family: Option<String>,
260}
261
262#[derive(Debug, Deserialize)]
263#[serde(deny_unknown_fields)]
264struct RawNerPolicy {
265 model_dir: Option<String>,
266 locale: Option<String>,
267 #[serde(default)]
268 threshold: Option<f32>,
269}
270
271#[derive(Debug, Deserialize)]
272#[serde(deny_unknown_fields)]
273struct RawLocalePolicy {
274 #[serde(default)]
275 active: Vec<String>,
276}
277
278#[derive(Debug, Default, Deserialize)]
279#[serde(deny_unknown_fields)]
280struct RawPolicyTables {
281 #[serde(default)]
282 rulepacks: Option<RawRulepackPolicy>,
283 #[serde(default)]
284 custom_recognizers: Vec<RawDetectorSpec>,
285}
286
287#[derive(Debug, Deserialize)]
288#[serde(deny_unknown_fields)]
289struct RawRulepackPolicy {
290 #[serde(default)]
291 bundled: Vec<String>,
292 #[serde(default)]
293 paths: Vec<String>,
294}
295
296#[derive(Debug, Deserialize)]
297#[serde(deny_unknown_fields)]
298struct RawRuleSpec {
299 kind: String,
300 class: Option<String>,
301 column: Option<String>,
302 action: String,
303}
304
305impl TryFrom<RawPolicy> for Policy {
306 type Error = PolicyError;
307
308 fn try_from(raw: RawPolicy) -> Result<Self, Self::Error> {
309 let session = parse_session(raw.session)?;
310
311 if !raw.detectors.is_empty() {
312 return Err(PolicyError::LegacyDetectorUnsupported(
313 "https://github.com/EmpireTwo/gaze/blob/main/docs/policy.md#migrating-detector",
314 ));
315 }
316
317 let policy_tables = raw.policy.unwrap_or_default();
318 let RawPolicyTables {
319 rulepacks: raw_rulepacks,
320 custom_recognizers,
321 } = policy_tables;
322
323 let ner = raw.ner.map(parse_ner).transpose()?;
324 let mut detectors = Vec::with_capacity(custom_recognizers.len());
325 let mut dictionaries = Vec::new();
326 for detector in custom_recognizers {
327 let (detector, dictionary) = parse_detector(detector)?;
328 if let Some(dictionary) = dictionary {
329 dictionaries.push(dictionary);
330 }
331 detectors.push(detector);
332 }
333 let rulepacks = raw_rulepacks
334 .map(parse_rulepack_policy)
335 .transpose()?
336 .unwrap_or_else(|| RulepackPolicy {
337 bundled: vec!["core".to_string()],
338 paths: Vec::new(),
339 });
340
341 if detectors.is_empty() && rulepacks.bundled.is_empty() && rulepacks.paths.is_empty() {
342 return Err(PolicyError::NoDetectors);
343 }
344
345 let mut rules = Vec::with_capacity(raw.rules.len());
346 for rule in raw.rules {
347 rules.push(parse_rule(rule)?);
348 }
349 if rules.is_empty() {
350 return Err(PolicyError::NoRules);
351 }
352
353 let locale = raw.locale.map(parse_locale_policy).transpose()?.flatten();
354
355 Ok(Self {
356 session,
357 detectors,
358 dictionaries,
359 rules,
360 ner,
361 rulepacks,
362 locale,
363 })
364 }
365}
366
367fn parse_session(raw: RawSessionPolicy) -> Result<SessionPolicy, PolicyError> {
368 let scope = SessionScope::parse(&raw.scope)?;
369
370 match scope {
371 SessionScope::Persistent => match raw.ttl_secs {
372 Some(0) => Err(PolicyError::BadTtl(
373 "session.ttl_secs must be greater than zero".to_string(),
374 )),
375 Some(ttl_secs) => Ok(SessionPolicy {
376 scope,
377 ttl_secs: Some(ttl_secs),
378 }),
379 None => Err(PolicyError::MissingTtl),
380 },
381 _ => {
382 if raw.ttl_secs == Some(0) {
383 return Err(PolicyError::BadTtl(
384 "session.ttl_secs must be greater than zero".to_string(),
385 ));
386 }
387 Ok(SessionPolicy {
388 scope,
389 ttl_secs: raw.ttl_secs,
390 })
391 }
392 }
393}
394
395fn parse_detector(
396 raw: RawDetectorSpec,
397) -> Result<(DetectorSpec, Option<RulepackDict>), PolicyError> {
398 let class = parse_class(&raw.class)?;
399 match raw.kind.as_str() {
400 "regex" => parse_regex_detector(raw, class),
401 "dictionary" => parse_dictionary_detector(raw, class),
402 other => Ok((
403 DetectorSpec {
404 kind: DetectorKind::Unknown(other.to_string()),
405 name: raw.name,
406 pattern: raw.pattern,
407 class,
408 dictionary_name: None,
409 case_sensitive: raw.case_sensitive,
410 token_family: raw.token_family.unwrap_or_else(|| "counter".to_string()),
411 },
412 None,
413 )),
414 }
415}
416
417fn parse_regex_detector(
418 raw: RawDetectorSpec,
419 class: PiiClass,
420) -> Result<(DetectorSpec, Option<RulepackDict>), PolicyError> {
421 let pattern = raw.pattern.ok_or_else(|| PolicyError::BadDictionary {
422 name: raw.name.clone(),
423 reason: "regex recognizers require pattern".to_string(),
424 })?;
425 let compiled = regex::Regex::new(&pattern).map_err(|source| PolicyError::BadRegex {
426 name: raw.name.clone(),
427 source,
428 })?;
429 crate::token_shape::reject_if_shadows_token_shape(&compiled, &raw.name).map_err(|shadow| {
430 PolicyError::TokenShapeShadow {
431 name: shadow.recognizer_id,
432 pattern: shadow.offending_pattern,
433 shadowed_shape: shadow.shadowed_shape,
434 }
435 })?;
436
437 Ok((
438 DetectorSpec {
439 kind: DetectorKind::Regex,
440 name: raw.name,
441 pattern: Some(pattern),
442 class,
443 dictionary_name: None,
444 case_sensitive: false,
445 token_family: raw.token_family.unwrap_or_else(|| "counter".to_string()),
446 },
447 None,
448 ))
449}
450
451fn parse_dictionary_detector(
452 raw: RawDetectorSpec,
453 class: PiiClass,
454) -> Result<(DetectorSpec, Option<RulepackDict>), PolicyError> {
455 if raw.pattern.is_some() {
456 return Err(PolicyError::BadDictionary {
457 name: raw.name,
458 reason: "dictionary recognizers must not set pattern".to_string(),
459 });
460 }
461
462 let dictionary_name = raw
463 .terms_from_context
464 .clone()
465 .or(raw.dictionary.clone())
466 .unwrap_or_else(|| raw.name.clone());
467 let mut terms = raw.terms;
468 if let Some(path) = raw.terms_file {
469 let path = expand_home(path)?;
470 let file = fs::read_to_string(&path).map_err(PolicyError::Io)?;
471 terms.extend(
472 file.lines()
473 .map(str::trim)
474 .filter(|line| !line.is_empty() && !line.starts_with('#'))
475 .map(str::to_string),
476 );
477 }
478
479 let dictionary = if raw.terms_from_context.is_some() {
480 if !terms.is_empty() {
481 return Err(PolicyError::BadDictionary {
482 name: raw.name.clone(),
483 reason: "terms_from_context cannot be combined with terms or terms_file"
484 .to_string(),
485 });
486 }
487 None
488 } else {
489 if terms.is_empty() {
490 return Err(PolicyError::BadDictionary {
491 name: raw.name.clone(),
492 reason: "dictionary recognizers require terms, terms_file, or terms_from_context"
493 .to_string(),
494 });
495 }
496 if !raw.case_sensitive && terms.iter().any(|term| !term.is_ascii()) {
497 return Err(PolicyError::BadDictionary {
498 name: raw.name.clone(),
499 reason:
500 "unicode dictionary insensitive matching unsupported in v0.4.0, use case_sensitive = true"
501 .to_string(),
502 });
503 }
504 Some(RulepackDict::new(
505 dictionary_name.clone(),
506 terms,
507 raw.case_sensitive,
508 ))
509 };
510
511 Ok((
512 DetectorSpec {
513 kind: DetectorKind::Dictionary,
514 name: raw.name,
515 pattern: None,
516 class,
517 dictionary_name: Some(dictionary_name),
518 case_sensitive: raw.case_sensitive,
519 token_family: raw.token_family.unwrap_or_else(|| "counter".to_string()),
520 },
521 dictionary,
522 ))
523}
524
525fn parse_rule(raw: RawRuleSpec) -> Result<RuleSpec, PolicyError> {
526 let action = parse_action(&raw.action)?;
527 match raw.kind.as_str() {
528 "class" => {
529 let class = raw
530 .class
531 .ok_or_else(|| PolicyError::UnknownClass("missing rule.class".to_string()))?;
532 Ok(RuleSpec::Class {
533 class: parse_class(&class)?,
534 action,
535 })
536 }
537 "column" => Ok(RuleSpec::Column {
538 column: raw
539 .column
540 .ok_or_else(|| PolicyError::BadTtl("missing rule.column".to_string()))?,
541 action,
542 }),
543 "default" => Ok(RuleSpec::Default { action }),
544 other => Err(PolicyError::BadTtl(format!("unknown rule.kind '{other}'"))),
545 }
546}
547
548fn parse_ner(raw: RawNerPolicy) -> Result<NerPolicy, PolicyError> {
549 let threshold = raw.threshold.unwrap_or(DEFAULT_NER_THRESHOLD);
550 if !(0.0..=1.0).contains(&threshold) {
551 return Err(PolicyError::NerThresholdOutOfRange { value: threshold });
552 }
553 if let Some(locale) = &raw.locale {
554 validate_ner_locale(locale)?;
555 }
556 Ok(NerPolicy {
557 model_dir: raw.model_dir.map(expand_home).transpose()?,
558 locale: raw.locale,
559 threshold,
560 })
561}
562
563pub fn validate_ner_locale(locale: &str) -> Result<(), PolicyError> {
564 LocaleTag::parse(locale)
565 .map(|_| ())
566 .map_err(|_| PolicyError::NerLocaleUnsupported {
567 value: locale.to_string(),
568 })
569}
570
571fn parse_locale_policy(raw: RawLocalePolicy) -> Result<Option<Vec<LocaleTag>>, PolicyError> {
572 if raw.active.is_empty() {
573 return Ok(None);
574 }
575 raw.active
576 .into_iter()
577 .map(|locale| {
578 LocaleTag::parse(&locale)
579 .map_err(|_| PolicyError::BadTtl(format!("unsupported locale tag '{locale}'")))
580 })
581 .collect::<Result<Vec<_>, _>>()
582 .map(Some)
583}
584
585fn parse_rulepack_policy(raw: RawRulepackPolicy) -> Result<RulepackPolicy, PolicyError> {
586 Ok(RulepackPolicy {
587 bundled: raw.bundled,
588 paths: raw
589 .paths
590 .into_iter()
591 .map(expand_home)
592 .collect::<Result<_, _>>()?,
593 })
594}
595
596fn expand_home(path: String) -> Result<PathBuf, PolicyError> {
597 if let Some(rest) = path.strip_prefix("~/") {
598 let home = env::var("HOME")
599 .map_err(|_| PolicyError::BadTtl("HOME is not set for ~/ expansion".to_string()))?;
600 Ok(PathBuf::from(home).join(rest))
601 } else {
602 Ok(PathBuf::from(path))
603 }
604}
605
606fn parse_class(input: &str) -> Result<PiiClass, PolicyError> {
607 let lower = input.trim().to_ascii_lowercase();
608 match lower.as_str() {
609 "email" => Ok(PiiClass::Email),
610 "name" => Ok(PiiClass::Name),
611 "location" => Ok(PiiClass::Location),
612 "organization" => Ok(PiiClass::Organization),
613 custom if custom.starts_with("custom:") => {
614 let name = input
615 .trim()
616 .split_once(':')
617 .map(|(_, name)| name)
618 .unwrap_or_default();
619 if name.trim().is_empty() {
620 return Err(PolicyError::UnknownClass(input.to_string()));
621 }
622 Ok(PiiClass::custom(name))
623 }
624 _ => Err(PolicyError::UnknownClass(input.to_string())),
625 }
626}
627
628fn parse_action(input: &str) -> Result<Action, PolicyError> {
629 match input {
630 "tokenize" => Ok(Action::Tokenize),
631 "redact" => Ok(Action::Redact),
632 "format_preserve" => Ok(Action::FormatPreserve),
633 "generalize" => Ok(Action::Generalize),
634 "preserve" => Ok(Action::Preserve),
635 other => Err(PolicyError::BadTtl(format!(
636 "unknown rule.action '{other}'"
637 ))),
638 }
639}
640
641#[cfg(test)]
642mod tests {
643 use std::fs;
644
645 use tempfile::tempdir;
646
647 use super::*;
648
649 #[test]
650 fn loads_policy_and_expands_home() {
651 let dir = tempdir().unwrap();
652 let path = dir.path().join("policy.toml");
653 fs::write(
654 &path,
655 r#"
656[session]
657scope = "persistent"
658ttl_secs = 86400
659
660[[policy.custom_recognizers]]
661kind = "regex"
662name = "emails"
663pattern = 'alice@example\.invalid'
664class = "email"
665
666[ner]
667model_dir = "~/.cache/gaze/model"
668locale = "de"
669threshold = 0.4
670
671[[rule]]
672kind = "class"
673class = "email"
674action = "tokenize"
675
676[[rule]]
677kind = "default"
678action = "preserve"
679"#,
680 )
681 .unwrap();
682
683 let old_home = env::var_os("HOME");
684 env::set_var("HOME", "/tmp/gaze-home");
685 let policy = Policy::load(&path).unwrap();
686 match old_home {
687 Some(value) => env::set_var("HOME", value),
688 None => env::remove_var("HOME"),
689 }
690
691 assert_eq!(policy.session.scope, SessionScope::Persistent);
692 assert_eq!(policy.session.ttl_secs, Some(86400));
693 assert_eq!(policy.detectors.len(), 1);
694 assert_eq!(policy.rules.len(), 2);
695 let ner = policy.ner.unwrap();
696 assert_eq!(
697 ner.model_dir,
698 Some(PathBuf::from("/tmp/gaze-home/.cache/gaze/model"))
699 );
700 assert_eq!(ner.threshold, 0.4);
701 }
702
703 #[test]
704 fn rejects_ner_threshold_out_of_range() {
705 let raw = r#"
706[session]
707scope = "ephemeral"
708
709[ner]
710threshold = 1.1
711
712[[policy.custom_recognizers]]
713kind = "regex"
714name = "emails"
715pattern = ".+"
716class = "email"
717
718[[rule]]
719kind = "default"
720action = "preserve"
721"#;
722 let raw: RawPolicy = toml::from_str(raw).expect("raw policy");
723
724 assert!(matches!(
725 Policy::try_from(raw),
726 Err(PolicyError::NerThresholdOutOfRange { value }) if value == 1.1
727 ));
728 }
729
730 #[test]
731 fn accepts_bcp47_ner_locale_hints() {
732 for locale in ["de", "en-US", "pt-BR", "zh-Hant"] {
733 assert!(
734 validate_ner_locale(locale).is_ok(),
735 "NER locale hints should accept BCP47-shaped tag {locale}"
736 );
737 }
738
739 assert!(matches!(
740 validate_ner_locale("bad locale!"),
741 Err(PolicyError::NerLocaleUnsupported { value }) if value == "bad locale!"
742 ));
743 }
744
745 #[test]
746 fn rejects_unknown_session_scope_with_typed_error() {
747 let raw = r#"
748[session]
749scope = "forever"
750
751[[policy.custom_recognizers]]
752kind = "regex"
753name = "emails"
754pattern = ".+"
755class = "email"
756
757[[rule]]
758kind = "default"
759action = "preserve"
760"#;
761
762 let raw = toml::from_str::<RawPolicy>(raw).unwrap();
763 let err = Policy::try_from(raw).unwrap_err();
764
765 assert!(matches!(
766 err,
767 PolicyError::SessionScopeUnknown { value } if value == "forever"
768 ));
769 }
770
771 #[test]
772 fn custom_email_recognizer_loads_under_preservation() {
773 let raw = r#"
774[session]
775scope = "ephemeral"
776
777[[policy.custom_recognizers]]
778kind = "regex"
779name = "emails"
780pattern = 'alice@example\.invalid'
781class = "email"
782
783[[rule]]
784kind = "default"
785action = "preserve"
786"#;
787
788 let raw = toml::from_str::<RawPolicy>(raw).unwrap();
789 let policy = Policy::try_from(raw).unwrap();
790
791 assert_eq!(policy.detectors.len(), 1);
792 assert_eq!(policy.detectors[0].name, "emails");
793 }
794
795 #[test]
796 fn rejects_unknown_keys() {
797 let dir = tempdir().unwrap();
798 let path = dir.path().join("policy.toml");
799 fs::write(
800 &path,
801 r#"
802[session]
803scope = "ephemeral"
804bogus = true
805
806[[policy.custom_recognizers]]
807kind = "regex"
808name = "emails"
809pattern = ".+"
810class = "email"
811
812[[rule]]
813kind = "default"
814action = "preserve"
815"#,
816 )
817 .unwrap();
818
819 assert!(matches!(
820 Policy::load(&path),
821 Err(PolicyError::TomlParse(_))
822 ));
823 }
824
825 #[test]
826 fn loads_dictionary_custom_recognizer_terms() {
827 let dir = tempdir().unwrap();
828 let path = dir.path().join("policy.toml");
829 fs::write(
830 &path,
831 r#"
832[session]
833scope = "ephemeral"
834
835[[policy.custom_recognizers]]
836kind = "dictionary"
837name = "songs"
838class = "custom:song"
839terms = ["Song A"]
840case_sensitive = true
841
842[[rule]]
843kind = "class"
844class = "custom:song"
845action = "tokenize"
846
847[[rule]]
848kind = "default"
849action = "preserve"
850"#,
851 )
852 .unwrap();
853
854 let policy = Policy::load(&path).unwrap();
855 assert_eq!(policy.detectors[0].kind, DetectorKind::Dictionary);
856 assert_eq!(
857 policy.detectors[0].dictionary_name.as_deref(),
858 Some("songs")
859 );
860 assert_eq!(policy.dictionaries[0].terms, vec!["Song A"]);
861 }
862}