Skip to main content

redact_core/recognizers/
pattern.rs

1// Copyright 2026 Censgate LLC.
2// Licensed under the Apache License, Version 2.0. See the LICENSE file
3// in the project root for license information.
4
5use super::{validation::validate_entity, Recognizer, RecognizerResult};
6use crate::types::EntityType;
7use anyhow::Result;
8use lazy_static::lazy_static;
9use regex::Regex;
10use std::collections::HashMap;
11
12/// Pattern-based recognizer using regex
13#[derive(Debug, Clone)]
14pub struct PatternRecognizer {
15    name: String,
16    patterns: HashMap<EntityType, Vec<CompiledPattern>>,
17    min_score: f32,
18}
19
20#[derive(Debug, Clone)]
21struct CompiledPattern {
22    regex: Regex,
23    score: f32,
24    context_words: Vec<String>,
25}
26
27impl PatternRecognizer {
28    /// Create a new pattern recognizer with default patterns
29    pub fn new() -> Self {
30        let mut recognizer = Self {
31            name: "PatternRecognizer".to_string(),
32            patterns: HashMap::new(),
33            min_score: 0.5,
34        };
35        recognizer.load_default_patterns();
36        recognizer
37    }
38
39    /// Create a new pattern recognizer with custom name
40    pub fn with_name(name: impl Into<String>) -> Self {
41        let mut recognizer = Self::new();
42        recognizer.name = name.into();
43        recognizer
44    }
45
46    /// Set minimum confidence score
47    pub fn with_min_score(mut self, min_score: f32) -> Self {
48        self.min_score = min_score;
49        self
50    }
51
52    /// Add a custom pattern for an entity type
53    pub fn add_pattern(
54        &mut self,
55        entity_type: EntityType,
56        pattern: &str,
57        score: f32,
58    ) -> Result<()> {
59        let regex = Regex::new(pattern)?;
60        let compiled = CompiledPattern {
61            regex,
62            score,
63            context_words: vec![],
64        };
65        self.patterns.entry(entity_type).or_default().push(compiled);
66        Ok(())
67    }
68
69    /// Add a pattern with context words for score boosting
70    pub fn add_pattern_with_context(
71        &mut self,
72        entity_type: EntityType,
73        pattern: &str,
74        score: f32,
75        context_words: Vec<String>,
76    ) -> Result<()> {
77        let regex = Regex::new(pattern)?;
78        let compiled = CompiledPattern {
79            regex,
80            score,
81            context_words,
82        };
83        self.patterns.entry(entity_type).or_default().push(compiled);
84        Ok(())
85    }
86
87    /// Load default patterns for common PII types
88    fn load_default_patterns(&mut self) {
89        // Email addresses
90        let _ = self.add_pattern(
91            EntityType::EmailAddress,
92            r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
93            0.8,
94        );
95
96        // Phone numbers (US/international format with separators)
97        // Requires at least one separator or parentheses to avoid matching
98        // consecutive digits in credit cards, ISBNs, etc.
99        // Matches: (555) 123-4567, 555-123-4567, 555.123.4567, 555 123 4567
100        // Does NOT match: 5551234567 (no separators - too prone to false positives)
101        let _ = self.add_pattern(
102            EntityType::PhoneNumber,
103            r"\(\d{3}\)[-.\s]?\d{3}[-.\s]?\d{4}\b|\b\d{3}[-.\s]\d{3}[-.\s]?\d{4}\b",
104            0.7,
105        );
106
107        // Credit cards (4 groups of 4 digits)
108        let _ = self.add_pattern(
109            EntityType::CreditCard,
110            r"\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})\b",
111            0.9,
112        );
113
114        // US SSN (simplified pattern - Rust regex doesn't support lookahead)
115        // Pattern matches XXX-XX-XXXX format
116        let _ = self.add_pattern(EntityType::UsSsn, r"\b\d{3}-\d{2}-\d{4}\b", 0.9);
117
118        // IP Address (IPv4)
119        let _ = self.add_pattern(
120            EntityType::IpAddress,
121            r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b",
122            0.8,
123        );
124
125        // URL
126        let _ = self.add_pattern(
127            EntityType::Url,
128            r"\b(?:https?://|www\.)[a-zA-Z0-9][-a-zA-Z0-9]*(?:\.[a-zA-Z0-9][-a-zA-Z0-9]*)+(?:/[^\s]*)?\b",
129            0.7,
130        );
131
132        // Domain name (standalone, without protocol - avoid overlapping URL)
133        let _ = self.add_pattern(
134            EntityType::DomainName,
135            r"\b(?:[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?\.)+[A-Za-z]{2,}\b",
136            0.7,
137        );
138
139        // GUID/UUID
140        let _ = self.add_pattern(
141            EntityType::Guid,
142            r"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b",
143            0.9,
144        );
145
146        // MAC Address
147        let _ = self.add_pattern(
148            EntityType::MacAddress,
149            r"\b(?:[0-9A-Fa-f]{2}[:-]){5}[0-9A-Fa-f]{2}\b",
150            0.9,
151        );
152
153        // UK NHS Number
154        let _ = self.add_pattern_with_context(
155            EntityType::UkNhs,
156            r"\b(?:\d{3}\s?\d{3}\s?\d{4}|\d{10})\b",
157            0.6,
158            vec![
159                "NHS".to_string(),
160                "patient".to_string(),
161                "health".to_string(),
162            ],
163        );
164
165        // UK National Insurance Number
166        let _ = self.add_pattern(
167            EntityType::UkNino,
168            r"\b[A-CEGHJ-PR-TW-Z]{1}[A-CEGHJ-NPR-TW-Z]{1}\d{6}[A-D]{1}\b",
169            0.85,
170        );
171
172        // UK Postcode
173        let _ = self.add_pattern(
174            EntityType::UkPostcode,
175            r"\b[A-Z]{1,2}\d[A-Z\d]?\s?\d[A-Z]{2}\b",
176            0.75,
177        );
178
179        // UK Sort Code
180        let _ = self.add_pattern(EntityType::UkSortCode, r"\b\d{2}-\d{2}-\d{2}\b", 0.7);
181
182        // IBAN
183        let _ = self.add_pattern(
184            EntityType::IbanCode,
185            r"\b[A-Z]{2}\d{2}[A-Z0-9]{1,30}\b",
186            0.75,
187        );
188
189        // Bitcoin Address
190        let _ = self.add_pattern(
191            EntityType::BtcAddress,
192            r"\b(?:bc1|[13])[a-zA-HJ-NP-Z0-9]{25,62}\b",
193            0.85,
194        );
195
196        // Ethereum Address
197        let _ = self.add_pattern(EntityType::EthAddress, r"\b0x[a-fA-F0-9]{40}\b", 0.9);
198
199        // MD5 Hash
200        let _ = self.add_pattern(EntityType::Md5Hash, r"\b[a-fA-F0-9]{32}\b", 0.6);
201
202        // SHA1 Hash
203        let _ = self.add_pattern(EntityType::Sha1Hash, r"\b[a-fA-F0-9]{40}\b", 0.6);
204
205        // SHA256 Hash
206        let _ = self.add_pattern(EntityType::Sha256Hash, r"\b[a-fA-F0-9]{64}\b", 0.6);
207
208        // US ZIP Code (5 digits or ZIP+4 format)
209        let _ = self.add_pattern(
210            EntityType::UsZipCode,
211            r"\b\d{5}(?:-\d{4})?\b",
212            0.6, // Lower confidence as could be other 5-digit numbers
213        );
214
215        // PO Box
216        let _ = self.add_pattern_with_context(
217            EntityType::PoBox,
218            r"\b(?:P\.?\s?O\.?|POST\s+OFFICE)\s*BOX\s+\d+\b",
219            0.85,
220            vec![
221                "address".to_string(),
222                "mail".to_string(),
223                "ship".to_string(),
224            ],
225        );
226
227        // ISBN (10 or 13 digit formats)
228        let _ = self.add_pattern(
229            EntityType::Isbn,
230            r"\b(?:ISBN(?:-1[03])?:?\s*)?(?:\d{9}[\dX]|\d{13})\b",
231            0.8,
232        );
233
234        // Generic Passport Number (alphanumeric, 6-9 characters)
235        let _ = self.add_pattern_with_context(
236            EntityType::PassportNumber,
237            r"\b[A-Z]{1,2}\d{6,9}\b",
238            0.7,
239            vec!["passport".to_string(), "travel".to_string()],
240        );
241
242        // Medical Record Number (various formats with MRN context)
243        let _ = self.add_pattern_with_context(
244            EntityType::MedicalRecordNumber,
245            r"\b(?:MRN|Medical\s*Record|Patient\s*ID):?\s*[A-Z0-9]{6,12}\b",
246            0.85,
247            vec![
248                "patient".to_string(),
249                "medical".to_string(),
250                "hospital".to_string(),
251            ],
252        );
253
254        // Age (with context)
255        let _ = self.add_pattern_with_context(
256            EntityType::Age,
257            r"\b(?:age|aged|years old):?\s*(\d{1,3})\b",
258            0.8,
259            vec!["years".to_string(), "old".to_string(), "age".to_string()],
260        );
261
262        // Date/Time (ISO format and common variants)
263        let _ = self.add_pattern(
264            EntityType::DateTime,
265            r"\b\d{4}-\d{2}-\d{2}(?:[T\s]\d{2}:\d{2}(?::\d{2})?)?\b",
266            0.5,
267        );
268
269        // US Driver's License (varies by state, common formats)
270        // More specific patterns to avoid false positives:
271        // - Letter prefix followed by 6-8 digits (most states)
272        // - State-specific format with dashes
273        // Base score is low (0.4) - requires context to reach min_score
274        let _ = self.add_pattern_with_context(
275            EntityType::UsDriverLicense,
276            r"\b[A-Z]\d{6,8}\b|\b[A-Z]\d{3}-\d{4}-\d{4}\b",
277            0.4,
278            vec![
279                "driver".to_string(),
280                "license".to_string(),
281                "DL".to_string(),
282                "DMV".to_string(),
283            ],
284        );
285
286        // US Passport Number (9 digits, sometimes with letter prefix)
287        // Base score is low - requires context
288        let _ = self.add_pattern_with_context(
289            EntityType::UsPassport,
290            r"\b[A-Z]?\d{9}\b",
291            0.4,
292            vec![
293                "passport".to_string(),
294                "travel".to_string(),
295                "state department".to_string(),
296            ],
297        );
298
299        // US Bank Account Number (typically 8-17 digits)
300        // Very low base score - highly dependent on context
301        let _ = self.add_pattern_with_context(
302            EntityType::UsBankNumber,
303            r"\b\d{8,17}\b",
304            0.3,
305            vec![
306                "account".to_string(),
307                "bank".to_string(),
308                "routing".to_string(),
309                "checking".to_string(),
310                "savings".to_string(),
311            ],
312        );
313
314        // UK Driver's License (DVLA format: 5 letters + 6 digits + 2 letters + 3 digits + 2 letters)
315        // Example: MORGA753116SM9IJ 35
316        let _ = self.add_pattern(
317            EntityType::UkDriverLicense,
318            r"\b[A-Z]{5}\d{6}[A-Z0-9]{2}\d[A-Z]{2}\s?\d{2}\b",
319            0.85,
320        );
321
322        // UK Passport Number (9 digits)
323        // Low base score - requires context to avoid matching random 9-digit numbers
324        let _ = self.add_pattern_with_context(
325            EntityType::UkPassportNumber,
326            r"\b\d{9}\b",
327            0.3,
328            vec![
329                "passport".to_string(),
330                "travel".to_string(),
331                "HMPO".to_string(),
332            ],
333        );
334
335        // UK Phone Number (landline: 01/02/03 prefix)
336        let _ = self.add_pattern(
337            EntityType::UkPhoneNumber,
338            r"\b(?:0[1-3]\d{2,3}\s?\d{3}\s?\d{4}|0[1-3]\d{2,3}\s?\d{6,7})\b",
339            0.75,
340        );
341
342        // UK Mobile Number (07 prefix)
343        let _ = self.add_pattern(
344            EntityType::UkMobileNumber,
345            r"\b07\d{3}\s?\d{3}\s?\d{3}\b",
346            0.8,
347        );
348
349        // UK Company Number (Companies House: 8 digits or 2 letters + 6 digits)
350        // Low base score - requires context to avoid matching random 8-digit numbers
351        let _ = self.add_pattern_with_context(
352            EntityType::UkCompanyNumber,
353            r"\b(?:\d{8}|[A-Z]{2}\d{6})\b",
354            0.3,
355            vec![
356                "company".to_string(),
357                "companies house".to_string(),
358                "registration".to_string(),
359                "CRN".to_string(),
360            ],
361        );
362
363        // Medical License Number (various formats with context)
364        let _ = self.add_pattern_with_context(
365            EntityType::MedicalLicense,
366            r"\b(?:MD|DO|NP|PA|RN|LPN)[-\s]?\d{5,10}\b",
367            0.8,
368            vec![
369                "license".to_string(),
370                "medical".to_string(),
371                "physician".to_string(),
372                "doctor".to_string(),
373                "nurse".to_string(),
374            ],
375        );
376
377        // Generic Crypto Wallet (covers various formats beyond BTC/ETH)
378        // Matches Litecoin (L/M/3), Ripple (r), etc.
379        let _ = self.add_pattern_with_context(
380            EntityType::CryptoWallet,
381            r"\b[LMr3][a-km-zA-HJ-NP-Z1-9]{25,34}\b",
382            0.75,
383            vec![
384                "wallet".to_string(),
385                "crypto".to_string(),
386                "address".to_string(),
387                "coin".to_string(),
388            ],
389        );
390    }
391
392    /// Check context words around a match to boost confidence
393    fn check_context(&self, text: &str, start: usize, end: usize, context_words: &[String]) -> f32 {
394        if context_words.is_empty() {
395            return 0.0;
396        }
397
398        // Get 50 characters before and after the match
399        let context_start = start.saturating_sub(50);
400        let context_end = (end + 50).min(text.len());
401        let context = &text[context_start..context_end].to_lowercase();
402
403        // Count matching context words
404        let matches = context_words
405            .iter()
406            .filter(|word| context.contains(&word.to_lowercase()))
407            .count();
408
409        // Boost score based on context matches (up to +0.3)
410        (matches as f32 / context_words.len() as f32) * 0.3
411    }
412}
413
414impl Default for PatternRecognizer {
415    fn default() -> Self {
416        Self::new()
417    }
418}
419
420impl Recognizer for PatternRecognizer {
421    fn name(&self) -> &str {
422        &self.name
423    }
424
425    fn supported_entities(&self) -> &[EntityType] {
426        lazy_static! {
427            static ref SUPPORTED: Vec<EntityType> = vec![
428                // Contact information
429                EntityType::EmailAddress,
430                EntityType::PhoneNumber,
431                EntityType::IpAddress,
432                EntityType::Url,
433                EntityType::DomainName,
434                // Financial
435                EntityType::CreditCard,
436                EntityType::IbanCode,
437                EntityType::UsBankNumber,
438                // US-specific
439                EntityType::UsSsn,
440                EntityType::UsDriverLicense,
441                EntityType::UsPassport,
442                EntityType::UsZipCode,
443                // UK-specific
444                EntityType::UkNhs,
445                EntityType::UkNino,
446                EntityType::UkPostcode,
447                EntityType::UkSortCode,
448                EntityType::UkDriverLicense,
449                EntityType::UkPassportNumber,
450                EntityType::UkPhoneNumber,
451                EntityType::UkMobileNumber,
452                EntityType::UkCompanyNumber,
453                // Healthcare
454                EntityType::MedicalLicense,
455                EntityType::MedicalRecordNumber,
456                // Generic identifiers
457                EntityType::PassportNumber,
458                EntityType::Age,
459                EntityType::Isbn,
460                EntityType::PoBox,
461                EntityType::DateTime,
462                // Crypto
463                EntityType::CryptoWallet,
464                EntityType::BtcAddress,
465                EntityType::EthAddress,
466                // Technical
467                EntityType::Guid,
468                EntityType::MacAddress,
469                EntityType::Md5Hash,
470                EntityType::Sha1Hash,
471                EntityType::Sha256Hash,
472            ];
473        }
474        &SUPPORTED
475    }
476
477    fn analyze(&self, text: &str, _language: &str) -> Result<Vec<RecognizerResult>> {
478        let mut results = Vec::new();
479
480        for (entity_type, patterns) in &self.patterns {
481            for pattern in patterns {
482                for capture in pattern.regex.captures_iter(text) {
483                    if let Some(matched) = capture.get(0) {
484                        let start = matched.start();
485                        let end = matched.end();
486                        let matched_text = matched.as_str();
487
488                        // Base score from pattern
489                        let mut score = pattern.score;
490
491                        // Boost score based on context if context words are provided
492                        if !pattern.context_words.is_empty() {
493                            score += self.check_context(text, start, end, &pattern.context_words);
494                            score = score.min(1.0); // Cap at 1.0
495                        }
496
497                        // Apply validation (checksum, format validation)
498                        // This can reduce or zero out the score for invalid matches
499                        let validation_factor = validate_entity(entity_type, matched_text);
500                        score *= validation_factor;
501
502                        if score >= self.min_score {
503                            results.push(
504                                RecognizerResult::new(
505                                    entity_type.clone(),
506                                    start,
507                                    end,
508                                    score,
509                                    self.name(),
510                                )
511                                .with_text(text),
512                            );
513                        }
514                    }
515                }
516            }
517        }
518
519        Ok(results)
520    }
521
522    fn min_score(&self) -> f32 {
523        self.min_score
524    }
525}
526
527#[cfg(test)]
528mod tests {
529    use super::*;
530
531    #[test]
532    fn test_email_detection() {
533        let recognizer = PatternRecognizer::new();
534        let text = "Contact me at john.doe@example.com for details";
535        let results = recognizer.analyze(text, "en").unwrap();
536
537        let email_results: Vec<_> = results
538            .iter()
539            .filter(|r| r.entity_type == EntityType::EmailAddress)
540            .collect();
541        assert_eq!(email_results.len(), 1);
542        assert_eq!(
543            email_results[0].text,
544            Some("john.doe@example.com".to_string())
545        );
546        assert!(email_results[0].score >= 0.8);
547    }
548
549    #[test]
550    fn test_phone_detection() {
551        let recognizer = PatternRecognizer::new();
552        let text = "Call me at (555) 123-4567";
553        let results = recognizer.analyze(text, "en").unwrap();
554
555        assert!(!results.is_empty());
556        let phone_result = results
557            .iter()
558            .find(|r| r.entity_type == EntityType::PhoneNumber);
559        assert!(phone_result.is_some());
560    }
561
562    #[test]
563    fn test_credit_card_detection() {
564        let recognizer = PatternRecognizer::new();
565        let text = "Card number: 4532015112830366";
566        let results = recognizer.analyze(text, "en").unwrap();
567
568        assert!(!results.is_empty());
569        let cc_result = results
570            .iter()
571            .find(|r| r.entity_type == EntityType::CreditCard);
572        assert!(cc_result.is_some());
573    }
574
575    #[test]
576    fn test_ssn_detection() {
577        let recognizer = PatternRecognizer::new();
578        let text = "SSN: 123-45-6789";
579        let results = recognizer.analyze(text, "en").unwrap();
580
581        assert!(!results.is_empty());
582        let ssn_result = results.iter().find(|r| r.entity_type == EntityType::UsSsn);
583        assert!(ssn_result.is_some());
584    }
585
586    #[test]
587    fn test_uk_nhs_with_context() {
588        let recognizer = PatternRecognizer::new();
589        // Use a valid NHS number that passes mod-11 checksum: 943 476 5919
590        // Checksum: 9*10 + 4*9 + 3*8 + 4*7 + 7*6 + 6*5 + 5*4 + 9*3 + 1*2 = 220
591        // 11 - (220 % 11) = 11 - 0 = 11 -> 0, but last digit is 9, so let's use a known valid one
592        // Valid NHS: 401 023 2137 (checksum verified)
593        let text = "NHS patient number is 401 023 2137";
594        let results = recognizer.analyze(text, "en").unwrap();
595
596        assert!(!results.is_empty());
597        let nhs_result = results.iter().find(|r| r.entity_type == EntityType::UkNhs);
598        assert!(
599            nhs_result.is_some(),
600            "Should detect NHS number with context"
601        );
602        // Score should be boosted due to "NHS" context
603        if let Some(result) = nhs_result {
604            assert!(result.score > 0.6);
605        }
606    }
607
608    #[test]
609    fn test_uk_nino_detection() {
610        let recognizer = PatternRecognizer::new();
611        let text = "NINO: AB123456C";
612        let results = recognizer.analyze(text, "en").unwrap();
613
614        assert!(!results.is_empty());
615        let nino_result = results.iter().find(|r| r.entity_type == EntityType::UkNino);
616        assert!(nino_result.is_some());
617    }
618
619    #[test]
620    fn test_multiple_entities() {
621        let recognizer = PatternRecognizer::new();
622        let text = "Email john@example.com, phone (555) 123-4567, SSN 123-45-6789";
623        let results = recognizer.analyze(text, "en").unwrap();
624
625        assert!(results.len() >= 3);
626        assert!(results
627            .iter()
628            .any(|r| r.entity_type == EntityType::EmailAddress));
629        assert!(results
630            .iter()
631            .any(|r| r.entity_type == EntityType::PhoneNumber));
632        assert!(results.iter().any(|r| r.entity_type == EntityType::UsSsn));
633    }
634
635    #[test]
636    fn test_custom_pattern() {
637        let mut recognizer = PatternRecognizer::new();
638        recognizer
639            .add_pattern(
640                EntityType::Custom("CUSTOM_ID".to_string()),
641                r"\bCID-\d{6}\b",
642                0.9,
643            )
644            .unwrap();
645
646        let text = "Your customer ID is CID-123456";
647        let results = recognizer.analyze(text, "en").unwrap();
648
649        let custom_result = results
650            .iter()
651            .find(|r| matches!(r.entity_type, EntityType::Custom(_)));
652        assert!(custom_result.is_some());
653    }
654
655    #[test]
656    fn test_min_score_filtering() {
657        let recognizer = PatternRecognizer::new().with_min_score(0.9);
658        let text = "Date: 2024-01-15"; // Date has score 0.5
659        let results = recognizer.analyze(text, "en").unwrap();
660
661        // Date should be filtered out due to min_score
662        let date_results = results
663            .iter()
664            .filter(|r| r.entity_type == EntityType::DateTime)
665            .count();
666        assert_eq!(date_results, 0);
667    }
668
669    #[test]
670    fn test_uk_driver_license_detection() {
671        let recognizer = PatternRecognizer::new();
672        let text = "UK DL: MORGA753116SM9IJ 35";
673        let results = recognizer.analyze(text, "en").unwrap();
674
675        let dl_result = results
676            .iter()
677            .find(|r| r.entity_type == EntityType::UkDriverLicense);
678        assert!(dl_result.is_some(), "Should detect UK driver's license");
679    }
680
681    #[test]
682    fn test_uk_mobile_detection() {
683        let recognizer = PatternRecognizer::new();
684        let text = "Call me on 07700 900123";
685        let results = recognizer.analyze(text, "en").unwrap();
686
687        let mobile_result = results
688            .iter()
689            .find(|r| r.entity_type == EntityType::UkMobileNumber);
690        assert!(mobile_result.is_some(), "Should detect UK mobile number");
691    }
692
693    #[test]
694    fn test_uk_phone_detection() {
695        let recognizer = PatternRecognizer::new();
696        let text = "Office: 0207 123 4567";
697        let results = recognizer.analyze(text, "en").unwrap();
698
699        let phone_result = results
700            .iter()
701            .find(|r| r.entity_type == EntityType::UkPhoneNumber);
702        assert!(phone_result.is_some(), "Should detect UK phone number");
703    }
704
705    #[test]
706    fn test_medical_license_detection() {
707        let recognizer = PatternRecognizer::new();
708        let text = "Medical license: MD-123456789";
709        let results = recognizer.analyze(text, "en").unwrap();
710
711        let license_result = results
712            .iter()
713            .find(|r| r.entity_type == EntityType::MedicalLicense);
714        assert!(license_result.is_some(), "Should detect medical license");
715    }
716
717    #[test]
718    fn test_supported_entities_count() {
719        let recognizer = PatternRecognizer::new();
720        let supported = recognizer.supported_entities();
721        // Should have 36 pattern-based entity types
722        assert_eq!(
723            supported.len(),
724            36,
725            "Should support 36 pattern-based entity types, got {}",
726            supported.len()
727        );
728    }
729}