memvid_core/enrich/
rules.rs

1//! Rules-based enrichment engine using regex patterns.
2//!
3//! This engine extracts memory cards from text using configurable regex
4//! patterns. It's fast, deterministic, and doesn't require any models.
5
6use super::{EnrichmentContext, EnrichmentEngine, EnrichmentResult};
7use crate::types::{MemoryCard, MemoryCardBuilder, MemoryKind, Polarity};
8use regex::Regex;
9
10/// Normalize entity names for consistent O(1) lookups.
11/// Converts to lowercase and trims whitespace.
12fn normalize_entity(entity: &str) -> String {
13    entity.trim().to_lowercase()
14}
15
16/// A rule for extracting memory cards from text.
17#[derive(Debug, Clone)]
18pub struct ExtractionRule {
19    /// Name of the rule (for debugging).
20    pub name: String,
21    /// Regex pattern to match.
22    pub pattern: Regex,
23    /// The kind of memory card to create.
24    pub kind: MemoryKind,
25    /// The entity to use (supports $1, $2 capture groups).
26    pub entity: String,
27    /// The slot to use (supports $1, $2 capture groups).
28    pub slot: String,
29    /// The value template (supports $1, $2 capture groups).
30    pub value: String,
31    /// Optional polarity for preference rules.
32    pub polarity: Option<Polarity>,
33}
34
35impl ExtractionRule {
36    /// Create a new extraction rule.
37    pub fn new(
38        name: impl Into<String>,
39        pattern: &str,
40        kind: MemoryKind,
41        entity: impl Into<String>,
42        slot: impl Into<String>,
43        value: impl Into<String>,
44    ) -> Result<Self, regex::Error> {
45        Ok(Self {
46            name: name.into(),
47            pattern: Regex::new(pattern)?,
48            kind,
49            entity: entity.into(),
50            slot: slot.into(),
51            value: value.into(),
52            polarity: None,
53        })
54    }
55
56    /// Create a preference rule with polarity.
57    pub fn preference(
58        name: impl Into<String>,
59        pattern: &str,
60        entity: impl Into<String>,
61        slot: impl Into<String>,
62        value: impl Into<String>,
63        polarity: Polarity,
64    ) -> Result<Self, regex::Error> {
65        Ok(Self {
66            name: name.into(),
67            pattern: Regex::new(pattern)?,
68            kind: MemoryKind::Preference,
69            entity: entity.into(),
70            slot: slot.into(),
71            value: value.into(),
72            polarity: Some(polarity),
73        })
74    }
75
76    /// Apply the rule to text and return extracted cards.
77    fn apply(&self, ctx: &EnrichmentContext) -> Vec<MemoryCard> {
78        let mut cards = Vec::new();
79
80        for caps in self.pattern.captures_iter(&ctx.text) {
81            // Expand capture groups in entity, slot, value
82            let entity = normalize_entity(&self.expand_captures(&self.entity, &caps));
83            let slot = self.expand_captures(&self.slot, &caps);
84            let value = self.expand_captures(&self.value, &caps).trim().to_string();
85
86            if entity.is_empty() || slot.is_empty() || value.is_empty() {
87                continue;
88            }
89
90            let mut builder = MemoryCardBuilder::new()
91                .kind(self.kind.clone())
92                .entity(&entity)
93                .slot(&slot)
94                .value(&value)
95                .source(ctx.frame_id, Some(ctx.uri.clone()))
96                .engine("rules", "1.0.0");
97
98            if let Some(polarity) = &self.polarity {
99                builder = builder.polarity(polarity.clone());
100            }
101
102            // Build with a placeholder ID (will be assigned by MemoriesTrack)
103            if let Ok(card) = builder.build(0) {
104                cards.push(card);
105            }
106        }
107
108        cards
109    }
110
111    /// Expand capture group references ($1, $2, etc.) in a template.
112    fn expand_captures(&self, template: &str, caps: &regex::Captures) -> String {
113        let mut result = template.to_string();
114        for i in 0..10 {
115            let placeholder = format!("${}", i);
116            if let Some(m) = caps.get(i) {
117                result = result.replace(&placeholder, m.as_str());
118            }
119        }
120        result
121    }
122}
123
124/// Rules-based enrichment engine.
125///
126/// This engine uses a collection of regex-based rules to extract
127/// structured memory cards from text. Rules can target facts,
128/// preferences, events, and other memory types.
129#[derive(Debug)]
130pub struct RulesEngine {
131    rules: Vec<ExtractionRule>,
132    version: String,
133}
134
135impl Default for RulesEngine {
136    fn default() -> Self {
137        Self::new()
138    }
139}
140
141impl RulesEngine {
142    /// Create a new rules engine with default rules.
143    #[must_use]
144    pub fn new() -> Self {
145        let mut engine = Self {
146            rules: Vec::new(),
147            version: "1.0.0".to_string(),
148        };
149        engine.add_default_rules();
150        engine.add_third_person_rules();
151        engine
152    }
153
154    /// Create an empty rules engine (no default rules).
155    #[must_use]
156    pub fn empty() -> Self {
157        Self {
158            rules: Vec::new(),
159            version: "1.0.0".to_string(),
160        }
161    }
162
163    /// Add a rule to the engine.
164    pub fn add_rule(&mut self, rule: ExtractionRule) {
165        self.rules.push(rule);
166    }
167
168    /// Add default rules for common patterns.
169    fn add_default_rules(&mut self) {
170        // Employment facts
171        if let Ok(rule) = ExtractionRule::new(
172            "employer",
173            r"(?i)(?:I work at|I'm employed at|I work for|my employer is|I'm at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$)",
174            MemoryKind::Fact,
175            "user",
176            "employer",
177            "$1",
178        ) {
179            self.rules.push(rule);
180        }
181
182        // Job title
183        if let Ok(rule) = ExtractionRule::new(
184            "job_title",
185            r"(?i)(?:I am a|I'm a|I work as a|my job is|my role is|my title is)\s+([A-Za-z][a-zA-Z\s]+?)(?:\.|,|!|\?|$| at)",
186            MemoryKind::Fact,
187            "user",
188            "job_title",
189            "$1",
190        ) {
191            self.rules.push(rule);
192        }
193
194        // Location
195        if let Ok(rule) = ExtractionRule::new(
196            "location",
197            r"(?i)(?:I live in|I'm based in|I'm from|I reside in|my home is in)\s+([A-Z][a-zA-Z\s,]+?)(?:\.|!|\?|$)",
198            MemoryKind::Fact,
199            "user",
200            "location",
201            "$1",
202        ) {
203            self.rules.push(rule);
204        }
205
206        // Name
207        if let Ok(rule) = ExtractionRule::new(
208            "name",
209            r"(?i)(?:my name is|I'm|call me|I am)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)",
210            MemoryKind::Profile,
211            "user",
212            "name",
213            "$1",
214        ) {
215            self.rules.push(rule);
216        }
217
218        // Age
219        if let Ok(rule) = ExtractionRule::new(
220            "age",
221            r"(?i)(?:I am|I'm)\s+(\d{1,3})\s+(?:years old|yrs old|yo)(?:\.|,|!|\?|$|\s)",
222            MemoryKind::Profile,
223            "user",
224            "age",
225            "$1",
226        ) {
227            self.rules.push(rule);
228        }
229
230        // Food preferences (positive)
231        if let Ok(rule) = ExtractionRule::preference(
232            "food_like",
233            r"(?i)(?:I (?:really )?(?:love|like|enjoy|prefer|adore))\s+([\w\s]+?)(?:\.|,|!|\?|$)",
234            "user",
235            "food_preference",
236            "$1",
237            Polarity::Positive,
238        ) {
239            self.rules.push(rule);
240        }
241
242        // Food preferences (negative)
243        if let Ok(rule) = ExtractionRule::preference(
244            "food_dislike",
245            r"(?i)(?:I (?:really )?(?:hate|dislike|can't stand|don't like|avoid))\s+([\w\s]+?)(?:\.|,|!|\?|$)",
246            "user",
247            "food_preference",
248            "$1",
249            Polarity::Negative,
250        ) {
251            self.rules.push(rule);
252        }
253
254        // Allergies
255        if let Ok(rule) = ExtractionRule::new(
256            "allergy",
257            r"(?i)(?:I am|I'm) allergic to\s+([\w\s]+?)(?:\.|,|!|\?|$)",
258            MemoryKind::Profile,
259            "user",
260            "allergy",
261            "$1",
262        ) {
263            self.rules.push(rule);
264        }
265
266        // Programming language preferences
267        if let Ok(rule) = ExtractionRule::preference(
268            "programming_language",
269            r"(?i)(?:I (?:really )?(?:love|like|enjoy|prefer) (?:programming in|coding in|using|writing))\s+([\w\+\#]+)(?:\.|,|!|\?|$|\s)",
270            "user",
271            "programming_language",
272            "$1",
273            Polarity::Positive,
274        ) {
275            self.rules.push(rule);
276        }
277
278        // Hobby/interest
279        if let Ok(rule) = ExtractionRule::new(
280            "hobby",
281            r"(?i)(?:my hobby is|I enjoy|I like to|my favorite hobby is|my favourite hobby is)\s+([\w\s]+?)(?:\.|,|!|\?|$)",
282            MemoryKind::Preference,
283            "user",
284            "hobby",
285            "$1",
286        ) {
287            self.rules.push(rule);
288        }
289
290        // Pet
291        if let Ok(rule) = ExtractionRule::new(
292            "pet",
293            r"(?i)(?:I have a|my pet is a|I own a)\s+([\w\s]+?)(?:\s+named|\.|,|!|\?|$)",
294            MemoryKind::Fact,
295            "user",
296            "pet",
297            "$1",
298        ) {
299            self.rules.push(rule);
300        }
301
302        // Pet name
303        if let Ok(rule) = ExtractionRule::new(
304            "pet_name",
305            r"(?i)(?:my (?:pet|dog|cat|bird|fish|hamster)'?s? name is|I have a [\w\s]+ named)\s+([A-Z][a-z]+)(?:\.|,|!|\?|$)",
306            MemoryKind::Fact,
307            "user",
308            "pet_name",
309            "$1",
310        ) {
311            self.rules.push(rule);
312        }
313
314        // Birthday
315        if let Ok(rule) = ExtractionRule::new(
316            "birthday",
317            r"(?i)(?:my birthday is|I was born on|born on)\s+(\w+\s+\d{1,2}(?:st|nd|rd|th)?(?:,?\s+\d{4})?)(?:\.|,|!|\?|$)",
318            MemoryKind::Profile,
319            "user",
320            "birthday",
321            "$1",
322        ) {
323            self.rules.push(rule);
324        }
325
326        // Email
327        if let Ok(rule) = ExtractionRule::new(
328            "email",
329            r"(?i)(?:my email is|email me at|reach me at)\s+([\w\.\-]+@[\w\.\-]+\.\w+)",
330            MemoryKind::Profile,
331            "user",
332            "email",
333            "$1",
334        ) {
335            self.rules.push(rule);
336        }
337
338        // Family member mentions
339        if let Ok(rule) = ExtractionRule::new(
340            "family",
341            r"(?i)my\s+(wife|husband|spouse|partner|son|daughter|child|brother|sister|mother|father|mom|dad|grandma|grandmother|grandpa|grandfather)'?s?\s+(?:name is|is named)\s+([A-Z][a-z]+)",
342            MemoryKind::Relationship,
343            "user",
344            "$1",
345            "$2",
346        ) {
347            self.rules.push(rule);
348        }
349
350        // Travel/trip events
351        if let Ok(rule) = ExtractionRule::new(
352            "travel",
353            r"(?i)(?:I (?:went|traveled|travelled|visited|am going|will go|am visiting) to)\s+([A-Z][a-zA-Z\s,]+?)(?:\s+(?:last|this|next)|\.|,|!|\?|$)",
354            MemoryKind::Event,
355            "user",
356            "travel",
357            "$1",
358        ) {
359            self.rules.push(rule);
360        }
361    }
362
363    /// Add rules for third-person statements (e.g., "Alice works at Acme Corp").
364    ///
365    /// These patterns extract triplets where the subject is a named person
366    /// rather than "user" (first-person).
367    fn add_third_person_rules(&mut self) {
368        // Common name pattern: Capitalized first name, optional middle/last names
369        // Matches: "Alice", "John Smith", "Mary Jane Watson"
370        let name = r"([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,2})";
371
372        // ============================================================
373        // EMPLOYMENT PATTERNS
374        // ============================================================
375
376        // "Alice works at Acme Corp" / "John is employed at Google"
377        if let Ok(rule) = ExtractionRule::new(
378            "3p_employer_works_at",
379            &format!(r"(?i){name}\s+(?:works at|works for|is employed at|is employed by|joined|is at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$|\s+(?:as|in|since))"),
380            MemoryKind::Fact,
381            "$1",
382            "employer",
383            "$2",
384        ) {
385            self.rules.push(rule);
386        }
387
388        // "Alice is the CEO of Acme Corp" / "Bob is the founder of Startup Inc"
389        if let Ok(rule) = ExtractionRule::new(
390            "3p_role_at_company",
391            &format!(r"(?i){name}\s+is\s+(?:the\s+)?([A-Za-z\s]+?)\s+(?:of|at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$)"),
392            MemoryKind::Fact,
393            "$1",
394            "role",
395            "$2 at $3",
396        ) {
397            self.rules.push(rule);
398        }
399
400        // "Alice, CEO of Acme" / "Bob, founder of Startup"
401        if let Ok(rule) = ExtractionRule::new(
402            "3p_title_appositive",
403            &format!(r"(?i){name},\s+(?:the\s+)?([A-Za-z\s]+?)\s+(?:of|at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$)"),
404            MemoryKind::Fact,
405            "$1",
406            "role",
407            "$2 at $3",
408        ) {
409            self.rules.push(rule);
410        }
411
412        // ============================================================
413        // LOCATION PATTERNS
414        // ============================================================
415
416        // "Alice lives in San Francisco" / "John is based in New York"
417        if let Ok(rule) = ExtractionRule::new(
418            "3p_location_lives",
419            &format!(r"(?i){name}\s+(?:lives in|is based in|resides in|is from|comes from|moved to|relocated to)\s+([A-Z][a-zA-Z\s,]+?)(?:\.|!|\?|$)"),
420            MemoryKind::Fact,
421            "$1",
422            "location",
423            "$2",
424        ) {
425            self.rules.push(rule);
426        }
427
428        // "Alice is a San Francisco resident" / "John is a New Yorker"
429        if let Ok(rule) = ExtractionRule::new(
430            "3p_location_resident",
431            &format!(r"(?i){name}\s+is\s+(?:a\s+)?([A-Z][a-zA-Z\s]+?)(?:\s+resident|\s+native)(?:\.|,|!|\?|$)"),
432            MemoryKind::Fact,
433            "$1",
434            "location",
435            "$2",
436        ) {
437            self.rules.push(rule);
438        }
439
440        // ============================================================
441        // JOB TITLE / PROFESSION PATTERNS
442        // ============================================================
443
444        // "Alice is a software engineer" / "John is an architect"
445        if let Ok(rule) = ExtractionRule::new(
446            "3p_job_title",
447            &format!(r"(?i){name}\s+is\s+(?:a|an)\s+([A-Za-z][a-zA-Z\s]+?)(?:\.|,|!|\?|$|\s+(?:at|who|and|with))"),
448            MemoryKind::Fact,
449            "$1",
450            "job_title",
451            "$2",
452        ) {
453            self.rules.push(rule);
454        }
455
456        // "Alice works as a product manager" / "John works as an engineer"
457        if let Ok(rule) = ExtractionRule::new(
458            "3p_job_works_as",
459            &format!(r"(?i){name}\s+works\s+as\s+(?:a|an)\s+([A-Za-z][a-zA-Z\s]+?)(?:\.|,|!|\?|$|\s+(?:at|in|for))"),
460            MemoryKind::Fact,
461            "$1",
462            "job_title",
463            "$2",
464        ) {
465            self.rules.push(rule);
466        }
467
468        // ============================================================
469        // RELATIONSHIP PATTERNS
470        // ============================================================
471
472        // "Alice is married to Bob" / "John is engaged to Mary"
473        if let Ok(rule) = ExtractionRule::new(
474            "3p_relationship_married",
475            &format!(r"(?i){name}\s+is\s+(?:married to|engaged to|dating|in a relationship with|the (?:wife|husband|partner|spouse) of)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)"),
476            MemoryKind::Relationship,
477            "$1",
478            "spouse",
479            "$2",
480        ) {
481            self.rules.push(rule);
482        }
483
484        // "Alice and Bob are married" / "John and Mary are dating"
485        if let Ok(rule) = ExtractionRule::new(
486            "3p_relationship_pair",
487            &format!(r"(?i){name}\s+and\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)\s+are\s+(?:married|engaged|dating|partners|a couple)(?:\.|,|!|\?|$)"),
488            MemoryKind::Relationship,
489            "$1",
490            "spouse",
491            "$2",
492        ) {
493            self.rules.push(rule);
494        }
495
496        // "Alice is Bob's wife" / "John is Mary's husband"
497        if let Ok(rule) = ExtractionRule::new(
498            "3p_relationship_possessive",
499            &format!(r"(?i){name}\s+is\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'s\s+(wife|husband|partner|spouse|girlfriend|boyfriend|fiancé|fiancee)(?:\.|,|!|\?|$)"),
500            MemoryKind::Relationship,
501            "$1",
502            "spouse",
503            "$2",
504        ) {
505            self.rules.push(rule);
506        }
507
508        // "Alice's husband is Bob" / "John's wife is Mary"
509        if let Ok(rule) = ExtractionRule::new(
510            "3p_relationship_poss_is",
511            &format!(r"(?i){name}'s\s+(?:wife|husband|partner|spouse|girlfriend|boyfriend)\s+is\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)"),
512            MemoryKind::Relationship,
513            "$1",
514            "spouse",
515            "$2",
516        ) {
517            self.rules.push(rule);
518        }
519
520        // Family relationships: "Alice is Bob's mother/sister/etc."
521        if let Ok(rule) = ExtractionRule::new(
522            "3p_family_member",
523            &format!(r"(?i){name}\s+is\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'s\s+(mother|father|sister|brother|son|daughter|aunt|uncle|cousin|grandmother|grandfather|grandma|grandpa|mom|dad)(?:\.|,|!|\?|$)"),
524            MemoryKind::Relationship,
525            "$1",
526            "$3",
527            "$2",
528        ) {
529            self.rules.push(rule);
530        }
531
532        // "Alice has a brother named Bob"
533        if let Ok(rule) = ExtractionRule::new(
534            "3p_family_named",
535            &format!(r"(?i){name}\s+has\s+(?:a\s+)?(brother|sister|son|daughter|mother|father)\s+(?:named|called)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)"),
536            MemoryKind::Relationship,
537            "$1",
538            "$2",
539            "$3",
540        ) {
541            self.rules.push(rule);
542        }
543
544        // ============================================================
545        // PREFERENCE PATTERNS
546        // ============================================================
547
548        // "Alice loves pizza" / "John enjoys hiking" (positive)
549        if let Ok(rule) = ExtractionRule::preference(
550            "3p_preference_positive",
551            &format!(r"(?i){name}\s+(?:loves|likes|enjoys|adores|prefers|is fond of)\s+([\w\s]+?)(?:\.|,|!|\?|$)"),
552            "$1",
553            "preference",
554            "$2",
555            Polarity::Positive,
556        ) {
557            self.rules.push(rule);
558        }
559
560        // "Alice hates spiders" / "John dislikes crowds" (negative)
561        if let Ok(rule) = ExtractionRule::preference(
562            "3p_preference_negative",
563            &format!(r"(?i){name}\s+(?:hates|dislikes|despises|can't stand|doesn't like|avoids)\s+([\w\s]+?)(?:\.|,|!|\?|$)"),
564            "$1",
565            "preference",
566            "$2",
567            Polarity::Negative,
568        ) {
569            self.rules.push(rule);
570        }
571
572        // "Alice's favorite food is sushi"
573        if let Ok(rule) = ExtractionRule::new(
574            "3p_favorite",
575            &format!(r"(?i){name}'s\s+(?:favorite|favourite)\s+(\w+)\s+is\s+([\w\s]+?)(?:\.|,|!|\?|$)"),
576            MemoryKind::Preference,
577            "$1",
578            "favorite_$2",
579            "$3",
580        ) {
581            self.rules.push(rule);
582        }
583
584        // ============================================================
585        // EDUCATION PATTERNS
586        // ============================================================
587
588        // "Alice studied at MIT" / "John graduated from Harvard"
589        if let Ok(rule) = ExtractionRule::new(
590            "3p_education_studied",
591            &format!(r"(?i){name}\s+(?:studied at|graduated from|attends|attended|went to|goes to)\s+([A-Z][a-zA-Z\s]+?(?:University|College|Institute|School|Academy)?)(?:\.|,|!|\?|$)"),
592            MemoryKind::Fact,
593            "$1",
594            "education",
595            "$2",
596        ) {
597            self.rules.push(rule);
598        }
599
600        // "Alice has a degree in Computer Science"
601        if let Ok(rule) = ExtractionRule::new(
602            "3p_education_degree",
603            &format!(r"(?i){name}\s+has\s+(?:a\s+)?(?:degree|PhD|doctorate|masters?|bachelors?|BA|BS|MS|MBA)\s+in\s+([A-Za-z\s]+?)(?:\.|,|!|\?|$|\s+from)"),
604            MemoryKind::Fact,
605            "$1",
606            "degree",
607            "$2",
608        ) {
609            self.rules.push(rule);
610        }
611
612        // "Alice majored in Physics"
613        if let Ok(rule) = ExtractionRule::new(
614            "3p_education_major",
615            &format!(r"(?i){name}\s+(?:majored in|minored in|studied)\s+([A-Za-z\s]+?)(?:\.|,|!|\?|$|\s+at)"),
616            MemoryKind::Fact,
617            "$1",
618            "field_of_study",
619            "$2",
620        ) {
621            self.rules.push(rule);
622        }
623
624        // ============================================================
625        // PROFILE / BIO PATTERNS
626        // ============================================================
627
628        // "Alice is 28 years old" / "John is 35"
629        if let Ok(rule) = ExtractionRule::new(
630            "3p_age",
631            &format!(r"(?i){name}\s+is\s+(\d{{1,3}})\s*(?:years old|yrs old|yo)?(?:\.|,|!|\?|$|\s)"),
632            MemoryKind::Profile,
633            "$1",
634            "age",
635            "$2",
636        ) {
637            self.rules.push(rule);
638        }
639
640        // "Alice was born in 1990" / "John was born on March 15"
641        if let Ok(rule) = ExtractionRule::new(
642            "3p_birthdate",
643            &format!(r"(?i){name}\s+was\s+born\s+(?:in|on)\s+(\w+(?:\s+\d{{1,2}}(?:st|nd|rd|th)?)?(?:,?\s+\d{{4}})?)(?:\.|,|!|\?|$)"),
644            MemoryKind::Profile,
645            "$1",
646            "birthdate",
647            "$2",
648        ) {
649            self.rules.push(rule);
650        }
651
652        // "Alice is from Boston" - birthplace
653        if let Ok(rule) = ExtractionRule::new(
654            "3p_birthplace",
655            &format!(r"(?i){name}\s+(?:is|was)\s+(?:originally\s+)?from\s+([A-Z][a-zA-Z\s,]+?)(?:\.|!|\?|$|\s+but)"),
656            MemoryKind::Profile,
657            "$1",
658            "birthplace",
659            "$2",
660        ) {
661            self.rules.push(rule);
662        }
663
664        // "Alice's email is alice@example.com"
665        if let Ok(rule) = ExtractionRule::new(
666            "3p_email",
667            &format!(r"(?i){name}'s\s+email\s+(?:is|address is)\s+([\w\.\-]+@[\w\.\-]+\.\w+)"),
668            MemoryKind::Profile,
669            "$1",
670            "email",
671            "$2",
672        ) {
673            self.rules.push(rule);
674        }
675
676        // ============================================================
677        // HOBBY / INTEREST PATTERNS
678        // ============================================================
679
680        // "Alice plays tennis" / "John plays the piano"
681        if let Ok(rule) = ExtractionRule::new(
682            "3p_hobby_plays",
683            &format!(r"(?i){name}\s+plays\s+(?:the\s+)?([\w\s]+?)(?:\.|,|!|\?|$|\s+(?:every|on|and))"),
684            MemoryKind::Preference,
685            "$1",
686            "hobby",
687            "$2",
688        ) {
689            self.rules.push(rule);
690        }
691
692        // "Alice is into photography" / "John is interested in astronomy"
693        if let Ok(rule) = ExtractionRule::new(
694            "3p_interest",
695            &format!(r"(?i){name}\s+is\s+(?:into|interested in|passionate about|really into)\s+([\w\s]+?)(?:\.|,|!|\?|$)"),
696            MemoryKind::Preference,
697            "$1",
698            "interest",
699            "$2",
700        ) {
701            self.rules.push(rule);
702        }
703
704        // ============================================================
705        // PET PATTERNS
706        // ============================================================
707
708        // "Alice has a cat named Whiskers"
709        if let Ok(rule) = ExtractionRule::new(
710            "3p_pet_named",
711            &format!(r"(?i){name}\s+has\s+(?:a\s+)?(dog|cat|bird|fish|hamster|rabbit|pet)\s+(?:named|called)\s+([A-Z][a-z]+)(?:\.|,|!|\?|$)"),
712            MemoryKind::Fact,
713            "$1",
714            "pet_name",
715            "$3",
716        ) {
717            self.rules.push(rule);
718        }
719
720        // "Alice's dog is named Max"
721        if let Ok(rule) = ExtractionRule::new(
722            "3p_pet_poss_named",
723            &format!(r"(?i){name}'s\s+(dog|cat|bird|fish|hamster|rabbit|pet)\s+is\s+(?:named|called)\s+([A-Z][a-z]+)(?:\.|,|!|\?|$)"),
724            MemoryKind::Fact,
725            "$1",
726            "pet_name",
727            "$3",
728        ) {
729            self.rules.push(rule);
730        }
731
732        // "Alice owns a golden retriever"
733        if let Ok(rule) = ExtractionRule::new(
734            "3p_pet_owns",
735            &format!(r"(?i){name}\s+(?:owns|has)\s+(?:a\s+)?([\w\s]+?)\s+(?:dog|cat|bird|fish|hamster|rabbit)(?:\.|,|!|\?|$)"),
736            MemoryKind::Fact,
737            "$1",
738            "pet",
739            "$2",
740        ) {
741            self.rules.push(rule);
742        }
743
744        // ============================================================
745        // EVENT PATTERNS
746        // ============================================================
747
748        // "Alice visited Paris" / "John traveled to Japan"
749        if let Ok(rule) = ExtractionRule::new(
750            "3p_travel",
751            &format!(r"(?i){name}\s+(?:visited|traveled to|travelled to|went to|is going to|will visit)\s+([A-Z][a-zA-Z\s,]+?)(?:\s+(?:last|this|next)|\.|,|!|\?|$)"),
752            MemoryKind::Event,
753            "$1",
754            "travel",
755            "$2",
756        ) {
757            self.rules.push(rule);
758        }
759
760        // "Alice started at Google in 2020"
761        if let Ok(rule) = ExtractionRule::new(
762            "3p_career_event",
763            &format!(r"(?i){name}\s+(?:started|joined|left|quit|founded)\s+(?:at\s+)?([A-Z][a-zA-Z0-9\s&]+?)(?:\s+in\s+\d{{4}})?(?:\.|,|!|\?|$)"),
764            MemoryKind::Event,
765            "$1",
766            "career_event",
767            "$2",
768        ) {
769            self.rules.push(rule);
770        }
771    }
772
773    /// Get the number of rules in this engine.
774    #[must_use]
775    pub fn rule_count(&self) -> usize {
776        self.rules.len()
777    }
778}
779
780impl EnrichmentEngine for RulesEngine {
781    fn kind(&self) -> &str {
782        "rules"
783    }
784
785    fn version(&self) -> &str {
786        &self.version
787    }
788
789    fn enrich(&self, ctx: &EnrichmentContext) -> EnrichmentResult {
790        let mut all_cards = Vec::new();
791
792        for rule in &self.rules {
793            let cards = rule.apply(ctx);
794            all_cards.extend(cards);
795        }
796
797        EnrichmentResult::success(all_cards)
798    }
799}
800
801#[cfg(test)]
802mod tests {
803    use super::*;
804
805    fn test_context(text: &str) -> EnrichmentContext {
806        EnrichmentContext::new(
807            1,
808            "mv2://test/msg-1".to_string(),
809            text.to_string(),
810            None,
811            1700000000,
812            None,
813        )
814    }
815
816    #[test]
817    fn test_rules_engine_default() {
818        let engine = RulesEngine::new();
819        assert!(engine.rule_count() > 0);
820        assert_eq!(engine.kind(), "rules");
821        assert_eq!(engine.version(), "1.0.0");
822    }
823
824    #[test]
825    fn test_extract_employer() {
826        let engine = RulesEngine::new();
827        let ctx = test_context("Hi, I work at Anthropic.");
828        let result = engine.enrich(&ctx);
829
830        assert!(result.success);
831        // Find the first-person employer card
832        let card = result.cards.iter().find(|c| c.entity == "user" && c.slot == "employer").unwrap();
833        assert_eq!(card.value, "Anthropic");
834    }
835
836    #[test]
837    fn test_extract_location() {
838        let engine = RulesEngine::new();
839        let ctx = test_context("I live in San Francisco.");
840        let result = engine.enrich(&ctx);
841
842        assert!(result.success);
843        // Find the first-person location card
844        let card = result.cards.iter().find(|c| c.entity == "user" && c.slot == "location").unwrap();
845        assert_eq!(card.value, "San Francisco");
846    }
847
848    #[test]
849    fn test_extract_preference_positive() {
850        let engine = RulesEngine::new();
851        let ctx = test_context("I really love sushi.");
852        let result = engine.enrich(&ctx);
853
854        assert!(result.success);
855        assert_eq!(result.cards.len(), 1);
856        assert_eq!(result.cards[0].kind, MemoryKind::Preference);
857        assert_eq!(result.cards[0].slot, "food_preference");
858        assert_eq!(result.cards[0].value, "sushi");
859        assert_eq!(result.cards[0].polarity, Some(Polarity::Positive));
860    }
861
862    #[test]
863    fn test_extract_preference_negative() {
864        let engine = RulesEngine::new();
865        let ctx = test_context("I really hate cilantro.");
866        let result = engine.enrich(&ctx);
867
868        assert!(result.success);
869        assert_eq!(result.cards.len(), 1);
870        assert_eq!(result.cards[0].polarity, Some(Polarity::Negative));
871        assert_eq!(result.cards[0].value, "cilantro");
872    }
873
874    #[test]
875    fn test_multiple_extractions() {
876        let engine = RulesEngine::new();
877        let ctx = test_context("I work at Google. I live in Mountain View. I love programming.");
878        let result = engine.enrich(&ctx);
879
880        assert!(result.success);
881        assert!(result.cards.len() >= 2);
882    }
883
884    #[test]
885    fn test_no_matches() {
886        let engine = RulesEngine::new();
887        let ctx = test_context("The weather is nice today.");
888        let result = engine.enrich(&ctx);
889
890        assert!(result.success);
891        assert!(result.cards.is_empty());
892    }
893
894    #[test]
895    fn test_extract_name() {
896        let engine = RulesEngine::new();
897        let ctx = test_context("My name is John Smith.");
898        let result = engine.enrich(&ctx);
899
900        assert!(result.success);
901        assert_eq!(result.cards.len(), 1);
902        assert_eq!(result.cards[0].slot, "name");
903        assert_eq!(result.cards[0].value, "John Smith");
904    }
905
906    #[test]
907    fn test_extract_pet() {
908        let engine = RulesEngine::new();
909        let ctx = test_context("I have a golden retriever named Max.");
910        let result = engine.enrich(&ctx);
911
912        assert!(result.success);
913        // Should extract both "pet" and "pet_name"
914        let pet_card = result.cards.iter().find(|c| c.slot == "pet");
915        let name_card = result.cards.iter().find(|c| c.slot == "pet_name");
916        assert!(pet_card.is_some());
917        assert!(name_card.is_some());
918        assert_eq!(name_card.unwrap().value, "Max");
919    }
920
921    #[test]
922    fn test_custom_rule() {
923        let mut engine = RulesEngine::empty();
924        let rule = ExtractionRule::new(
925            "custom",
926            r"(?i)favorite color is\s+(\w+)",
927            MemoryKind::Preference,
928            "user",
929            "favorite_color",
930            "$1",
931        )
932        .unwrap();
933        engine.add_rule(rule);
934
935        let ctx = test_context("My favorite color is blue.");
936        let result = engine.enrich(&ctx);
937
938        assert!(result.success);
939        assert_eq!(result.cards.len(), 1);
940        assert_eq!(result.cards[0].slot, "favorite_color");
941        assert_eq!(result.cards[0].value, "blue");
942    }
943
944    // ========================================================
945    // THIRD-PERSON PATTERN TESTS
946    // ========================================================
947
948    #[test]
949    fn test_3p_employer() {
950        let engine = RulesEngine::new();
951        let ctx = test_context("Alice works at Acme Corp.");
952        let result = engine.enrich(&ctx);
953
954        assert!(result.success);
955        let card = result.cards.iter().find(|c| c.slot == "employer").unwrap();
956        assert_eq!(card.entity, "alice");
957        assert_eq!(card.value, "Acme Corp");
958    }
959
960    #[test]
961    fn test_3p_employer_variations() {
962        let engine = RulesEngine::new();
963
964        // "is employed at"
965        let ctx = test_context("John Smith is employed at Google.");
966        let result = engine.enrich(&ctx);
967        let card = result.cards.iter().find(|c| c.slot == "employer").unwrap();
968        assert_eq!(card.entity, "john smith");
969        assert_eq!(card.value, "Google");
970
971        // "joined"
972        let ctx = test_context("Mary joined Microsoft.");
973        let result = engine.enrich(&ctx);
974        let card = result.cards.iter().find(|c| c.slot == "employer").unwrap();
975        assert_eq!(card.entity, "mary");
976        assert_eq!(card.value, "Microsoft");
977    }
978
979    #[test]
980    fn test_3p_location() {
981        let engine = RulesEngine::new();
982
983        // "lives in"
984        let ctx = test_context("Alice lives in San Francisco.");
985        let result = engine.enrich(&ctx);
986        let card = result.cards.iter().find(|c| c.slot == "location").unwrap();
987        assert_eq!(card.entity, "alice");
988        assert_eq!(card.value, "San Francisco");
989
990        // "is based in"
991        let ctx = test_context("Bob is based in New York City.");
992        let result = engine.enrich(&ctx);
993        let card = result.cards.iter().find(|c| c.slot == "location").unwrap();
994        assert_eq!(card.entity, "bob");
995        assert!(card.value.contains("New York"));
996    }
997
998    #[test]
999    fn test_3p_job_title() {
1000        let engine = RulesEngine::new();
1001
1002        // "is a"
1003        let ctx = test_context("Alice is a software engineer.");
1004        let result = engine.enrich(&ctx);
1005        let card = result.cards.iter().find(|c| c.slot == "job_title").unwrap();
1006        assert_eq!(card.entity, "alice");
1007        assert_eq!(card.value, "software engineer");
1008
1009        // "works as"
1010        let ctx = test_context("John works as a product manager.");
1011        let result = engine.enrich(&ctx);
1012        let card = result.cards.iter().find(|c| c.slot == "job_title").unwrap();
1013        assert_eq!(card.entity, "john");
1014        assert_eq!(card.value, "product manager");
1015    }
1016
1017    #[test]
1018    fn test_3p_relationship_married() {
1019        let engine = RulesEngine::new();
1020
1021        // "is married to"
1022        let ctx = test_context("Alice is married to Bob.");
1023        let result = engine.enrich(&ctx);
1024        let card = result.cards.iter().find(|c| c.slot == "spouse").unwrap();
1025        assert_eq!(card.entity, "alice");
1026        assert_eq!(card.value, "Bob");
1027
1028        // "and are married"
1029        let ctx = test_context("John and Mary are married.");
1030        let result = engine.enrich(&ctx);
1031        let card = result.cards.iter().find(|c| c.slot == "spouse").unwrap();
1032        assert_eq!(card.entity, "john");
1033        assert_eq!(card.value, "Mary");
1034    }
1035
1036    #[test]
1037    fn test_3p_preference_positive() {
1038        let engine = RulesEngine::new();
1039        let ctx = test_context("Alice loves sushi.");
1040        let result = engine.enrich(&ctx);
1041
1042        let card = result.cards.iter().find(|c| c.slot == "preference").unwrap();
1043        assert_eq!(card.entity, "alice");
1044        assert_eq!(card.value, "sushi");
1045        assert_eq!(card.polarity, Some(Polarity::Positive));
1046    }
1047
1048    #[test]
1049    fn test_3p_preference_negative() {
1050        let engine = RulesEngine::new();
1051        let ctx = test_context("Bob hates spiders.");
1052        let result = engine.enrich(&ctx);
1053
1054        let card = result.cards.iter().find(|c| c.slot == "preference").unwrap();
1055        assert_eq!(card.entity, "bob");
1056        assert_eq!(card.value, "spiders");
1057        assert_eq!(card.polarity, Some(Polarity::Negative));
1058    }
1059
1060    #[test]
1061    fn test_3p_education() {
1062        let engine = RulesEngine::new();
1063
1064        // "graduated from"
1065        let ctx = test_context("Alice graduated from MIT.");
1066        let result = engine.enrich(&ctx);
1067        let card = result.cards.iter().find(|c| c.slot == "education").unwrap();
1068        assert_eq!(card.entity, "alice");
1069        assert_eq!(card.value, "MIT");
1070
1071        // "studied at"
1072        let ctx = test_context("John studied at Stanford University.");
1073        let result = engine.enrich(&ctx);
1074        let card = result.cards.iter().find(|c| c.slot == "education").unwrap();
1075        assert_eq!(card.entity, "john");
1076        assert!(card.value.contains("Stanford"));
1077    }
1078
1079    #[test]
1080    fn test_3p_age() {
1081        let engine = RulesEngine::new();
1082        let ctx = test_context("Alice is 28 years old.");
1083        let result = engine.enrich(&ctx);
1084
1085        let card = result.cards.iter().find(|c| c.slot == "age").unwrap();
1086        assert_eq!(card.entity, "alice");
1087        assert_eq!(card.value, "28");
1088    }
1089
1090    #[test]
1091    fn test_3p_travel() {
1092        let engine = RulesEngine::new();
1093        let ctx = test_context("Alice visited Paris.");
1094        let result = engine.enrich(&ctx);
1095
1096        let card = result.cards.iter().find(|c| c.slot == "travel").unwrap();
1097        assert_eq!(card.entity, "alice");
1098        assert_eq!(card.value, "Paris");
1099    }
1100
1101    #[test]
1102    fn test_3p_hobby() {
1103        let engine = RulesEngine::new();
1104        let ctx = test_context("Bob plays tennis.");
1105        let result = engine.enrich(&ctx);
1106
1107        let card = result.cards.iter().find(|c| c.slot == "hobby").unwrap();
1108        assert_eq!(card.entity, "bob");
1109        assert_eq!(card.value, "tennis");
1110    }
1111
1112    #[test]
1113    fn test_3p_multiple_extractions() {
1114        let engine = RulesEngine::new();
1115        let ctx = test_context(
1116            "Alice works at Google. She lives in Mountain View. Bob is a doctor in Seattle.",
1117        );
1118        let result = engine.enrich(&ctx);
1119
1120        assert!(result.success);
1121        // Should extract multiple facts about Alice and Bob
1122        let alice_employer = result.cards.iter().find(|c| c.entity == "alice" && c.slot == "employer");
1123        let bob_job = result.cards.iter().find(|c| c.entity == "bob" && c.slot == "job_title");
1124
1125        assert!(alice_employer.is_some());
1126        assert!(bob_job.is_some());
1127    }
1128
1129    #[test]
1130    fn test_entity_normalization() {
1131        let engine = RulesEngine::new();
1132
1133        // Entities should be normalized to lowercase for consistent O(1) lookups
1134        let ctx = test_context("ALICE SMITH works at Acme.");
1135        let result = engine.enrich(&ctx);
1136
1137        let card = result.cards.iter().find(|c| c.slot == "employer");
1138        assert!(card.is_some());
1139        // Entity should be lowercase
1140        assert_eq!(card.unwrap().entity, "alice smith");
1141    }
1142
1143    #[test]
1144    fn test_3p_pet() {
1145        let engine = RulesEngine::new();
1146        let ctx = test_context("Alice has a cat named Whiskers.");
1147        let result = engine.enrich(&ctx);
1148
1149        let card = result.cards.iter().find(|c| c.slot == "pet_name").unwrap();
1150        assert_eq!(card.entity, "alice");
1151        assert_eq!(card.value, "Whiskers");
1152    }
1153
1154    #[test]
1155    fn test_3p_family() {
1156        let engine = RulesEngine::new();
1157        let ctx = test_context("Alice has a brother named Bob.");
1158        let result = engine.enrich(&ctx);
1159
1160        let card = result.cards.iter().find(|c| c.slot == "brother").unwrap();
1161        assert_eq!(card.entity, "alice");
1162        assert_eq!(card.value, "Bob");
1163    }
1164}