memvid_core/enrich/
rules.rs

1//! Rules-based enrichment engine using regex patterns.
2//!
3//! This engine extracts memory cards from text using configurable regex
4//! patterns. It's fast, deterministic, and doesn't require any models.
5
6use super::{EnrichmentContext, EnrichmentEngine, EnrichmentResult};
7use crate::types::{MemoryCard, MemoryCardBuilder, MemoryKind, Polarity};
8use regex::Regex;
9
10/// Normalize entity names for consistent O(1) lookups.
11/// Converts to lowercase and trims whitespace.
12fn normalize_entity(entity: &str) -> String {
13    entity.trim().to_lowercase()
14}
15
16/// A rule for extracting memory cards from text.
17#[derive(Debug, Clone)]
18pub struct ExtractionRule {
19    /// Name of the rule (for debugging).
20    pub name: String,
21    /// Regex pattern to match.
22    pub pattern: Regex,
23    /// The kind of memory card to create.
24    pub kind: MemoryKind,
25    /// The entity to use (supports $1, $2 capture groups).
26    pub entity: String,
27    /// The slot to use (supports $1, $2 capture groups).
28    pub slot: String,
29    /// The value template (supports $1, $2 capture groups).
30    pub value: String,
31    /// Optional polarity for preference rules.
32    pub polarity: Option<Polarity>,
33}
34
35impl ExtractionRule {
36    /// Create a new extraction rule.
37    pub fn new(
38        name: impl Into<String>,
39        pattern: &str,
40        kind: MemoryKind,
41        entity: impl Into<String>,
42        slot: impl Into<String>,
43        value: impl Into<String>,
44    ) -> Result<Self, regex::Error> {
45        Ok(Self {
46            name: name.into(),
47            pattern: Regex::new(pattern)?,
48            kind,
49            entity: entity.into(),
50            slot: slot.into(),
51            value: value.into(),
52            polarity: None,
53        })
54    }
55
56    /// Create a preference rule with polarity.
57    pub fn preference(
58        name: impl Into<String>,
59        pattern: &str,
60        entity: impl Into<String>,
61        slot: impl Into<String>,
62        value: impl Into<String>,
63        polarity: Polarity,
64    ) -> Result<Self, regex::Error> {
65        Ok(Self {
66            name: name.into(),
67            pattern: Regex::new(pattern)?,
68            kind: MemoryKind::Preference,
69            entity: entity.into(),
70            slot: slot.into(),
71            value: value.into(),
72            polarity: Some(polarity),
73        })
74    }
75
76    /// Apply the rule to text and return extracted cards.
77    fn apply(&self, ctx: &EnrichmentContext) -> Vec<MemoryCard> {
78        let mut cards = Vec::new();
79
80        for caps in self.pattern.captures_iter(&ctx.text) {
81            // Expand capture groups in entity, slot, value
82            let entity = normalize_entity(&self.expand_captures(&self.entity, &caps));
83            let slot = self.expand_captures(&self.slot, &caps);
84            let value = self.expand_captures(&self.value, &caps).trim().to_string();
85
86            if entity.is_empty() || slot.is_empty() || value.is_empty() {
87                continue;
88            }
89
90            let mut builder = MemoryCardBuilder::new()
91                .kind(self.kind.clone())
92                .entity(&entity)
93                .slot(&slot)
94                .value(&value)
95                .source(ctx.frame_id, Some(ctx.uri.clone()))
96                .engine("rules", "1.0.0");
97
98            if let Some(polarity) = &self.polarity {
99                builder = builder.polarity(polarity.clone());
100            }
101
102            // Build with a placeholder ID (will be assigned by MemoriesTrack)
103            if let Ok(card) = builder.build(0) {
104                cards.push(card);
105            }
106        }
107
108        cards
109    }
110
111    /// Expand capture group references ($1, $2, etc.) in a template.
112    fn expand_captures(&self, template: &str, caps: &regex::Captures) -> String {
113        let mut result = template.to_string();
114        for i in 0..10 {
115            let placeholder = format!("${}", i);
116            if let Some(m) = caps.get(i) {
117                result = result.replace(&placeholder, m.as_str());
118            }
119        }
120        result
121    }
122}
123
124/// Rules-based enrichment engine.
125///
126/// This engine uses a collection of regex-based rules to extract
127/// structured memory cards from text. Rules can target facts,
128/// preferences, events, and other memory types.
129#[derive(Debug)]
130pub struct RulesEngine {
131    rules: Vec<ExtractionRule>,
132    version: String,
133}
134
135impl Default for RulesEngine {
136    fn default() -> Self {
137        Self::new()
138    }
139}
140
141impl RulesEngine {
142    /// Create a new rules engine with default rules.
143    #[must_use]
144    pub fn new() -> Self {
145        let mut engine = Self {
146            rules: Vec::new(),
147            version: "1.0.0".to_string(),
148        };
149        engine.add_default_rules();
150        engine.add_third_person_rules();
151        engine
152    }
153
154    /// Create an empty rules engine (no default rules).
155    #[must_use]
156    pub fn empty() -> Self {
157        Self {
158            rules: Vec::new(),
159            version: "1.0.0".to_string(),
160        }
161    }
162
163    /// Add a rule to the engine.
164    pub fn add_rule(&mut self, rule: ExtractionRule) {
165        self.rules.push(rule);
166    }
167
168    /// Add default rules for common patterns.
169    fn add_default_rules(&mut self) {
170        // Employment facts
171        if let Ok(rule) = ExtractionRule::new(
172            "employer",
173            r"(?i)(?:I work at|I'm employed at|I work for|my employer is|I'm at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$)",
174            MemoryKind::Fact,
175            "user",
176            "employer",
177            "$1",
178        ) {
179            self.rules.push(rule);
180        }
181
182        // Job title
183        if let Ok(rule) = ExtractionRule::new(
184            "job_title",
185            r"(?i)(?:I am a|I'm a|I work as a|my job is|my role is|my title is)\s+([A-Za-z][a-zA-Z\s]+?)(?:\.|,|!|\?|$| at)",
186            MemoryKind::Fact,
187            "user",
188            "job_title",
189            "$1",
190        ) {
191            self.rules.push(rule);
192        }
193
194        // Location
195        if let Ok(rule) = ExtractionRule::new(
196            "location",
197            r"(?i)(?:I live in|I'm based in|I'm from|I reside in|my home is in)\s+([A-Z][a-zA-Z\s,]+?)(?:\.|!|\?|$)",
198            MemoryKind::Fact,
199            "user",
200            "location",
201            "$1",
202        ) {
203            self.rules.push(rule);
204        }
205
206        // Name
207        if let Ok(rule) = ExtractionRule::new(
208            "name",
209            r"(?i)(?:my name is|I'm|call me|I am)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)",
210            MemoryKind::Profile,
211            "user",
212            "name",
213            "$1",
214        ) {
215            self.rules.push(rule);
216        }
217
218        // Age
219        if let Ok(rule) = ExtractionRule::new(
220            "age",
221            r"(?i)(?:I am|I'm)\s+(\d{1,3})\s+(?:years old|yrs old|yo)(?:\.|,|!|\?|$|\s)",
222            MemoryKind::Profile,
223            "user",
224            "age",
225            "$1",
226        ) {
227            self.rules.push(rule);
228        }
229
230        // Food preferences (positive)
231        if let Ok(rule) = ExtractionRule::preference(
232            "food_like",
233            r"(?i)(?:I (?:really )?(?:love|like|enjoy|prefer|adore))\s+([\w\s]+?)(?:\.|,|!|\?|$)",
234            "user",
235            "food_preference",
236            "$1",
237            Polarity::Positive,
238        ) {
239            self.rules.push(rule);
240        }
241
242        // Food preferences (negative)
243        if let Ok(rule) = ExtractionRule::preference(
244            "food_dislike",
245            r"(?i)(?:I (?:really )?(?:hate|dislike|can't stand|don't like|avoid))\s+([\w\s]+?)(?:\.|,|!|\?|$)",
246            "user",
247            "food_preference",
248            "$1",
249            Polarity::Negative,
250        ) {
251            self.rules.push(rule);
252        }
253
254        // Allergies
255        if let Ok(rule) = ExtractionRule::new(
256            "allergy",
257            r"(?i)(?:I am|I'm) allergic to\s+([\w\s]+?)(?:\.|,|!|\?|$)",
258            MemoryKind::Profile,
259            "user",
260            "allergy",
261            "$1",
262        ) {
263            self.rules.push(rule);
264        }
265
266        // Programming language preferences
267        if let Ok(rule) = ExtractionRule::preference(
268            "programming_language",
269            r"(?i)(?:I (?:really )?(?:love|like|enjoy|prefer) (?:programming in|coding in|using|writing))\s+([\w\+\#]+)(?:\.|,|!|\?|$|\s)",
270            "user",
271            "programming_language",
272            "$1",
273            Polarity::Positive,
274        ) {
275            self.rules.push(rule);
276        }
277
278        // Hobby/interest
279        if let Ok(rule) = ExtractionRule::new(
280            "hobby",
281            r"(?i)(?:my hobby is|I enjoy|I like to|my favorite hobby is|my favourite hobby is)\s+([\w\s]+?)(?:\.|,|!|\?|$)",
282            MemoryKind::Preference,
283            "user",
284            "hobby",
285            "$1",
286        ) {
287            self.rules.push(rule);
288        }
289
290        // Pet
291        if let Ok(rule) = ExtractionRule::new(
292            "pet",
293            r"(?i)(?:I have a|my pet is a|I own a)\s+([\w\s]+?)(?:\s+named|\.|,|!|\?|$)",
294            MemoryKind::Fact,
295            "user",
296            "pet",
297            "$1",
298        ) {
299            self.rules.push(rule);
300        }
301
302        // Pet name
303        if let Ok(rule) = ExtractionRule::new(
304            "pet_name",
305            r"(?i)(?:my (?:pet|dog|cat|bird|fish|hamster)'?s? name is|I have a [\w\s]+ named)\s+([A-Z][a-z]+)(?:\.|,|!|\?|$)",
306            MemoryKind::Fact,
307            "user",
308            "pet_name",
309            "$1",
310        ) {
311            self.rules.push(rule);
312        }
313
314        // Birthday
315        if let Ok(rule) = ExtractionRule::new(
316            "birthday",
317            r"(?i)(?:my birthday is|I was born on|born on)\s+(\w+\s+\d{1,2}(?:st|nd|rd|th)?(?:,?\s+\d{4})?)(?:\.|,|!|\?|$)",
318            MemoryKind::Profile,
319            "user",
320            "birthday",
321            "$1",
322        ) {
323            self.rules.push(rule);
324        }
325
326        // Email
327        if let Ok(rule) = ExtractionRule::new(
328            "email",
329            r"(?i)(?:my email is|email me at|reach me at)\s+([\w\.\-]+@[\w\.\-]+\.\w+)",
330            MemoryKind::Profile,
331            "user",
332            "email",
333            "$1",
334        ) {
335            self.rules.push(rule);
336        }
337
338        // Family member mentions
339        if let Ok(rule) = ExtractionRule::new(
340            "family",
341            r"(?i)my\s+(wife|husband|spouse|partner|son|daughter|child|brother|sister|mother|father|mom|dad|grandma|grandmother|grandpa|grandfather)'?s?\s+(?:name is|is named)\s+([A-Z][a-z]+)",
342            MemoryKind::Relationship,
343            "user",
344            "$1",
345            "$2",
346        ) {
347            self.rules.push(rule);
348        }
349
350        // Travel/trip events
351        if let Ok(rule) = ExtractionRule::new(
352            "travel",
353            r"(?i)(?:I (?:went|traveled|travelled|visited|am going|will go|am visiting) to)\s+([A-Z][a-zA-Z\s,]+?)(?:\s+(?:last|this|next)|\.|,|!|\?|$)",
354            MemoryKind::Event,
355            "user",
356            "travel",
357            "$1",
358        ) {
359            self.rules.push(rule);
360        }
361    }
362
363    /// Add rules for third-person statements (e.g., "Alice works at Acme Corp").
364    ///
365    /// These patterns extract triplets where the subject is a named person
366    /// rather than "user" (first-person).
367    fn add_third_person_rules(&mut self) {
368        // Common name pattern: Capitalized first name, optional middle/last names
369        // Matches: "Alice", "John Smith", "Mary Jane Watson"
370        let name = r"([A-Z][a-z]+(?:\s+[A-Z][a-z]+){0,2})";
371
372        // ============================================================
373        // EMPLOYMENT PATTERNS
374        // ============================================================
375
376        // "Alice works at Acme Corp" / "John is employed at Google"
377        if let Ok(rule) = ExtractionRule::new(
378            "3p_employer_works_at",
379            &format!(
380                r"(?i){name}\s+(?:works at|works for|is employed at|is employed by|joined|is at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$|\s+(?:as|in|since))"
381            ),
382            MemoryKind::Fact,
383            "$1",
384            "employer",
385            "$2",
386        ) {
387            self.rules.push(rule);
388        }
389
390        // "Alice is the CEO of Acme Corp" / "Bob is the founder of Startup Inc"
391        if let Ok(rule) = ExtractionRule::new(
392            "3p_role_at_company",
393            &format!(
394                r"(?i){name}\s+is\s+(?:the\s+)?([A-Za-z\s]+?)\s+(?:of|at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$)"
395            ),
396            MemoryKind::Fact,
397            "$1",
398            "role",
399            "$2 at $3",
400        ) {
401            self.rules.push(rule);
402        }
403
404        // "Alice, CEO of Acme" / "Bob, founder of Startup"
405        if let Ok(rule) = ExtractionRule::new(
406            "3p_title_appositive",
407            &format!(
408                r"(?i){name},\s+(?:the\s+)?([A-Za-z\s]+?)\s+(?:of|at)\s+([A-Z][a-zA-Z0-9\s&]+?)(?:\.|,|!|\?|$)"
409            ),
410            MemoryKind::Fact,
411            "$1",
412            "role",
413            "$2 at $3",
414        ) {
415            self.rules.push(rule);
416        }
417
418        // ============================================================
419        // LOCATION PATTERNS
420        // ============================================================
421
422        // "Alice lives in San Francisco" / "John is based in New York"
423        if let Ok(rule) = ExtractionRule::new(
424            "3p_location_lives",
425            &format!(
426                r"(?i){name}\s+(?:lives in|is based in|resides in|is from|comes from|moved to|relocated to)\s+([A-Z][a-zA-Z\s,]+?)(?:\.|!|\?|$)"
427            ),
428            MemoryKind::Fact,
429            "$1",
430            "location",
431            "$2",
432        ) {
433            self.rules.push(rule);
434        }
435
436        // "Alice is a San Francisco resident" / "John is a New Yorker"
437        if let Ok(rule) = ExtractionRule::new(
438            "3p_location_resident",
439            &format!(
440                r"(?i){name}\s+is\s+(?:a\s+)?([A-Z][a-zA-Z\s]+?)(?:\s+resident|\s+native)(?:\.|,|!|\?|$)"
441            ),
442            MemoryKind::Fact,
443            "$1",
444            "location",
445            "$2",
446        ) {
447            self.rules.push(rule);
448        }
449
450        // ============================================================
451        // JOB TITLE / PROFESSION PATTERNS
452        // ============================================================
453
454        // "Alice is a software engineer" / "John is an architect"
455        if let Ok(rule) = ExtractionRule::new(
456            "3p_job_title",
457            &format!(
458                r"(?i){name}\s+is\s+(?:a|an)\s+([A-Za-z][a-zA-Z\s]+?)(?:\.|,|!|\?|$|\s+(?:at|who|and|with))"
459            ),
460            MemoryKind::Fact,
461            "$1",
462            "job_title",
463            "$2",
464        ) {
465            self.rules.push(rule);
466        }
467
468        // "Alice works as a product manager" / "John works as an engineer"
469        if let Ok(rule) = ExtractionRule::new(
470            "3p_job_works_as",
471            &format!(
472                r"(?i){name}\s+works\s+as\s+(?:a|an)\s+([A-Za-z][a-zA-Z\s]+?)(?:\.|,|!|\?|$|\s+(?:at|in|for))"
473            ),
474            MemoryKind::Fact,
475            "$1",
476            "job_title",
477            "$2",
478        ) {
479            self.rules.push(rule);
480        }
481
482        // ============================================================
483        // RELATIONSHIP PATTERNS
484        // ============================================================
485
486        // "Alice is married to Bob" / "John is engaged to Mary"
487        if let Ok(rule) = ExtractionRule::new(
488            "3p_relationship_married",
489            &format!(
490                r"(?i){name}\s+is\s+(?:married to|engaged to|dating|in a relationship with|the (?:wife|husband|partner|spouse) of)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)"
491            ),
492            MemoryKind::Relationship,
493            "$1",
494            "spouse",
495            "$2",
496        ) {
497            self.rules.push(rule);
498        }
499
500        // "Alice and Bob are married" / "John and Mary are dating"
501        if let Ok(rule) = ExtractionRule::new(
502            "3p_relationship_pair",
503            &format!(
504                r"(?i){name}\s+and\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)\s+are\s+(?:married|engaged|dating|partners|a couple)(?:\.|,|!|\?|$)"
505            ),
506            MemoryKind::Relationship,
507            "$1",
508            "spouse",
509            "$2",
510        ) {
511            self.rules.push(rule);
512        }
513
514        // "Alice is Bob's wife" / "John is Mary's husband"
515        if let Ok(rule) = ExtractionRule::new(
516            "3p_relationship_possessive",
517            &format!(
518                r"(?i){name}\s+is\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'s\s+(wife|husband|partner|spouse|girlfriend|boyfriend|fiancé|fiancee)(?:\.|,|!|\?|$)"
519            ),
520            MemoryKind::Relationship,
521            "$1",
522            "spouse",
523            "$2",
524        ) {
525            self.rules.push(rule);
526        }
527
528        // "Alice's husband is Bob" / "John's wife is Mary"
529        if let Ok(rule) = ExtractionRule::new(
530            "3p_relationship_poss_is",
531            &format!(
532                r"(?i){name}'s\s+(?:wife|husband|partner|spouse|girlfriend|boyfriend)\s+is\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)"
533            ),
534            MemoryKind::Relationship,
535            "$1",
536            "spouse",
537            "$2",
538        ) {
539            self.rules.push(rule);
540        }
541
542        // Family relationships: "Alice is Bob's mother/sister/etc."
543        if let Ok(rule) = ExtractionRule::new(
544            "3p_family_member",
545            &format!(
546                r"(?i){name}\s+is\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)'s\s+(mother|father|sister|brother|son|daughter|aunt|uncle|cousin|grandmother|grandfather|grandma|grandpa|mom|dad)(?:\.|,|!|\?|$)"
547            ),
548            MemoryKind::Relationship,
549            "$1",
550            "$3",
551            "$2",
552        ) {
553            self.rules.push(rule);
554        }
555
556        // "Alice has a brother named Bob"
557        if let Ok(rule) = ExtractionRule::new(
558            "3p_family_named",
559            &format!(
560                r"(?i){name}\s+has\s+(?:a\s+)?(brother|sister|son|daughter|mother|father)\s+(?:named|called)\s+([A-Z][a-z]+(?:\s+[A-Z][a-z]+)?)(?:\.|,|!|\?|$)"
561            ),
562            MemoryKind::Relationship,
563            "$1",
564            "$2",
565            "$3",
566        ) {
567            self.rules.push(rule);
568        }
569
570        // ============================================================
571        // PREFERENCE PATTERNS
572        // ============================================================
573
574        // "Alice loves pizza" / "John enjoys hiking" (positive)
575        if let Ok(rule) = ExtractionRule::preference(
576            "3p_preference_positive",
577            &format!(
578                r"(?i){name}\s+(?:loves|likes|enjoys|adores|prefers|is fond of)\s+([\w\s]+?)(?:\.|,|!|\?|$)"
579            ),
580            "$1",
581            "preference",
582            "$2",
583            Polarity::Positive,
584        ) {
585            self.rules.push(rule);
586        }
587
588        // "Alice hates spiders" / "John dislikes crowds" (negative)
589        if let Ok(rule) = ExtractionRule::preference(
590            "3p_preference_negative",
591            &format!(
592                r"(?i){name}\s+(?:hates|dislikes|despises|can't stand|doesn't like|avoids)\s+([\w\s]+?)(?:\.|,|!|\?|$)"
593            ),
594            "$1",
595            "preference",
596            "$2",
597            Polarity::Negative,
598        ) {
599            self.rules.push(rule);
600        }
601
602        // "Alice's favorite food is sushi"
603        if let Ok(rule) = ExtractionRule::new(
604            "3p_favorite",
605            &format!(
606                r"(?i){name}'s\s+(?:favorite|favourite)\s+(\w+)\s+is\s+([\w\s]+?)(?:\.|,|!|\?|$)"
607            ),
608            MemoryKind::Preference,
609            "$1",
610            "favorite_$2",
611            "$3",
612        ) {
613            self.rules.push(rule);
614        }
615
616        // ============================================================
617        // EDUCATION PATTERNS
618        // ============================================================
619
620        // "Alice studied at MIT" / "John graduated from Harvard"
621        if let Ok(rule) = ExtractionRule::new(
622            "3p_education_studied",
623            &format!(
624                r"(?i){name}\s+(?:studied at|graduated from|attends|attended|went to|goes to)\s+([A-Z][a-zA-Z\s]+?(?:University|College|Institute|School|Academy)?)(?:\.|,|!|\?|$)"
625            ),
626            MemoryKind::Fact,
627            "$1",
628            "education",
629            "$2",
630        ) {
631            self.rules.push(rule);
632        }
633
634        // "Alice has a degree in Computer Science"
635        if let Ok(rule) = ExtractionRule::new(
636            "3p_education_degree",
637            &format!(
638                r"(?i){name}\s+has\s+(?:a\s+)?(?:degree|PhD|doctorate|masters?|bachelors?|BA|BS|MS|MBA)\s+in\s+([A-Za-z\s]+?)(?:\.|,|!|\?|$|\s+from)"
639            ),
640            MemoryKind::Fact,
641            "$1",
642            "degree",
643            "$2",
644        ) {
645            self.rules.push(rule);
646        }
647
648        // "Alice majored in Physics"
649        if let Ok(rule) = ExtractionRule::new(
650            "3p_education_major",
651            &format!(
652                r"(?i){name}\s+(?:majored in|minored in|studied)\s+([A-Za-z\s]+?)(?:\.|,|!|\?|$|\s+at)"
653            ),
654            MemoryKind::Fact,
655            "$1",
656            "field_of_study",
657            "$2",
658        ) {
659            self.rules.push(rule);
660        }
661
662        // ============================================================
663        // PROFILE / BIO PATTERNS
664        // ============================================================
665
666        // "Alice is 28 years old" / "John is 35"
667        if let Ok(rule) = ExtractionRule::new(
668            "3p_age",
669            &format!(
670                r"(?i){name}\s+is\s+(\d{{1,3}})\s*(?:years old|yrs old|yo)?(?:\.|,|!|\?|$|\s)"
671            ),
672            MemoryKind::Profile,
673            "$1",
674            "age",
675            "$2",
676        ) {
677            self.rules.push(rule);
678        }
679
680        // "Alice was born in 1990" / "John was born on March 15"
681        if let Ok(rule) = ExtractionRule::new(
682            "3p_birthdate",
683            &format!(
684                r"(?i){name}\s+was\s+born\s+(?:in|on)\s+(\w+(?:\s+\d{{1,2}}(?:st|nd|rd|th)?)?(?:,?\s+\d{{4}})?)(?:\.|,|!|\?|$)"
685            ),
686            MemoryKind::Profile,
687            "$1",
688            "birthdate",
689            "$2",
690        ) {
691            self.rules.push(rule);
692        }
693
694        // "Alice is from Boston" - birthplace
695        if let Ok(rule) = ExtractionRule::new(
696            "3p_birthplace",
697            &format!(
698                r"(?i){name}\s+(?:is|was)\s+(?:originally\s+)?from\s+([A-Z][a-zA-Z\s,]+?)(?:\.|!|\?|$|\s+but)"
699            ),
700            MemoryKind::Profile,
701            "$1",
702            "birthplace",
703            "$2",
704        ) {
705            self.rules.push(rule);
706        }
707
708        // "Alice's email is alice@example.com"
709        if let Ok(rule) = ExtractionRule::new(
710            "3p_email",
711            &format!(r"(?i){name}'s\s+email\s+(?:is|address is)\s+([\w\.\-]+@[\w\.\-]+\.\w+)"),
712            MemoryKind::Profile,
713            "$1",
714            "email",
715            "$2",
716        ) {
717            self.rules.push(rule);
718        }
719
720        // ============================================================
721        // HOBBY / INTEREST PATTERNS
722        // ============================================================
723
724        // "Alice plays tennis" / "John plays the piano"
725        if let Ok(rule) = ExtractionRule::new(
726            "3p_hobby_plays",
727            &format!(
728                r"(?i){name}\s+plays\s+(?:the\s+)?([\w\s]+?)(?:\.|,|!|\?|$|\s+(?:every|on|and))"
729            ),
730            MemoryKind::Preference,
731            "$1",
732            "hobby",
733            "$2",
734        ) {
735            self.rules.push(rule);
736        }
737
738        // "Alice is into photography" / "John is interested in astronomy"
739        if let Ok(rule) = ExtractionRule::new(
740            "3p_interest",
741            &format!(
742                r"(?i){name}\s+is\s+(?:into|interested in|passionate about|really into)\s+([\w\s]+?)(?:\.|,|!|\?|$)"
743            ),
744            MemoryKind::Preference,
745            "$1",
746            "interest",
747            "$2",
748        ) {
749            self.rules.push(rule);
750        }
751
752        // ============================================================
753        // PET PATTERNS
754        // ============================================================
755
756        // "Alice has a cat named Whiskers"
757        if let Ok(rule) = ExtractionRule::new(
758            "3p_pet_named",
759            &format!(
760                r"(?i){name}\s+has\s+(?:a\s+)?(dog|cat|bird|fish|hamster|rabbit|pet)\s+(?:named|called)\s+([A-Z][a-z]+)(?:\.|,|!|\?|$)"
761            ),
762            MemoryKind::Fact,
763            "$1",
764            "pet_name",
765            "$3",
766        ) {
767            self.rules.push(rule);
768        }
769
770        // "Alice's dog is named Max"
771        if let Ok(rule) = ExtractionRule::new(
772            "3p_pet_poss_named",
773            &format!(
774                r"(?i){name}'s\s+(dog|cat|bird|fish|hamster|rabbit|pet)\s+is\s+(?:named|called)\s+([A-Z][a-z]+)(?:\.|,|!|\?|$)"
775            ),
776            MemoryKind::Fact,
777            "$1",
778            "pet_name",
779            "$3",
780        ) {
781            self.rules.push(rule);
782        }
783
784        // "Alice owns a golden retriever"
785        if let Ok(rule) = ExtractionRule::new(
786            "3p_pet_owns",
787            &format!(
788                r"(?i){name}\s+(?:owns|has)\s+(?:a\s+)?([\w\s]+?)\s+(?:dog|cat|bird|fish|hamster|rabbit)(?:\.|,|!|\?|$)"
789            ),
790            MemoryKind::Fact,
791            "$1",
792            "pet",
793            "$2",
794        ) {
795            self.rules.push(rule);
796        }
797
798        // ============================================================
799        // EVENT PATTERNS
800        // ============================================================
801
802        // "Alice visited Paris" / "John traveled to Japan"
803        if let Ok(rule) = ExtractionRule::new(
804            "3p_travel",
805            &format!(
806                r"(?i){name}\s+(?:visited|traveled to|travelled to|went to|is going to|will visit)\s+([A-Z][a-zA-Z\s,]+?)(?:\s+(?:last|this|next)|\.|,|!|\?|$)"
807            ),
808            MemoryKind::Event,
809            "$1",
810            "travel",
811            "$2",
812        ) {
813            self.rules.push(rule);
814        }
815
816        // "Alice started at Google in 2020"
817        if let Ok(rule) = ExtractionRule::new(
818            "3p_career_event",
819            &format!(
820                r"(?i){name}\s+(?:started|joined|left|quit|founded)\s+(?:at\s+)?([A-Z][a-zA-Z0-9\s&]+?)(?:\s+in\s+\d{{4}})?(?:\.|,|!|\?|$)"
821            ),
822            MemoryKind::Event,
823            "$1",
824            "career_event",
825            "$2",
826        ) {
827            self.rules.push(rule);
828        }
829    }
830
831    /// Get the number of rules in this engine.
832    #[must_use]
833    pub fn rule_count(&self) -> usize {
834        self.rules.len()
835    }
836}
837
838impl EnrichmentEngine for RulesEngine {
839    fn kind(&self) -> &str {
840        "rules"
841    }
842
843    fn version(&self) -> &str {
844        &self.version
845    }
846
847    fn enrich(&self, ctx: &EnrichmentContext) -> EnrichmentResult {
848        let mut all_cards = Vec::new();
849
850        for rule in &self.rules {
851            let cards = rule.apply(ctx);
852            all_cards.extend(cards);
853        }
854
855        EnrichmentResult::success(all_cards)
856    }
857}
858
859#[cfg(test)]
860mod tests {
861    use super::*;
862
863    fn test_context(text: &str) -> EnrichmentContext {
864        EnrichmentContext::new(
865            1,
866            "mv2://test/msg-1".to_string(),
867            text.to_string(),
868            None,
869            1700000000,
870            None,
871        )
872    }
873
874    #[test]
875    fn test_rules_engine_default() {
876        let engine = RulesEngine::new();
877        assert!(engine.rule_count() > 0);
878        assert_eq!(engine.kind(), "rules");
879        assert_eq!(engine.version(), "1.0.0");
880    }
881
882    #[test]
883    fn test_extract_employer() {
884        let engine = RulesEngine::new();
885        let ctx = test_context("Hi, I work at Anthropic.");
886        let result = engine.enrich(&ctx);
887
888        assert!(result.success);
889        // Find the first-person employer card
890        let card = result
891            .cards
892            .iter()
893            .find(|c| c.entity == "user" && c.slot == "employer")
894            .unwrap();
895        assert_eq!(card.value, "Anthropic");
896    }
897
898    #[test]
899    fn test_extract_location() {
900        let engine = RulesEngine::new();
901        let ctx = test_context("I live in San Francisco.");
902        let result = engine.enrich(&ctx);
903
904        assert!(result.success);
905        // Find the first-person location card
906        let card = result
907            .cards
908            .iter()
909            .find(|c| c.entity == "user" && c.slot == "location")
910            .unwrap();
911        assert_eq!(card.value, "San Francisco");
912    }
913
914    #[test]
915    fn test_extract_preference_positive() {
916        let engine = RulesEngine::new();
917        let ctx = test_context("I really love sushi.");
918        let result = engine.enrich(&ctx);
919
920        assert!(result.success);
921        assert_eq!(result.cards.len(), 1);
922        assert_eq!(result.cards[0].kind, MemoryKind::Preference);
923        assert_eq!(result.cards[0].slot, "food_preference");
924        assert_eq!(result.cards[0].value, "sushi");
925        assert_eq!(result.cards[0].polarity, Some(Polarity::Positive));
926    }
927
928    #[test]
929    fn test_extract_preference_negative() {
930        let engine = RulesEngine::new();
931        let ctx = test_context("I really hate cilantro.");
932        let result = engine.enrich(&ctx);
933
934        assert!(result.success);
935        assert_eq!(result.cards.len(), 1);
936        assert_eq!(result.cards[0].polarity, Some(Polarity::Negative));
937        assert_eq!(result.cards[0].value, "cilantro");
938    }
939
940    #[test]
941    fn test_multiple_extractions() {
942        let engine = RulesEngine::new();
943        let ctx = test_context("I work at Google. I live in Mountain View. I love programming.");
944        let result = engine.enrich(&ctx);
945
946        assert!(result.success);
947        assert!(result.cards.len() >= 2);
948    }
949
950    #[test]
951    fn test_no_matches() {
952        let engine = RulesEngine::new();
953        let ctx = test_context("The weather is nice today.");
954        let result = engine.enrich(&ctx);
955
956        assert!(result.success);
957        assert!(result.cards.is_empty());
958    }
959
960    #[test]
961    fn test_extract_name() {
962        let engine = RulesEngine::new();
963        let ctx = test_context("My name is John Smith.");
964        let result = engine.enrich(&ctx);
965
966        assert!(result.success);
967        assert_eq!(result.cards.len(), 1);
968        assert_eq!(result.cards[0].slot, "name");
969        assert_eq!(result.cards[0].value, "John Smith");
970    }
971
972    #[test]
973    fn test_extract_pet() {
974        let engine = RulesEngine::new();
975        let ctx = test_context("I have a golden retriever named Max.");
976        let result = engine.enrich(&ctx);
977
978        assert!(result.success);
979        // Should extract both "pet" and "pet_name"
980        let pet_card = result.cards.iter().find(|c| c.slot == "pet");
981        let name_card = result.cards.iter().find(|c| c.slot == "pet_name");
982        assert!(pet_card.is_some());
983        assert!(name_card.is_some());
984        assert_eq!(name_card.unwrap().value, "Max");
985    }
986
987    #[test]
988    fn test_custom_rule() {
989        let mut engine = RulesEngine::empty();
990        let rule = ExtractionRule::new(
991            "custom",
992            r"(?i)favorite color is\s+(\w+)",
993            MemoryKind::Preference,
994            "user",
995            "favorite_color",
996            "$1",
997        )
998        .unwrap();
999        engine.add_rule(rule);
1000
1001        let ctx = test_context("My favorite color is blue.");
1002        let result = engine.enrich(&ctx);
1003
1004        assert!(result.success);
1005        assert_eq!(result.cards.len(), 1);
1006        assert_eq!(result.cards[0].slot, "favorite_color");
1007        assert_eq!(result.cards[0].value, "blue");
1008    }
1009
1010    // ========================================================
1011    // THIRD-PERSON PATTERN TESTS
1012    // ========================================================
1013
1014    #[test]
1015    fn test_3p_employer() {
1016        let engine = RulesEngine::new();
1017        let ctx = test_context("Alice works at Acme Corp.");
1018        let result = engine.enrich(&ctx);
1019
1020        assert!(result.success);
1021        let card = result.cards.iter().find(|c| c.slot == "employer").unwrap();
1022        assert_eq!(card.entity, "alice");
1023        assert_eq!(card.value, "Acme Corp");
1024    }
1025
1026    #[test]
1027    fn test_3p_employer_variations() {
1028        let engine = RulesEngine::new();
1029
1030        // "is employed at"
1031        let ctx = test_context("John Smith is employed at Google.");
1032        let result = engine.enrich(&ctx);
1033        let card = result.cards.iter().find(|c| c.slot == "employer").unwrap();
1034        assert_eq!(card.entity, "john smith");
1035        assert_eq!(card.value, "Google");
1036
1037        // "joined"
1038        let ctx = test_context("Mary joined Microsoft.");
1039        let result = engine.enrich(&ctx);
1040        let card = result.cards.iter().find(|c| c.slot == "employer").unwrap();
1041        assert_eq!(card.entity, "mary");
1042        assert_eq!(card.value, "Microsoft");
1043    }
1044
1045    #[test]
1046    fn test_3p_location() {
1047        let engine = RulesEngine::new();
1048
1049        // "lives in"
1050        let ctx = test_context("Alice lives in San Francisco.");
1051        let result = engine.enrich(&ctx);
1052        let card = result.cards.iter().find(|c| c.slot == "location").unwrap();
1053        assert_eq!(card.entity, "alice");
1054        assert_eq!(card.value, "San Francisco");
1055
1056        // "is based in"
1057        let ctx = test_context("Bob is based in New York City.");
1058        let result = engine.enrich(&ctx);
1059        let card = result.cards.iter().find(|c| c.slot == "location").unwrap();
1060        assert_eq!(card.entity, "bob");
1061        assert!(card.value.contains("New York"));
1062    }
1063
1064    #[test]
1065    fn test_3p_job_title() {
1066        let engine = RulesEngine::new();
1067
1068        // "is a"
1069        let ctx = test_context("Alice is a software engineer.");
1070        let result = engine.enrich(&ctx);
1071        let card = result.cards.iter().find(|c| c.slot == "job_title").unwrap();
1072        assert_eq!(card.entity, "alice");
1073        assert_eq!(card.value, "software engineer");
1074
1075        // "works as"
1076        let ctx = test_context("John works as a product manager.");
1077        let result = engine.enrich(&ctx);
1078        let card = result.cards.iter().find(|c| c.slot == "job_title").unwrap();
1079        assert_eq!(card.entity, "john");
1080        assert_eq!(card.value, "product manager");
1081    }
1082
1083    #[test]
1084    fn test_3p_relationship_married() {
1085        let engine = RulesEngine::new();
1086
1087        // "is married to"
1088        let ctx = test_context("Alice is married to Bob.");
1089        let result = engine.enrich(&ctx);
1090        let card = result.cards.iter().find(|c| c.slot == "spouse").unwrap();
1091        assert_eq!(card.entity, "alice");
1092        assert_eq!(card.value, "Bob");
1093
1094        // "and are married"
1095        let ctx = test_context("John and Mary are married.");
1096        let result = engine.enrich(&ctx);
1097        let card = result.cards.iter().find(|c| c.slot == "spouse").unwrap();
1098        assert_eq!(card.entity, "john");
1099        assert_eq!(card.value, "Mary");
1100    }
1101
1102    #[test]
1103    fn test_3p_preference_positive() {
1104        let engine = RulesEngine::new();
1105        let ctx = test_context("Alice loves sushi.");
1106        let result = engine.enrich(&ctx);
1107
1108        let card = result
1109            .cards
1110            .iter()
1111            .find(|c| c.slot == "preference")
1112            .unwrap();
1113        assert_eq!(card.entity, "alice");
1114        assert_eq!(card.value, "sushi");
1115        assert_eq!(card.polarity, Some(Polarity::Positive));
1116    }
1117
1118    #[test]
1119    fn test_3p_preference_negative() {
1120        let engine = RulesEngine::new();
1121        let ctx = test_context("Bob hates spiders.");
1122        let result = engine.enrich(&ctx);
1123
1124        let card = result
1125            .cards
1126            .iter()
1127            .find(|c| c.slot == "preference")
1128            .unwrap();
1129        assert_eq!(card.entity, "bob");
1130        assert_eq!(card.value, "spiders");
1131        assert_eq!(card.polarity, Some(Polarity::Negative));
1132    }
1133
1134    #[test]
1135    fn test_3p_education() {
1136        let engine = RulesEngine::new();
1137
1138        // "graduated from"
1139        let ctx = test_context("Alice graduated from MIT.");
1140        let result = engine.enrich(&ctx);
1141        let card = result.cards.iter().find(|c| c.slot == "education").unwrap();
1142        assert_eq!(card.entity, "alice");
1143        assert_eq!(card.value, "MIT");
1144
1145        // "studied at"
1146        let ctx = test_context("John studied at Stanford University.");
1147        let result = engine.enrich(&ctx);
1148        let card = result.cards.iter().find(|c| c.slot == "education").unwrap();
1149        assert_eq!(card.entity, "john");
1150        assert!(card.value.contains("Stanford"));
1151    }
1152
1153    #[test]
1154    fn test_3p_age() {
1155        let engine = RulesEngine::new();
1156        let ctx = test_context("Alice is 28 years old.");
1157        let result = engine.enrich(&ctx);
1158
1159        let card = result.cards.iter().find(|c| c.slot == "age").unwrap();
1160        assert_eq!(card.entity, "alice");
1161        assert_eq!(card.value, "28");
1162    }
1163
1164    #[test]
1165    fn test_3p_travel() {
1166        let engine = RulesEngine::new();
1167        let ctx = test_context("Alice visited Paris.");
1168        let result = engine.enrich(&ctx);
1169
1170        let card = result.cards.iter().find(|c| c.slot == "travel").unwrap();
1171        assert_eq!(card.entity, "alice");
1172        assert_eq!(card.value, "Paris");
1173    }
1174
1175    #[test]
1176    fn test_3p_hobby() {
1177        let engine = RulesEngine::new();
1178        let ctx = test_context("Bob plays tennis.");
1179        let result = engine.enrich(&ctx);
1180
1181        let card = result.cards.iter().find(|c| c.slot == "hobby").unwrap();
1182        assert_eq!(card.entity, "bob");
1183        assert_eq!(card.value, "tennis");
1184    }
1185
1186    #[test]
1187    fn test_3p_multiple_extractions() {
1188        let engine = RulesEngine::new();
1189        let ctx = test_context(
1190            "Alice works at Google. She lives in Mountain View. Bob is a doctor in Seattle.",
1191        );
1192        let result = engine.enrich(&ctx);
1193
1194        assert!(result.success);
1195        // Should extract multiple facts about Alice and Bob
1196        let alice_employer = result
1197            .cards
1198            .iter()
1199            .find(|c| c.entity == "alice" && c.slot == "employer");
1200        let bob_job = result
1201            .cards
1202            .iter()
1203            .find(|c| c.entity == "bob" && c.slot == "job_title");
1204
1205        assert!(alice_employer.is_some());
1206        assert!(bob_job.is_some());
1207    }
1208
1209    #[test]
1210    fn test_entity_normalization() {
1211        let engine = RulesEngine::new();
1212
1213        // Entities should be normalized to lowercase for consistent O(1) lookups
1214        let ctx = test_context("ALICE SMITH works at Acme.");
1215        let result = engine.enrich(&ctx);
1216
1217        let card = result.cards.iter().find(|c| c.slot == "employer");
1218        assert!(card.is_some());
1219        // Entity should be lowercase
1220        assert_eq!(card.unwrap().entity, "alice smith");
1221    }
1222
1223    #[test]
1224    fn test_3p_pet() {
1225        let engine = RulesEngine::new();
1226        let ctx = test_context("Alice has a cat named Whiskers.");
1227        let result = engine.enrich(&ctx);
1228
1229        let card = result.cards.iter().find(|c| c.slot == "pet_name").unwrap();
1230        assert_eq!(card.entity, "alice");
1231        assert_eq!(card.value, "Whiskers");
1232    }
1233
1234    #[test]
1235    fn test_3p_family() {
1236        let engine = RulesEngine::new();
1237        let ctx = test_context("Alice has a brother named Bob.");
1238        let result = engine.enrich(&ctx);
1239
1240        let card = result.cards.iter().find(|c| c.slot == "brother").unwrap();
1241        assert_eq!(card.entity, "alice");
1242        assert_eq!(card.value, "Bob");
1243    }
1244}