Skip to main content

cortexai_data/
crossref.rs

1//! Cross-reference narrative generation
2//!
3//! Generates human-readable PT-BR narratives for entity cross-referencing
4//! across multiple data sources.
5
6use crate::types::{DataRecord, DataSource, FieldValue, MatchResult};
7use crate::DataMatcher;
8use std::collections::HashMap;
9
10/// Cross-reference result with narrative
11#[derive(Debug, Clone)]
12pub struct CrossReferenceResult {
13    /// The entity being cross-referenced
14    pub entity_id: String,
15    /// Query used for matching
16    pub query_name: String,
17    /// Query CPF if provided
18    pub query_cpf: Option<String>,
19    /// Narrative description in PT-BR
20    pub narrative: String,
21    /// Summary of each source match
22    pub source_summaries: Vec<SourceSummary>,
23    /// Total sources matched
24    pub total_sources: usize,
25    /// Overall confidence
26    pub confidence: f64,
27    /// Underlying match result
28    pub match_result: Option<MatchResult>,
29}
30
31/// Summary of a single source match
32#[derive(Debug, Clone)]
33pub struct SourceSummary {
34    /// Source identifier
35    pub source_id: String,
36    /// Source name
37    pub source_name: String,
38    /// Human-readable summary of the record
39    pub summary: String,
40    /// Match confidence
41    pub confidence: f64,
42    /// Key fields extracted
43    pub key_fields: HashMap<String, String>,
44}
45
46/// Cross-reference engine for narrative generation
47#[derive(Debug, Clone)]
48pub struct CrossReferencer {
49    matcher: DataMatcher,
50}
51
52impl Default for CrossReferencer {
53    fn default() -> Self {
54        Self::new()
55    }
56}
57
58impl CrossReferencer {
59    /// Create a new cross-referencer
60    pub fn new() -> Self {
61        Self {
62            matcher: DataMatcher::new(),
63        }
64    }
65
66    /// Create with custom matcher
67    pub fn with_matcher(matcher: DataMatcher) -> Self {
68        Self { matcher }
69    }
70
71    /// Cross-reference an entity across sources and generate narrative
72    pub fn cross_reference(
73        &self,
74        sources: &[DataSource],
75        query_name: &str,
76        query_cpf: Option<&str>,
77    ) -> CrossReferenceResult {
78        let results = self
79            .matcher
80            .match_across_sources(sources, query_name, query_cpf);
81
82        if results.is_empty() {
83            return CrossReferenceResult {
84                entity_id: String::new(),
85                query_name: query_name.to_string(),
86                query_cpf: query_cpf.map(String::from),
87                narrative: format!(
88                    "Nenhum registro encontrado para '{}' nas fontes consultadas.",
89                    query_name
90                ),
91                source_summaries: Vec::new(),
92                total_sources: 0,
93                confidence: 0.0,
94                match_result: None,
95            };
96        }
97
98        // Take best match
99        let best = &results[0];
100        let source_summaries = self.build_source_summaries(best);
101        let narrative = self.build_narrative(query_name, query_cpf, best, &source_summaries);
102
103        CrossReferenceResult {
104            entity_id: best.entity_id.clone(),
105            query_name: query_name.to_string(),
106            query_cpf: query_cpf.map(String::from),
107            narrative,
108            source_summaries,
109            total_sources: best.sources.len(),
110            confidence: best.confidence,
111            match_result: Some(best.clone()),
112        }
113    }
114
115    /// Build summaries for each source match
116    fn build_source_summaries(&self, result: &MatchResult) -> Vec<SourceSummary> {
117        result
118            .sources
119            .iter()
120            .map(|source_match| {
121                let key_fields = self.extract_key_fields(&source_match.record);
122                let summary =
123                    self.summarize_record(&source_match.record, &source_match.source_name);
124
125                SourceSummary {
126                    source_id: source_match.source_id.clone(),
127                    source_name: source_match.source_name.clone(),
128                    summary,
129                    confidence: source_match.score,
130                    key_fields,
131                }
132            })
133            .collect()
134    }
135
136    /// Extract key fields from a record as strings
137    fn extract_key_fields(&self, record: &DataRecord) -> HashMap<String, String> {
138        let mut key_fields = HashMap::new();
139
140        for (key, value) in &record.fields {
141            let str_value = match value {
142                FieldValue::Text(s) => s.clone(),
143                FieldValue::Integer(n) => n.to_string(),
144                FieldValue::Float(f) => format!("{:.2}", f),
145                FieldValue::Boolean(b) => if *b { "sim" } else { "não" }.to_string(),
146                FieldValue::Date(d) => d.clone(),
147                FieldValue::Null => continue,
148            };
149
150            if !str_value.is_empty() {
151                key_fields.insert(key.clone(), str_value);
152            }
153        }
154
155        key_fields
156    }
157
158    /// Summarize a single record
159    fn summarize_record(&self, record: &DataRecord, source_name: &str) -> String {
160        let mut parts = Vec::new();
161
162        // Name
163        if let Some(name) = record.get_name_field() {
164            parts.push(format!("nome: {}", name));
165        }
166
167        // CPF
168        if let Some(cpf) = record.get_cpf_field() {
169            parts.push(format!("CPF: {}", cpf));
170        }
171
172        // Other interesting fields
173        let interesting_fields = [
174            "email",
175            "telefone",
176            "phone",
177            "endereco",
178            "address",
179            "cidade",
180            "city",
181            "estado",
182            "state",
183            "valor",
184            "value",
185            "status",
186            "tipo",
187            "type",
188            "data",
189            "date",
190            "created_at",
191            "updated_at",
192        ];
193
194        for field_name in &interesting_fields {
195            if let Some(FieldValue::Text(value)) = record.fields.get(*field_name) {
196                if !value.is_empty() && parts.len() < 6 {
197                    parts.push(format!("{}: {}", field_name, value));
198                }
199            }
200        }
201
202        if parts.is_empty() {
203            format!("registro encontrado em {}", source_name)
204        } else {
205            parts.join(", ")
206        }
207    }
208
209    /// Build the PT-BR narrative
210    fn build_narrative(
211        &self,
212        query_name: &str,
213        query_cpf: Option<&str>,
214        result: &MatchResult,
215        summaries: &[SourceSummary],
216    ) -> String {
217        let mut narrative = String::new();
218
219        // Opening
220        let cpf_info = query_cpf
221            .map(|cpf| format!(" (CPF: {})", cpf))
222            .unwrap_or_default();
223
224        narrative.push_str(&format!(
225            "**{}**{} foi encontrado em {} fonte(s) com {:.0}% de confiança.\n\n",
226            query_name,
227            cpf_info,
228            result.sources.len(),
229            result.confidence * 100.0
230        ));
231
232        // Source details
233        for (i, summary) in summaries.iter().enumerate() {
234            let confidence_str = match summary.confidence {
235                c if c >= 0.95 => "correspondência exata",
236                c if c >= 0.85 => "alta correspondência",
237                c if c >= 0.70 => "correspondência moderada",
238                _ => "baixa correspondência",
239            };
240
241            narrative.push_str(&format!(
242                "{}. **{}** ({}):\n   {}\n\n",
243                i + 1,
244                summary.source_name,
245                confidence_str,
246                summary.summary
247            ));
248        }
249
250        // Summary line
251        let source_names: Vec<&str> = summaries.iter().map(|s| s.source_name.as_str()).collect();
252
253        if source_names.len() == 1 {
254            narrative.push_str(&format!("Aparece somente em **{}**.", source_names[0]));
255        } else if source_names.len() == 2 {
256            narrative.push_str(&format!(
257                "Aparece em **{}** e **{}**.",
258                source_names[0], source_names[1]
259            ));
260        } else {
261            let last = source_names.last().unwrap();
262            let rest = &source_names[..source_names.len() - 1];
263            narrative.push_str(&format!(
264                "Aparece em **{}** e **{}**.",
265                rest.join("**, **"),
266                last
267            ));
268        }
269
270        narrative
271    }
272
273    /// Generate a compact one-line narrative
274    pub fn compact_narrative(
275        &self,
276        sources: &[DataSource],
277        query_name: &str,
278        query_cpf: Option<&str>,
279    ) -> String {
280        let result = self.cross_reference(sources, query_name, query_cpf);
281
282        if result.total_sources == 0 {
283            return format!("'{}': não encontrado", query_name);
284        }
285
286        let source_names: Vec<&str> = result
287            .source_summaries
288            .iter()
289            .map(|s| s.source_name.as_str())
290            .collect();
291
292        let source_list = if source_names.len() == 1 {
293            source_names[0].to_string()
294        } else if source_names.len() == 2 {
295            format!("{} e {}", source_names[0], source_names[1])
296        } else {
297            let last = source_names.last().unwrap();
298            let rest = &source_names[..source_names.len() - 1];
299            format!("{} e {}", rest.join(", "), last)
300        };
301
302        format!(
303            "'{}': aparece em {} ({:.0}% confiança)",
304            query_name,
305            source_list,
306            result.confidence * 100.0
307        )
308    }
309}
310
311/// Build a cross-reference narrative from match results (standalone function)
312pub fn build_cross_reference_narrative(
313    query_name: &str,
314    query_cpf: Option<&str>,
315    match_result: &MatchResult,
316) -> String {
317    let crossref = CrossReferencer::new();
318    let summaries = crossref.build_source_summaries(match_result);
319    crossref.build_narrative(query_name, query_cpf, match_result, &summaries)
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use crate::types::{DataSchema, FieldValue};
326
327    fn create_test_sources() -> Vec<DataSource> {
328        vec![
329            DataSource {
330                id: "parties".to_string(),
331                name: "Parties".to_string(),
332                schema: DataSchema::default(),
333                records: vec![DataRecord::new("parties")
334                    .with_field(
335                        "nome",
336                        FieldValue::Text("Lucas Melo de Oliveira".to_string()),
337                    )
338                    .with_field("cpf", FieldValue::Text("123.456.789-00".to_string()))
339                    .with_field("email", FieldValue::Text("lucas@email.com".to_string()))
340                    .with_confidence(1.0)],
341            },
342            DataSource {
343                id: "iptu".to_string(),
344                name: "IPTU".to_string(),
345                schema: DataSchema::default(),
346                records: vec![DataRecord::new("iptu")
347                    .with_field("nome", FieldValue::Text("LUCAS M OLIVEIRA".to_string()))
348                    .with_field("documento", FieldValue::Text("12345678900".to_string()))
349                    .with_field(
350                        "endereco",
351                        FieldValue::Text("Rua das Flores, 123".to_string()),
352                    )
353                    .with_confidence(1.0)],
354            },
355            DataSource {
356                id: "transactions".to_string(),
357                name: "Transações".to_string(),
358                schema: DataSchema::default(),
359                records: vec![DataRecord::new("transactions")
360                    .with_field("nome", FieldValue::Text("Lucas Oliveira".to_string()))
361                    .with_field("cpf", FieldValue::Text("123.456.789-00".to_string()))
362                    .with_field("valor", FieldValue::Text("R$ 500.000,00".to_string()))
363                    .with_field("tipo", FieldValue::Text("Compra".to_string()))
364                    .with_confidence(1.0)],
365            },
366        ]
367    }
368
369    #[test]
370    fn test_cross_reference_with_cpf() {
371        let crossref = CrossReferencer::new();
372        let sources = create_test_sources();
373
374        let result = crossref.cross_reference(&sources, "Lucas Oliveira", Some("123.456.789-00"));
375
376        assert!(
377            result.total_sources >= 2,
378            "Should match in multiple sources"
379        );
380        assert!(result.confidence > 0.85);
381        assert!(result.narrative.contains("Lucas Oliveira"));
382        assert!(result.narrative.contains("encontrado"));
383    }
384
385    #[test]
386    fn test_cross_reference_by_name() {
387        let crossref = CrossReferencer::new();
388        let sources = create_test_sources();
389
390        let result = crossref.cross_reference(&sources, "Lucas Melo Oliveira", None);
391
392        assert!(result.total_sources >= 1, "Should find by name");
393        assert!(!result.narrative.is_empty());
394    }
395
396    #[test]
397    fn test_cross_reference_not_found() {
398        let crossref = CrossReferencer::new();
399        let sources = create_test_sources();
400
401        let result = crossref.cross_reference(&sources, "Pessoa Inexistente", None);
402
403        assert_eq!(result.total_sources, 0);
404        assert!(result.narrative.contains("Nenhum registro"));
405    }
406
407    #[test]
408    fn test_compact_narrative() {
409        let crossref = CrossReferencer::new();
410        let sources = create_test_sources();
411
412        let narrative = crossref.compact_narrative(&sources, "Lucas", Some("123.456.789-00"));
413
414        assert!(narrative.contains("aparece em"));
415        assert!(narrative.contains("confiança"));
416    }
417
418    #[test]
419    fn test_narrative_contains_source_names() {
420        let crossref = CrossReferencer::new();
421        let sources = create_test_sources();
422
423        let result = crossref.cross_reference(&sources, "Lucas", Some("123.456.789-00"));
424
425        // Should mention at least one source name
426        let contains_source = result.narrative.contains("Parties")
427            || result.narrative.contains("IPTU")
428            || result.narrative.contains("Transações");
429
430        assert!(contains_source, "Narrative should mention source names");
431    }
432
433    #[test]
434    fn test_source_summaries() {
435        let crossref = CrossReferencer::new();
436        let sources = create_test_sources();
437
438        let result = crossref.cross_reference(&sources, "Lucas", Some("123.456.789-00"));
439
440        for summary in &result.source_summaries {
441            assert!(!summary.source_name.is_empty());
442            assert!(!summary.summary.is_empty());
443            assert!(summary.confidence > 0.0);
444        }
445    }
446}