1use crate::types::{DataRecord, DataSource, FieldValue, MatchResult};
7use crate::DataMatcher;
8use std::collections::HashMap;
9
10#[derive(Debug, Clone)]
12pub struct CrossReferenceResult {
13 pub entity_id: String,
15 pub query_name: String,
17 pub query_cpf: Option<String>,
19 pub narrative: String,
21 pub source_summaries: Vec<SourceSummary>,
23 pub total_sources: usize,
25 pub confidence: f64,
27 pub match_result: Option<MatchResult>,
29}
30
31#[derive(Debug, Clone)]
33pub struct SourceSummary {
34 pub source_id: String,
36 pub source_name: String,
38 pub summary: String,
40 pub confidence: f64,
42 pub key_fields: HashMap<String, String>,
44}
45
46#[derive(Debug, Clone)]
48pub struct CrossReferencer {
49 matcher: DataMatcher,
50}
51
52impl Default for CrossReferencer {
53 fn default() -> Self {
54 Self::new()
55 }
56}
57
58impl CrossReferencer {
59 pub fn new() -> Self {
61 Self {
62 matcher: DataMatcher::new(),
63 }
64 }
65
66 pub fn with_matcher(matcher: DataMatcher) -> Self {
68 Self { matcher }
69 }
70
71 pub fn cross_reference(
73 &self,
74 sources: &[DataSource],
75 query_name: &str,
76 query_cpf: Option<&str>,
77 ) -> CrossReferenceResult {
78 let results = self
79 .matcher
80 .match_across_sources(sources, query_name, query_cpf);
81
82 if results.is_empty() {
83 return CrossReferenceResult {
84 entity_id: String::new(),
85 query_name: query_name.to_string(),
86 query_cpf: query_cpf.map(String::from),
87 narrative: format!(
88 "Nenhum registro encontrado para '{}' nas fontes consultadas.",
89 query_name
90 ),
91 source_summaries: Vec::new(),
92 total_sources: 0,
93 confidence: 0.0,
94 match_result: None,
95 };
96 }
97
98 let best = &results[0];
100 let source_summaries = self.build_source_summaries(best);
101 let narrative = self.build_narrative(query_name, query_cpf, best, &source_summaries);
102
103 CrossReferenceResult {
104 entity_id: best.entity_id.clone(),
105 query_name: query_name.to_string(),
106 query_cpf: query_cpf.map(String::from),
107 narrative,
108 source_summaries,
109 total_sources: best.sources.len(),
110 confidence: best.confidence,
111 match_result: Some(best.clone()),
112 }
113 }
114
115 fn build_source_summaries(&self, result: &MatchResult) -> Vec<SourceSummary> {
117 result
118 .sources
119 .iter()
120 .map(|source_match| {
121 let key_fields = self.extract_key_fields(&source_match.record);
122 let summary =
123 self.summarize_record(&source_match.record, &source_match.source_name);
124
125 SourceSummary {
126 source_id: source_match.source_id.clone(),
127 source_name: source_match.source_name.clone(),
128 summary,
129 confidence: source_match.score,
130 key_fields,
131 }
132 })
133 .collect()
134 }
135
136 fn extract_key_fields(&self, record: &DataRecord) -> HashMap<String, String> {
138 let mut key_fields = HashMap::new();
139
140 for (key, value) in &record.fields {
141 let str_value = match value {
142 FieldValue::Text(s) => s.clone(),
143 FieldValue::Integer(n) => n.to_string(),
144 FieldValue::Float(f) => format!("{:.2}", f),
145 FieldValue::Boolean(b) => if *b { "sim" } else { "não" }.to_string(),
146 FieldValue::Date(d) => d.clone(),
147 FieldValue::Null => continue,
148 };
149
150 if !str_value.is_empty() {
151 key_fields.insert(key.clone(), str_value);
152 }
153 }
154
155 key_fields
156 }
157
158 fn summarize_record(&self, record: &DataRecord, source_name: &str) -> String {
160 let mut parts = Vec::new();
161
162 if let Some(name) = record.get_name_field() {
164 parts.push(format!("nome: {}", name));
165 }
166
167 if let Some(cpf) = record.get_cpf_field() {
169 parts.push(format!("CPF: {}", cpf));
170 }
171
172 let interesting_fields = [
174 "email",
175 "telefone",
176 "phone",
177 "endereco",
178 "address",
179 "cidade",
180 "city",
181 "estado",
182 "state",
183 "valor",
184 "value",
185 "status",
186 "tipo",
187 "type",
188 "data",
189 "date",
190 "created_at",
191 "updated_at",
192 ];
193
194 for field_name in &interesting_fields {
195 if let Some(FieldValue::Text(value)) = record.fields.get(*field_name) {
196 if !value.is_empty() && parts.len() < 6 {
197 parts.push(format!("{}: {}", field_name, value));
198 }
199 }
200 }
201
202 if parts.is_empty() {
203 format!("registro encontrado em {}", source_name)
204 } else {
205 parts.join(", ")
206 }
207 }
208
209 fn build_narrative(
211 &self,
212 query_name: &str,
213 query_cpf: Option<&str>,
214 result: &MatchResult,
215 summaries: &[SourceSummary],
216 ) -> String {
217 let mut narrative = String::new();
218
219 let cpf_info = query_cpf
221 .map(|cpf| format!(" (CPF: {})", cpf))
222 .unwrap_or_default();
223
224 narrative.push_str(&format!(
225 "**{}**{} foi encontrado em {} fonte(s) com {:.0}% de confiança.\n\n",
226 query_name,
227 cpf_info,
228 result.sources.len(),
229 result.confidence * 100.0
230 ));
231
232 for (i, summary) in summaries.iter().enumerate() {
234 let confidence_str = match summary.confidence {
235 c if c >= 0.95 => "correspondência exata",
236 c if c >= 0.85 => "alta correspondência",
237 c if c >= 0.70 => "correspondência moderada",
238 _ => "baixa correspondência",
239 };
240
241 narrative.push_str(&format!(
242 "{}. **{}** ({}):\n {}\n\n",
243 i + 1,
244 summary.source_name,
245 confidence_str,
246 summary.summary
247 ));
248 }
249
250 let source_names: Vec<&str> = summaries.iter().map(|s| s.source_name.as_str()).collect();
252
253 if source_names.len() == 1 {
254 narrative.push_str(&format!("Aparece somente em **{}**.", source_names[0]));
255 } else if source_names.len() == 2 {
256 narrative.push_str(&format!(
257 "Aparece em **{}** e **{}**.",
258 source_names[0], source_names[1]
259 ));
260 } else {
261 let last = source_names.last().unwrap();
262 let rest = &source_names[..source_names.len() - 1];
263 narrative.push_str(&format!(
264 "Aparece em **{}** e **{}**.",
265 rest.join("**, **"),
266 last
267 ));
268 }
269
270 narrative
271 }
272
273 pub fn compact_narrative(
275 &self,
276 sources: &[DataSource],
277 query_name: &str,
278 query_cpf: Option<&str>,
279 ) -> String {
280 let result = self.cross_reference(sources, query_name, query_cpf);
281
282 if result.total_sources == 0 {
283 return format!("'{}': não encontrado", query_name);
284 }
285
286 let source_names: Vec<&str> = result
287 .source_summaries
288 .iter()
289 .map(|s| s.source_name.as_str())
290 .collect();
291
292 let source_list = if source_names.len() == 1 {
293 source_names[0].to_string()
294 } else if source_names.len() == 2 {
295 format!("{} e {}", source_names[0], source_names[1])
296 } else {
297 let last = source_names.last().unwrap();
298 let rest = &source_names[..source_names.len() - 1];
299 format!("{} e {}", rest.join(", "), last)
300 };
301
302 format!(
303 "'{}': aparece em {} ({:.0}% confiança)",
304 query_name,
305 source_list,
306 result.confidence * 100.0
307 )
308 }
309}
310
311pub fn build_cross_reference_narrative(
313 query_name: &str,
314 query_cpf: Option<&str>,
315 match_result: &MatchResult,
316) -> String {
317 let crossref = CrossReferencer::new();
318 let summaries = crossref.build_source_summaries(match_result);
319 crossref.build_narrative(query_name, query_cpf, match_result, &summaries)
320}
321
322#[cfg(test)]
323mod tests {
324 use super::*;
325 use crate::types::{DataSchema, FieldValue};
326
327 fn create_test_sources() -> Vec<DataSource> {
328 vec![
329 DataSource {
330 id: "parties".to_string(),
331 name: "Parties".to_string(),
332 schema: DataSchema::default(),
333 records: vec![DataRecord::new("parties")
334 .with_field(
335 "nome",
336 FieldValue::Text("Lucas Melo de Oliveira".to_string()),
337 )
338 .with_field("cpf", FieldValue::Text("123.456.789-00".to_string()))
339 .with_field("email", FieldValue::Text("lucas@email.com".to_string()))
340 .with_confidence(1.0)],
341 },
342 DataSource {
343 id: "iptu".to_string(),
344 name: "IPTU".to_string(),
345 schema: DataSchema::default(),
346 records: vec![DataRecord::new("iptu")
347 .with_field("nome", FieldValue::Text("LUCAS M OLIVEIRA".to_string()))
348 .with_field("documento", FieldValue::Text("12345678900".to_string()))
349 .with_field(
350 "endereco",
351 FieldValue::Text("Rua das Flores, 123".to_string()),
352 )
353 .with_confidence(1.0)],
354 },
355 DataSource {
356 id: "transactions".to_string(),
357 name: "Transações".to_string(),
358 schema: DataSchema::default(),
359 records: vec![DataRecord::new("transactions")
360 .with_field("nome", FieldValue::Text("Lucas Oliveira".to_string()))
361 .with_field("cpf", FieldValue::Text("123.456.789-00".to_string()))
362 .with_field("valor", FieldValue::Text("R$ 500.000,00".to_string()))
363 .with_field("tipo", FieldValue::Text("Compra".to_string()))
364 .with_confidence(1.0)],
365 },
366 ]
367 }
368
369 #[test]
370 fn test_cross_reference_with_cpf() {
371 let crossref = CrossReferencer::new();
372 let sources = create_test_sources();
373
374 let result = crossref.cross_reference(&sources, "Lucas Oliveira", Some("123.456.789-00"));
375
376 assert!(
377 result.total_sources >= 2,
378 "Should match in multiple sources"
379 );
380 assert!(result.confidence > 0.85);
381 assert!(result.narrative.contains("Lucas Oliveira"));
382 assert!(result.narrative.contains("encontrado"));
383 }
384
385 #[test]
386 fn test_cross_reference_by_name() {
387 let crossref = CrossReferencer::new();
388 let sources = create_test_sources();
389
390 let result = crossref.cross_reference(&sources, "Lucas Melo Oliveira", None);
391
392 assert!(result.total_sources >= 1, "Should find by name");
393 assert!(!result.narrative.is_empty());
394 }
395
396 #[test]
397 fn test_cross_reference_not_found() {
398 let crossref = CrossReferencer::new();
399 let sources = create_test_sources();
400
401 let result = crossref.cross_reference(&sources, "Pessoa Inexistente", None);
402
403 assert_eq!(result.total_sources, 0);
404 assert!(result.narrative.contains("Nenhum registro"));
405 }
406
407 #[test]
408 fn test_compact_narrative() {
409 let crossref = CrossReferencer::new();
410 let sources = create_test_sources();
411
412 let narrative = crossref.compact_narrative(&sources, "Lucas", Some("123.456.789-00"));
413
414 assert!(narrative.contains("aparece em"));
415 assert!(narrative.contains("confiança"));
416 }
417
418 #[test]
419 fn test_narrative_contains_source_names() {
420 let crossref = CrossReferencer::new();
421 let sources = create_test_sources();
422
423 let result = crossref.cross_reference(&sources, "Lucas", Some("123.456.789-00"));
424
425 let contains_source = result.narrative.contains("Parties")
427 || result.narrative.contains("IPTU")
428 || result.narrative.contains("Transações");
429
430 assert!(contains_source, "Narrative should mention source names");
431 }
432
433 #[test]
434 fn test_source_summaries() {
435 let crossref = CrossReferencer::new();
436 let sources = create_test_sources();
437
438 let result = crossref.cross_reference(&sources, "Lucas", Some("123.456.789-00"));
439
440 for summary in &result.source_summaries {
441 assert!(!summary.source_name.is_empty());
442 assert!(!summary.summary.is_empty());
443 assert!(summary.confidence > 0.0);
444 }
445 }
446}