Skip to main content

cortexai_data/
lib.rs

1//! # Data Matching Module
2//!
3//! High-performance data matching, CPF/CNPJ validation, and cross-source
4//! consolidation for Brazilian data sources.
5//!
6//! ## Features
7//!
8//! - **CPF Matcher**: Normalize and validate Brazilian CPF numbers
9//! - **CNPJ Matcher**: Normalize and validate Brazilian CNPJ numbers
10//! - **Name Matcher**: Fuzzy matching with Brazilian name conventions
11//! - **Data Matcher**: Cross-source entity resolution and consolidation
12//! - **Data Pipeline**: Async processing with LRU caching
13//! - **Parallel Pipeline**: High-throughput concurrent processing with DashMap
14//! - **Metrics**: Comprehensive observability with EMA processing times
15//! - **SQL Extractor**: PostgreSQL data extraction with dynamic schema (requires `postgres` feature)
16//!
17//! ## Example
18//!
19//! ```rust,ignore
20//! use cortexai_data::{DataMatcher, CpfMatcher, CnpjMatcher, NameMatcher};
21//!
22//! let matcher = DataMatcher::new();
23//! let results = matcher.match_across_sources(&sources, "Lucas Oliveira", Some("123.456.789-00"));
24//! ```
25//!
26//! ## SQL Extraction (with `postgres` feature)
27//!
28//! ```rust,ignore
29//! use cortexai_data::sql::{PostgresExtractor, PostgresConfig};
30//!
31//! let config = PostgresConfig::new("postgres://user:pass@localhost/db");
32//! let pool = config.create_pool().await?;
33//!
34//! let extractor = PostgresExtractor::new("sales", "Recent Sales", pool)
35//!     .with_query("SELECT * FROM sales WHERE created_at > NOW() - INTERVAL '30 days'");
36//!
37//! let data_source = extractor.extract().await?;
38//! ```
39
40pub mod cnpj;
41pub mod cpf;
42pub mod crossref;
43pub mod matcher;
44pub mod metrics;
45pub mod name;
46pub mod pipeline;
47#[cfg(feature = "postgres")]
48pub mod sql;
49pub mod types;
50
51pub use cnpj::CnpjMatcher;
52pub use cpf::CpfMatcher;
53pub use crossref::{
54    build_cross_reference_narrative, CrossReferenceResult, CrossReferencer, SourceSummary,
55};
56pub use matcher::DataMatcher;
57pub use metrics::{DataMatchingMetrics, MetricsSnapshot};
58pub use name::NameMatcher;
59pub use pipeline::{CacheResult, ConcurrentCache, DataCache, DataPipeline, ParallelPipeline};
60#[cfg(feature = "postgres")]
61pub use sql::{DataExtractor, PostgresConfig, PostgresExtractor, QueryBuilder};
62pub use types::*;