zer_lib/lib.rs
1//! `zer-lib`, unified entity resolution library.
2//!
3//! Provides [`Comparator`], [`Scorer`], and a [`Backend`] abstraction that
4//! selects GPU acceleration automatically when compiled with the `cuda` or
5//! `vulkan` features and suitable hardware is present. Without those features
6//! the crate compiles and runs entirely on CPU via `zer-compare`.
7//!
8//! # Quick start
9//!
10//! ```rust,no_run
11//! use zer_lib::prelude::*;
12//!
13//! let schema = SchemaBuilder::new()
14//! .field("naam", FieldKind::Name)
15//! .field("datum", FieldKind::Date)
16//! .build().unwrap();
17//!
18//! let backend = Backend::auto_detect(); // CUDA → Vulkan → AVX2 → CPU
19//! let comparator = Comparator::new(&schema, &backend);
20//! let scorer = Scorer::new(&backend);
21//! ```
22//!
23//! # Feature flags
24//!
25//! **Compute backends** (mutually exclusive in practice; pick one):
26//!
27//! | Flag | Description |
28//! |------------------|--------------------------------------------------------------------------|
29//! | `cuda` | NVIDIA CUDA via `zer-compute`, requires CUDA Toolkit 13.1+ and `nvcc` |
30//! | `vulkan` | Vulkan 1.3 compute via `zer-compute`, requires `slangc` on `PATH` |
31//! | `avx2` | x86_64 AVX2 SIMD via `zer-compute`, no external toolchain required |
32//! | `cpu` | Explicit scalar CPU path via `zer-compute` (Rayon parallel) |
33//! | `debug-shaders` | Embed debug info in CUDA kernels for `cuda-gdb` / Nsight (needs `cuda`) |
34//!
35//! **Pipeline integration:**
36//!
37//! | Flag | Description |
38//! |------------|--------------------------------------------------------------------------|
39//! | `pipeline` | Enable `Pipeline`, `Ingester`, and related types from `zer-pipeline` |
40//!
41//! **Neural judge ORT execution providers** (independent of compute backend):
42//!
43//! | Flag | Description |
44//! |------------------|--------------------------------------------------------------------------|
45//! | `judge_cpu` | Scalar CPU execution provider for ORT (no extra dependencies) |
46//! | `judge_cuda` | NVIDIA CUDA execution provider for ORT |
47//! | `judge_rocm` | AMD ROCm execution provider for ORT |
48//! | `judge_directml` | Windows DirectML execution provider for ORT |
49//! | `judge_openvino` | Intel OpenVINO execution provider for ORT |
50//!
51//! # CPU-only usage
52//!
53//! Users who never need GPU can depend on `zer-compare` directly and never
54//! import this crate. `zer_compare::FieldComparator` and
55//! `zer_compare::FellegiSunterScorer` are the raw CPU implementations.
56
57#[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
58use std::sync::Arc;
59
60use zer_core::{
61 comparison::{ComparisonBatch, ComparisonVector},
62 record::Record,
63 record_pool::RecordPool,
64 schema::Schema,
65 scoring::{ModelParams, ScoredPair},
66 traits::{Comparator as ComparatorTrait, Result as ZerResult, Scorer as ScorerTrait},
67};
68
69// ── Backend ───────────────────────────────────────────────────────────────────
70
71enum BackendInner {
72 Cpu,
73 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
74 Gpu(Arc<zer_compute::DeviceBackend>),
75}
76
77/// Opaque compute backend handle.
78///
79/// Create once and share between [`Comparator`] and [`Scorer`] so both use the
80/// same underlying GPU device.
81///
82/// ```rust,no_run
83/// use zer_lib::prelude::*;
84///
85/// let schema = SchemaBuilder::new().field("naam", FieldKind::Name).build().unwrap();
86/// let backend = Backend::auto_detect();
87/// let comparator = Comparator::new(&schema, &backend);
88/// let scorer = Scorer::new(&backend);
89/// ```
90pub struct Backend {
91 inner: BackendInner,
92 name: &'static str,
93}
94
95impl Backend {
96 /// Read `--target=<name>` from process args and return the matching backend.
97 ///
98 /// Falls back to CPU when the flag is absent, no hardware probing.
99 /// Pass `--target=auto` to restore the hardware-detection order
100 /// (CUDA → Vulkan → AVX2 → CPU).
101 pub fn auto_detect() -> Self {
102 match std::env::args()
103 .find_map(|a| a.strip_prefix("--target=").map(str::to_owned))
104 .as_deref()
105 {
106 Some(t) => Self::from_target(t),
107 None => Self::cpu(),
108 }
109 }
110
111 /// Force the CPU backend regardless of available hardware.
112 pub fn cpu() -> Self {
113 Self { inner: BackendInner::Cpu, name: "cpu" }
114 }
115
116 /// Select a backend by name, called by `auto_detect()` to resolve `--target=<name>`.
117 ///
118 /// Accepted values: `"auto"` (hardware-detect), `"cpu"`, `"cuda"`, `"avx2"`, `"vulkan"`.
119 ///
120 /// Exits with a diagnostic if the target is unknown, not compiled in, or hardware init fails.
121 pub fn from_target(target: &str) -> Self {
122 if target == "cpu" {
123 return Self::cpu();
124 }
125
126 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
127 {
128 let pref = match target {
129 "auto" => zer_compute::BackendPreference::Auto,
130 "cuda" => zer_compute::BackendPreference::Cuda,
131 "vulkan" => zer_compute::BackendPreference::Vulkan,
132 "avx2" => zer_compute::BackendPreference::Avx2,
133 other => {
134 tracing::error!(target = other, "unknown --target; valid: auto, cpu, avx2, cuda, vulkan");
135 std::process::exit(1);
136 }
137 };
138 return match zer_compute::DeviceBackend::from_preference(pref) {
139 Ok(dev) => {
140 let name = dev.name();
141 if dev.is_accelerated() {
142 Self { inner: BackendInner::Gpu(Arc::new(dev)), name }
143 } else {
144 Self { inner: BackendInner::Cpu, name: "cpu" }
145 }
146 }
147 Err(e) => {
148 tracing::error!(target, error = %e, "--target unavailable");
149 std::process::exit(1);
150 }
151 };
152 }
153
154 #[allow(unreachable_code)]
155 {
156 if target == "auto" {
157 return Self::cpu();
158 }
159 tracing::error!(target, "unknown --target; valid values when built without GPU features: auto, cpu");
160 std::process::exit(1);
161 }
162 }
163
164 /// Human-readable name of the active backend: `"cpu"`, `"cuda"`, or `"avx2"`.
165 pub fn name(&self) -> &'static str {
166 self.name
167 }
168
169 /// `true` when a GPU backend is active.
170 pub fn is_gpu(&self) -> bool {
171 !matches!(self.inner, BackendInner::Cpu)
172 }
173}
174
175// ── Comparator ────────────────────────────────────────────────────────────────
176
177enum ComparatorInner {
178 Cpu(zer_compare::FieldComparator),
179 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
180 Gpu(zer_compute::DeviceComparator),
181}
182
183/// Pairwise record comparator with automatic GPU/CPU selection.
184///
185/// Wraps `FieldComparator` (CPU) or `DeviceComparator` (GPU) depending on the
186/// [`Backend`]. Implements [`ComparatorTrait`] identically in both cases.
187pub struct Comparator {
188 inner: ComparatorInner,
189}
190
191impl Comparator {
192 /// Wrap an already-constructed [`zer_compare::FieldComparator`] directly.
193 ///
194 /// Use this when you want to override default similarity functions via
195 /// [`zer_compare::FieldComparator::with_fns`] before creating the comparator.
196 /// Always uses the CPU path; GPU acceleration is not available this way.
197 pub fn from_cpu(fc: zer_compare::FieldComparator) -> Self {
198 Self { inner: ComparatorInner::Cpu(fc) }
199 }
200
201 /// Build a comparator from a schema and backend.
202 pub fn new(schema: &Schema, backend: &Backend) -> Self {
203 match &backend.inner {
204 BackendInner::Cpu => Self {
205 inner: ComparatorInner::Cpu(
206 zer_compare::FieldComparator::from_schema(schema),
207 ),
208 },
209 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
210 BackendInner::Gpu(dev) => Self {
211 inner: ComparatorInner::Gpu(
212 zer_compute::DeviceComparator::new(Arc::clone(dev), schema).unwrap(),
213 ),
214 },
215 }
216 }
217
218 /// Name of the active backend, for diagnostics.
219 pub fn backend_name(&self) -> &'static str {
220 match &self.inner {
221 ComparatorInner::Cpu(_) => "cpu",
222 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
223 ComparatorInner::Gpu(c) => c.backend_name(),
224 }
225 }
226
227 /// Primary hot-path: pool-native batch comparison.
228 ///
229 /// `pool` is a `RecordPool` built from the candidate records; `pair_indices`
230 /// holds `(i, j)` pairs where `i` and `j` are indices into the pool.
231 /// Avoids all `Record::clone()` and `HashMap` lookups, the fastest path for
232 /// large BRP-style jobs where records are already loaded into a pool.
233 pub fn compare_batch_from_pool(
234 &self,
235 pool: &RecordPool,
236 pair_indices: &[(usize, usize)],
237 schema: &Schema,
238 ) -> ComparisonBatch {
239 match &self.inner {
240 ComparatorInner::Cpu(c) => c.compare_batch_from_pool(pool, pair_indices, schema),
241 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
242 ComparatorInner::Gpu(c) => c.compare_batch_from_pool(pool, pair_indices, schema),
243 }
244 }
245
246 /// Convenience wrapper: builds a pool from a flat `records` slice and compares
247 /// the `pair_indices` pairs. No `Record::clone()`.
248 pub fn compare_batch_indexed(
249 &self,
250 records: &[Record],
251 pair_indices: &[(usize, usize)],
252 schema: &Schema,
253 ) -> ComparisonBatch {
254 let pool = RecordPool::from_records(records, schema);
255 self.compare_batch_from_pool(&pool, pair_indices, schema)
256 }
257}
258
259impl ComparatorTrait for Comparator {
260 fn compare(&self, a: &Record, b: &Record, schema: &Schema) -> ComparisonVector {
261 match &self.inner {
262 ComparatorInner::Cpu(c) => c.compare(a, b, schema),
263 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
264 ComparatorInner::Gpu(c) => c.compare(a, b, schema),
265 }
266 }
267
268 fn compare_batch_from_pool(
269 &self,
270 pool: &RecordPool,
271 indices: &[(usize, usize)],
272 schema: &Schema,
273 ) -> ComparisonBatch {
274 self.compare_batch_from_pool(pool, indices, schema)
275 }
276}
277
278// ── Scorer ────────────────────────────────────────────────────────────────────
279
280enum ScorerInner {
281 Cpu(zer_compare::FellegiSunterScorer),
282 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
283 Gpu(zer_compute::DeviceScorer),
284}
285
286/// Fellegi-Sunter scorer with automatic GPU/CPU EM acceleration.
287///
288/// `score` / `score_batch` always run on CPU, no kernel overhead for small
289/// operations. `estimate_params` uses the GPU EM kernel when the backend is
290/// GPU and the batch exceeds the transfer break-even threshold; otherwise it
291/// falls back to `zer_compare::run_em` on the CPU.
292pub struct Scorer {
293 inner: ScorerInner,
294}
295
296impl Scorer {
297 /// Build a scorer using the given backend.
298 pub fn new(backend: &Backend) -> Self {
299 match &backend.inner {
300 BackendInner::Cpu => Self {
301 inner: ScorerInner::Cpu(zer_compare::FellegiSunterScorer),
302 },
303 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
304 BackendInner::Gpu(dev) => Self {
305 inner: ScorerInner::Gpu(zer_compute::DeviceScorer::new(Arc::clone(dev))),
306 },
307 }
308 }
309
310 /// Name of the active backend, for diagnostics.
311 pub fn backend_name(&self) -> &'static str {
312 match &self.inner {
313 ScorerInner::Cpu(_) => "cpu",
314 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
315 ScorerInner::Gpu(s) => s.backend_name(),
316 }
317 }
318}
319
320impl ScorerTrait for Scorer {
321 fn score(&self, vector: &ComparisonVector, params: &ModelParams) -> ScoredPair {
322 match &self.inner {
323 ScorerInner::Cpu(s) => s.score(vector, params),
324 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
325 ScorerInner::Gpu(s) => s.score(vector, params),
326 }
327 }
328
329 fn score_batch(
330 &self,
331 batch: &ComparisonBatch,
332 params: &ModelParams,
333 ) -> Vec<ScoredPair> {
334 match &self.inner {
335 ScorerInner::Cpu(s) => s.score_batch(batch, params),
336 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
337 ScorerInner::Gpu(s) => s.score_batch(batch, params),
338 }
339 }
340
341 fn estimate_params(
342 &self,
343 batch: &ComparisonBatch,
344 init: Option<ModelParams>,
345 max_iter: usize,
346 ) -> ZerResult<ModelParams> {
347 match &self.inner {
348 ScorerInner::Cpu(s) => s.estimate_params(batch, init, max_iter),
349 #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
350 ScorerInner::Gpu(s) => s.estimate_params(batch, init, max_iter),
351 }
352 }
353}
354
355// ── Low-level kernel access for power users ───────────────────────────────────
356
357/// Raw GPU kernel dispatch, for users writing custom kernels.
358///
359/// Requires the `cuda` or `avx2` feature. Most users should use
360/// [`Comparator`] and [`Scorer`] instead.
361///
362/// # Writing a custom kernel
363///
364/// 1. Define a zero-sized marker struct and `impl Kernel for It`.
365/// 2. `impl KernelDispatch<It> for zer_compute::backend::cpu::CpuDevice`, CPU fallback.
366/// 3. `impl KernelDispatch<It> for zer_compute::backend::cuda::CudaDevice`, CUDA path.
367/// 4. Add the `impl KernelDispatch<It> for DeviceBackend` match in
368/// `zer_compute::backend::mod`.
369/// 5. Access the raw device via `zer::compute::DeviceBackend`.
370#[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
371pub mod kernel {
372 pub use zer_compute::{
373 backend::DeviceBackend,
374 error::GpuError,
375 kernel::{Kernel, KernelDispatch},
376 };
377}
378
379// ── Crate re-exports ──────────────────────────────────────────────────────────
380
381pub use zer_blocking as blocking;
382pub use zer_compare as compare;
383pub use zer_core as core;
384pub use zer_schema as schema;
385pub use zer_cluster as cluster;
386
387#[cfg(feature = "pipeline")]
388pub use zer_pipeline as pipeline;
389
390#[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
391pub use zer_compute as compute;
392
393// ── Prelude ───────────────────────────────────────────────────────────────────
394
395pub mod prelude {
396 // Concrete auto-detecting types, primary user-facing API
397 pub use crate::{Backend, Comparator, Scorer};
398
399 // Core data types
400 pub use zer_core::{
401 comparison::{ComparisonBatch, ComparisonLevel, ComparisonVector},
402 entity::{Entity, EntityId, EntityMember, ResolutionMethod},
403 error::ZerError,
404 record::{FieldValue, Record, RecordId},
405 record_pool::RecordPool,
406 schema::{FieldKind, Schema, SchemaBuilder},
407 scoring::{MatchBand, ModelParams, ScoredPair},
408 traits::{
409 BlockIndex, Blocker, Clusterer, EntityStore, Judge, JudgeVerdict, RecordStore,
410 // Renamed to avoid shadowing the concrete Comparator / Scorer structs above
411 Comparator as ComparatorTrait,
412 Scorer as ScorerTrait,
413 },
414 VecRecordStore,
415 };
416
417 // Blocking
418 pub use zer_blocking::{
419 BlockerFactory, CompositeBlocker, InvertedIndex, SchemaCategory,
420 keys::{
421 AddressInitialKey, AliasPhoneticKey, CameraTimeWindowKey, DateFragmentKey,
422 DateGranularity, DocumentDigitSuffixKey, DocumentSuffixKey, ExactFieldKey,
423 FuzzyYearKey, GeoGridKey, LicensePlateNormKey, PhoneticAlgo, PhoneticNameDobKey,
424 PlateOCRFuzzyKey, SuffixKey, TransliteratedPhoneticKey,
425 },
426 };
427
428 // CPU implementations, available directly for users who want the raw types
429 pub use zer_compare::{
430 FellegiSunterScorer, FieldComparator, LevelThresholds, SimilarityFn,
431 JaroWinklerSimilarity, PhoneticEqualitySimilarity, TokenOverlapSimilarity,
432 AddressTokenOverlap, StreetNumberEditDistance,
433 };
434
435 // Schema registry and artifact management (Phase 6)
436 pub use zer_schema::{ModelArtifact, SchemaFingerprint, SchemaInferrer, SchemaRegistry, StartupMode};
437
438 // Clustering and entity store (Phase 6)
439 pub use zer_cluster::{ClusterConfig, ConnectedComponentsClusterer, ZalEntityStore};
440
441 // Pipeline types, available with the `pipeline` feature (no polars required)
442 #[cfg(feature = "pipeline")]
443 pub use zer_pipeline::{
444 BatchReport, ClusterIter, ClusterView, IngestResult, Ingester,
445 Pipeline, PipelineBuilder, PipelineConfig, RateConfig,
446 };
447
448}