Skip to main content

zer_lib/
lib.rs

1//! `zer-lib`, unified entity resolution library.
2//!
3//! Provides [`Comparator`], [`Scorer`], and a [`Backend`] abstraction that
4//! selects GPU acceleration automatically when compiled with the `cuda` or
5//! `vulkan` features and suitable hardware is present.  Without those features
6//! the crate compiles and runs entirely on CPU via `zer-compare`.
7//!
8//! # Quick start
9//!
10//! ```rust,no_run
11//! use zer_lib::prelude::*;
12//!
13//! let schema = SchemaBuilder::new()
14//!     .field("naam",  FieldKind::Name)
15//!     .field("datum", FieldKind::Date)
16//!     .build().unwrap();
17//!
18//! let backend    = Backend::auto_detect();        // CUDA → Vulkan → AVX2 → CPU
19//! let comparator = Comparator::new(&schema, &backend);
20//! let scorer     = Scorer::new(&backend);
21//! ```
22//!
23//! # Feature flags
24//!
25//! **Compute backends** (mutually exclusive in practice; pick one):
26//!
27//! | Flag             | Description                                                              |
28//! |------------------|--------------------------------------------------------------------------|
29//! | `cuda`           | NVIDIA CUDA via `zer-compute`, requires CUDA Toolkit 13.1+ and `nvcc`   |
30//! | `vulkan`         | Vulkan 1.3 compute via `zer-compute`, requires `slangc` on `PATH`        |
31//! | `avx2`           | x86_64 AVX2 SIMD via `zer-compute`, no external toolchain required       |
32//! | `cpu`            | Explicit scalar CPU path via `zer-compute` (Rayon parallel)              |
33//! | `debug-shaders`  | Embed debug info in CUDA kernels for `cuda-gdb` / Nsight (needs `cuda`) |
34//!
35//! **Pipeline integration:**
36//!
37//! | Flag       | Description                                                              |
38//! |------------|--------------------------------------------------------------------------|
39//! | `pipeline` | Enable `Pipeline`, `Ingester`, and related types from `zer-pipeline`     |
40//!
41//! **Neural judge ORT execution providers** (independent of compute backend):
42//!
43//! | Flag             | Description                                                              |
44//! |------------------|--------------------------------------------------------------------------|
45//! | `judge_cpu`      | Scalar CPU execution provider for ORT (no extra dependencies)            |
46//! | `judge_cuda`     | NVIDIA CUDA execution provider for ORT                                   |
47//! | `judge_rocm`     | AMD ROCm execution provider for ORT                                      |
48//! | `judge_directml` | Windows DirectML execution provider for ORT                              |
49//! | `judge_openvino` | Intel OpenVINO execution provider for ORT                                |
50//!
51//! # CPU-only usage
52//!
53//! Users who never need GPU can depend on `zer-compare` directly and never
54//! import this crate.  `zer_compare::FieldComparator` and
55//! `zer_compare::FellegiSunterScorer` are the raw CPU implementations.
56
57#[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
58use std::sync::Arc;
59
60use zer_core::{
61    comparison::{ComparisonBatch, ComparisonVector},
62    record::Record,
63    record_pool::RecordPool,
64    schema::Schema,
65    scoring::{ModelParams, ScoredPair},
66    traits::{Comparator as ComparatorTrait, Result as ZerResult, Scorer as ScorerTrait},
67};
68
69// ── Backend ───────────────────────────────────────────────────────────────────
70
71enum BackendInner {
72    Cpu,
73    #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
74    Gpu(Arc<zer_compute::DeviceBackend>),
75}
76
77/// Opaque compute backend handle.
78///
79/// Create once and share between [`Comparator`] and [`Scorer`] so both use the
80/// same underlying GPU device.
81///
82/// ```rust,no_run
83/// use zer_lib::prelude::*;
84///
85/// let schema     = SchemaBuilder::new().field("naam", FieldKind::Name).build().unwrap();
86/// let backend    = Backend::auto_detect();
87/// let comparator = Comparator::new(&schema, &backend);
88/// let scorer     = Scorer::new(&backend);
89/// ```
90pub struct Backend {
91    inner: BackendInner,
92    name:  &'static str,
93}
94
95impl Backend {
96    /// Read `--target=<name>` from process args and return the matching backend.
97    ///
98    /// Falls back to CPU when the flag is absent, no hardware probing.
99    /// Pass `--target=auto` to restore the hardware-detection order
100    /// (CUDA → Vulkan → AVX2 → CPU).
101    pub fn auto_detect() -> Self {
102        match std::env::args()
103            .find_map(|a| a.strip_prefix("--target=").map(str::to_owned))
104            .as_deref()
105        {
106            Some(t) => Self::from_target(t),
107            None    => Self::cpu(),
108        }
109    }
110
111    /// Force the CPU backend regardless of available hardware.
112    pub fn cpu() -> Self {
113        Self { inner: BackendInner::Cpu, name: "cpu" }
114    }
115
116    /// Select a backend by name, called by `auto_detect()` to resolve `--target=<name>`.
117    ///
118    /// Accepted values: `"auto"` (hardware-detect), `"cpu"`, `"cuda"`, `"avx2"`, `"vulkan"`.
119    ///
120    /// Exits with a diagnostic if the target is unknown, not compiled in, or hardware init fails.
121    pub fn from_target(target: &str) -> Self {
122        if target == "cpu" {
123            return Self::cpu();
124        }
125
126        #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
127        {
128            let pref = match target {
129                "auto"   => zer_compute::BackendPreference::Auto,
130                "cuda"   => zer_compute::BackendPreference::Cuda,
131                "vulkan" => zer_compute::BackendPreference::Vulkan,
132                "avx2"   => zer_compute::BackendPreference::Avx2,
133                other => {
134                    tracing::error!(target = other, "unknown --target; valid: auto, cpu, avx2, cuda, vulkan");
135                    std::process::exit(1);
136                }
137            };
138            return match zer_compute::DeviceBackend::from_preference(pref) {
139                Ok(dev) => {
140                    let name = dev.name();
141                    if dev.is_accelerated() {
142                        Self { inner: BackendInner::Gpu(Arc::new(dev)), name }
143                    } else {
144                        Self { inner: BackendInner::Cpu, name: "cpu" }
145                    }
146                }
147                Err(e) => {
148                    tracing::error!(target, error = %e, "--target unavailable");
149                    std::process::exit(1);
150                }
151            };
152        }
153
154        #[allow(unreachable_code)]
155        {
156            if target == "auto" {
157                return Self::cpu();
158            }
159            tracing::error!(target, "unknown --target; valid values when built without GPU features: auto, cpu");
160            std::process::exit(1);
161        }
162    }
163
164    /// Human-readable name of the active backend: `"cpu"`, `"cuda"`, or `"avx2"`.
165    pub fn name(&self) -> &'static str {
166        self.name
167    }
168
169    /// `true` when a GPU backend is active.
170    pub fn is_gpu(&self) -> bool {
171        !matches!(self.inner, BackendInner::Cpu)
172    }
173}
174
175// ── Comparator ────────────────────────────────────────────────────────────────
176
177enum ComparatorInner {
178    Cpu(zer_compare::FieldComparator),
179    #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
180    Gpu(zer_compute::DeviceComparator),
181}
182
183/// Pairwise record comparator with automatic GPU/CPU selection.
184///
185/// Wraps `FieldComparator` (CPU) or `DeviceComparator` (GPU) depending on the
186/// [`Backend`].  Implements [`ComparatorTrait`] identically in both cases.
187pub struct Comparator {
188    inner: ComparatorInner,
189}
190
191impl Comparator {
192    /// Wrap an already-constructed [`zer_compare::FieldComparator`] directly.
193    ///
194    /// Use this when you want to override default similarity functions via
195    /// [`zer_compare::FieldComparator::with_fns`] before creating the comparator.
196    /// Always uses the CPU path; GPU acceleration is not available this way.
197    pub fn from_cpu(fc: zer_compare::FieldComparator) -> Self {
198        Self { inner: ComparatorInner::Cpu(fc) }
199    }
200
201    /// Build a comparator from a schema and backend.
202    pub fn new(schema: &Schema, backend: &Backend) -> Self {
203        match &backend.inner {
204            BackendInner::Cpu => Self {
205                inner: ComparatorInner::Cpu(
206                    zer_compare::FieldComparator::from_schema(schema),
207                ),
208            },
209            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
210            BackendInner::Gpu(dev) => Self {
211                inner: ComparatorInner::Gpu(
212                    zer_compute::DeviceComparator::new(Arc::clone(dev), schema).unwrap(),
213                ),
214            },
215        }
216    }
217
218    /// Name of the active backend, for diagnostics.
219    pub fn backend_name(&self) -> &'static str {
220        match &self.inner {
221            ComparatorInner::Cpu(_) => "cpu",
222            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
223            ComparatorInner::Gpu(c) => c.backend_name(),
224        }
225    }
226
227    /// Primary hot-path: pool-native batch comparison.
228    ///
229    /// `pool` is a `RecordPool` built from the candidate records; `pair_indices`
230    /// holds `(i, j)` pairs where `i` and `j` are indices into the pool.
231    /// Avoids all `Record::clone()` and `HashMap` lookups, the fastest path for
232    /// large BRP-style jobs where records are already loaded into a pool.
233    pub fn compare_batch_from_pool(
234        &self,
235        pool:         &RecordPool,
236        pair_indices: &[(usize, usize)],
237        schema:       &Schema,
238    ) -> ComparisonBatch {
239        match &self.inner {
240            ComparatorInner::Cpu(c) => c.compare_batch_from_pool(pool, pair_indices, schema),
241            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
242            ComparatorInner::Gpu(c) => c.compare_batch_from_pool(pool, pair_indices, schema),
243        }
244    }
245
246    /// Convenience wrapper: builds a pool from a flat `records` slice and compares
247    /// the `pair_indices` pairs.  No `Record::clone()`.
248    pub fn compare_batch_indexed(
249        &self,
250        records:      &[Record],
251        pair_indices: &[(usize, usize)],
252        schema:       &Schema,
253    ) -> ComparisonBatch {
254        let pool = RecordPool::from_records(records, schema);
255        self.compare_batch_from_pool(&pool, pair_indices, schema)
256    }
257}
258
259impl ComparatorTrait for Comparator {
260    fn compare(&self, a: &Record, b: &Record, schema: &Schema) -> ComparisonVector {
261        match &self.inner {
262            ComparatorInner::Cpu(c) => c.compare(a, b, schema),
263            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
264            ComparatorInner::Gpu(c) => c.compare(a, b, schema),
265        }
266    }
267
268    fn compare_batch_from_pool(
269        &self,
270        pool:    &RecordPool,
271        indices: &[(usize, usize)],
272        schema:  &Schema,
273    ) -> ComparisonBatch {
274        self.compare_batch_from_pool(pool, indices, schema)
275    }
276}
277
278// ── Scorer ────────────────────────────────────────────────────────────────────
279
280enum ScorerInner {
281    Cpu(zer_compare::FellegiSunterScorer),
282    #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
283    Gpu(zer_compute::DeviceScorer),
284}
285
286/// Fellegi-Sunter scorer with automatic GPU/CPU EM acceleration.
287///
288/// `score` / `score_batch` always run on CPU, no kernel overhead for small
289/// operations.  `estimate_params` uses the GPU EM kernel when the backend is
290/// GPU and the batch exceeds the transfer break-even threshold; otherwise it
291/// falls back to `zer_compare::run_em` on the CPU.
292pub struct Scorer {
293    inner: ScorerInner,
294}
295
296impl Scorer {
297    /// Build a scorer using the given backend.
298    pub fn new(backend: &Backend) -> Self {
299        match &backend.inner {
300            BackendInner::Cpu => Self {
301                inner: ScorerInner::Cpu(zer_compare::FellegiSunterScorer),
302            },
303            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
304            BackendInner::Gpu(dev) => Self {
305                inner: ScorerInner::Gpu(zer_compute::DeviceScorer::new(Arc::clone(dev))),
306            },
307        }
308    }
309
310    /// Name of the active backend, for diagnostics.
311    pub fn backend_name(&self) -> &'static str {
312        match &self.inner {
313            ScorerInner::Cpu(_) => "cpu",
314            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
315            ScorerInner::Gpu(s) => s.backend_name(),
316        }
317    }
318}
319
320impl ScorerTrait for Scorer {
321    fn score(&self, vector: &ComparisonVector, params: &ModelParams) -> ScoredPair {
322        match &self.inner {
323            ScorerInner::Cpu(s) => s.score(vector, params),
324            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
325            ScorerInner::Gpu(s) => s.score(vector, params),
326        }
327    }
328
329    fn score_batch(
330        &self,
331        batch:  &ComparisonBatch,
332        params: &ModelParams,
333    ) -> Vec<ScoredPair> {
334        match &self.inner {
335            ScorerInner::Cpu(s) => s.score_batch(batch, params),
336            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
337            ScorerInner::Gpu(s) => s.score_batch(batch, params),
338        }
339    }
340
341    fn estimate_params(
342        &self,
343        batch:    &ComparisonBatch,
344        init:     Option<ModelParams>,
345        max_iter: usize,
346    ) -> ZerResult<ModelParams> {
347        match &self.inner {
348            ScorerInner::Cpu(s) => s.estimate_params(batch, init, max_iter),
349            #[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
350            ScorerInner::Gpu(s) => s.estimate_params(batch, init, max_iter),
351        }
352    }
353}
354
355// ── Low-level kernel access for power users ───────────────────────────────────
356
357/// Raw GPU kernel dispatch, for users writing custom kernels.
358///
359/// Requires the `cuda` or `avx2` feature.  Most users should use
360/// [`Comparator`] and [`Scorer`] instead.
361///
362/// # Writing a custom kernel
363///
364/// 1. Define a zero-sized marker struct and `impl Kernel for It`.
365/// 2. `impl KernelDispatch<It> for zer_compute::backend::cpu::CpuDevice`, CPU fallback.
366/// 3. `impl KernelDispatch<It> for zer_compute::backend::cuda::CudaDevice`, CUDA path.
367/// 4. Add the `impl KernelDispatch<It> for DeviceBackend` match in
368///    `zer_compute::backend::mod`.
369/// 5. Access the raw device via `zer::compute::DeviceBackend`.
370#[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
371pub mod kernel {
372    pub use zer_compute::{
373        backend::DeviceBackend,
374        error::GpuError,
375        kernel::{Kernel, KernelDispatch},
376    };
377}
378
379// ── Crate re-exports ──────────────────────────────────────────────────────────
380
381pub use zer_blocking as blocking;
382pub use zer_compare  as compare;
383pub use zer_core     as core;
384pub use zer_schema   as schema;
385pub use zer_cluster  as cluster;
386
387#[cfg(feature = "pipeline")]
388pub use zer_pipeline as pipeline;
389
390#[cfg(any(feature = "cuda", feature = "avx2", feature = "vulkan"))]
391pub use zer_compute as compute;
392
393// ── Prelude ───────────────────────────────────────────────────────────────────
394
395pub mod prelude {
396    // Concrete auto-detecting types, primary user-facing API
397    pub use crate::{Backend, Comparator, Scorer};
398
399    // Core data types
400    pub use zer_core::{
401        comparison::{ComparisonBatch, ComparisonLevel, ComparisonVector},
402        entity::{Entity, EntityId, EntityMember, ResolutionMethod},
403        error::ZerError,
404        record::{FieldValue, Record, RecordId},
405        record_pool::RecordPool,
406        schema::{FieldKind, Schema, SchemaBuilder},
407        scoring::{MatchBand, ModelParams, ScoredPair},
408        traits::{
409            BlockIndex, Blocker, Clusterer, EntityStore, Judge, JudgeVerdict, RecordStore,
410            // Renamed to avoid shadowing the concrete Comparator / Scorer structs above
411            Comparator as ComparatorTrait,
412            Scorer as ScorerTrait,
413        },
414        VecRecordStore,
415    };
416
417    // Blocking
418    pub use zer_blocking::{
419        BlockerFactory, CompositeBlocker, InvertedIndex, SchemaCategory,
420        keys::{
421            AddressInitialKey, AliasPhoneticKey, CameraTimeWindowKey, DateFragmentKey,
422            DateGranularity, DocumentDigitSuffixKey, DocumentSuffixKey, ExactFieldKey,
423            FuzzyYearKey, GeoGridKey, LicensePlateNormKey, PhoneticAlgo, PhoneticNameDobKey,
424            PlateOCRFuzzyKey, SuffixKey, TransliteratedPhoneticKey,
425        },
426    };
427
428    // CPU implementations, available directly for users who want the raw types
429    pub use zer_compare::{
430        FellegiSunterScorer, FieldComparator, LevelThresholds, SimilarityFn,
431        JaroWinklerSimilarity, PhoneticEqualitySimilarity, TokenOverlapSimilarity,
432        AddressTokenOverlap, StreetNumberEditDistance,
433    };
434
435    // Schema registry and artifact management (Phase 6)
436    pub use zer_schema::{ModelArtifact, SchemaFingerprint, SchemaInferrer, SchemaRegistry, StartupMode};
437
438    // Clustering and entity store (Phase 6)
439    pub use zer_cluster::{ClusterConfig, ConnectedComponentsClusterer, ZalEntityStore};
440
441    // Pipeline types, available with the `pipeline` feature (no polars required)
442    #[cfg(feature = "pipeline")]
443    pub use zer_pipeline::{
444        BatchReport, ClusterIter, ClusterView, IngestResult, Ingester,
445        Pipeline, PipelineBuilder, PipelineConfig, RateConfig,
446    };
447
448}