Skip to main content

zer_schema/
lib.rs

1//! Schema registry and model persistence for zer.
2//!
3//! This crate provides three cooperating components:
4//!
5//! 1. **[`SchemaInferrer`]**, automatic `FieldKind` detection from column names
6//!    and value patterns; produces a [`Schema`] without requiring the caller to
7//!    know the dataset structure upfront.
8//!
9//! 2. **[`SchemaFingerprint`]**, a compact identity for a schema plus its data
10//!    distribution (SHA-256 hash of field names/kinds, per-field null rates,
11//!    cardinalities).
12//!
13//! 3. **[`SchemaRegistry`]**, a `sled`-backed persistent store for
14//!    [`ModelArtifact`]s (trained Fellegi-Sunter parameters). On startup the
15//!    pipeline calls [`SchemaRegistry::lookup_startup_mode`] to decide whether
16//!    to load params directly (exact match), warm-start EM (similar schema), or
17//!    run full EM from priors (new/incompatible schema).
18//!
19//! [`Schema`]: zer_core::schema::Schema
20
21pub mod artifact;
22pub mod config;
23pub mod fingerprint;
24pub mod infer;
25pub mod registry;
26pub mod similarity;
27
28pub use artifact::ModelArtifact;
29pub use config::{NameHeuristics, ValuePatterns};
30pub use fingerprint::{FieldStats, SchemaFingerprint};
31pub use infer::SchemaInferrer;
32pub use registry::{SchemaRegistry, StartupMode};
33pub use similarity::{fingerprint_distance, EXACT_MATCH_THRESHOLD, WARM_START_THRESHOLD};