Skip to main content

zer_core/
record.rs

1use std::borrow::Cow;
2
3use ahash::AHashMap;
4
5pub type RecordId = u64;
6pub type FieldName = String;
7
8/// Derive a stable `RecordId` from a `(source, key)` pair using FNV-1a.
9///
10/// The hash is deterministic across runs. same source and key always produce
11/// the same u64.  Use this when loading records from external datasets so that
12/// each record's identity is anchored to its natural key rather than a
13/// caller-managed sequential integer.
14pub fn derive_record_id(source: &str, key: &str) -> RecordId {
15    const OFFSET: u64 = 14695981039346656037;
16    const PRIME: u64 = 1099511628211;
17    let mut h = OFFSET;
18    for &b in source
19        .as_bytes()
20        .iter()
21        .chain(b":".iter())
22        .chain(key.as_bytes())
23    {
24        h ^= b as u64;
25        h = h.wrapping_mul(PRIME);
26    }
27    h
28}
29
30/// Typed value stored in a record field.
31#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
32pub enum FieldValue {
33    Text(String),
34    Int(i64),
35    UInt(u64),
36    Float(f64),
37    Bool(bool),
38    Bytes(Vec<u8>),
39    Null,
40}
41
42impl From<String> for FieldValue {
43    fn from(s: String) -> Self {
44        FieldValue::Text(s)
45    }
46}
47impl From<&str> for FieldValue {
48    fn from(s: &str) -> Self {
49        FieldValue::Text(s.to_owned())
50    }
51}
52impl From<i64> for FieldValue {
53    fn from(i: i64) -> Self {
54        FieldValue::Int(i)
55    }
56}
57impl From<i32> for FieldValue {
58    fn from(i: i32) -> Self {
59        FieldValue::Int(i as i64)
60    }
61}
62impl From<u64> for FieldValue {
63    fn from(u: u64) -> Self {
64        FieldValue::UInt(u)
65    }
66}
67impl From<Vec<u8>> for FieldValue {
68    fn from(b: Vec<u8>) -> Self {
69        FieldValue::Bytes(b)
70    }
71}
72impl From<u32> for FieldValue {
73    fn from(u: u32) -> Self {
74        FieldValue::UInt(u as u64)
75    }
76}
77impl From<f64> for FieldValue {
78    fn from(f: f64) -> Self {
79        FieldValue::Float(f)
80    }
81}
82impl From<f32> for FieldValue {
83    fn from(f: f32) -> Self {
84        FieldValue::Float(f as f64)
85    }
86}
87impl From<bool> for FieldValue {
88    fn from(b: bool) -> Self {
89        FieldValue::Bool(b)
90    }
91}
92impl<T: Into<FieldValue>> From<Option<T>> for FieldValue {
93    fn from(opt: Option<T>) -> Self {
94        match opt {
95            Some(v) => v.into(),
96            None => FieldValue::Null,
97        }
98    }
99}
100
101/// A single data record with a unique ID and a map of field values.
102///
103/// `id` is an internal u64 used for fast indexing and joins. treat it as
104/// opaque.  `key` is the user-visible natural key: the value of whichever
105/// column was nominated as the identity column when loading the dataset (e.g.
106/// BSN, UUID, or primary-key value).  The `.zes` output references records by
107/// `(source, key)`, not by `id`.
108///
109/// # Construction
110///
111/// * [`Record::from_key`]. preferred when loading real data via a
112///   `zer_adapters::DatasetConfig`.  Derives `id` from `hash(source:key)`.
113/// * [`Record::new`]. for synthetic/test records; sets `key = id.to_string()`.
114#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
115pub struct Record {
116    pub id: RecordId,
117    pub key: String,
118    pub fields: AHashMap<FieldName, FieldValue>,
119    pub source: Option<String>,
120}
121
122impl Record {
123    /// Create a record with an explicit numeric ID.
124    ///
125    /// `key` is set to `id.to_string()`.  Use this only for synthetic or test
126    /// records.  For real data use [`Record::from_key`] so that the natural
127    /// key is preserved in the `.zes` output.
128    pub fn new(id: RecordId) -> Self {
129        Self {
130            id,
131            key: id.to_string(),
132            fields: AHashMap::new(),
133            source: None,
134        }
135    }
136
137    /// Create a record whose identity comes from a natural key column.
138    ///
139    /// `id` is derived deterministically via `FNV-1a(source:key)` so that the
140    /// same `(source, key)` pair always produces the same internal ID.  The
141    /// `source` label is stored on the record, so calling [`Record::with_source`]
142    /// afterwards is not required (but is a no-op).
143    pub fn from_key(source: impl Into<String>, key: impl Into<String>) -> Self {
144        let source = source.into();
145        let key = key.into();
146        let id = derive_record_id(&source, &key);
147        Self {
148            id,
149            key,
150            fields: AHashMap::new(),
151            source: Some(source),
152        }
153    }
154
155    pub fn with_source(mut self, source: impl Into<String>) -> Self {
156        self.source = Some(source.into());
157        self
158    }
159
160    pub fn insert(mut self, name: impl Into<String>, value: impl Into<FieldValue>) -> Self {
161        self.fields.insert(name.into(), value.into());
162        self
163    }
164
165    pub fn get(&self, name: &str) -> Option<&FieldValue> {
166        self.fields.get(name)
167    }
168
169    pub fn text(&self, name: &str) -> Option<&str> {
170        match self.fields.get(name) {
171            Some(FieldValue::Text(s)) => Some(s.as_str()),
172            _ => None,
173        }
174    }
175
176    /// Returns the field value as a string, coercing non-text scalars to their string representation.
177    pub fn field_as_str(&self, name: &str) -> Option<Cow<'_, str>> {
178        match self.fields.get(name)? {
179            FieldValue::Text(s) => Some(Cow::Borrowed(s.as_str())),
180            FieldValue::Int(i) => Some(Cow::Owned(i.to_string())),
181            FieldValue::UInt(u) => Some(Cow::Owned(u.to_string())),
182            FieldValue::Float(f) => Some(Cow::Owned(f.to_string())),
183            FieldValue::Bool(b) => Some(Cow::Owned(b.to_string())),
184            FieldValue::Bytes(_) => None,
185            FieldValue::Null => None,
186        }
187    }
188
189    /// Extract a typed value from a named field using the [`FromFieldValue`] trait.
190    ///
191    /// ```rust
192    /// use zer_core::record::{Record, FieldValue};
193    /// let r = Record::new(1).insert("lat", 52.37f64);
194    /// let lat: Option<f64> = r.field_as::<f64>("lat");
195    /// assert_eq!(lat, Some(52.37f64));
196    /// ```
197    pub fn field_as<T: FromFieldValue>(&self, name: &str) -> Option<T> {
198        self.fields.get(name).and_then(T::from_field_value)
199    }
200}
201
202/// Typed extraction from a [`FieldValue`].
203pub trait FromFieldValue: Sized {
204    fn from_field_value(v: &FieldValue) -> Option<Self>;
205}
206
207impl FromFieldValue for f64 {
208    fn from_field_value(v: &FieldValue) -> Option<Self> {
209        match v {
210            FieldValue::Float(f) => Some(*f),
211            FieldValue::Int(i) => Some(*i as f64),
212            FieldValue::UInt(u) => Some(*u as f64),
213            // Text fallback: typed data avoids the parse; string data still works.
214            FieldValue::Text(s) => s.parse::<f64>().ok(),
215            _ => None,
216        }
217    }
218}
219
220impl FromFieldValue for f32 {
221    fn from_field_value(v: &FieldValue) -> Option<Self> {
222        match v {
223            FieldValue::Float(f) => Some(*f as f32),
224            FieldValue::Int(i) => Some(*i as f32),
225            FieldValue::UInt(u) => Some(*u as f32),
226            FieldValue::Text(s) => s.parse::<f32>().ok(),
227            _ => None,
228        }
229    }
230}
231
232impl FromFieldValue for i64 {
233    fn from_field_value(v: &FieldValue) -> Option<Self> {
234        match v {
235            FieldValue::Int(i) => Some(*i),
236            FieldValue::UInt(u) => i64::try_from(*u).ok(),
237            FieldValue::Text(s) => s.parse::<i64>().ok(),
238            _ => None,
239        }
240    }
241}
242
243impl FromFieldValue for i32 {
244    fn from_field_value(v: &FieldValue) -> Option<Self> {
245        match v {
246            FieldValue::Int(i) => i32::try_from(*i).ok(),
247            FieldValue::UInt(u) => i32::try_from(*u).ok(),
248            FieldValue::Text(s) => s.parse::<i32>().ok(),
249            _ => None,
250        }
251    }
252}
253
254impl FromFieldValue for u64 {
255    fn from_field_value(v: &FieldValue) -> Option<Self> {
256        match v {
257            FieldValue::UInt(u) => Some(*u),
258            FieldValue::Int(i) => u64::try_from(*i).ok(),
259            FieldValue::Text(s) => s.parse::<u64>().ok(),
260            _ => None,
261        }
262    }
263}
264
265impl FromFieldValue for u32 {
266    fn from_field_value(v: &FieldValue) -> Option<Self> {
267        match v {
268            FieldValue::UInt(u) => u32::try_from(*u).ok(),
269            FieldValue::Int(i) => u32::try_from(*i).ok(),
270            FieldValue::Text(s) => s.parse::<u32>().ok(),
271            _ => None,
272        }
273    }
274}
275
276impl FromFieldValue for bool {
277    fn from_field_value(v: &FieldValue) -> Option<Self> {
278        match v {
279            FieldValue::Bool(b) => Some(*b),
280            _ => None,
281        }
282    }
283}
284
285impl FromFieldValue for String {
286    fn from_field_value(v: &FieldValue) -> Option<Self> {
287        match v {
288            FieldValue::Text(s) => Some(s.clone()),
289            FieldValue::Int(i) => Some(i.to_string()),
290            FieldValue::UInt(u) => Some(u.to_string()),
291            FieldValue::Float(f) => Some(f.to_string()),
292            FieldValue::Bool(b) => Some(b.to_string()),
293            FieldValue::Bytes(_) | FieldValue::Null => None,
294        }
295    }
296}
297
298impl FromFieldValue for Vec<u8> {
299    fn from_field_value(v: &FieldValue) -> Option<Self> {
300        match v {
301            FieldValue::Bytes(b) => Some(b.clone()),
302            FieldValue::Text(s) => Some(s.as_bytes().to_vec()),
303            _ => None,
304        }
305    }
306}
307
308#[cfg(test)]
309mod tests {
310    use super::*;
311
312    #[test]
313    fn field_value_equality() {
314        assert_eq!(
315            FieldValue::Text("hello".into()),
316            FieldValue::Text("hello".into())
317        );
318        assert_ne!(FieldValue::Int(1), FieldValue::Int(2));
319        assert_eq!(FieldValue::Null, FieldValue::Null);
320    }
321
322    #[test]
323    fn record_builder_chain() {
324        let r = Record::new(42)
325            .with_source("kvk")
326            .insert("name", "Alice")
327            .insert("age", 30i64);
328
329        assert_eq!(r.id, 42);
330        assert_eq!(r.key, "42");
331        assert_eq!(r.source.as_deref(), Some("kvk"));
332        assert_eq!(r.text("name"), Some("Alice"));
333        assert_eq!(r.get("age"), Some(&FieldValue::Int(30)));
334        assert_eq!(r.get("missing"), None);
335    }
336
337    #[test]
338    fn from_key_derives_id_deterministically() {
339        use super::derive_record_id;
340        let r = Record::from_key("brp", "893479421");
341        assert_eq!(r.key, "893479421");
342        assert_eq!(r.source.as_deref(), Some("brp"));
343        assert_eq!(r.id, derive_record_id("brp", "893479421"));
344
345        // Same source+key always gives the same id.
346        let r2 = Record::from_key("brp", "893479421");
347        assert_eq!(r.id, r2.id);
348
349        // Different key gives different id.
350        let r3 = Record::from_key("brp", "999999999");
351        assert_ne!(r.id, r3.id);
352
353        // Different source gives different id even for the same key.
354        let r4 = Record::from_key("kvk", "893479421");
355        assert_ne!(r.id, r4.id);
356    }
357
358    #[test]
359    fn field_as_str_coerces_scalars() {
360        let r = Record::new(1)
361            .insert("phone", 5551234567i64)
362            .insert("lat", 52.345f64)
363            .insert("active", true)
364            .insert("name", "Alice")
365            .insert("empty", FieldValue::Null);
366
367        assert_eq!(r.field_as_str("phone").as_deref(), Some("5551234567"));
368        assert_eq!(r.field_as_str("lat").as_deref(), Some("52.345"));
369        assert_eq!(r.field_as_str("active").as_deref(), Some("true"));
370        assert_eq!(r.field_as_str("name").as_deref(), Some("Alice"));
371        assert_eq!(r.field_as_str("empty"), None);
372        assert_eq!(r.field_as_str("missing"), None);
373    }
374
375    #[test]
376    fn from_impls_roundtrip() {
377        assert_eq!(FieldValue::from("hello"), FieldValue::Text("hello".into()));
378        assert_eq!(FieldValue::from(42i64), FieldValue::Int(42));
379        assert_eq!(FieldValue::from(3.14f64), FieldValue::Float(3.14));
380        assert_eq!(FieldValue::from(true), FieldValue::Bool(true));
381        assert_eq!(FieldValue::from(Some("hi")), FieldValue::Text("hi".into()));
382        assert_eq!(FieldValue::from(None::<&str>), FieldValue::Null);
383        // u64 now produces UInt, not Int
384        assert_eq!(FieldValue::from(u64::MAX), FieldValue::UInt(u64::MAX));
385        // bytes roundtrip
386        assert_eq!(
387            FieldValue::from(vec![1u8, 2, 3]),
388            FieldValue::Bytes(vec![1, 2, 3])
389        );
390    }
391
392    #[test]
393    fn field_as_str_new_variants() {
394        let r = Record::new(1)
395            .insert("count", 42u64)
396            .insert("data", FieldValue::Bytes(vec![0xff]));
397        assert_eq!(r.field_as_str("count").as_deref(), Some("42"));
398        assert_eq!(r.field_as_str("data"), None);
399    }
400
401    #[test]
402    fn field_as_typed() {
403        let r = Record::new(1)
404            .insert("lat", 52.37f64)
405            .insert("count", 10u64)
406            .insert("age", 30i64)
407            .insert("active", true)
408            .insert("name", "Alice")
409            .insert("blob", FieldValue::Bytes(vec![1, 2, 3]));
410
411        assert_eq!(r.field_as::<f64>("lat"), Some(52.37));
412        assert_eq!(r.field_as::<f32>("lat"), Some(52.37f32));
413        assert_eq!(r.field_as::<u64>("count"), Some(10u64));
414        assert_eq!(r.field_as::<i64>("count"), Some(10i64));
415        assert_eq!(r.field_as::<i64>("age"), Some(30i64));
416        assert_eq!(r.field_as::<bool>("active"), Some(true));
417        assert_eq!(r.field_as::<String>("name"), Some("Alice".to_string()));
418        assert_eq!(r.field_as::<Vec<u8>>("blob"), Some(vec![1u8, 2, 3]));
419        assert_eq!(r.field_as::<f64>("missing"), None);
420    }
421
422    #[test]
423    fn field_as_cross_variant_coercions() {
424        let r = Record::new(1)
425            .insert("int_val", 100i64)
426            .insert("uint_val", 200u64);
427
428        // Int → f64
429        assert_eq!(r.field_as::<f64>("int_val"), Some(100.0));
430        // UInt → f64
431        assert_eq!(r.field_as::<f64>("uint_val"), Some(200.0));
432        // UInt → i64 (in range)
433        assert_eq!(r.field_as::<i64>("uint_val"), Some(200i64));
434        // Int → u64 (non-negative)
435        assert_eq!(r.field_as::<u64>("int_val"), Some(100u64));
436
437        // negative Int → u64 fails
438        let r2 = Record::new(2).insert("neg", -1i64);
439        assert_eq!(r2.field_as::<u64>("neg"), None);
440    }
441}