Skip to main content

zer_core/
schema.rs

1use crate::{error::ZerError, record::FieldName};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
4pub enum FieldKind {
5    Name,
6    /// ISO 8601 date (YYYY-MM-DD), no time component.
7    Date,
8    Address,
9    Phone,
10    Id,
11    FreeText,
12    Numeric,
13    Categorical,
14    /// Pipe-delimited list of name aliases (e.g. SIS II `alias_namen` field).
15    Alias,
16    /// Vehicle registration plate (e.g. Dutch kenteken). Enables OCR-fuzzy blocking.
17    LicensePlate,
18    /// Geographic coordinate stored as a decimal float string (lat or lon).
19    GpsCoordinate,
20    /// ISO 8601 datetime including time component (YYYY-MM-DDTHH:MM:SS).
21    Timestamp,
22}
23
24/// Name and kind for a single schema field.
25#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
26pub struct FieldDef {
27    pub name: FieldName,
28    pub kind: FieldKind,
29}
30
31/// Ordered list of field definitions for a dataset.
32#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
33pub struct Schema {
34    pub fields: Vec<FieldDef>,
35}
36
37impl Schema {
38    /// Iterate over field names that match a given kind.
39    pub fn fields_of_kind(&self, kind: FieldKind) -> impl Iterator<Item = &str> {
40        self.fields.iter()
41            .filter(move |f| f.kind == kind)
42            .map(|f| f.name.as_str())
43    }
44
45    /// Return the position of a field by name, or `None` if absent.
46    pub fn field_index(&self, name: &str) -> Option<usize> {
47        self.fields.iter().position(|f| f.name == name)
48    }
49
50    pub fn len(&self) -> usize {
51        self.fields.len()
52    }
53
54    pub fn is_empty(&self) -> bool {
55        self.fields.is_empty()
56    }
57}
58
59/// Fluent builder for constructing a `Schema`.
60#[derive(Default)]
61pub struct SchemaBuilder {
62    fields: Vec<FieldDef>,
63}
64
65impl SchemaBuilder {
66    pub fn new() -> Self {
67        Self::default()
68    }
69
70    pub fn field(mut self, name: &str, kind: FieldKind) -> Self {
71        self.fields.push(FieldDef { name: name.into(), kind });
72        self
73    }
74
75    pub fn build(self) -> Result<Schema, ZerError> {
76        if self.fields.is_empty() {
77            return Err(ZerError::EmptySchema);
78        }
79        Ok(Schema { fields: self.fields })
80    }
81}
82
83#[cfg(test)]
84mod tests {
85    use super::*;
86
87    #[test]
88    fn schema_builder_rejects_empty() {
89        assert!(SchemaBuilder::new().build().is_err());
90    }
91
92    #[test]
93    fn schema_builder_produces_correct_field_count() {
94        let s = SchemaBuilder::new()
95            .field("first_name", FieldKind::Name)
96            .field("last_name", FieldKind::Name)
97            .field("dob", FieldKind::Date)
98            .build()
99            .unwrap();
100        assert_eq!(s.len(), 3);
101    }
102
103    #[test]
104    fn fields_of_kind_filters_correctly() {
105        let s = SchemaBuilder::new()
106            .field("first_name", FieldKind::Name)
107            .field("last_name", FieldKind::Name)
108            .field("dob", FieldKind::Date)
109            .build()
110            .unwrap();
111
112        let names: Vec<&str> = s.fields_of_kind(FieldKind::Name).collect();
113        assert_eq!(names, vec!["first_name", "last_name"]);
114
115        let dates: Vec<&str> = s.fields_of_kind(FieldKind::Date).collect();
116        assert_eq!(dates, vec!["dob"]);
117
118        let phones: Vec<&str> = s.fields_of_kind(FieldKind::Phone).collect();
119        assert!(phones.is_empty());
120    }
121
122    #[test]
123    fn field_index_lookup() {
124        let s = SchemaBuilder::new()
125            .field("name", FieldKind::Name)
126            .field("dob", FieldKind::Date)
127            .build()
128            .unwrap();
129        assert_eq!(s.field_index("name"), Some(0));
130        assert_eq!(s.field_index("dob"), Some(1));
131        assert_eq!(s.field_index("missing"), None);
132    }
133}