Skip to main content

zer_core/
schema.rs

1use crate::{error::ZerError, record::FieldName};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
4pub enum FieldKind {
5    Name,
6    /// ISO 8601 date (YYYY-MM-DD), no time component.
7    Date,
8    Address,
9    Phone,
10    Id,
11    FreeText,
12    Numeric,
13    Categorical,
14    /// Pipe-delimited list of name aliases (e.g. SIS II `alias_namen` field).
15    Alias,
16    /// Vehicle registration plate (e.g. Dutch kenteken). Enables OCR-fuzzy blocking.
17    LicensePlate,
18    /// Geographic coordinate stored as a decimal float string (lat or lon).
19    GpsCoordinate,
20    /// ISO 8601 datetime including time component (YYYY-MM-DDTHH:MM:SS).
21    Timestamp,
22}
23
24/// Name and kind for a single schema field.
25#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
26pub struct FieldDef {
27    pub name: FieldName,
28    pub kind: FieldKind,
29}
30
31/// Ordered list of field definitions for a dataset.
32#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
33pub struct Schema {
34    pub fields: Vec<FieldDef>,
35}
36
37impl Schema {
38    /// Iterate over field names that match a given kind.
39    pub fn fields_of_kind(&self, kind: FieldKind) -> impl Iterator<Item = &str> {
40        self.fields
41            .iter()
42            .filter(move |f| f.kind == kind)
43            .map(|f| f.name.as_str())
44    }
45
46    /// Return the position of a field by name, or `None` if absent.
47    pub fn field_index(&self, name: &str) -> Option<usize> {
48        self.fields.iter().position(|f| f.name == name)
49    }
50
51    pub fn len(&self) -> usize {
52        self.fields.len()
53    }
54
55    pub fn is_empty(&self) -> bool {
56        self.fields.is_empty()
57    }
58}
59
60/// Fluent builder for constructing a `Schema`.
61#[derive(Default)]
62pub struct SchemaBuilder {
63    fields: Vec<FieldDef>,
64}
65
66impl SchemaBuilder {
67    pub fn new() -> Self {
68        Self::default()
69    }
70
71    pub fn field(mut self, name: &str, kind: FieldKind) -> Self {
72        self.fields.push(FieldDef {
73            name: name.into(),
74            kind,
75        });
76        self
77    }
78
79    pub fn build(self) -> Result<Schema, ZerError> {
80        if self.fields.is_empty() {
81            return Err(ZerError::EmptySchema);
82        }
83        Ok(Schema {
84            fields: self.fields,
85        })
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn schema_builder_rejects_empty() {
95        assert!(SchemaBuilder::new().build().is_err());
96    }
97
98    #[test]
99    fn schema_builder_produces_correct_field_count() {
100        let s = SchemaBuilder::new()
101            .field("first_name", FieldKind::Name)
102            .field("last_name", FieldKind::Name)
103            .field("dob", FieldKind::Date)
104            .build()
105            .unwrap();
106        assert_eq!(s.len(), 3);
107    }
108
109    #[test]
110    fn fields_of_kind_filters_correctly() {
111        let s = SchemaBuilder::new()
112            .field("first_name", FieldKind::Name)
113            .field("last_name", FieldKind::Name)
114            .field("dob", FieldKind::Date)
115            .build()
116            .unwrap();
117
118        let names: Vec<&str> = s.fields_of_kind(FieldKind::Name).collect();
119        assert_eq!(names, vec!["first_name", "last_name"]);
120
121        let dates: Vec<&str> = s.fields_of_kind(FieldKind::Date).collect();
122        assert_eq!(dates, vec!["dob"]);
123
124        let phones: Vec<&str> = s.fields_of_kind(FieldKind::Phone).collect();
125        assert!(phones.is_empty());
126    }
127
128    #[test]
129    fn field_index_lookup() {
130        let s = SchemaBuilder::new()
131            .field("name", FieldKind::Name)
132            .field("dob", FieldKind::Date)
133            .build()
134            .unwrap();
135        assert_eq!(s.field_index("name"), Some(0));
136        assert_eq!(s.field_index("dob"), Some(1));
137        assert_eq!(s.field_index("missing"), None);
138    }
139}