Skip to main content

zer_blocking/keys/
token.rs

1use zer_core::{record::Record, schema::Schema};
2
3use crate::normalize::normalize_text;
4use super::BlockingKey;
5
6/// Blocks on: (first token of address field) + ":" + (first char of first-name field).
7/// Handles surname transpositions, two records at the same address with the same initial
8/// should end up in the same bucket even if the surname differs.
9pub struct AddressInitialKey {
10    address_field:    String,
11    first_name_field: String,
12}
13
14impl AddressInitialKey {
15    pub fn new(address_field: &str, first_name_field: &str) -> Self {
16        Self {
17            address_field:    address_field.into(),
18            first_name_field: first_name_field.into(),
19        }
20    }
21}
22
23impl BlockingKey for AddressInitialKey {
24    fn name(&self) -> &str {
25        "addr_initial"
26    }
27
28    fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
29        let addr_cow = record.field_as_str(&self.address_field);
30        let addr_raw = match addr_cow.as_deref() {
31            Some(s) => s,
32            None    => return vec![],
33        };
34        let name_cow = record.field_as_str(&self.first_name_field);
35        let name_raw = match name_cow.as_deref() {
36            Some(s) => s,
37            None    => return vec![],
38        };
39
40        let addr_norm = normalize_text(addr_raw);
41        let name_norm = normalize_text(name_raw);
42
43        let addr_token = addr_norm.split_whitespace().next().unwrap_or("").to_string();
44        let initial    = name_norm.chars().next().unwrap_or(' ');
45
46        if addr_token.is_empty() || !initial.is_ascii_alphabetic() {
47            return vec![];
48        }
49
50        vec![format!("{}:{}", addr_token, initial)]
51    }
52}
53
54#[cfg(test)]
55mod tests {
56    use super::*;
57    use zer_core::{record::FieldValue, schema::{SchemaBuilder, FieldKind}};
58
59    fn schema() -> Schema {
60        SchemaBuilder::new()
61            .field("address",    FieldKind::Address)
62            .field("first_name", FieldKind::Name)
63            .build()
64            .unwrap()
65    }
66
67    #[test]
68    fn extracts_first_token_and_initial() {
69        let k = AddressInitialKey::new("address", "first_name");
70        let r = Record::new(1)
71            .insert("address",    FieldValue::Text("123 Main Street".into()))
72            .insert("first_name", FieldValue::Text("John".into()));
73        assert_eq!(k.extract(&r, &schema()), vec!["123:J"]);
74    }
75
76    #[test]
77    fn same_address_different_first_name_no_collision() {
78        let k  = AddressInitialKey::new("address", "first_name");
79        let s  = schema();
80        let r1 = Record::new(1)
81            .insert("address",    FieldValue::Text("Singel 191".into()))
82            .insert("first_name", FieldValue::Text("Alice".into()));
83        let r2 = Record::new(2)
84            .insert("address",    FieldValue::Text("Singel 191".into()))
85            .insert("first_name", FieldValue::Text("Bob".into()));
86        assert_ne!(k.extract(&r1, &s), k.extract(&r2, &s));
87    }
88
89    #[test]
90    fn missing_field_returns_empty() {
91        let k = AddressInitialKey::new("address", "first_name");
92        let r = Record::new(1).insert("address", FieldValue::Text("Singel 191".into()));
93        assert!(k.extract(&r, &schema()).is_empty());
94    }
95}