zer_blocking/keys/
document.rs1use zer_core::{record::Record, schema::Schema};
2
3use crate::normalize::normalize_digits_only;
4use super::BlockingKey;
5
6pub struct DocumentSuffixKey {
17 field: String,
18 suffix_len: usize,
19}
20
21impl DocumentSuffixKey {
22 pub fn new(field: &str, suffix_len: usize) -> Self {
24 Self { field: field.into(), suffix_len }
25 }
26}
27
28impl BlockingKey for DocumentSuffixKey {
29 fn name(&self) -> &str { "document_suffix" }
30
31 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
32 let cow = record.field_as_str(&self.field);
33 let raw = match cow.as_deref() {
34 Some(s) => s,
35 None => return vec![],
36 };
37 let clean: String = raw
38 .chars()
39 .filter(|c| c.is_ascii_alphanumeric())
40 .collect::<String>()
41 .to_ascii_uppercase();
42 if clean.len() < self.suffix_len {
43 return vec![];
44 }
45 let suffix = &clean[clean.len() - self.suffix_len..];
46 vec![suffix.to_string()]
47 }
48}
49
50pub struct DocumentDigitSuffixKey {
57 field: String,
58 suffix_len: usize,
59}
60
61impl DocumentDigitSuffixKey {
62 pub fn new(field: &str, suffix_len: usize) -> Self {
63 Self { field: field.into(), suffix_len }
64 }
65}
66
67impl BlockingKey for DocumentDigitSuffixKey {
68 fn name(&self) -> &str { "document_digit_suffix" }
69
70 fn extract(&self, record: &Record, _schema: &Schema) -> Vec<String> {
71 let cow = record.field_as_str(&self.field);
72 let raw = match cow.as_deref() {
73 Some(s) => s,
74 None => return vec![],
75 };
76 let digits = normalize_digits_only(raw);
77 if digits.len() < self.suffix_len {
78 return vec![];
79 }
80 let suffix = &digits[digits.len() - self.suffix_len..];
81 vec![suffix.to_string()]
82 }
83}
84
85#[cfg(test)]
88mod tests {
89 use super::*;
90 use zer_core::{record::FieldValue, schema::{FieldKind, SchemaBuilder}};
91
92 fn schema() -> Schema {
93 SchemaBuilder::new()
94 .field("document_nummer", FieldKind::Id)
95 .build()
96 .unwrap()
97 }
98
99 fn rec(id: u64, doc: &str) -> Record {
100 Record::new(id).insert("document_nummer", FieldValue::Text(doc.into()))
101 }
102
103 #[test]
106 fn suffix_key_strips_non_alphanum_and_uppercases() {
107 let schema = schema();
108 let key = DocumentSuffixKey::new("document_nummer", 6);
109 let r = rec(1, "P-NL-AB123456");
110 let keys = key.extract(&r, &schema);
111 assert_eq!(keys, vec!["123456"]);
112 }
113
114 #[test]
115 fn suffix_key_same_serial_different_prefix_collide() {
116 let schema = schema();
117 let key = DocumentSuffixKey::new("document_nummer", 6);
118
119 let r1 = rec(1, "P-NL-AB123456");
120 let r2 = rec(2, "AB123456");
121 assert_eq!(key.extract(&r1, &schema), key.extract(&r2, &schema));
122 }
123
124 #[test]
125 fn suffix_key_too_short_returns_empty() {
126 let schema = schema();
127 let key = DocumentSuffixKey::new("document_nummer", 6);
128 let r = rec(1, "AB12"); assert!(key.extract(&r, &schema).is_empty());
130 }
131
132 #[test]
133 fn suffix_key_missing_field_returns_empty() {
134 let schema = schema();
135 let key = DocumentSuffixKey::new("document_nummer", 6);
136 assert!(key.extract(&Record::new(1), &schema).is_empty());
137 }
138
139 #[test]
142 fn digit_suffix_strips_all_letters() {
143 let schema = schema();
144 let key = DocumentDigitSuffixKey::new("document_nummer", 4);
145 let r = rec(1, "BSN-12345678");
146 let keys = key.extract(&r, &schema);
147 assert_eq!(keys, vec!["5678"]);
148 }
149
150 #[test]
151 fn digit_suffix_same_number_different_format_collide() {
152 let schema = schema();
153 let key = DocumentDigitSuffixKey::new("document_nummer", 6);
154
155 let r1 = rec(1, "123-45-6789");
156 let r2 = rec(2, "123456789");
157 assert_eq!(key.extract(&r1, &schema), key.extract(&r2, &schema));
158 }
159}