destination/compare/
eponym.rs

1//! The `eponym` module is the eponymous module for `compare`.  Contains types and methods for
2//! comparing addresses.
3use crate::{
4    Address, AddressErrorKind, AddressStatus, Geographic, IntoCsv, Io, PartialAddress,
5    PartialAddresses, SubaddressType, from_csv, to_csv,
6};
7use derive_more::{Deref, DerefMut};
8use indicatif::ParallelProgressIterator;
9use rayon::prelude::*;
10use serde::{Deserialize, Serialize};
11use tracing::info;
12
13/// The `Mismatch` enum tracks the fields of an address that can diverge while still potentially
14/// referring to the same location.
15#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
16pub enum Mismatch {
17    /// Represents a mismatch in the subaddress type.
18    SubaddressType(String),
19    /// Represents a mismatch in the floor number.
20    Floor(String),
21    /// Represents a mismatch in the building identifier.
22    Building(String),
23    /// Represents a mismatch in the address status.
24    Status(String),
25}
26
27impl Mismatch {
28    /// The `subaddress_type` method captures information about the mismatch between subaddress
29    /// type fields as a message contained in the enum variant.
30    pub fn subaddress_type(from: Option<SubaddressType>, to: Option<SubaddressType>) -> Self {
31        let message = format!("{:?} not equal to {:?}", from, to);
32        Self::SubaddressType(message)
33    }
34
35    /// The `floor` method captures information about the mismatch between the `floor` fields as a message contained in the enum variant.
36    pub fn floor(from: Option<i64>, to: Option<i64>) -> Self {
37        let message = format!("{:?} not equal to {:?}", from, to);
38        Self::Floor(message)
39    }
40
41    /// The `building` method captures information about the mismatch between the `building` fields as a message contained in the enum variant.
42    pub fn building(from: Option<String>, to: Option<String>) -> Self {
43        let message = format!("{:?} not equal to {:?}", from, to);
44        Self::Building(message)
45    }
46
47    /// The `status` method captures information about the mismatch between the `status` fields as a message contained in the enum variant.
48    pub fn status(from: AddressStatus, to: AddressStatus) -> Self {
49        let message = format!("{} not equal to {}", from, to);
50        Self::Status(message)
51    }
52}
53
54/// The `Mismatches` struct holds a vector of type [`Mismatch`].
55#[derive(
56    Debug,
57    Default,
58    Clone,
59    PartialEq,
60    PartialOrd,
61    Eq,
62    Ord,
63    Hash,
64    Serialize,
65    Deserialize,
66    Deref,
67    DerefMut,
68)]
69pub struct Mismatches(Vec<Mismatch>);
70
71impl Mismatches {
72    /// Creates a new 'Mismatches' from a vector of type ['Mismatch'].
73    pub fn new(fields: Vec<Mismatch>) -> Self {
74        Mismatches(fields)
75    }
76}
77
78/// The `AddressMatch` is an intermediary data structure used internally to aggregate match information from
79/// comparing types that implement [`crate::Addresses`], for the purpose of producing [`MatchRecords`].
80#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
81pub struct AddressMatch {
82    /// The `coincident` field indicates the compared addresses refer to the same location, or are
83    /// coincidental.
84    pub coincident: bool,
85    /// The `mismatches` field holds [`Mismatch`] information for each field that differs between
86    /// the compared addresses.  If no coincident address is present, this field is `None`.
87    pub mismatches: Option<Mismatches>,
88}
89
90impl AddressMatch {
91    /// Constructor for creating an `AddressMatch` from its constituent fields.
92    pub fn new(coincident: bool, fields: Vec<Mismatch>) -> Self {
93        let mismatches = if fields.is_empty() {
94            None
95        } else {
96            Some(Mismatches::new(fields))
97        };
98        AddressMatch {
99            coincident,
100            mismatches,
101        }
102    }
103}
104
105/// The `MatchStatus` enum delineates whether a given address has a match (the `Matching` variant),
106/// has a match but differs in some descriptive fields (the `Divergent` variant), or does not have
107/// a match in the comparison set (the `Missing` variant).
108///
109/// We have derived Default using the Missing variant, mostly so structs that take a `MatchStatus`
110/// as a field can also derive default.  Properly speaking, there is no meaningful default for this
111/// struct, but if you need to create one first and fill it in later, you can.
112#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Serialize, Deserialize)]
113pub enum MatchStatus {
114    /// The `Matching` variant indicates an address has an exact match in the comparison set.
115    Matching,
116    /// The `Divergent` variant indicates an address has a match in the comparison set, but
117    /// the address contains fields with different values than in the comparison (e.g. the
118    /// address has status 'Retired' compared to 'Current').
119    Divergent,
120    #[default]
121    /// The `Missing` variant indicates the address does not have a match in the comparison set.
122    Missing,
123}
124
125/// A `MatchRecord` reports the match results for a single address compared against a set of
126/// addresses.  Designed to plot and diagnose missing and divergent addresses.
127#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Serialize, Deserialize)]
128pub struct MatchRecord {
129    /// The `match_status` field represents the match status of the address.
130    pub match_status: MatchStatus,
131    /// The `address_label` field is the text representation of the subject address.
132    pub address_label: String,
133    /// The `subaddress_type` field indicates a difference in subaddress type between a subject
134    /// address and its match, if present.  E.g. "SUITE" does not match "APARTMENT".
135    pub subaddress_type: Option<String>,
136    /// The `floor` field indicates the subject address and its match, if present, have different floor numbers.
137    pub floor: Option<String>,
138    /// The `building` field indicates the subject address and its match, if present, have
139    /// different building identifiers.
140    pub building: Option<String>,
141    /// The `status` field indicates the subject address and its match, if present, have different
142    /// values for the address status. E.g. "Current" does not match "Other".
143    pub status: Option<String>,
144    /// The `longitude` field represents the 'x' value of the address point.  Depending on the
145    /// input from the caller, the value may be in decimal degrees, meters or feet.
146    pub longitude: f64,
147    /// The `latitude` field represents the 'y' value of the address point.  Depending on the
148    /// input from the caller, the value may be in decimal degrees, meters or feet.
149    pub latitude: f64,
150    /// The `id` field is an internal unique id.
151    pub id: uuid::Uuid,
152}
153
154impl Geographic for MatchRecord {
155    fn latitude(&self) -> f64 {
156        self.latitude
157    }
158
159    fn longitude(&self) -> f64 {
160        self.longitude
161    }
162}
163
164/// The `MatchRecords` struct holds a vector of type [`MatchRecord`].
165#[derive(Debug, Default, Clone, PartialEq, PartialOrd, Serialize, Deserialize, Deref, DerefMut)]
166pub struct MatchRecords(Vec<MatchRecord>);
167
168impl MatchRecords {
169    /// The constructor for `MatchRecords` compares a single subject address against a set of
170    /// addresses, and returns the `MatchRecords` for the subject address.  A subject address can
171    /// match against multiple candidates (e.g. a parent address will match against all
172    /// subaddresses associated with the parent), so the result type must potentially accommodate
173    /// multiple records.
174    pub fn new<T: Address + Geographic, U: Address + Geographic>(
175        self_address: &T,
176        other_addresses: &[U],
177    ) -> Self {
178        let address_label = self_address.label();
179        let latitude = self_address.latitude();
180        let longitude = self_address.longitude();
181        let id = uuid::Uuid::new_v4();
182
183        let mut match_record = Vec::new();
184
185        for address in other_addresses {
186            let address_match = self_address.coincident(address);
187            if address_match.coincident {
188                let mut subaddress_type = None;
189                let mut floor = None;
190                let mut building = None;
191                let mut status = None;
192                match address_match.mismatches {
193                    None => match_record.push(MatchRecord {
194                        match_status: MatchStatus::Matching,
195                        address_label: address_label.clone(),
196                        subaddress_type,
197                        floor,
198                        building,
199                        status,
200                        longitude,
201                        latitude,
202                        id,
203                    }),
204                    Some(mismatches) => {
205                        for mismatch in mismatches.iter() {
206                            match mismatch {
207                                Mismatch::SubaddressType(message) => {
208                                    subaddress_type = Some(message.to_owned())
209                                }
210                                Mismatch::Floor(message) => floor = Some(message.to_owned()),
211                                Mismatch::Building(message) => building = Some(message.to_owned()),
212                                Mismatch::Status(message) => status = Some(message.to_owned()),
213                            }
214                        }
215                        match_record.push(MatchRecord {
216                            match_status: MatchStatus::Divergent,
217                            address_label: address_label.clone(),
218                            subaddress_type,
219                            floor,
220                            building,
221                            status,
222                            longitude,
223                            latitude,
224                            id,
225                        })
226                    }
227                }
228            }
229        }
230        if match_record.is_empty() {
231            match_record.push(MatchRecord {
232                match_status: MatchStatus::Missing,
233                address_label,
234                subaddress_type: None,
235                floor: None,
236                building: None,
237                status: None,
238                longitude,
239                latitude,
240                id,
241            })
242        }
243        MatchRecords(match_record)
244    }
245
246    /// For each address in `self_addresses`, the `compare` method calculates the match record for
247    /// the subject address compared against the addresses in `other_addresses`, and returns the
248    /// results in a [`MatchRecords`] struct.
249    pub fn compare<T: Address + Geographic + Send + Sync, U: Address + Geographic + Send + Sync>(
250        self_addresses: &[T],
251        other_addresses: &[U],
252    ) -> Self {
253        let style = indicatif::ProgressStyle::with_template(
254            "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {'Comparing addresses.'}",
255        )
256        .unwrap();
257        let record = self_addresses
258            .par_iter()
259            .map(|address| MatchRecords::new(address, other_addresses))
260            .progress_with_style(style)
261            .collect::<Vec<MatchRecords>>();
262        let mut records = Vec::new();
263        for mut item in record {
264            records.append(&mut item);
265        }
266        MatchRecords(records)
267    }
268
269    /// The `filter` method returns the subset of `MatchRecords` that meet the filter requirement.
270    /// The `filter` parameter takes a string reference that can take the values "matching",
271    /// "missing", "divergent", "subaddress", "floor", "building" and "status".  When filtering by
272    /// match status, the return records contain those records where the match status equals the
273    /// filter value.  For the mismatch fields, the return records contain values where a mismatch
274    /// is present in the provided field.
275    pub fn filter(mut self, filter: &str) -> Self {
276        match filter {
277            "matching" => self.retain(|r| r.match_status == MatchStatus::Matching),
278            "missing" => self.retain(|r| r.match_status == MatchStatus::Missing),
279            "divergent" => self.retain(|r| r.match_status == MatchStatus::Divergent),
280            "subaddress" => self.retain(|r| {
281                r.match_status == MatchStatus::Divergent && r.subaddress_type.is_some()
282            }),
283            "floor" => {
284                self.retain(|r| r.match_status == MatchStatus::Divergent && r.floor.is_some())
285            }
286            "building" => {
287                self.retain(|r| r.match_status == MatchStatus::Divergent && r.building.is_some())
288            }
289            "status" => {
290                self.retain(|r| r.match_status == MatchStatus::Divergent && r.status.is_some())
291            }
292            _ => info!("Invalid filter provided."),
293        }
294        self
295    }
296}
297
298impl IntoCsv<MatchRecords> for MatchRecords {
299    fn from_csv<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Io> {
300        let records = from_csv(path)?;
301        Ok(Self(records))
302    }
303
304    fn to_csv<P: AsRef<std::path::Path>>(&mut self, path: P) -> Result<(), AddressErrorKind> {
305        to_csv(&mut self.0, path.as_ref().into())
306    }
307}
308
309/// The `MatchPartialRecord` struct contains match data for a [`PartialAddress`].
310#[derive(Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize)]
311pub struct MatchPartialRecord {
312    /// The `match_status` field represents the match status of the partial address.
313    match_status: MatchStatus,
314    /// The `address_label` field is the text representation of the partial address.
315    address_label: String,
316    /// The `other_label` field is the text representation of the matching address.
317    other_label: Option<String>,
318    /// The `longitude` field represents the 'x' value of the matching address, if present.
319    longitude: Option<f64>,
320    /// The `latitude` field represents the 'y' value of the matching address, if present.
321    latitude: Option<f64>,
322}
323
324impl MatchPartialRecord {
325    /// The `coincident` method attempts to match fields present in the partial address against the
326    /// comparison address, returning a `MatchPartialRecord` if successful.  Returns `None` if
327    /// the match status is "missing".
328    pub fn coincident<T: Address + Geographic>(
329        partial: &PartialAddress,
330        address: &T,
331    ) -> Option<MatchPartialRecord> {
332        let mut match_status = MatchStatus::Missing;
333
334        if let Some(value) = partial.address_number {
335            if value == address.number() {
336                match_status = MatchStatus::Matching;
337            }
338        }
339
340        if &partial.street_name_pre_directional != address.directional()
341            && match_status == MatchStatus::Matching
342        {
343            match_status = MatchStatus::Missing;
344        }
345
346        if let Some(value) = &partial.street_name {
347            if value != address.street_name() && match_status == MatchStatus::Matching {
348                match_status = MatchStatus::Missing;
349            }
350        }
351
352        if let Some(value) = partial.street_name_post_type() {
353            if let &Some(street_type) = address.street_type() {
354                if value != street_type && match_status == MatchStatus::Matching {
355                    match_status = MatchStatus::Missing;
356                }
357            }
358        }
359
360        if &partial.subaddress_identifier() != address.subaddress_id()
361            && match_status == MatchStatus::Matching
362        {
363            match_status = MatchStatus::Divergent;
364        }
365
366        if address.subaddress_id().is_none()
367            && &partial.building() != address.building()
368            && match_status == MatchStatus::Matching
369        {
370            match_status = MatchStatus::Divergent;
371        }
372
373        if address.subaddress_id().is_none()
374            && address.building().is_none()
375            && &partial.floor() != address.floor()
376            && match_status == MatchStatus::Matching
377        {
378            match_status = MatchStatus::Divergent;
379        }
380
381        if match_status != MatchStatus::Missing {
382            Some(MatchPartialRecord {
383                match_status,
384                address_label: partial.label(),
385                other_label: Some(address.label()),
386                longitude: Some(address.longitude()),
387                latitude: Some(address.latitude()),
388            })
389        } else {
390            None
391        }
392    }
393
394    /// The `compare` method attempts to match fields present in the partial address against a set
395    /// of comparison addresses, returning a [`MatchPartialRecords`].
396    pub fn compare<T: Address + Geographic>(
397        partial: &PartialAddress,
398        addresses: &[T],
399    ) -> MatchPartialRecords {
400        let mut records = Vec::new();
401        for address in addresses {
402            let coincident = MatchPartialRecord::coincident(partial, address);
403            if let Some(record) = coincident {
404                records.push(record);
405            }
406        }
407        if records.is_empty() {
408            records.push(MatchPartialRecord {
409                match_status: MatchStatus::Missing,
410                address_label: partial.label(),
411                other_label: None,
412                longitude: None,
413                latitude: None,
414            })
415        }
416        let compared = MatchPartialRecords(records);
417        let matching = compared.clone().filter("matching");
418        if matching.is_empty() {
419            compared
420        } else {
421            matching
422        }
423    }
424
425    /// The `match_status` method returns the cloned value of the `match_status` field.
426    pub fn match_status(&self) -> MatchStatus {
427        self.match_status.to_owned()
428    }
429
430    /// The `address_label` method returns the cloned value of the `address_label` field.
431    pub fn address_label(&self) -> String {
432        self.address_label.to_owned()
433    }
434
435    /// The `other_label` method returns the cloned value of the `other_label` field.
436    pub fn other_label(&self) -> Option<String> {
437        self.other_label.clone()
438    }
439
440    /// The `longitude` method returns the value of the `longitude` field.
441    pub fn longitude(&self) -> Option<f64> {
442        self.longitude
443    }
444
445    /// The `latitude` method returns the value of the `latitude` field.
446    pub fn latitude(&self) -> Option<f64> {
447        self.latitude
448    }
449}
450
451/// The `MatchPartialRecords` struct holds a vector of type [`MatchPartialRecord`].
452#[derive(
453    Debug, Clone, PartialEq, PartialOrd, Serialize, Deserialize, Deref, DerefMut, derive_new::new,
454)]
455pub struct MatchPartialRecords(Vec<MatchPartialRecord>);
456
457impl MatchPartialRecords {
458    /// For each partial address in `self_addresses`, the `compare` method attempts to match the
459    /// fields present in the partial address against the addresses in `other_addresses`, returning
460    /// a `MatchPartialRecords`.
461    pub fn compare<T: Address + Geographic + Send + Sync>(
462        self_addresses: &PartialAddresses,
463        other_addresses: &[T],
464    ) -> Self {
465        let style = indicatif::ProgressStyle::with_template(
466            "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {'Comparing addresses.'}",
467        )
468        .unwrap();
469        let record = self_addresses
470            .par_iter()
471            .map(|address| MatchPartialRecord::compare(address, other_addresses))
472            .progress_with_style(style)
473            .collect::<Vec<MatchPartialRecords>>();
474        let mut records = Vec::new();
475        for mut item in record {
476            records.append(&mut item);
477        }
478        MatchPartialRecords(records)
479    }
480
481    /// The `filter` method returns the subset of `PartialMatchRecords` that meet the filter requirement.
482    /// The `filter` parameter takes a string reference that can take the values "matching",
483    /// "missing", or "divergent".  The return records contain those records where the match status equals the
484    /// filter value.
485    pub fn filter(mut self, filter: &str) -> Self {
486        match filter {
487            "missing" => self.retain(|r| r.match_status == MatchStatus::Missing),
488            "divergent" => self.retain(|r| r.match_status == MatchStatus::Divergent),
489            "matching" => self.retain(|r| r.match_status == MatchStatus::Matching),
490            _ => info!("Invalid filter provided."),
491        }
492        self
493    }
494}
495
496impl IntoCsv<MatchPartialRecords> for MatchPartialRecords {
497    fn from_csv<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Io> {
498        let records = from_csv(path)?;
499        Ok(Self(records))
500    }
501
502    fn to_csv<P: AsRef<std::path::Path>>(&mut self, path: P) -> Result<(), AddressErrorKind> {
503        to_csv(&mut self.0, path.as_ref().into())
504    }
505}