human_name/
lib.rs

1//! A library for parsing and comparing human names.
2//!
3//! See the documentation of the `Name` struct for details.
4
5#![doc(html_root_url = "https://djudd.github.io/human-name/")]
6#![cfg_attr(feature = "bench", feature(test))]
7
8extern crate crossbeam_utils;
9extern crate smallvec;
10extern crate unicode_normalization;
11extern crate unicode_segmentation;
12extern crate unidecode;
13
14#[cfg(test)]
15#[cfg(feature = "bench")]
16extern crate test;
17
18#[cfg(test)]
19extern crate alloc_counter;
20
21mod case;
22mod comparison;
23mod decomposition;
24mod features;
25mod namecase;
26mod namepart;
27mod nickname;
28mod parse;
29mod segment;
30mod suffix;
31mod surname;
32mod title;
33mod transliterate;
34mod word;
35
36#[cfg(feature = "ffi")]
37pub mod external;
38
39#[cfg(feature = "name_eq_hash")]
40mod eq_hash;
41
42#[cfg(feature = "serialization")]
43mod serialization;
44
45use crate::decomposition::normalize_nfkd_whitespace;
46use crate::word::{Location, Words};
47use compact_str::CompactString;
48use crossbeam_utils::atomic::AtomicCell;
49use smallvec::SmallVec;
50use std::borrow::Cow;
51use std::collections::hash_map::DefaultHasher;
52use std::convert::TryInto;
53use std::hash::{Hash, Hasher};
54use std::num::NonZeroU8;
55
56#[cfg(test)]
57use alloc_counter::AllocCounterSystem;
58
59#[cfg(test)]
60#[global_allocator]
61static A: AllocCounterSystem = AllocCounterSystem;
62
63pub const MAX_NAME_LEN: usize = 1024;
64pub const MAX_SEGMENT_LEN: usize = segment::MAX_LEN;
65pub const MAX_SEGMENTS: usize = parse::MAX_WORDS;
66
67/// Represents a parsed human name.
68///
69/// Guaranteed to contain (what we think is) a surname, a first initial, and
70/// nothing more. May also contain given & middle names, middle initials, and/or
71/// a generational suffix.
72///
73/// Construct a Name using `parse`:
74///
75/// ```
76/// use human_name::Name;
77///
78/// let name = Name::parse("Jane Doe").unwrap();
79/// ```
80///
81/// Once you have a Name, you may extract is components, convert it to JSON,
82/// or compare it with another Name to see if they are consistent with representing
83/// the same person (see docs on `consistent_with` for details).
84#[derive(Debug)]
85pub struct Name {
86    text: CompactString, // stores concatenation of display_full() and initials()
87    locations: SmallVec<[Location; 6]>, // stores concatenation of word locations in full text and given name locations in initials
88    given_name_words: u8,               // support no more than 256
89    surname_words: u8,                  // support no more than 256
90    initials_len: u8,                   // support no more than 256
91    generation: Option<NonZeroU8>,
92    honorifics: Option<Box<Honorifics>>,
93    surname_hash: AtomicCell<Option<u32>>,
94}
95
96#[derive(Clone, Debug)]
97struct Honorifics {
98    prefix: Option<Box<str>>,
99    suffix: Option<Box<str>>,
100}
101
102impl Clone for Name {
103    fn clone(&self) -> Self {
104        Name {
105            text: self.text.clone(),
106            locations: self.locations.clone(),
107            given_name_words: self.given_name_words,
108            surname_words: self.surname_words,
109            initials_len: self.initials_len,
110            generation: self.generation,
111            honorifics: self.honorifics.clone(),
112            surname_hash: Default::default(),
113        }
114    }
115}
116
117impl Name {
118    /// Parses a string represent a single person's full name into a canonical
119    /// representation.
120    ///
121    /// # Examples
122    /// ```
123    /// use human_name::Name;
124    ///
125    /// let name = Name::parse("Jane Doe").unwrap();
126    /// assert_eq!("Doe", name.surname());
127    /// assert_eq!(Some("Jane"), name.given_name());
128    ///
129    /// let name = Name::parse("Doe, J").unwrap();
130    /// assert_eq!("Doe", name.surname());
131    /// assert_eq!(None, name.given_name());
132    /// assert_eq!('J', name.first_initial());
133    ///
134    /// let name = Name::parse("Dr. Juan Alberto T. Velasquez y Garcia III").unwrap();
135    /// assert_eq!("Velasquez y Garcia", name.surname());
136    /// assert_eq!(Some("Juan"), name.given_name());
137    /// assert_eq!(Some("AT"), name.middle_initials());
138    /// assert_eq!(Some("III"), name.generational_suffix());
139    /// assert_eq!(Some("Dr."), name.honorific_prefix());
140    /// ```
141    ///
142    /// # Supported formats
143    ///
144    /// Supports a variety of formats, including prefix and postfix titles,
145    /// parenthesized nicknames, initials with and without periods, and sort
146    /// order ("Doe, Jane"). Makes use of heuristics based on case when
147    /// applicable (e.g., "AL Doe" is parsed as "A. L. Doe", while "Al Doe" is
148    /// parsed as a given name and surname), as well as _small_ sets of known
149    /// particles, conjunctions, titles, etc.
150    ///
151    /// # Limitations
152    ///
153    /// Errs on the side of producing parse output rather than giving up, so
154    /// this function is _not_ suitable as a way of guessing whether a given
155    /// string actually represents a name.
156    ///
157    /// However, success requires at least an apparent surname and first initial.
158    /// Single-word names cannot be parsed (you may or may not wish to assume
159    /// they are given names).
160    ///
161    /// Does not preserve titles (other than generational suffixes such as "III")
162    /// or nicknames. Does not handle plural forms specially: "Mr. & Mrs. John
163    /// Doe" will be parsed as "John Doe", and "Jane Doe, et al" will be parsed
164    /// as "Jane Doe".
165    ///
166    /// Works best on Latin names - i.e., data from North or South America or
167    /// Europe. Does not understand surname-first formats without commas: "Kim
168    /// Il-sung" will be parsed as having the first name "Kim".
169    ///
170    /// Handles non-Latin unicode strings, but without any particular intelligence.
171    /// Attempts at least to fail nicely, such that either `parse` returns `None`,
172    /// or calling `display_full()` on the parsed result returns the input,
173    /// plus or minus whitespace.
174    ///
175    /// Of course, [there is no perfect algorithm](http://www.kalzumeus.com/2010/06/17/falsehoods-programmers-believe-about-names/)
176    /// for canonicalizing names. The goal here is to do the best we can without
177    /// large statistical models.
178    pub fn parse(name: &str) -> Option<Name> {
179        if name.len() >= MAX_NAME_LEN {
180            return None;
181        }
182
183        let name = normalize_nfkd_whitespace(name);
184        let name = nickname::strip_nickname(&name);
185        let parsed = parse::parse(&name)?;
186
187        Name::initialize_struct(&parsed, name.len())
188    }
189
190    fn initialize_struct(parsed: &parse::Name, name_len: usize) -> Option<Name> {
191        let words = parsed.words();
192        let surname_index = parsed.surname_index;
193
194        let mut text = CompactString::with_capacity(name_len + surname_index);
195        let mut initials = CompactString::with_capacity(surname_index);
196
197        let mut locations = SmallVec::with_capacity(words.len() + surname_index);
198        let mut locations_in_initials: SmallVec<[Location; 4]> =
199            SmallVec::with_capacity(surname_index);
200
201        for word in &words[..surname_index] {
202            if word.is_initials() {
203                word.with_initials(|c| {
204                    text.push(c);
205                    text.push_str(". ");
206
207                    initials.push(c);
208                });
209            } else {
210                let prior_len = text.len();
211                word.with_namecased(|s| text.push_str(s));
212                locations.push(Location::new(prior_len..text.len())?);
213
214                let prior_len = initials.len();
215                word.with_initials(|c| initials.push(c));
216                locations_in_initials.push(Location::new(prior_len..initials.len())?);
217
218                text.push(' ');
219            }
220        }
221
222        let surname_words = &words[surname_index..];
223        for (i, word) in surname_words.iter().enumerate() {
224            let prior_len = text.len();
225            word.with_namecased(|s| text.push_str(s));
226            locations.push(Location::new(prior_len..text.len())?);
227
228            if i < surname_words.len() - 1 {
229                text.push(' ');
230            }
231        }
232
233        debug_assert!(!text.is_empty(), "Names are empty!");
234        debug_assert!(!initials.is_empty(), "Initials are empty!");
235
236        let generation = parsed.generation;
237        let honorifics = {
238            let prefix = parsed
239                .honorific_prefix()
240                .map(|s| s.into_owned().into_boxed_str());
241            let suffix = parsed
242                .honorific_suffix()
243                .map(|s| s.into_owned().into_boxed_str());
244
245            if prefix.is_some() || suffix.is_some() {
246                Some(Box::new(Honorifics { prefix, suffix }))
247            } else {
248                None
249            }
250        };
251
252        let surname_words = (locations.len() - locations_in_initials.len())
253            .try_into()
254            .ok()?;
255        let given_name_words = locations_in_initials.len().try_into().ok()?;
256        let initials_len = initials.len().try_into().ok()?;
257
258        text.push_str(&initials);
259        text.shrink_to_fit();
260
261        locations.extend_from_slice(&locations_in_initials);
262        locations.shrink_to_fit();
263
264        Some(Name {
265            text,
266            locations,
267            given_name_words,
268            surname_words,
269            initials_len,
270            generation,
271            honorifics,
272            surname_hash: Default::default(),
273        })
274    }
275
276    /// First initial (always present)
277    pub fn first_initial(&self) -> char {
278        self.initials().chars().next().unwrap()
279    }
280
281    /// Given name as a string, if present
282    ///
283    /// ```
284    /// use human_name::Name;
285    ///
286    /// let name = Name::parse("Jane Doe").unwrap();
287    /// assert_eq!(Some("Jane"), name.given_name());
288    ///
289    /// let name = Name::parse("J. Doe").unwrap();
290    /// assert_eq!(None, name.given_name());
291    /// ```
292    pub fn given_name(&self) -> Option<&str> {
293        self.given_iter().next()
294    }
295
296    /// Does this person use a middle name in place of their given name?
297    ///
298    /// ```
299    /// use human_name::Name;
300    ///
301    /// let name = Name::parse("Jane Doe").unwrap();
302    /// assert!(!name.goes_by_middle_name());
303    ///
304    /// let name = Name::parse("J. Doe").unwrap();
305    /// assert!(!name.goes_by_middle_name());
306    ///
307    /// let name = Name::parse("T Boone Pickens").unwrap();
308    /// assert!(name.goes_by_middle_name());
309    /// ```
310    pub fn goes_by_middle_name(&self) -> bool {
311        if let Some(loc) = self.given_names_in_initials().first() {
312            loc.range().start > 0
313        } else {
314            false
315        }
316    }
317
318    /// First and middle initials as a string (always present)
319    ///
320    /// ```
321    /// use human_name::Name;
322    ///
323    /// let name = Name::parse("Jane Doe").unwrap();
324    /// assert_eq!("J", name.initials());
325    ///
326    /// let name = Name::parse("James T. Kirk").unwrap();
327    /// assert_eq!("JT", name.initials());
328    /// ```
329    #[inline]
330    pub fn initials(&self) -> &str {
331        &self.text[self.name_bytes()..]
332    }
333
334    /// Middle names as an array of words, if present
335    pub fn middle_names(&self) -> Option<SmallVec<[&str; 3]>> {
336        self.middle_name_iter().map(|i| i.collect())
337    }
338
339    /// Middle names as a string, if present
340    ///
341    /// ```
342    /// use human_name::Name;
343    ///
344    /// let name = Name::parse("Jane Doe").unwrap();
345    /// assert_eq!(None, name.middle_name());
346    ///
347    /// let name = Name::parse("James T. Kirk").unwrap();
348    /// assert_eq!(None, name.middle_name());
349    ///
350    /// let name = Name::parse("James Tiberius Kirk").unwrap();
351    /// assert_eq!("Tiberius", name.middle_name().unwrap());
352    ///
353    /// let name = Name::parse("Able Baker Charlie Delta").unwrap();
354    /// assert_eq!("Baker Charlie", name.middle_name().unwrap());
355    /// ```
356    pub fn middle_name(&self) -> Option<Cow<str>> {
357        self.middle_name_iter().map(|i| i.join())
358    }
359
360    /// Middle initials as a string, if present
361    ///
362    /// ```
363    /// use human_name::Name;
364    ///
365    /// let name = Name::parse("Jane Doe").unwrap();
366    /// assert_eq!(None, name.middle_initials());
367    ///
368    /// let name = Name::parse("James T. Kirk").unwrap();
369    /// assert_eq!("T", name.middle_initials().unwrap());
370    ///
371    /// let name = Name::parse("James Tiberius Kirk").unwrap();
372    /// assert_eq!("T", name.middle_initials().unwrap());
373    ///
374    /// let name = Name::parse("Able Baker Charlie Delta").unwrap();
375    /// assert_eq!("BC", name.middle_initials().unwrap());
376    /// ```
377    pub fn middle_initials(&self) -> Option<&str> {
378        self.initials()
379            .char_indices()
380            .nth(1)
381            .map(|(i, _)| &self.text[self.name_bytes() + i..])
382    }
383
384    /// Surname as a slice of words (always present)
385    pub fn surnames(&self) -> SmallVec<[&str; 3]> {
386        self.surname_iter().collect()
387    }
388
389    /// Surname as a string (always present)
390    ///
391    /// ```
392    /// use human_name::Name;
393    ///
394    /// let name = Name::parse("Jane Doe").unwrap();
395    /// assert_eq!("Doe", name.surname());
396    ///
397    /// let name = Name::parse("JOHN ALLEN Q DE LA MACDONALD JR").unwrap();
398    /// assert_eq!("de la MacDonald", name.surname());
399    /// ```
400    pub fn surname(&self) -> &str {
401        let start = self.surname_locations()[0].range().start;
402        let end = self.surname_end_in_text();
403        &self.text[start..end]
404    }
405
406    /// Generational suffix, if present
407    ///
408    /// ```
409    /// use human_name::Name;
410    ///
411    /// let name = Name::parse("Gary Payton II").unwrap();
412    /// assert_eq!(Some("Jr."), name.generational_suffix());
413    /// ```
414    pub fn generational_suffix(&self) -> Option<&str> {
415        self.generation.map(suffix::display_generational_suffix)
416    }
417
418    /// Honorific prefix(es), if present
419    ///
420    /// ```
421    /// use human_name::Name;
422    ///
423    /// let name = Name::parse("Rev. Dr. Martin Luther King, Jr.").unwrap();
424    /// assert_eq!(Some("Rev. Dr."), name.honorific_prefix());
425    /// ```
426    pub fn honorific_prefix(&self) -> Option<&str> {
427        self.honorifics
428            .as_ref()
429            .and_then(|h| h.prefix.as_ref())
430            .map(|p| p.as_ref())
431    }
432
433    /// Honorific suffix(es), if present
434    ///
435    /// ```
436    /// use human_name::Name;
437    ///
438    /// let name = Name::parse("Stephen Strange, MD").unwrap();
439    /// assert_eq!(Some("MD"), name.honorific_suffix());
440    /// ```
441    pub fn honorific_suffix(&self) -> Option<&str> {
442        self.honorifics
443            .as_ref()
444            .and_then(|h| h.suffix.as_ref())
445            .map(|s| s.as_ref())
446    }
447
448    /// First initial (with period) and surname.
449    ///
450    /// ```
451    /// use human_name::Name;
452    ///
453    /// let name = Name::parse("J. Doe").unwrap();
454    /// assert_eq!("J. Doe", name.display_initial_surname());
455    ///
456    /// let name = Name::parse("James T. Kirk").unwrap();
457    /// assert_eq!("J. Kirk", name.display_initial_surname());
458    ///
459    /// let name = Name::parse("JOHN ALLEN Q DE LA MACDONALD JR").unwrap();
460    /// assert_eq!("J. de la MacDonald", name.display_initial_surname());
461    /// ```
462    pub fn display_initial_surname(&self) -> Cow<str> {
463        if self.given_name_words == 0 && self.initials_len == 1 {
464            Cow::Borrowed(&self.text[..self.surname_end_in_text()])
465        } else {
466            Cow::Owned(format!("{}. {}", self.first_initial(), self.surname()))
467        }
468    }
469
470    /// Given name and surname, if given name is known, otherwise first initial
471    /// and surname.
472    ///
473    /// ```
474    /// use human_name::Name;
475    ///
476    /// let name = Name::parse("J. Doe").unwrap();
477    /// assert_eq!("J. Doe", name.display_first_last());
478    ///
479    /// let name = Name::parse("Jane Doe").unwrap();
480    /// assert_eq!("Jane Doe", name.display_first_last());
481    ///
482    /// let name = Name::parse("James T. Kirk").unwrap();
483    /// assert_eq!("James Kirk", name.display_first_last());
484    ///
485    /// let name = Name::parse("JOHN ALLEN Q DE LA MACDONALD JR").unwrap();
486    /// assert_eq!("John de la MacDonald", name.display_first_last());
487    /// ```
488    pub fn display_first_last(&self) -> Cow<str> {
489        if self.given_name_words <= 1 && self.initials_len == 1 {
490            Cow::Borrowed(&self.text[..self.surname_end_in_text()])
491        } else if let Some(ref name) = self.given_name() {
492            Cow::Owned(format!("{} {}", name, self.surname()))
493        } else {
494            self.display_initial_surname()
495        }
496    }
497
498    /// Number of bytes in the full name as UTF-8 in NFKD normal form, including
499    /// spaces and punctuation.
500    ///
501    /// Includes generational suffix, but does not include honorifics.
502    ///
503    /// ```
504    /// use human_name::Name;
505    ///
506    /// let name = Name::parse("JOHN ALLEN Q DE LA MACDÖNALD JR").unwrap();
507    /// assert_eq!("John Allen Q. de la MacDönald, Jr.".len(), name.byte_len());
508    /// ```
509    #[inline]
510    pub fn byte_len(&self) -> usize {
511        const SEPARATOR_LEN: usize = ", ".len();
512
513        self.name_bytes()
514            + self
515                .generational_suffix()
516                .map(|g| g.len() + SEPARATOR_LEN)
517                .unwrap_or(0)
518    }
519
520    #[inline]
521    fn name_bytes(&self) -> usize {
522        self.text.len() - usize::from(self.initials_len)
523    }
524
525    /// The full name, or as much of it as was preserved from the input,
526    /// including given name, middle names, surname and generational suffix.
527    ///
528    /// Includes generational suffix, but does not include honorifics.
529    ///
530    /// ```
531    /// use human_name::Name;
532    ///
533    /// let name = Name::parse("DR JOHN ALLEN Q DE LA MACDONALD JR").unwrap();
534    /// assert_eq!("John Allen Q. de la MacDonald, Jr.", name.display_full());
535    ///
536    /// let name = Name::parse("Air Chief Marshal Sir Harrieta ('Harry') Keōpūolani Nāhiʻenaʻena, GBE, KCB, ADC").unwrap();
537    /// assert_eq!("Harrieta Keōpūolani Nāhiʻenaʻena", name.display_full());
538    /// ```
539    #[inline]
540    pub fn display_full(&self) -> Cow<str> {
541        let name = &self.text[..self.name_bytes()];
542        if let Some(suffix) = self.generational_suffix() {
543            let mut result = name.to_string();
544            result.push_str(", ");
545            result.push_str(suffix);
546            Cow::Owned(result)
547        } else {
548            Cow::Borrowed(name)
549        }
550    }
551
552    /// The full name, or as much of it as was preserved from the input,
553    /// including given name, middle names, surname, generational suffix,
554    /// and honorifics.
555    ///
556    /// ```
557    /// use human_name::Name;
558    ///
559    /// let name = Name::parse("DR JOHN ALLEN Q DE LA MACDONALD JR").unwrap();
560    /// assert_eq!("Dr. John Allen Q. de la MacDonald, Jr.", name.display_full_with_honorifics());
561    ///
562    /// let name = Name::parse("Air Chief Marshal Sir Harrieta ('Harry') Keōpūolani Nāhiʻenaʻena, GBE, KCB, ADC").unwrap();
563    /// assert_eq!("Air Chief Marshal Sir Harrieta Keōpūolani Nāhiʻenaʻena GBE KCB ADC", name.display_full_with_honorifics());
564    /// ```
565    pub fn display_full_with_honorifics(&self) -> Cow<str> {
566        if let Some(honorifics) = self.honorifics.as_ref() {
567            let mut result = String::with_capacity(
568                honorifics.prefix.as_ref().map(|t| t.len() + 1).unwrap_or(0)
569                    + self.byte_len()
570                    + honorifics.suffix.as_ref().map(|t| t.len() + 1).unwrap_or(0),
571            );
572            if let Some(prefix) = &honorifics.prefix {
573                result.push_str(prefix);
574                result.push(' ');
575            }
576            result.push_str(&self.display_full());
577            if let Some(suffix) = &honorifics.suffix {
578                result.push(' ');
579                result.push_str(suffix);
580            }
581            Cow::Owned(result)
582        } else {
583            self.display_full()
584        }
585    }
586
587    /// Implements a hash for a name that is always identical for two names that
588    /// may be consistent according to our matching algorithm.
589    ///
590    /// ### WARNING
591    ///
592    /// This hash function is prone to collisions!
593    ///
594    /// We can only use the last four alphabetical characters of the surname,
595    /// because that's all we're guaranteed to use in the consistency test,
596    /// and we attempt to convert to lowercase ASCII, giving us only have 19
597    /// bits of variability.
598    ///
599    /// That means if you are working with a lot of names and you expect surnames
600    /// to be similar or identical, you might be better off avoiding hash-based
601    /// datastructures (or using a custom hash and matching algorithm).
602    ///
603    /// We can't use more characters of the surname because we treat names as equal
604    /// when one surname ends with the other and the smaller is at least four
605    /// characters, to catch cases like "Iria Gayo" == "Iria del Río Gayo".
606    ///
607    /// We can't use the first initial because we might ignore it if someone goes
608    /// by a middle name or nickname, or due to transliteration.
609    pub fn surname_hash(&self) -> u64 {
610        if let Some(hash) = self.surname_hash.load() {
611            return hash.into();
612        }
613
614        let mut s = DefaultHasher::new();
615        self.hash_surname(&mut s);
616
617        // Since we only have ~19 bits of input (per above),
618        // there's no point keeping a longer hash.
619        let hash = s.finish() as u32;
620        self.surname_hash.store(Some(hash));
621        hash.into()
622    }
623
624    fn hash_surname<H: Hasher>(&self, state: &mut H) {
625        for c in self
626            .surname_iter()
627            .rev()
628            .flat_map(|word| {
629                transliterate::to_ascii_casefolded_reversed(word)
630                    .into_iter()
631                    .flatten()
632            })
633            .take(comparison::MIN_SURNAME_CHAR_MATCH)
634        {
635            c.hash(state);
636        }
637    }
638
639    #[inline]
640    fn surname_end_in_text(&self) -> usize {
641        self.surname_locations()[usize::from(self.surname_words) - 1]
642            .range()
643            .end
644    }
645
646    #[inline]
647    fn surname_iter(
648        &self,
649    ) -> Words<impl Iterator<Item = Location> + DoubleEndedIterator + ExactSizeIterator + '_> {
650        self.word_iter(self.surname_locations())
651    }
652
653    #[inline]
654    fn middle_name_iter(
655        &self,
656    ) -> Option<Words<impl Iterator<Item = Location> + DoubleEndedIterator + ExactSizeIterator + '_>>
657    {
658        if self.given_name_words > 1 {
659            Some(self.word_iter(&self.given_name_locations()[1..]))
660        } else {
661            None
662        }
663    }
664
665    #[inline]
666    fn given_iter(
667        &self,
668    ) -> Words<impl Iterator<Item = Location> + DoubleEndedIterator + ExactSizeIterator + '_> {
669        self.word_iter(self.given_name_locations())
670    }
671
672    #[inline]
673    fn word_iter<'a>(
674        &'a self,
675        locations: &'a [Location],
676    ) -> Words<'_, impl Iterator<Item = Location> + DoubleEndedIterator + ExactSizeIterator + '_>
677    {
678        Words::new(&self.text, locations.iter().copied())
679    }
680
681    #[inline]
682    fn given_name_locations(&self) -> &[Location] {
683        &self.locations[..self.given_name_words.into()]
684    }
685
686    #[inline]
687    fn surname_locations(&self) -> &[Location] {
688        &self.locations
689            [self.given_name_words.into()..(self.given_name_words + self.surname_words).into()]
690    }
691
692    #[inline]
693    fn given_names_in_initials(&self) -> &[Location] {
694        &self.locations[(self.given_name_words + self.surname_words).into()..]
695    }
696}
697
698#[cfg(test)]
699mod tests {
700    use super::*;
701    use alloc_counter::deny_alloc;
702
703    #[cfg(feature = "bench")]
704    use test::{black_box, Bencher};
705
706    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
707    #[test]
708    fn struct_size() {
709        assert_eq!(80, std::mem::size_of::<Name>());
710        assert_eq!(32, std::mem::size_of::<Honorifics>());
711    }
712
713    #[test]
714    fn fast_path_parse_does_not_allocate() {
715        deny_alloc(|| Name::parse("Jane Doe").unwrap());
716        deny_alloc(|| Name::parse("J. Doe").unwrap());
717    }
718
719    #[test]
720    fn fast_path_eq_does_not_allocate() {
721        let n1 = Name::parse("Jane Doe").unwrap();
722        let n2 = Name::parse("John Doe").unwrap();
723        let n3 = Name::parse("J. Doe").unwrap();
724        deny_alloc(|| {
725            assert!(!n1.consistent_with(&n2));
726            assert!(n1.consistent_with(&n3));
727        });
728    }
729
730    #[test]
731    fn parse_high_proportion_of_combining_chars() {
732        let name = Name::parse(".ΰ\u{330}\u{610}`");
733        assert!(name.is_none());
734    }
735
736    #[test]
737    fn parse_very_long_honorific_prefix() {
738        // It would probably also be fine to fail to parse this, but we shouldn't panic
739        let name = Name::parse("%%%%%hLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLe pl Puc");
740        assert_eq!("H. Lllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllll. E. P. L. Puc", name.unwrap().display_full_with_honorifics());
741    }
742
743    #[test]
744    fn eq_non_alphanumeric_initials() {
745        // It would probably also be fine to fail to parse one of these, but we shouldn't panic
746        let a = Name::parse("\u{3}\n\u{4}\u{19}Joo\n'lA").unwrap();
747        let b = Name::parse("H8\n'lA/").unwrap();
748        assert!(!a.consistent_with(&b));
749    }
750
751    #[test]
752    fn eq_empty_transliterated_initials() {
753        // It would probably also be fine to fail to parse `b` or find consistency, but we shouldn't panic
754        let a = Name::parse("Ng\nmac").unwrap();
755        let b = Name::parse("\u{65c}\nmac\n").unwrap();
756        assert!(!a.consistent_with(&b));
757    }
758
759    #[test]
760    fn digits() {
761        let a = Name::parse("111 222");
762        assert!(a.is_none());
763
764        let a = Name::parse("One-1 Ones").unwrap();
765        let b = Name::parse("One-2 Ones").unwrap();
766        assert!(a.consistent_with(&b));
767
768        let a = Name::parse("One Ones-1").unwrap();
769        let b = Name::parse("One Ones-2").unwrap();
770        assert!(!a.consistent_with(&b));
771
772        let a = Name::parse("One Ones1").unwrap();
773        let b = Name::parse("One Ones2").unwrap();
774        assert!(!a.consistent_with(&b));
775
776        let a = Name::parse("One1 Ones").unwrap();
777        let b = Name::parse("One2 Ones").unwrap();
778        assert!(a.consistent_with(&b));
779
780        let a = Name::parse("One 1 Ones").unwrap();
781        let b = Name::parse("One 2 Ones").unwrap();
782        assert!(a.consistent_with(&b));
783    }
784
785    #[test]
786    fn non_bmp_alphas() {
787        let a = Name::parse("𐒴𐓘 𐓊𐓙").unwrap();
788        let b = Name::parse("𐒴𐓘 𐒵 𐓊𐓙").unwrap();
789        assert_eq!("𐒴𐓘 𐓊𐓙", a.display_first_last());
790        assert_eq!("𐒴𐓘 𐓊𐓙", b.display_first_last());
791        assert!(a.consistent_with(&b));
792
793        let c = Name::parse("𐒴𐓘 𐒵𐓙").unwrap();
794        assert_eq!("𐒴𐓘 𐒵𐓙", c.display_first_last());
795        assert!(!a.consistent_with(&c));
796
797        let d = Name::parse("𐒴𐓘 𐓍 𐓊𐓙").unwrap();
798        assert_eq!("𐒴𐓘 𐓊𐓙", d.display_first_last());
799        assert!(a.consistent_with(&d));
800        assert!(!b.consistent_with(&d));
801
802        let a = Name::parse("𐒴𐓘-𐓊𐓙 𐓍𐓙").unwrap();
803        assert_eq!("𐒴𐓘 𐓍𐓙", a.display_first_last()); // Preserving the original would probably be better but this documents current behavior
804        assert!(a.consistent_with(&a));
805        let b = Name::parse("𐒴𐓘 𐓊𐓙-𐓍𐓙").unwrap();
806        assert_eq!("𐒴𐓘 𐓍𐓙", b.display_first_last()); // Preserving the original would probably be better but this documents current behavior
807        assert!(b.consistent_with(&b));
808        let c = Name::parse("𐒴𐓘 𐓊𐓙 𐓍𐓙").unwrap();
809        assert_eq!("𐒴𐓘 𐓍𐓙", c.display_first_last());
810        assert!(c.consistent_with(&c));
811
812        assert!(a.consistent_with(&b));
813        assert!(a.consistent_with(&c));
814        assert!(b.consistent_with(&c));
815    }
816
817    #[test]
818    fn stops_being_nfkd() {
819        // Some string split stops this from being NFKD after it's normalized, which is ~fine
820        // but at one point produced a panic on a debug assertion.
821        let input = "\u{5c4}((\0)\u{64f}()()\u{5c4}\u{64f}\u{612}";
822        assert!(Name::parse(input).is_none());
823    }
824
825    #[test]
826    fn emojis() {
827        let a = Name::parse("😃 😃");
828        assert!(a.is_none());
829
830        let a = Name::parse("smile-😃 smiley").unwrap();
831        let b = Name::parse("smile-😰 smiley").unwrap();
832        assert!(a.consistent_with(&b));
833
834        let a = Name::parse("smile smiley-😃").unwrap();
835        let b = Name::parse("smile smiley-😰").unwrap();
836        assert!(a.consistent_with(&b));
837
838        let a = Name::parse("smile 😃 smiley").unwrap();
839        let b = Name::parse("smile 😰 smiley").unwrap();
840        assert!(a.consistent_with(&b));
841
842        let a = Name::parse("smile-😃 smiley").unwrap();
843        let b = Name::parse("smile-😰 smiley").unwrap();
844        assert!(a.consistent_with(&b));
845
846        let a = Name::parse("smile😃 smiley").unwrap();
847        let b = Name::parse("smile😰 smiley").unwrap();
848        assert!(a.consistent_with(&b));
849
850        let a = Name::parse("smile smiley😃").unwrap();
851        let b = Name::parse("smile smiley😰").unwrap();
852        assert!(a.consistent_with(&b));
853    }
854
855    #[cfg(feature = "bench")]
856    #[bench]
857    fn initialize_struct_initial_surname(b: &mut Bencher) {
858        let name = "J. Doe";
859        let parsed = parse::parse(&*name).unwrap();
860        b.iter(|| {
861            black_box(
862                Name::initialize_struct(&parsed, name.len())
863                    .unwrap()
864                    .byte_len(),
865            )
866        })
867    }
868
869    #[cfg(feature = "bench")]
870    #[bench]
871    fn initialize_struct_first_last(b: &mut Bencher) {
872        let name = "John Doe";
873        let parsed = parse::parse(&*name).unwrap();
874        b.iter(|| {
875            black_box(
876                Name::initialize_struct(&parsed, name.len())
877                    .unwrap()
878                    .byte_len(),
879            )
880        })
881    }
882
883    #[cfg(feature = "bench")]
884    #[bench]
885    fn initialize_struct_complex(b: &mut Bencher) {
886        let name = "John Allen Q.R. de la MacDonald Jr.";
887        let parsed = parse::parse(&*name).unwrap();
888        b.iter(|| {
889            black_box(
890                Name::initialize_struct(&parsed, name.len())
891                    .unwrap()
892                    .byte_len(),
893            )
894        })
895    }
896}
897
898#[cfg(feature = "bench")]
899#[cfg(test)]
900mod bench {
901    use super::Name;
902    use std::fs::File;
903    use std::io::prelude::*;
904    use std::io::BufReader;
905
906    #[cfg(feature = "bench")]
907    use test::{black_box, Bencher};
908
909    #[bench]
910    fn bench_parsing_first_last(b: &mut Bencher) {
911        b.iter(|| {
912            let parsed = Name::parse("Juan Garcia");
913            black_box(parsed.is_none())
914        })
915    }
916
917    #[bench]
918    fn bench_parsing_sort_order(b: &mut Bencher) {
919        b.iter(|| {
920            let parsed = Name::parse("Garcia, J.Q.");
921            black_box(parsed.is_none())
922        })
923    }
924
925    #[bench]
926    fn bench_parsing_needs_namecase(b: &mut Bencher) {
927        b.iter(|| {
928            let parsed = Name::parse("JAIME GARCIA");
929            black_box(parsed.is_none())
930        })
931    }
932
933    #[bench]
934    fn bench_parsing_unparseable(b: &mut Bencher) {
935        b.iter(|| {
936            let parsed = Name::parse("foo@bar.com");
937            black_box(parsed.is_none())
938        })
939    }
940
941    #[bench]
942    fn bench_parsing_complex(b: &mut Bencher) {
943        let name = "鈴木 Velasquez y Garcia, Dr. Juan Q. 'Don Juan' Xavier III";
944        b.iter(|| {
945            let parsed = Name::parse(name);
946            black_box(parsed.is_none())
947        })
948    }
949
950    #[bench]
951    fn bench_equality_equal(b: &mut Bencher) {
952        let x = Name::parse("Jane Doe").unwrap();
953        let y = Name::parse("Jane H. Doe").unwrap();
954
955        b.iter(|| black_box(x.consistent_with(&y)))
956    }
957
958    #[bench]
959    fn bench_equality_not_equal(b: &mut Bencher) {
960        let x = Name::parse("Jane Doe").unwrap();
961        let y = Name::parse("Foo Bar").unwrap();
962
963        b.iter(|| black_box(x.consistent_with(&y)))
964    }
965
966    #[bench]
967    fn bench_equality_close_to_equal(b: &mut Bencher) {
968        let x = Name::parse("Jane Doe").unwrap();
969        let y = Name::parse("John Doe").unwrap();
970
971        b.iter(|| black_box(x.consistent_with(&y)))
972    }
973
974    #[bench]
975    fn bench_parsing_many(b: &mut Bencher) {
976        let f = File::open("tests/benchmark-names.txt").ok().unwrap();
977        let reader = BufReader::new(f);
978        let names: Vec<String> = reader.lines().map(|l| l.ok().unwrap()).collect();
979
980        b.iter(move || {
981            let mut valid = 0;
982            let mut invalid = 0;
983
984            for name in names.iter() {
985                let parsed = Name::parse(&name);
986                if parsed.is_none() {
987                    invalid += 1;
988                } else {
989                    valid += 1;
990                }
991            }
992
993            black_box(valid);
994            black_box(invalid);
995        })
996    }
997
998    #[bench]
999    fn bench_equality_many(b: &mut Bencher) {
1000        let f = File::open("tests/benchmark-names.txt").ok().unwrap();
1001        let reader = BufReader::new(f);
1002        let names: Vec<Name> = reader
1003            .lines()
1004            .filter_map(|l| Name::parse(&l.ok().unwrap()))
1005            .collect();
1006
1007        b.iter(|| {
1008            let mut matches = 0;
1009
1010            for a in &names[..64] {
1011                for b in &names {
1012                    if a.consistent_with(&b) {
1013                        matches += 1;
1014                    }
1015                }
1016            }
1017
1018            black_box(matches);
1019        })
1020    }
1021}