1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
use super::utils::*;
use super::Name;
use smallvec::SmallVec;
use std::borrow::Cow;

impl Name {
    /// Does this name appear to match a munged string such as an email
    /// localpart or URL slug, where whitespace has been removed?
    ///
    /// # Examples
    ///
    /// ```
    /// use human_name::Name;
    /// let name = Name::parse("Jane A. Doe").unwrap();
    ///
    /// assert!(name.matches_slug_or_localpart("jane.doe"));
    /// assert!(!name.matches_slug_or_localpart("john.doe"));
    ///
    /// assert!(name.matches_slug_or_localpart("janedoe"));
    /// assert!(!name.matches_slug_or_localpart("johndoe"));
    ///
    /// assert!(name.matches_slug_or_localpart("jad"));
    /// assert!(!name.matches_slug_or_localpart("jd"));
    ///
    /// assert!(name.matches_slug_or_localpart("janed"));
    /// assert!(!name.matches_slug_or_localpart("jane"));
    /// assert!(!name.matches_slug_or_localpart("johnd"));
    ///
    /// ```
    pub fn matches_slug_or_localpart(&self, string: &str) -> bool {
        if string.is_empty() {
            return false;
        }

        // Special case: Nice punctuation lets us actually parse a name directly
        if string.chars().any(is_nonalphanumeric) {
            let subbed = string
                .split(is_nonalphanumeric)
                .filter(|p| !p.is_empty())
                .collect::<SmallVec<[&str; 5]>>()
                .join(" ");

            if let Some(name) = Name::parse(&subbed) {
                if name.consistent_with(self) {
                    return true;
                }
            }
        }

        let normed: Cow<str> = if string
            .chars()
            .all(|c| c.is_alphabetic() && c.is_lowercase())
        {
            Cow::Borrowed(string)
        } else {
            Cow::Owned(
                string
                    .chars()
                    .filter_map(lowercase_if_alpha)
                    .collect::<String>(),
            )
        };

        if normed.is_empty() {
            return false;
        }

        // Special case: Full initials
        let full_initials_len = self.initials().len() + self.surname_words();
        if full_initials_len > 2 && normed.len() == full_initials_len {
            let mut initials = String::with_capacity(full_initials_len);
            initials.extend(self.initials().chars().flat_map(char::to_lowercase));
            initials.extend(
                self.surname_iter()
                    .filter_map(|n| n.chars().nth(0))
                    .flat_map(char::to_lowercase),
            );

            if *normed == initials {
                return true;
            }
        }

        // Special case: Given name plus surname initial
        if let Some(name) = self.given_name() {
            let name_and_initial_len = name.len() + self.surname_words();
            if normed.len() == name_and_initial_len {
                let mut name_and_initial = String::with_capacity(name_and_initial_len);
                name_and_initial.extend(name.chars().flat_map(char::to_lowercase));
                name_and_initial.extend(
                    self.surname_iter()
                        .filter_map(|n| n.chars().nth(0))
                        .flat_map(char::to_lowercase),
                );

                if *normed == name_and_initial {
                    return true;
                }
            }
        }

        // Now, the default case:
        //
        // We find as much of the surname as we can, treat the rest of the input
        // as prefix and suffix, and examine those to see if they might match the
        // rest of the name.
        let search_result = self.find_surname_in(&normed);
        if search_result.is_none() {
            return false;
        }

        let (match_begin, match_len, found_exact_surname) = search_result.unwrap();

        let prefix = if match_begin > 0 {
            Some(&normed[0..match_begin])
        } else {
            None
        };

        let suffix = if match_begin + match_len < normed.len() {
            Some(&normed[match_begin + match_len..])
        } else {
            None
        };

        if prefix.map(|s| s.len()).unwrap_or(0) < 2 && suffix.map(|s| s.len()).unwrap_or(0) < 2 {
            // Don't allow just a two-letter surname match to result in an overall match
            if match_len < 3 {
                return false;
            }

            // Don't allow just a 3 or 4-char part-surname match to result in an overall match
            if match_len < 5 && !found_exact_surname {
                return false;
            }
        }

        let allow_unknowns = found_exact_surname && (prefix.is_none() || suffix.is_none());

        (prefix.is_none() || self.matches_remaining_name_parts(prefix.unwrap(), allow_unknowns))
            && (suffix.is_none()
                || self.matches_remaining_name_parts(suffix.unwrap(), allow_unknowns))
    }

    fn find_surname_in(&self, haystack: &str) -> Option<(usize, usize, bool)> {
        let lower_surname: String = self
            .surname_iter()
            .flat_map(|n| n.chars().filter_map(lowercase_if_alpha))
            .collect();
        if lower_surname.len() < 2 {
            return None;
        }

        let mut match_begin = haystack.rfind(&lower_surname);
        let mut match_len = lower_surname.len();

        while match_begin.is_none() {
            match_len -= lower_surname[0..match_len]
                .chars()
                .next_back()
                .unwrap()
                .len_utf8();
            if match_len > 2 {
                match_begin = haystack.rfind(&lower_surname[0..match_len]);
            } else {
                break;
            }
        }

        if let Some(i) = match_begin {
            Some((i, match_len, match_len == lower_surname.len()))
        } else {
            None
        }
    }

    fn matches_remaining_name_parts(&self, part: &str, allow_unknowns: bool) -> bool {
        let lower_first_initial = self.first_initial().to_lowercase().next().unwrap();
        let given_names: Option<Cow<str>> = if self.surname_index == 1 {
            self.given_name().map(|w| Cow::Borrowed(w))
        } else if self.surname_index > 0 {
            Some(self.given_iter().join())
        } else {
            None
        };

        if let Some(ref name) = given_names {
            // Allow just given name, or partial given name, as part
            if name.len() >= part.len() && eq_or_starts_with!(part, name) {
                return true;
            }
        } else if allow_unknowns {
            // Allow possible given name starting with first initial when given
            // name is unknown and surname matched exactly
            if part.starts_with(lower_first_initial) {
                return true;
            }
        }

        if self.middle_initials().is_some() {
            // Allow just initials, or partial initials, as part
            if self.initials().len() >= part.len() && eq_or_starts_with!(part, self.initials()) {
                return true;
            }
        } else if allow_unknowns {
            // Allow possible initials starting with first initial when middle
            // initials are unknown and surname matched exactly (assuming maximum
            // likely number of first & middle initials is three)
            if part.len() < 4 && part.starts_with(lower_first_initial) {
                return true;
            }
        }

        if let Some(ref name) = given_names {
            if part.len() > name.len() && eq_or_starts_with!(part, name) {
                let remainder = &part[name.len()..];

                // Allow given name *plus* middle initials as part (with heuristic
                // when middle initials are unknown and surname matched exactly,
                // assuming maximum likely number of middle initials is two)
                if let Some(initials) = self.middle_initials() {
                    if initials.len() >= remainder.len() && eq_or_starts_with!(remainder, initials)
                    {
                        return true;
                    }
                } else if allow_unknowns && remainder.len() < 3 {
                    return true;
                }
            }
        }

        if let Some(initials) = self.middle_initials() {
            if part.len() > initials.len() && eq_or_ends_with!(initials, part) {
                let remainder = &part[0..part.len() - initials.len()];

                // Allow partial given name, plus known middle initials, as part
                if let Some(name) = self.given_name() {
                    if eq_or_starts_with!(remainder, name) {
                        return true;
                    }
                }
            }
        }

        if self.goes_by_middle_name()
            && part.len() == lower_first_initial.len_utf8()
            && part.chars().nth(0) == Some(lower_first_initial)
        {
            return true;
        }

        false
    }
}