Skip to main content

password_strength/analyzer/
pattern.rs

1use crate::analyzer::Analyzer;
2use std::collections::{HashMap, HashSet};
3use unicode_segmentation::UnicodeSegmentation;
4
5/// Represents a pattern string, typically part of a keyboard layout or sequence.
6type PatternString = &'static str;
7/// Represents a row on a standard keyboard layout.
8type KeyboardRow = PatternString;
9/// Represents a column (or diagonal) on a standard keyboard layout.
10type KeyboardColumn = PatternString;
11/// Represents a common character sequence (e.g., alphabet, numbers).
12type CharSequence = PatternString;
13/// Represents a common character substitution (original character, substituted character).
14type Substitution = (&'static str, &'static str);
15
16/// Configuration constants for pattern detection penalties.
17const MIN_PATTERN_LENGTH: usize = 3;
18const DEFAULT_MAX_KEYBOARD_PENALTY: f32 = 0.30;
19const DEFAULT_MAX_REPEAT_PENALTY: f32 = 0.30;
20const DEFAULT_MAX_SEQUENCE_PENALTY: f32 = 0.25;
21const DEFAULT_MAX_SUBSTITUTION_PENALTY: f32 = 0.15;
22const DEFAULT_HIGH_FREQUENCY_THRESHOLD: f32 = 0.35;
23
24/// `PatternAnalyzer` analyzes passwords for common weak patterns.
25///
26/// It checks for:
27/// - Keyboard patterns (e.g., "qwerty", "asdf").
28/// - Repeated characters (consecutive like "aaa" or high frequency like "banana").
29/// - Sequential characters (e.g., "abc", "123").
30/// - Common character substitutions (e.g., "@" for "a", "3" for "e").
31/// - Repeated sequences (e.g., "abcabc", "passpass").
32///
33/// Penalties are applied for each detected weakness, reducing the overall score from 1.0.
34///
35/// # Examples
36///
37/// ```
38/// use password_strength::analyzer::{Analyzer, pattern::PatternAnalyzer};
39///
40/// let analyzer = PatternAnalyzer::default();
41/// println!("Score for 'qwerty12345': {}", analyzer.analyze("qwerty12345"));
42/// println!("Score for 'P@sswOrd!': {}", analyzer.analyze("P@sswOrd!"));
43/// ```
44#[derive(Debug, Clone)]
45pub struct PatternAnalyzer {
46    keyboard_rows: Vec<KeyboardRow>,
47    keyboard_columns: Vec<KeyboardColumn>,
48    char_sequences: Vec<CharSequence>,
49    substitution_map: HashMap<char, Vec<char>>,
50    substitution_chars: HashSet<char>,
51    min_pattern_length: usize,
52    max_keyboard_penalty: f32,
53    max_repeat_penalty: f32,
54    max_sequence_penalty: f32,
55    max_substitution_penalty: f32,
56    high_frequency_threshold: f32,
57}
58
59impl Default for PatternAnalyzer {
60    fn default() -> Self {
61        let substitution_patterns: Vec<Substitution> = vec![
62            ("a", "@"),
63            ("a", "4"),
64            ("e", "3"),
65            ("i", "1"),
66            ("i", "!"),
67            ("o", "0"),
68            ("s", "$"),
69            ("s", "5"),
70            ("t", "7"),
71            ("l", "1"),
72        ];
73
74        let mut substitution_map = HashMap::new();
75        let mut substitution_chars = HashSet::new();
76        for (original, substitution) in substitution_patterns {
77            if let (Some(orig_char), Some(sub_char)) =
78                (original.chars().next(), substitution.chars().next())
79            {
80                substitution_map
81                    .entry(sub_char)
82                    .or_insert_with(Vec::new)
83                    .push(orig_char);
84                substitution_chars.insert(sub_char);
85            }
86        }
87
88        PatternAnalyzer {
89            keyboard_rows: vec!["qwertyuiop", "asdfghjkl", "zxcvbnm", "`1234567890-="],
90            keyboard_columns: vec![
91                "`1qaz", "2wsx", "3edc", "4rfv", "5tgb", "6yhn", "7ujm", "8ik,", "9ol.", "0p;/",
92                "-[", "='",
93            ],
94            char_sequences: vec!["abcdefghijklmnopqrstuvwxyz", "0123456789", "~!@#$%^&*()_+"],
95            substitution_map,
96            substitution_chars,
97            min_pattern_length: MIN_PATTERN_LENGTH,
98            max_keyboard_penalty: DEFAULT_MAX_KEYBOARD_PENALTY,
99            max_repeat_penalty: DEFAULT_MAX_REPEAT_PENALTY,
100            max_sequence_penalty: DEFAULT_MAX_SEQUENCE_PENALTY,
101            max_substitution_penalty: DEFAULT_MAX_SUBSTITUTION_PENALTY,
102            high_frequency_threshold: DEFAULT_HIGH_FREQUENCY_THRESHOLD,
103        }
104    }
105}
106
107impl PatternAnalyzer {
108    /// Detects keyboard patterns (rows, columns, diagonals) in the password.
109    /// Operates on the lowercase version of the password.
110    fn detect_keyboard_patterns(&self, password_lowercase: &str) -> f32 {
111        let mut penalty = 0.0;
112
113        let all_keyboard_patterns = self
114            .keyboard_rows
115            .iter()
116            .chain(self.keyboard_columns.iter());
117
118        for pattern_set in all_keyboard_patterns {
119            penalty += self.find_patterns_in_string(pattern_set, password_lowercase);
120        }
121
122        penalty.min(self.max_keyboard_penalty)
123    }
124
125    /// Helper function to find occurrences of patterns (and their reverses) from a source string within the password.
126    fn find_patterns_in_string(&self, pattern_source: &str, password_lowercase: &str) -> f32 {
127        let mut current_penalty = 0.0;
128        let password_len = password_lowercase.graphemes(true).count();
129        if password_len == 0 {
130            return 0.0;
131        }
132
133        for len in self.min_pattern_length..=password_len {
134            for password_subsequence in password_lowercase
135                .graphemes(true)
136                .collect::<Vec<&str>>()
137                .windows(len)
138            {
139                let sub: String = password_subsequence.join("");
140                let sub_rev: String = sub.chars().rev().collect();
141
142                if pattern_source.contains(&sub) || pattern_source.contains(&sub_rev) {
143                    current_penalty += 0.03 + (len as f32 - self.min_pattern_length as f32) * 0.015;
144                }
145            }
146        }
147        current_penalty
148    }
149
150    /// Detects repeated characters:
151    /// 1. Consecutive identical characters (e.g., "aaa", "bbb"). Penalizes *each* run >= 3.
152    /// 2. High frequency of any single character.
153    fn detect_repeated_characters(&self, password: &str) -> f32 {
154        let mut consecutive_penalty = 0.0;
155        let mut frequency_penalty = 0.0;
156        let graphemes: Vec<&str> = password.graphemes(true).collect();
157        let password_len = graphemes.len();
158
159        if password_len < 2 {
160            return 0.0;
161        }
162
163        let mut current_consecutive_repeat = 1;
164        for i in 1..password_len {
165            if graphemes[i].eq_ignore_ascii_case(graphemes[i - 1]) {
166                current_consecutive_repeat += 1;
167            } else {
168                if current_consecutive_repeat >= 3 {
169                    consecutive_penalty +=
170                        0.05 * (current_consecutive_repeat - (self.min_pattern_length - 1)) as f32;
171                }
172                current_consecutive_repeat = 1;
173            }
174        }
175        if current_consecutive_repeat >= 3 {
176            consecutive_penalty +=
177                0.05 * (current_consecutive_repeat - (self.min_pattern_length - 1)) as f32;
178        }
179
180        let mut char_counts = HashMap::new();
181        for g in &graphemes {
182            for c in g.chars() {
183                for lower_c in c.to_lowercase() {
184                    *char_counts.entry(lower_c).or_insert(0) += 1;
185                }
186            }
187        }
188
189        let total_chars = password.chars().count() as f32;
190        if total_chars > 0.0 {
191            for count in char_counts.values() {
192                let ratio = *count as f32 / total_chars;
193                if ratio > self.high_frequency_threshold {
194                    frequency_penalty += (ratio - self.high_frequency_threshold) * 0.5;
195                }
196            }
197        }
198
199        (consecutive_penalty + frequency_penalty).min(self.max_repeat_penalty)
200    }
201
202    /// Detects sequential characters (alphabetic, numeric, common symbols).
203    /// Finds contiguous sequences and penalizes based on their length.
204    /// Operates on the lowercase version of the password.
205    fn detect_sequential_characters(&self, password_lowercase: &str) -> f32 {
206        let mut penalty = 0.0;
207        let pass_chars: Vec<char> = password_lowercase.chars().collect();
208        let n = pass_chars.len();
209
210        if n < self.min_pattern_length {
211            return 0.0;
212        }
213
214        let mut i = 0;
215        while i < n {
216            let mut longest_sequence_len = 0;
217            for sequence_source in &self.char_sequences {
218                let source_chars: Vec<char> = sequence_source.chars().collect();
219                let source_n = source_chars.len();
220
221                for j in 0..source_n {
222                    let mut current_match_len = 0;
223                    while i + current_match_len < n && j + current_match_len < source_n {
224                        if pass_chars[i + current_match_len] == source_chars[j + current_match_len]
225                        {
226                            current_match_len += 1;
227                        } else {
228                            break;
229                        }
230                    }
231
232                    let mut current_rev_match_len = 0;
233                    while i + current_rev_match_len < n && j + current_rev_match_len < source_n {
234                        if pass_chars[i + current_rev_match_len]
235                            == source_chars[source_n - 1 - (j + current_rev_match_len)]
236                        {
237                            current_rev_match_len += 1;
238                        } else {
239                            break;
240                        }
241                    }
242
243                    longest_sequence_len = longest_sequence_len
244                        .max(current_match_len)
245                        .max(current_rev_match_len);
246                }
247            }
248
249            if longest_sequence_len >= self.min_pattern_length {
250                penalty +=
251                    0.03 + (longest_sequence_len as f32 - self.min_pattern_length as f32) * 0.02;
252                i += longest_sequence_len;
253            } else {
254                i += 1;
255            }
256        }
257
258        penalty.min(self.max_sequence_penalty)
259    }
260
261    /// Detects common character substitutions (e.g., "@" for "a", "3" for "e").
262    /// Penalizes based on the *presence* of substitution characters from the predefined list.
263    fn detect_common_substitutions(&self, password: &str) -> f32 {
264        let mut penalty: f32 = 0.0;
265        let mut found_substitution_types = HashSet::new();
266
267        for grapheme in password.graphemes(true) {
268            if let Some(c) = grapheme.chars().next() {
269                if self.substitution_chars.contains(&c) && found_substitution_types.insert(c) {
270                    penalty += 0.04;
271                }
272            }
273        }
274
275        penalty.min(self.max_substitution_penalty)
276    }
277
278    /// Detects repeated patterns or sequences within the password (e.g., "abcabc", "testtest").
279    fn detect_repeated_patterns(&self, password: &str) -> f32 {
280        let mut penalty = 0.0;
281        let graphemes: Vec<&str> = password.graphemes(true).collect();
282        let n = graphemes.len();
283
284        if n < self.min_pattern_length * 2 {
285            return 0.0;
286        }
287
288        for len in self.min_pattern_length..=(n / 2) {
289            let mut occurrences = HashMap::new();
290
291            for i in 0..=(n - len) {
292                let pattern_slice = &graphemes[i..(i + len)];
293                let pattern_key: String = pattern_slice.join("").to_lowercase();
294
295                for j in 0..=(n - len) {
296                    if i == j {
297                        continue;
298                    }
299                    let compare_slice = &graphemes[j..(j + len)];
300                    let compare_key: String = compare_slice.join("").to_lowercase();
301
302                    if pattern_key == compare_key {
303                        *occurrences.entry(pattern_key.clone()).or_insert(0) += 1;
304                        break;
305                    }
306                }
307            }
308
309            for count in occurrences.values() {
310                if *count > 1 {
311                    penalty += 0.02 + (len as f32 * 0.005) + (*count as f32 * 0.005);
312                }
313            }
314        }
315
316        penalty.min(self.max_repeat_penalty)
317    }
318}
319
320impl Analyzer for PatternAnalyzer {
321    /// Analyzes the password strength based on detected patterns.
322    /// Returns a score between 0.0 (weakest) and 1.0 (strongest relative to patterns).
323    fn analyze(&self, password: &str) -> f32 {
324        let password_len = password.graphemes(true).count();
325
326        if password_len < self.min_pattern_length {
327            return 1.0;
328        }
329
330        let password_lowercase = password.to_lowercase();
331
332        let mut score = 1.0;
333
334        let keyboard_penalty = self.detect_keyboard_patterns(&password_lowercase);
335        let repeat_penalty = self.detect_repeated_characters(password);
336        let sequence_penalty = self.detect_sequential_characters(&password_lowercase);
337        let substitution_penalty = self.detect_common_substitutions(password);
338        let repeated_pattern_penalty = self.detect_repeated_patterns(password);
339
340        score -= keyboard_penalty;
341        score -= repeat_penalty;
342        score -= sequence_penalty;
343        score -= substitution_penalty;
344        score -= repeated_pattern_penalty;
345
346        score.clamp(0.0, 1.0)
347    }
348}
349
350#[cfg(test)]
351mod tests {
352    use super::*;
353
354    fn default_analyzer() -> PatternAnalyzer {
355        PatternAnalyzer::default()
356    }
357
358    macro_rules! assert_score_between {
359        ($score:expr, $min:expr, $max:expr) => {
360            assert!(
361                $score >= $min && $score <= $max,
362                "Score {} is not between {} and {}",
363                $score,
364                $min,
365                $max
366            );
367        };
368    }
369
370    #[test]
371    fn test_empty_password() {
372        let analyzer = default_analyzer();
373        assert_eq!(analyzer.analyze(""), 1.0, "Empty password score");
374    }
375
376    #[test]
377    fn test_short_password() {
378        let analyzer = default_analyzer();
379        assert_eq!(analyzer.analyze("a"), 1.0, "Single char password score");
380        assert_eq!(analyzer.analyze("ab"), 1.0, "Two char password score");
381    }
382
383    #[test]
384    fn test_no_patterns() {
385        let analyzer = default_analyzer();
386        let score = analyzer.analyze("R#tG$yH7kL*p");
387        assert_score_between!(score, 0.9, 1.0);
388    }
389
390    #[test]
391    fn test_keyboard_row_pattern() {
392        let analyzer = default_analyzer();
393        let score_qwerty = analyzer.analyze("qwertyuiop");
394        let score_asdf = analyzer.analyze("asdfghjkl");
395        let score_zxcv = analyzer.analyze("zxcvbnm");
396        let score_123 = analyzer.analyze("1234567890");
397        let score_mixed = analyzer.analyze("myasdfpass");
398
399        assert!(score_qwerty < 0.8, "qwerty penalty");
400        assert!(score_asdf < 0.8, "asdf penalty");
401        assert!(score_zxcv < 0.8, "zxcv penalty");
402        assert!(score_123 < 0.8, "123 penalty");
403        assert!(score_mixed < 1.0, "mixed asdf penalty");
404    }
405
406    #[test]
407    fn test_keyboard_column_pattern() {
408        let analyzer = default_analyzer();
409        let score_qaz = analyzer.analyze("qazwsx");
410        assert!(score_qaz < 1.0, "qaz column penalty expected");
411    }
412
413    #[test]
414    fn test_keyboard_reversed_pattern() {
415        let analyzer = default_analyzer();
416        let score_rev_qwerty = analyzer.analyze("poiuytrewq");
417        let score_rev_asdf = analyzer.analyze("lkjhgfdsa");
418        assert!(score_rev_qwerty < 0.8, "reversed qwerty penalty");
419        assert!(score_rev_asdf < 0.8, "reversed asdf penalty");
420    }
421
422    #[test]
423    fn test_keyboard_case_insensitive() {
424        let analyzer = default_analyzer();
425        let score_upper = analyzer.analyze("QWERTY");
426        let score_mixed = analyzer.analyze("qWeRtY");
427        assert!(score_upper < 0.8, "Uppercase QWERTY penalty");
428        assert!(score_mixed < 0.8, "Mixed case qwerty penalty");
429    }
430
431    #[test]
432    fn test_repeated_consecutive_chars() {
433        let analyzer = default_analyzer();
434        let score_aaa = analyzer.analyze("testaaa");
435        let score_aaaaa = analyzer.analyze("testaaaaa");
436        let score_end = analyzer.analyze("testingggg");
437        let score_multi = analyzer.analyze("aaabbbccc");
438
439        assert!(score_aaa < 1.0, "aaa penalty");
440        assert!(score_aaaaa < score_aaa, "aaaaa more penalized than aaa");
441        assert!(score_end < 1.0, "end gggg penalty");
442        assert!(
443            score_multi < 0.9,
444            "a multi-repeat penalty should be significant"
445        );
446    }
447
448    #[test]
449    fn test_repeated_chars_case_insensitive() {
450        let analyzer = default_analyzer();
451        let score_a_aaa = analyzer.analyze("testaAaa");
452        assert!(score_a_aaa < 1.0, "aAaa penalty");
453    }
454
455    #[test]
456    fn test_repeated_high_frequency() {
457        let analyzer = default_analyzer();
458        let score_banana = analyzer.analyze("banana");
459        let score_papapa = analyzer.analyze("papapapapapapapa");
460        let score_distinct = analyzer.analyze("abcdefghijkl");
461
462        assert!(score_banana < 1.0, "banana frequency penalty");
463        assert!(score_papapa < 0.7, "papapa high-frequency penalty");
464        assert_score_between!(score_distinct, 0.70, 0.85);
465    }
466
467    #[test]
468    fn test_sequential_alpha() {
469        let analyzer = default_analyzer();
470        let score_abc = analyzer.analyze("abcdef");
471        let score_xyz = analyzer.analyze("uvwxyz");
472        let score_rev = analyzer.analyze("fedcba");
473        let score_mixed = analyzer.analyze("passabcword");
474
475        assert!(score_abc < 1.0, "abc penalty expected");
476        assert!(score_xyz < 1.0, "xyz penalty expected");
477        assert!(score_rev < 1.0, "reversed abc penalty expected");
478        assert!(score_mixed < 1.0, "mixed abc penalty expected");
479    }
480
481    #[test]
482    fn test_sequential_numeric() {
483        let analyzer = default_analyzer();
484        let score_123 = analyzer.analyze("12345");
485        let score_789 = analyzer.analyze("7890");
486        let score_rev = analyzer.analyze("54321");
487        let score_mixed = analyzer.analyze("pass123word");
488
489        assert!(score_123 < 0.9, "123 penalty");
490        assert!(score_789 < 0.9, "789 penalty");
491        assert!(score_rev < 0.9, "reversed 123 penalty");
492        assert!(score_mixed < 1.0, "mixed 123 penalty");
493    }
494
495    #[test]
496    fn test_sequential_symbols() {
497        let analyzer = default_analyzer();
498        let score_sym = analyzer.analyze("!@#$%");
499        if PatternAnalyzer::default()
500            .char_sequences
501            .iter()
502            .any(|s| s.contains("!@#$"))
503        {
504            assert!(score_sym < 0.95, "Symbol sequence penalty");
505        } else {
506            println!("Symbol sequence is not explicitly tested if not in defaults.");
507            assert!(score_sym <= 1.0);
508        }
509    }
510
511    #[test]
512    fn test_sequential_case_insensitive() {
513        let analyzer = default_analyzer();
514        let score_abc = analyzer.analyze("aBcDeF");
515        assert!(score_abc < 0.9, "Mixed case sequence penalty");
516    }
517
518    #[test]
519    fn test_substitutions() {
520        let analyzer = default_analyzer();
521        let score_p4ss = analyzer.analyze("p4ssw0rd");
522        let score_l33t = analyzer.analyze("l33t$p34k!");
523        let score_one = analyzer.analyze("password@");
524        let score_none = analyzer.analyze("password");
525
526        assert!(score_p4ss < 1.0, "p4ssw0rd sub penalty");
527        assert!(
528            score_l33t < score_p4ss,
529            "l33t should have higher sub penalty"
530        );
531        assert!(score_one < 1.0, "Single sub penalty");
532        assert!(
533            score_none > score_p4ss,
534            "No subs should score higher than subs"
535        );
536    }
537
538    #[test]
539    fn test_repeated_patterns() {
540        let analyzer = default_analyzer();
541        let score_abcabc = analyzer.analyze("abcabcabc");
542        let score_testtest = analyzer.analyze("testtesttest");
543        let score_abab = analyzer.analyze("ababababab");
544        let score_complex = analyzer.analyze("abc123abc123");
545        let score_case = analyzer.analyze("TestTest");
546
547        assert!(score_abcabc < 0.9, "abcabc penalty");
548        assert!(
549            score_abcabc < score_testtest,
550            "abcabcabc expected weaker than testtesttest"
551        );
552        assert!(score_abab < 0.9, "abab penalty");
553        assert!(score_complex < 0.9, "complex repeat penalty");
554        assert!(score_case < 0.9, "Case insensitive repeat penalty");
555    }
556}