keepass_dump_extractor/
lib.rs

1use itertools::Itertools;
2use once_cell::sync::Lazy;
3use std::collections::{HashMap, HashSet};
4
5use cli::Format;
6
7pub mod cli;
8
9const PREFIX: [u8; 2] = [0xCF, 0x25]; // UTF-16-LE encoded '●' (U+25CF)
10                                      // const VALID_CHARS: HashSet<char> =
11static VALID_CHARS: Lazy<HashSet<char>> = Lazy::new(|| {
12    let mut set = HashSet::new();
13    for range in [0x20..=0x7E, 0x0A0..=0x2AF, 0x1E00..=0x1EFF] {
14        set.extend(range.filter_map(char::from_u32));
15    }
16    set
17});
18static COMMON_CHARS: Lazy<Vec<char>> =
19    Lazy::new(|| (0x20..=0x7E).filter_map(char::from_u32).collect());
20
21pub fn find_leaks(bytes: &[u8]) -> Vec<(usize, char)> {
22    let mut results = Vec::new();
23    let mut length = 0;
24    let mut i = 0;
25    while i < bytes.len() - 1 {
26        // Leaks must begin with a series of '●'
27        if bytes[i..i + 2] == PREFIX {
28            length += 1;
29            i += 2;
30            continue;
31        } else if length > 0 {
32            // Leaks are encoded in UTF-16-LE
33            if let Some(Ok(c)) =
34                char::decode_utf16([u16::from_le_bytes([bytes[i], bytes[i + 1]])]).next()
35            {
36                // Filter some uncommon characters, and check for null bytes
37                if VALID_CHARS.contains(&c) && bytes[i + 2..i + 4] == [0x00, 0x00] {
38                    results.push((length, c));
39                    i += 4;
40                    continue;
41                }
42            }
43        }
44        length = 0;
45        i += 1;
46    }
47
48    results
49}
50
51fn group_by_length(leaks: Vec<(usize, char)>) -> HashMap<usize, Vec<(usize, char)>> {
52    let mut map = HashMap::new();
53    for leak in leaks {
54        map.entry(leak.0).or_insert(vec![]).push(leak);
55    }
56
57    map
58}
59
60fn count_duplicates(leaks: &[(usize, char)]) -> HashMap<(usize, char), usize> {
61    let mut map = HashMap::new();
62    for leak in leaks {
63        map.entry(*leak).or_insert(0);
64        *map.get_mut(leak).unwrap() += 1;
65    }
66
67    map
68}
69
70fn order_by_duplicates(leaks: &[(usize, char)]) -> Vec<(usize, char)> {
71    let map = count_duplicates(leaks);
72
73    let mut leaks = map.into_iter().collect::<Vec<_>>();
74    leaks.sort_by(|((a1, _), a2), ((b1, _), b2)| match a1.cmp(b1) {
75        std::cmp::Ordering::Equal => a2.cmp(b2).reverse(),
76        other => other,
77    });
78    leaks.into_iter().map(|(leak, _)| leak).collect()
79}
80
81fn get_unknowns_and_knowns(
82    leaks: Vec<(usize, char)>,
83    insert_common: bool,
84) -> (Vec<Vec<(usize, char)>>, Vec<char>) {
85    let leaks = order_by_duplicates(&leaks);
86    let leaks = group_by_length(leaks);
87    let max_length = *leaks.keys().max().unwrap() + 1;
88
89    let unknowns = (0..max_length)
90        .filter_map(|length| {
91            let chars = leaks.get(&length).cloned().unwrap_or_else(|| {
92                if insert_common {
93                    // Insert all common characters if there are no leaks of this length
94                    COMMON_CHARS.iter().map(|&c| (length, c)).collect()
95                } else {
96                    vec![]
97                }
98            });
99            (chars.len() > 1).then_some(chars)
100        })
101        .collect::<Vec<_>>();
102    let mut password = vec!['●'; max_length];
103    leaks.iter().for_each(|(length, chars)| {
104        if chars.len() == 1 {
105            password[*length] = chars.iter().next().unwrap().1;
106        }
107    });
108    (unknowns, password)
109}
110
111pub fn print_formatted_leaks(leaks: &[(usize, char)], format: cli::Format) {
112    match format {
113        // Directly print all hints about the password
114        Format::Found => {
115            let leaks = order_by_duplicates(leaks);
116
117            for (length, c) in leaks {
118                println!("{}{}", "●".repeat(length), c);
119            }
120        }
121        // Summarize the hints into the full size, leaving gaps for unknown characters
122        Format::Gaps => {
123            let (unknowns, password) = get_unknowns_and_knowns(leaks.to_vec(), false);
124
125            for unknown in unknowns {
126                for (length, c) in unknown {
127                    let mut password = password.clone();
128                    password[length] = c;
129                    println!("{}", password.iter().collect::<String>());
130                }
131            }
132        }
133        // Print all possible permutations of the password
134        Format::All => {
135            let (unknowns, mut password) = get_unknowns_and_knowns(leaks.to_vec(), true);
136
137            for perm in unknowns.iter().multi_cartesian_product() {
138                for (length, c) in perm {
139                    // No need to clone because next iteration will overwrite everything
140                    password[*length] = *c;
141                }
142                println!("{}", password.iter().collect::<String>());
143            }
144        }
145        // Write the raw results with all found information, not intended for human consumption
146        Format::Raw => {
147            let map = count_duplicates(leaks);
148            let mut leaks = map.into_iter().collect::<Vec<_>>();
149            leaks.sort_by(|((a1, _), _), ((b1, _), _)| a1.cmp(b1));
150
151            for ((length, c), count) in leaks {
152                println!("{count}\t{length}\t{c}");
153            }
154        }
155    }
156}
157
158#[cfg(test)]
159mod tests {
160    use super::*;
161    use hex_literal::hex;
162
163    #[test]
164    fn simple_character() {
165        let input = hex!("cf2500004141414141414141cf25cf25cf25670000004242424242424242");
166
167        assert_eq!(find_leaks(&input), vec![(3, 'g')]);
168
169        let input = hex!("cf25cf25cf25cf25cf25cf25cf25cf25cf25cf25cf25cf2541000000");
170        assert_eq!(find_leaks(&input), vec![(12, 'A')]);
171    }
172
173    #[test]
174    fn non_ascii_character() {
175        // Uses UTF-16-LE encoding
176        let input = hex!("cf25cf25cf25cf2553010000");
177
178        assert_eq!(find_leaks(&input), vec![(4, 'œ')]);
179    }
180}