1use std::fs::File;
101use std::io::{self, Read};
102use std::path::{Path, PathBuf};
103
104use sha1::{Digest, Sha1};
105
106pub const HIBP_DATA_DIR_ENV: &str = "HIBP_DATA_DIR";
108
109pub fn dataset_path_from_env() -> PathBuf {
112 std::env::var(HIBP_DATA_DIR_ENV).map(PathBuf::from).unwrap_or_else(|_| {
113 PathBuf::from(env!("CARGO_MANIFEST_DIR"))
114 .parent()
115 .unwrap()
116 .join("pwndpasswords-bin")
117 })
118}
119
120pub const RECORD_SIZE: usize = 6;
122
123pub const PREFIX_LEN: usize = 5;
125
126pub const HEX_CHARS: &[u8; 16] = b"0123456789ABCDEF";
128
129pub struct BreachChecker<'a> {
133 dataset_path: &'a Path,
134}
135
136impl<'a> BreachChecker<'a> {
137 pub fn new(dataset_path: &'a Path) -> Self {
142 Self { dataset_path }
143 }
144
145 pub fn is_breached(&self, password: &str) -> io::Result<bool> {
150 let mut hasher = Sha1::new();
152 hasher.update(password.as_bytes());
153 let hash: [u8; 20] = hasher.finalize().into();
154
155 let prefix_hex = Self::prefix_hex(&hash);
156 let mut file = self.open_file(prefix_hex)?;
157
158 let mut buf = [0u8; 16384];
162
163 let mut total = 0usize;
171 loop {
172 match file.read(&mut buf[total..]) {
173 Ok(0) => break,
174 Ok(n) => {
175 total += n;
176 }
177 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
178 Err(e) => return Err(e),
179 }
180 }
181
182 let search_key: [u8; 6] = unsafe { hash[2..8].try_into().unwrap_unchecked() };
183
184 Ok(buf[..total].as_chunks::<RECORD_SIZE>().0.binary_search(&search_key).is_ok())
185 }
186
187 #[doc(hidden)]
190 #[inline(always)]
191 pub fn prefix_hex(hash: &[u8; 20]) -> [u8; PREFIX_LEN] {
192 let mut prefix_hex = [0u8; PREFIX_LEN];
193
194 prefix_hex[0] = HEX_CHARS[(hash[0] >> 4) as usize];
195 prefix_hex[1] = HEX_CHARS[(hash[0] & 0x0f) as usize];
196 prefix_hex[2] = HEX_CHARS[(hash[1] >> 4) as usize];
197 prefix_hex[3] = HEX_CHARS[(hash[1] & 0x0f) as usize];
198 prefix_hex[4] = HEX_CHARS[(hash[2] >> 4) as usize];
199
200 prefix_hex
201 }
202
203 #[inline(always)]
205 fn build_path(&self, prefix_hex: [u8; PREFIX_LEN]) -> ([u8; 512], usize) {
206 let base = self.dataset_path.as_os_str().as_encoded_bytes();
207 let mut path_buf = [0u8; 512];
208 let path_len = base.len() + 1 + PREFIX_LEN + 4; path_buf[..base.len()].copy_from_slice(base);
210 path_buf[base.len()] = b'/';
211 path_buf[base.len() + 1..base.len() + 1 + PREFIX_LEN].copy_from_slice(&prefix_hex);
212 path_buf[base.len() + 1 + PREFIX_LEN..path_len].copy_from_slice(b".bin");
213
214 (path_buf, path_len)
215 }
216
217 #[doc(hidden)]
219 #[inline(always)]
220 pub fn open_file(&self, prefix_hex: [u8; PREFIX_LEN]) -> io::Result<File> {
221 let (path_buf, path_len) = self.build_path(prefix_hex);
222
223 let file_path = unsafe { std::str::from_utf8_unchecked(&path_buf[..path_len]) };
225
226 File::open(file_path)
227 }
228
229 #[cfg(feature = "tokio")]
252 pub async fn is_breached_async(&self, password: &str) -> io::Result<bool> {
253 let mut hasher = Sha1::new();
254 hasher.update(password.as_bytes());
255 let hash: [u8; 20] = hasher.finalize().into();
256
257 let search_key: [u8; 6] = unsafe { hash[2..8].try_into().unwrap_unchecked() };
258
259 let prefix_hex = Self::prefix_hex(&hash);
260 let (path_buf, path_len) = self.build_path(prefix_hex);
261
262 tokio::task::spawn_blocking(move || {
264 let file_path = unsafe { std::str::from_utf8_unchecked(&path_buf[..path_len]) };
265 let mut file = File::open(file_path)?;
266
267 let mut buf = [0u8; 16384];
268 let mut total = 0usize;
269 loop {
270 match file.read(&mut buf[total..]) {
271 Ok(0) => break,
272 Ok(n) => total += n,
273 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
274 Err(e) => return Err(e),
275 }
276 }
277
278 Ok(buf[..total].as_chunks::<RECORD_SIZE>().0.binary_search(&search_key).is_ok())
279 })
280 .await
281 .expect("spawn_blocking task panicked")
282 }
283
284 #[cfg(feature = "compio")]
292 pub async fn is_breached_compio(&self, password: &str) -> io::Result<bool> {
293 use compio::fs::File;
294 use compio::io::AsyncReadAt;
295
296 let mut hasher = Sha1::new();
297 hasher.update(password.as_bytes());
298 let hash: [u8; 20] = hasher.finalize().into();
299
300 let search_key: [u8; 6] = unsafe { hash[2..8].try_into().unwrap_unchecked() };
301
302 let prefix_hex = Self::prefix_hex(&hash);
303 let (path_buf, path_len) = self.build_path(prefix_hex);
304 let file_path = unsafe { std::str::from_utf8_unchecked(&path_buf[..path_len]) };
305
306 let file = File::open(file_path).await?;
307
308 let mut buf = [0u8; 16384];
310 let mut total = 0usize;
311
312 loop {
313 let buf_result = file.read_at(buf, total as u64).await;
314 buf = buf_result.1;
315 match buf_result.0 {
316 Ok(0) => break,
317 Ok(n) => total += n,
318 Err(e) if e.kind() == io::ErrorKind::Interrupted => continue,
319 Err(e) => return Err(e),
320 }
321 }
322
323 Ok(buf[..total].as_chunks::<RECORD_SIZE>().0.binary_search(&search_key).is_ok())
324 }
325}
326
327#[cfg(test)]
328mod tests {
329 use super::*;
330
331 #[test]
332 fn test_sha1t64_conversion() {
333 let mut hasher = Sha1::new();
336 hasher.update(b"password123");
337 let hash: [u8; 20] = hasher.finalize().into();
338
339 assert_eq!(hash[0], 0xCB);
340 assert_eq!(hash[1], 0xFD);
341 assert_eq!(hash[2], 0xAC);
342 assert_eq!(hash[3], 0x60);
343 assert_eq!(hash[4], 0x08);
344 assert_eq!(hash[5], 0xF9);
345 assert_eq!(hash[6], 0xCA);
346 assert_eq!(hash[7], 0xB4);
347 }
348
349 #[test]
350 #[ignore = "requires HIBP dataset"]
351 fn test_breached_password() {
352 let path = dataset_path_from_env();
356 let checker = BreachChecker::new(&path);
357 let result = checker.is_breached("password123").unwrap();
358 assert!(result, "password123 should be found in the breach database");
359 }
360
361 #[test]
362 #[ignore = "requires HIBP dataset"]
363 fn test_non_breached_password() {
364 let path = dataset_path_from_env();
365 let checker = BreachChecker::new(&path);
366 let result = checker.is_breached("hAwT?}cuC:r#kW5").unwrap();
368 assert!(
369 !result,
370 "random complex password should not be in the breach database"
371 );
372 }
373
374 #[test]
375 fn test_binary_search_sha1t48() {
376 let data: Vec<u8> = vec![
378 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, ];
383
384 assert!(
386 data.as_chunks::<RECORD_SIZE>()
387 .0
388 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x01])
389 .is_ok()
390 );
391 assert!(
392 data.as_chunks::<RECORD_SIZE>()
393 .0
394 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x05])
395 .is_ok()
396 );
397 assert!(
398 data.as_chunks::<RECORD_SIZE>()
399 .0
400 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x10])
401 .is_ok()
402 );
403 assert!(
404 data.as_chunks::<RECORD_SIZE>()
405 .0
406 .binary_search(&[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF])
407 .is_ok()
408 );
409
410 assert!(
412 data.as_chunks::<RECORD_SIZE>()
413 .0
414 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
415 .is_err()
416 );
417 assert!(
418 data.as_chunks::<RECORD_SIZE>()
419 .0
420 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x02])
421 .is_err()
422 );
423 assert!(
424 data.as_chunks::<RECORD_SIZE>()
425 .0
426 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0xFF])
427 .is_err()
428 );
429 assert!(
430 data.as_chunks::<RECORD_SIZE>()
431 .0
432 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
433 .is_err()
434 );
435 }
436
437 #[test]
438 fn test_empty_data() {
439 let data: Vec<u8> = vec![];
440 assert!(
441 data.as_chunks::<RECORD_SIZE>()
442 .0
443 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x01])
444 .is_err()
445 );
446 }
447
448 #[test]
449 fn test_single_record() {
450 let data: Vec<u8> = vec![0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0];
451
452 assert!(
453 data.as_chunks::<RECORD_SIZE>()
454 .0
455 .binary_search(&[0x56, 0x78, 0x9A, 0xBC, 0xDE, 0xF0])
456 .is_ok()
457 );
458 assert!(
459 data.as_chunks::<RECORD_SIZE>()
460 .0
461 .binary_search(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00])
462 .is_err()
463 );
464 assert!(
465 data.as_chunks::<RECORD_SIZE>()
466 .0
467 .binary_search(&[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF])
468 .is_err()
469 );
470 }
471}
472
473#[cfg(all(test, feature = "tokio"))]
474mod tokio_tests {
475 use super::*;
476
477 #[tokio::test]
478 #[ignore = "requires HIBP dataset"]
479 async fn test_async_breached_password() {
480 let path = dataset_path_from_env();
481 let checker = BreachChecker::new(&path);
482
483 let result = checker.is_breached_async("password123").await.unwrap();
484 assert!(result, "password123 should be found in breach database");
485 }
486
487 #[tokio::test]
488 #[ignore = "requires HIBP dataset"]
489 async fn test_async_non_breached_password() {
490 let path = dataset_path_from_env();
491 let checker = BreachChecker::new(&path);
492
493 let result = checker.is_breached_async("hAwT?}cuC:r#kW5").await.unwrap();
494 assert!(!result, "random password should not be in breach database");
495 }
496
497 #[tokio::test]
498 #[ignore = "requires HIBP dataset"]
499 async fn test_async_matches_sync() {
500 let path = dataset_path_from_env();
501 let checker = BreachChecker::new(&path);
502
503 let passwords = [
504 "password123",
505 "123456",
506 "qwerty",
507 "hAwT?}cuC:r#kW5",
508 "letmein",
509 "xK9#mP2$vL7@nQ4",
510 ];
511
512 for password in passwords {
513 let sync_result = checker.is_breached(password).unwrap();
514 let async_result = checker.is_breached_async(password).await.unwrap();
515 assert_eq!(
516 sync_result, async_result,
517 "sync and async results should match for '{}'",
518 password
519 );
520 }
521 }
522}
523
524#[cfg(all(test, feature = "compio"))]
525mod compio_tests {
526 use compio::runtime as compio_runtime;
527
528 use super::*;
529
530 #[test]
531 #[ignore = "requires HIBP dataset"]
532 fn test_compio_breached_password() {
533 let path = dataset_path_from_env();
534
535 compio_runtime::Runtime::new().unwrap().block_on(async {
536 let checker = BreachChecker::new(&path);
537 let result = checker.is_breached_compio("password123").await.unwrap();
538 assert!(result, "password123 should be found in breach database");
539 });
540 }
541
542 #[test]
543 #[ignore = "requires HIBP dataset"]
544 fn test_compio_non_breached_password() {
545 let path = dataset_path_from_env();
546
547 compio_runtime::Runtime::new().unwrap().block_on(async {
548 let checker = BreachChecker::new(&path);
549 let result = checker.is_breached_compio("hAwT?}cuC:r#kW5").await.unwrap();
550 assert!(!result, "random password should not be in breach database");
551 });
552 }
553
554 #[test]
555 #[ignore = "requires HIBP dataset"]
556 fn test_compio_matches_sync() {
557 let path = dataset_path_from_env();
558
559 compio_runtime::Runtime::new().unwrap().block_on(async {
560 let checker = BreachChecker::new(&path);
561
562 let passwords = [
563 "password123",
564 "123456",
565 "qwerty",
566 "hAwT?}cuC:r#kW5",
567 "letmein",
568 "xK9#mP2$vL7@nQ4",
569 ];
570
571 for password in passwords {
572 let sync_result = checker.is_breached(password).unwrap();
573 let compio_result = checker.is_breached_compio(password).await.unwrap();
574 assert_eq!(
575 sync_result, compio_result,
576 "sync and compio results should match for '{}'",
577 password
578 );
579 }
580 });
581 }
582}