use std::fs;
use std::io::Write;
use shaha::hasher;
use shaha::source::{FileSource, Source};
use shaha::storage::{HashRecord, ParquetStorage, Storage};
#[test]
fn test_sha256_known_vector() {
let hasher = hasher::get_hasher("sha256").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(
hex::encode(&hash),
"2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824"
);
}
#[test]
fn test_md5_known_vector() {
let hasher = hasher::get_hasher("md5").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(hex::encode(&hash), "5d41402abc4b2a76b9719d911017c592");
}
#[test]
fn test_sha1_known_vector() {
let hasher = hasher::get_hasher("sha1").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(hex::encode(&hash), "aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d");
}
#[test]
fn test_keccak256_known_vector() {
let hasher = hasher::get_hasher("keccak256").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(
hex::encode(&hash),
"1c8aff950685c2ed4bc3174f3472287b56d9517b9c948127319a09a7a36deac8"
);
}
#[test]
fn test_hash160_known_vector() {
let hasher = hasher::get_hasher("hash160").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(hex::encode(&hash), "b6a9c8c230722b7c748331a8b450f05566dc7d0f");
}
#[test]
fn test_hash256_known_vector() {
let hasher = hasher::get_hasher("hash256").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(
hex::encode(&hash),
"9595c9df90075148eb06860365df33584b75bff782a510c6cd4883a419833d50"
);
}
#[test]
fn test_sha512_known_vector() {
let hasher = hasher::get_hasher("sha512").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(
hex::encode(&hash),
"9b71d224bd62f3785d96d46ad3ea3d73319bfbc2890caadae2dff72519673ca72323c3d99ba5c11d7c7acc6e14b8c5da0c4663475c2e5c3adef46f73bcdec043"
);
}
#[test]
fn test_blake3_known_vector() {
let hasher = hasher::get_hasher("blake3").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(
hex::encode(&hash),
"ea8f163db38682925e4491c5e58d4bb3506ef8c14eb78a86e908c5624a67200f"
);
}
#[test]
fn test_ripemd160_known_vector() {
let hasher = hasher::get_hasher("ripemd160").unwrap();
let hash = hasher.hash(b"hello");
assert_eq!(hex::encode(&hash), "108f07b8382412612c048d07d13f814118445acd");
}
#[test]
fn test_available_algorithms() {
let algos = hasher::available_algorithms();
assert!(algos.contains(&"sha256"));
assert!(algos.contains(&"md5"));
assert!(algos.contains(&"keccak256"));
assert!(algos.contains(&"hash160"));
assert!(algos.contains(&"hash256"));
}
#[test]
fn test_unknown_algorithm_returns_none() {
assert!(hasher::get_hasher("unknown").is_none());
assert!(hasher::get_hasher("sha999").is_none());
}
#[test]
fn test_file_source() {
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("words.txt");
{
let mut file = fs::File::create(&file_path).unwrap();
writeln!(file, "hello").unwrap();
writeln!(file, "world").unwrap();
writeln!(file).unwrap();
writeln!(file, "test").unwrap();
}
let source = FileSource::new(&file_path);
let words: Vec<String> = source.words().unwrap().collect();
assert_eq!(words, vec!["hello", "world", "test"]);
}
#[test]
fn test_roundtrip_write_query() {
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("test.parquet");
let hasher = hasher::get_hasher("sha256").unwrap();
let hash = hasher.hash(b"password");
let records = vec![HashRecord {
hash: hash.clone(),
preimage: "password".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["test".to_string()],
}];
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(records).unwrap();
storage.finish().unwrap();
let results = storage.query(&hash, None, None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].preimage, "password");
assert_eq!(results[0].algorithm, "sha256");
let prefix = &hash[..4];
let results = storage.query(prefix, None, None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].preimage, "password");
}
#[test]
fn test_query_with_algorithm_filter() {
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("test.parquet");
let sha256 = hasher::get_hasher("sha256").unwrap();
let md5 = hasher::get_hasher("md5").unwrap();
let records = vec![
HashRecord {
hash: sha256.hash(b"hello"),
preimage: "hello".to_string(),
algorithm: "sha256".to_string(),
sources: vec![],
},
HashRecord {
hash: md5.hash(b"hello"),
preimage: "hello".to_string(),
algorithm: "md5".to_string(),
sources: vec![],
},
];
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(records).unwrap();
storage.finish().unwrap();
let sha256_hash = sha256.hash(b"hello");
let results = storage.query(&sha256_hash[..4], None, None).unwrap();
assert_eq!(results.len(), 1);
let results = storage.query(&sha256_hash[..4], Some("sha256"), None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].algorithm, "sha256");
let results = storage.query(&sha256_hash[..4], Some("md5"), None).unwrap();
assert_eq!(results.len(), 0);
}
#[test]
fn test_stats_from_metadata() {
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("test.parquet");
let sha256 = hasher::get_hasher("sha256").unwrap();
let md5 = hasher::get_hasher("md5").unwrap();
let records = vec![
HashRecord {
hash: sha256.hash(b"hello"),
preimage: "hello".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["test".to_string()],
},
HashRecord {
hash: md5.hash(b"hello"),
preimage: "hello".to_string(),
algorithm: "md5".to_string(),
sources: vec!["test".to_string(), "other".to_string()],
},
HashRecord {
hash: sha256.hash(b"world"),
preimage: "world".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["other".to_string()],
},
];
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(records).unwrap();
storage.finish().unwrap();
let stats = storage.stats().unwrap();
assert_eq!(stats.total_records, 3);
assert!(stats.algorithms.contains(&"sha256".to_string()));
assert!(stats.algorithms.contains(&"md5".to_string()));
assert_eq!(stats.algorithms.len(), 2);
assert!(stats.sources.contains(&"test".to_string()));
assert!(stats.sources.contains(&"other".to_string()));
assert_eq!(stats.sources.len(), 2);
assert!(stats.file_size_bytes > 0);
}
#[test]
fn test_append_mode_merges_sources() {
use std::collections::HashMap;
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("test.parquet");
let sha256 = hasher::get_hasher("sha256").unwrap();
let initial_records = vec![
HashRecord {
hash: sha256.hash(b"hello"),
preimage: "hello".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["wordlist1".to_string()],
},
HashRecord {
hash: sha256.hash(b"world"),
preimage: "world".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["wordlist1".to_string()],
},
];
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(initial_records).unwrap();
storage.finish().unwrap();
let storage = ParquetStorage::new(&db_path);
let existing = storage.query(&[], None, None).unwrap();
let mut records_map: HashMap<(Vec<u8>, String), HashRecord> = HashMap::new();
for record in existing {
let key = (record.hash.clone(), record.algorithm.clone());
records_map.insert(key, record);
}
let new_words = vec!["hello", "test"];
for word in new_words {
let hash = sha256.hash(word.as_bytes());
let key = (hash.clone(), "sha256".to_string());
if let Some(existing) = records_map.get_mut(&key) {
if !existing.sources.contains(&"wordlist2".to_string()) {
existing.sources.push("wordlist2".to_string());
}
} else {
records_map.insert(key, HashRecord {
hash,
preimage: word.to_string(),
algorithm: "sha256".to_string(),
sources: vec!["wordlist2".to_string()],
});
}
}
let mut storage = ParquetStorage::new(&db_path);
let records: Vec<HashRecord> = records_map.into_values().collect();
storage.write_batch(records).unwrap();
storage.finish().unwrap();
let storage = ParquetStorage::new(&db_path);
let hello_hash = sha256.hash(b"hello");
let results = storage.query(&hello_hash, Some("sha256"), None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].preimage, "hello");
assert!(results[0].sources.contains(&"wordlist1".to_string()));
assert!(results[0].sources.contains(&"wordlist2".to_string()));
assert_eq!(results[0].sources.len(), 2);
let world_hash = sha256.hash(b"world");
let results = storage.query(&world_hash, Some("sha256"), None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].sources, vec!["wordlist1".to_string()]);
let test_hash = sha256.hash(b"test");
let results = storage.query(&test_hash, Some("sha256"), None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].sources, vec!["wordlist2".to_string()]);
let stats = storage.stats().unwrap();
assert_eq!(stats.total_records, 3);
}
#[test]
fn test_bloom_filter_rejects_nonexistent_hash() {
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("test.parquet");
let sha256 = hasher::get_hasher("sha256").unwrap();
let records = vec![
HashRecord {
hash: sha256.hash(b"hello"),
preimage: "hello".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["test".to_string()],
},
HashRecord {
hash: sha256.hash(b"world"),
preimage: "world".to_string(),
algorithm: "sha256".to_string(),
sources: vec!["test".to_string()],
},
];
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(records).unwrap();
storage.finish().unwrap();
let storage = ParquetStorage::new(&db_path);
let existing_hash = sha256.hash(b"hello");
let results = storage.query(&existing_hash, None, None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].preimage, "hello");
let nonexistent_hash = sha256.hash(b"notindb");
let results = storage.query(&nonexistent_hash, None, None).unwrap();
assert_eq!(results.len(), 0);
let prefix = &existing_hash[..4];
let results = storage.query(prefix, None, None).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].preimage, "hello");
}
#[test]
fn test_query_with_limit() {
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("test.parquet");
let sha256 = hasher::get_hasher("sha256").unwrap();
let records: Vec<HashRecord> = (0..100)
.map(|i| {
let word = format!("word{}", i);
HashRecord {
hash: sha256.hash(word.as_bytes()),
preimage: word,
algorithm: "sha256".to_string(),
sources: vec!["test".to_string()],
}
})
.collect();
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(records).unwrap();
storage.finish().unwrap();
let storage = ParquetStorage::new(&db_path);
let results = storage.query(&[], None, None).unwrap();
assert_eq!(results.len(), 100);
let results = storage.query(&[], None, Some(10)).unwrap();
assert_eq!(results.len(), 10);
let results = storage.query(&[], None, Some(1)).unwrap();
assert_eq!(results.len(), 1);
let results = storage.query(&[], None, Some(1000)).unwrap();
assert_eq!(results.len(), 100);
}
#[test]
fn test_empty_file_source() {
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("empty.txt");
fs::File::create(&file_path).unwrap();
let source = FileSource::new(&file_path);
let words: Vec<String> = source.words().unwrap().collect();
assert!(words.is_empty());
}
#[test]
fn test_file_source_with_long_lines() {
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("long.txt");
{
let mut file = fs::File::create(&file_path).unwrap();
let long_word = "a".repeat(10_000);
writeln!(file, "{}", long_word).unwrap();
writeln!(file, "short").unwrap();
}
let source = FileSource::new(&file_path);
let words: Vec<String> = source.words().unwrap().collect();
assert_eq!(words.len(), 2);
assert_eq!(words[0].len(), 10_000);
assert_eq!(words[1], "short");
}
#[test]
fn test_file_source_content_hash_deterministic() {
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("words.txt");
{
let mut file = fs::File::create(&file_path).unwrap();
writeln!(file, "hello").unwrap();
writeln!(file, "world").unwrap();
}
let source1 = FileSource::new(&file_path);
let hash1 = source1.content_hash().unwrap().unwrap();
let source2 = FileSource::new(&file_path);
let hash2 = source2.content_hash().unwrap().unwrap();
assert_eq!(hash1, hash2);
}
#[test]
fn test_query_nonexistent_database() {
let storage = ParquetStorage::new("/nonexistent/path.parquet");
let results = storage.query(&[], None, None).unwrap();
assert!(results.is_empty());
let stats = storage.stats().unwrap();
assert_eq!(stats.total_records, 0);
}
#[test]
fn test_write_empty_batch() {
let dir = tempfile::tempdir().unwrap();
let db_path = dir.path().join("empty.parquet");
let mut storage = ParquetStorage::new(&db_path);
storage.write_batch(vec![]).unwrap();
storage.finish().unwrap();
assert!(!db_path.exists());
}