#![allow(clippy::unwrap_used, clippy::expect_used)]
use kmerust::run::count_kmers;
use kmerust::streaming::count_kmers_from_reader;
use std::io::{BufReader, Write};
use tempfile::NamedTempFile;
fn temp_fasta(content: &str) -> NamedTempFile {
let mut file = NamedTempFile::new().expect("Failed to create temp file");
file.write_all(content.as_bytes())
.expect("Failed to write temp file");
file.flush().expect("Failed to flush temp file");
file
}
#[test]
fn count_kmers_basic() {
let fasta = temp_fasta(">seq\nACGT\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
assert_eq!(result.len(), 1);
}
#[test]
fn count_kmers_simple_fixture() {
let result = count_kmers("tests/fixtures/simple.fa", 3).unwrap();
assert!(!result.is_empty());
for (kmer, count) in &result {
assert!(*count > 0, "k-mer {kmer} has non-positive count {count}");
assert_eq!(kmer.len(), 3, "k-mer {kmer} is not length 3");
}
}
#[test]
fn count_kmers_returns_canonical_forms() {
let fasta = temp_fasta(">seq\nTTT\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("AAA"), Some(&1));
assert_eq!(result.get("TTT"), None);
}
#[test]
fn count_kmers_handles_n_bases() {
let fasta = temp_fasta(">seq\nACGNACG\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
for kmer in result.keys() {
assert!(!kmer.contains('N'), "k-mer {kmer} should not contain N");
}
}
#[test]
fn count_kmers_soft_masked_bases() {
let fasta = temp_fasta(">seq\nacgt\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
}
#[test]
fn count_kmers_mixed_case() {
let fasta = temp_fasta(">seq\nAcGt\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
}
#[test]
fn count_kmers_empty_result_for_short_sequence() {
let fasta = temp_fasta(">seq\nAC\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert!(result.is_empty());
}
#[test]
fn count_kmers_exact_length_sequence() {
let fasta = temp_fasta(">seq\nACG\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result.get("ACG"), Some(&1));
}
#[test]
fn count_kmers_multiple_sequences() {
let fasta = temp_fasta(">seq1\nACG\n>seq2\nACG\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
}
#[test]
fn count_kmers_k_equals_1() {
let fasta = temp_fasta(">seq\nACGT\n");
let result = count_kmers(fasta.path(), 1).unwrap();
assert_eq!(result.get("A"), Some(&2)); assert_eq!(result.get("C"), Some(&2)); }
#[test]
fn count_kmers_k_equals_32() {
let seq = "A".repeat(32);
let fasta = temp_fasta(&format!(">seq\n{seq}\n"));
let result = count_kmers(fasta.path(), 32).unwrap();
assert_eq!(result.len(), 1);
assert!(result.contains_key(&seq));
}
#[test]
fn count_kmers_rejects_k_zero() {
let fasta = temp_fasta(">seq\nACGT\n");
let result = count_kmers(fasta.path(), 0);
assert!(result.is_err());
}
#[test]
fn count_kmers_rejects_k_too_large() {
let fasta = temp_fasta(">seq\nACGT\n");
let result = count_kmers(fasta.path(), 33);
assert!(result.is_err());
}
#[test]
fn count_kmers_nonexistent_file() {
let result = count_kmers("/nonexistent/path/to/file.fa", 3);
assert!(result.is_err());
}
#[test]
#[cfg(not(feature = "needletail"))]
fn count_kmers_empty_file() {
let fasta = temp_fasta("");
let result = count_kmers(fasta.path(), 3).unwrap();
assert!(result.is_empty());
}
#[test]
#[cfg(not(feature = "needletail"))]
fn count_kmers_header_only() {
let fasta = temp_fasta(">seq\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert!(result.is_empty());
}
#[test]
fn count_kmers_palindrome() {
let fasta = temp_fasta(">seq\nACGT\n");
let result = count_kmers(fasta.path(), 4).unwrap();
assert_eq!(result.get("ACGT"), Some(&1));
}
#[test]
fn count_kmers_all_same_base() {
let fasta = temp_fasta(">seq\nAAAAA\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert_eq!(result.get("AAA"), Some(&3));
}
#[test]
fn count_kmers_complementary_sequences() {
let fasta1 = temp_fasta(">seq\nGATTACA\n");
let fasta2 = temp_fasta(">seq\nTGTAATC\n");
let result1 = count_kmers(fasta1.path(), 3).unwrap();
let result2 = count_kmers(fasta2.path(), 3).unwrap();
assert_eq!(result1, result2);
}
#[test]
fn count_kmers_multiline_sequence() {
let fasta = temp_fasta(">seq\nACG\nTAC\n");
let result = count_kmers(fasta.path(), 3).unwrap();
assert!(!result.is_empty());
}
#[test]
fn count_kmers_with_n_fixture() {
let result = count_kmers("tests/fixtures/with_n.fa", 3).unwrap();
assert!(!result.is_empty());
for kmer in result.keys() {
assert!(!kmer.contains('N'), "k-mer should not contain N");
}
}
#[test]
fn count_kmers_soft_masked_fixture() {
let result = count_kmers("tests/fixtures/soft_masked.fa", 3).unwrap();
assert_eq!(result.get("AAA"), Some(&2));
}
#[test]
fn count_kmers_from_reader_basic() {
let fasta_data = b">seq\nACGT\n";
let reader = BufReader::new(&fasta_data[..]);
let result = count_kmers_from_reader(reader, 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
assert_eq!(result.len(), 1);
}
#[test]
fn count_kmers_from_reader_empty_sequence() {
let fasta_data = b">seq\nAC\n";
let reader = BufReader::new(&fasta_data[..]);
let result = count_kmers_from_reader(reader, 3).unwrap();
assert!(result.is_empty());
}
#[test]
fn count_kmers_from_reader_multiple_sequences() {
let fasta_data = b">seq1\nACGT\n>seq2\nTGCA\n";
let reader = BufReader::new(&fasta_data[..]);
let result = count_kmers_from_reader(reader, 2).unwrap();
assert!(!result.is_empty());
}
#[test]
fn count_kmers_from_reader_handles_n_bases() {
let fasta_data = b">seq\nACGNACG\n";
let reader = BufReader::new(&fasta_data[..]);
let result = count_kmers_from_reader(reader, 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
for kmer in result.keys() {
assert!(!kmer.contains('N'), "k-mer {kmer} should not contain N");
}
}
#[test]
fn count_kmers_from_reader_soft_masked() {
let fasta_data = b">seq\nacgt\n";
let reader = BufReader::new(&fasta_data[..]);
let result = count_kmers_from_reader(reader, 3).unwrap();
assert_eq!(result.get("ACG"), Some(&2));
}
#[test]
fn count_kmers_from_reader_matches_file_based() {
let fasta_content = ">seq1\nACGTACGT\n>seq2\nGATTACA\n";
let fasta_file = temp_fasta(fasta_content);
let file_result = count_kmers(fasta_file.path(), 3).unwrap();
let reader = BufReader::new(fasta_content.as_bytes());
let reader_result = count_kmers_from_reader(reader, 3).unwrap();
assert_eq!(file_result, reader_result);
}
#[test]
fn count_kmers_from_reader_rejects_invalid_k() {
let fasta_data = b">seq\nACGT\n";
let reader = BufReader::new(&fasta_data[..]);
assert!(count_kmers_from_reader(reader, 0).is_err());
let reader = BufReader::new(&fasta_data[..]);
assert!(count_kmers_from_reader(reader, 33).is_err());
}