use std::io::Write;
use std::path::Path;
use crate::cli::args::Args;
use crate::database::format::RKDatabase;
use crate::database::prefix_query_optimized::MemoryBlockInfo;
use crate::error::{KmerError, ProcessingResult};
#[derive(Debug)]
pub struct PrefixQueryArgs {
pub database: String,
pub pattern: String,
pub prefix: Option<String>,
pub hybrid: bool,
pub format: String,
pub output: Option<String>,
pub verbose: bool,
pub quiet: bool,
pub profile: bool,
pub min_count: Option<u64>,
pub max_count: Option<u64>,
}
pub fn execute_prefix_query(args: &Args) -> ProcessingResult<()> {
match &args.command {
crate::cli::args::Commands::PrefixQuery {
database,
pattern,
prefix,
hybrid,
format,
output,
verbose,
quiet,
profile,
min_count,
max_count,
} => {
let prefix_args = PrefixQueryArgs {
database: database.clone(),
pattern: pattern.clone(),
prefix: prefix.clone(),
hybrid: *hybrid,
format: format.clone(),
output: output.clone(),
verbose: *verbose,
quiet: *quiet,
profile: *profile,
min_count: *min_count,
max_count: *max_count,
};
execute_prefix_query_impl(&prefix_args)
}
_ => Err(KmerError::ProcessingError(
"Invalid command for prefix query execution".to_string(),
)
.into()),
}
}
fn execute_prefix_query_impl(args: &PrefixQueryArgs) -> ProcessingResult<()> {
let search_pattern = if let Some(ref explicit_prefix) = args.prefix {
explicit_prefix.clone()
} else if !args.pattern.is_empty() {
args.pattern.clone()
} else {
return Err(KmerError::ProcessingError(
"Either --pattern or --prefix must be specified".to_string(),
)
.into());
};
if search_pattern.is_empty() {
return Err(KmerError::ProcessingError("Pattern cannot be empty".to_string()).into());
}
if !Path::new(&args.database).exists() {
return Err(KmerError::FileNotFound(args.database.clone()).into());
}
if !args.quiet {
eprintln!("Loading database: {}", args.database);
}
let database = RKDatabase::from_file_path(Path::new(&args.database))
.map_err(|e| KmerError::ProcessingError(format!("Failed to load database: {}", e)))?;
let kmer_size = database.kmer_size();
let start_time = std::time::Instant::now();
let result = if args.hybrid || search_pattern.contains('{') {
if !args.quiet {
eprintln!("Executing hybrid search for pattern: {}", search_pattern);
}
use crate::database::prefix_query_optimized::extract_hybrid_by_pattern;
extract_hybrid_by_pattern(&database, &search_pattern.to_uppercase())
.map_err(|e| KmerError::ProcessingError(format!("Hybrid query failed: {}", e)))?
} else {
let prefix_len = search_pattern.len();
if prefix_len >= kmer_size {
return Err(KmerError::ProcessingError(format!(
"Prefix length ({}) must be less than k-mer size ({})",
prefix_len, kmer_size
))
.into());
}
if !search_pattern
.chars()
.all(|c| matches!(c.to_ascii_uppercase(), 'A' | 'T' | 'C' | 'G'))
{
return Err(KmerError::ProcessingError(format!(
"Pattern contains invalid characters: {}. Only A, T, C, G are allowed.",
search_pattern
))
.into());
}
if !args.quiet {
eprintln!("Executing optimized prefix query for: {}", search_pattern);
}
use crate::database::prefix_query_optimized::extract_prefix_optimized;
extract_prefix_optimized(&database, &search_pattern.to_uppercase())
.map_err(|e| KmerError::ProcessingError(format!("Prefix query failed: {}", e)))?
};
let query_time = start_time.elapsed();
let filtered_matches = if args.min_count.is_some() || args.max_count.is_some() {
let min_count = args.min_count.unwrap_or(0);
let max_count = args.max_count.unwrap_or(u64::MAX);
result
.matches
.into_iter()
.filter(|(_, count)| *count >= min_count && *count <= max_count)
.collect()
} else {
result.matches
};
let mut writer: Box<dyn Write> = if let Some(output_file) = &args.output {
let file = std::fs::File::create(output_file).map_err(|e| {
KmerError::FileWriteError(format!("Failed to create output file: {}", e))
})?;
Box::new(std::io::BufWriter::new(file))
} else {
Box::new(std::io::stdout())
};
match args.format.as_str() {
"table" => output_table(&mut writer, &filtered_matches, &result.memory_block, args)?,
"json" => output_json(&mut writer, &filtered_matches, &result.memory_block, args)?,
"csv" => output_csv(&mut writer, &filtered_matches, &result.memory_block, args)?,
"tsv" => output_tsv(&mut writer, &filtered_matches, &result.memory_block, args)?,
_ => {
return Err(KmerError::ProcessingError(format!(
"Unsupported output format: {}",
args.format
))
.into())
}
}
if args.profile || args.verbose {
output_performance_info(
&result.memory_block,
query_time,
filtered_matches.len(),
args,
)?;
}
Ok(())
}
fn output_table(
writer: &mut Box<dyn Write>,
matches: &[(String, u64)],
_memory_info: &MemoryBlockInfo,
_args: &PrefixQueryArgs,
) -> ProcessingResult<()> {
writeln!(writer, "K-mer\tCount")?;
for (kmer, count) in matches {
writeln!(writer, "{}\t{}", kmer, count)?;
}
Ok(())
}
fn output_json(
writer: &mut Box<dyn Write>,
matches: &[(String, u64)],
memory_info: &MemoryBlockInfo,
args: &PrefixQueryArgs,
) -> ProcessingResult<()> {
let matches_json: Vec<String> = matches
.iter()
.map(|(kmer, count)| format!(r#"{{"kmer": "{}", "count": {}}}"#, kmer, count))
.collect();
let json_output = format!(
r#"{{
"query": {{
"prefix": "{}",
"database": "{}"
}},
"results": {{
"matches": [{}],
"total_matches": {}
}},
"performance": {{
"memory_block": {{
"start_index": {},
"end_index": {},
"block_size": {},
"is_sorted": {}
}}
}}
}}"#,
args.pattern,
args.database,
matches_json.join(", "),
matches.len(),
memory_info.start_index,
memory_info.end_index,
memory_info.block_size,
memory_info.is_sorted
);
writeln!(writer, "{}", json_output)?;
Ok(())
}
fn output_csv(
writer: &mut Box<dyn Write>,
matches: &[(String, u64)],
_memory_info: &MemoryBlockInfo,
_args: &PrefixQueryArgs,
) -> ProcessingResult<()> {
writeln!(writer, "kmer,count")?;
for (kmer, count) in matches {
writeln!(writer, "{},{}", kmer, count)?;
}
Ok(())
}
fn output_tsv(
writer: &mut Box<dyn Write>,
matches: &[(String, u64)],
_memory_info: &MemoryBlockInfo,
_args: &PrefixQueryArgs,
) -> ProcessingResult<()> {
writeln!(writer, "kmer\tcount")?;
for (kmer, count) in matches {
writeln!(writer, "{}\t{}", kmer, count)?;
}
Ok(())
}
fn output_performance_info(
memory_info: &MemoryBlockInfo,
query_time: std::time::Duration,
match_count: usize,
args: &PrefixQueryArgs,
) -> ProcessingResult<()> {
if !args.quiet {
eprintln!("\n=== Performance Profile ===");
eprintln!("Query time: {:?}", query_time);
eprintln!("Matches found: {}", match_count);
eprintln!(
"Memory block: [{}, {}) - {} k-mers",
memory_info.start_index, memory_info.end_index, memory_info.block_size
);
eprintln!("Database sorted: {}", memory_info.is_sorted);
eprintln!("Optimization enabled: Yes");
if memory_info.is_sorted {
eprintln!("Performance gain: ~10-100x vs fuzzy-query for prefix patterns");
}
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_prefix_validation() {
assert!(validate_prefix("ATCG").is_ok());
assert!(validate_prefix("").is_err());
assert!(validate_prefix("ATCGNN").is_err());
}
fn validate_prefix(prefix: &str) -> ProcessingResult<()> {
if prefix.is_empty() {
return Err(crate::error::ProcessingError::new("Prefix cannot be empty"));
}
if !prefix
.chars()
.all(|c| matches!(c.to_ascii_uppercase(), 'A' | 'T' | 'C' | 'G'))
{
return Err(crate::error::ProcessingError::new(format!(
"Prefix contains invalid characters: {}",
prefix
)));
}
Ok(())
}
}