use crate::database::format::RKDatabase;
use crate::fuzzy::{query::PositionMutationConfig, FuzzyQuery, FuzzyQueryEngine};
use anyhow::Result;
use clap::Args;
use serde_json;
use std::io::{self, Write};
use std::path::PathBuf;
use std::time::Instant;
#[derive(Args, Debug)]
pub struct FuzzyQueryArgs {
#[arg(help = "Path to RKDB database file")]
pub database: PathBuf,
#[arg(help = "Query string (may contain 'N' wildcards)")]
pub query: String,
#[arg(
short = 'm',
long = "mutations",
default_value = "0",
help = "Maximum Hamming distance for mutations"
)]
pub mutations: usize,
#[arg(
short = 'M',
long = "max-variants",
default_value = "10000",
help = "Maximum number of variants to generate"
)]
pub max_variants: usize,
#[arg(
short = 'b',
long = "batch-size",
default_value = "1000",
help = "Batch size for processing variants"
)]
pub batch_size: usize,
#[arg(
short = 'f',
long = "format",
default_value = "table",
value_parser = ["table", "json", "tsv", "csv"],
help = "Output format"
)]
pub format: String,
#[arg(short = 'o', long = "output", help = "Output file path")]
pub output: Option<PathBuf>,
#[arg(short = 'v', long = "verbose", help = "Enable verbose output")]
pub verbose: bool,
#[arg(short = 'q', long = "quiet", help = "Suppress non-error output")]
pub quiet: bool,
#[arg(long = "profile", help = "Show performance profiling")]
pub profile: bool,
#[arg(
long = "position-mutations",
help = "Position-specific mutations (e.g., \"3,4,5:2\" or \"3,4,5:2;6,7:1\")"
)]
pub position_mutations: Option<String>,
}
#[derive(Args, Debug)]
pub struct FuzzyQueryBatchArgs {
#[arg(help = "Path to RKDB database file")]
pub database: PathBuf,
#[arg(help = "File containing queries (one per line)")]
pub query_file: PathBuf,
#[arg(
long = "default-mutations",
default_value = "0",
help = "Default maximum Hamming distance for mutations"
)]
pub default_mutations: usize,
#[arg(
long = "default-max-variants",
default_value = "10000",
help = "Default maximum number of variants to generate"
)]
pub default_max_variants: usize,
#[arg(
short = 'b',
long = "batch-size",
default_value = "100",
help = "Batch size for processing queries"
)]
pub batch_size: usize,
#[arg(
short = 'f',
long = "format",
default_value = "table",
value_parser = ["table", "json", "tsv", "csv"],
help = "Output format"
)]
pub format: String,
#[arg(short = 'o', long = "output", help = "Output file path")]
pub output: Option<PathBuf>,
#[arg(short = 'v', long = "verbose", help = "Enable verbose output")]
pub verbose: bool,
#[arg(short = 'q', long = "quiet", help = "Suppress non-error output")]
pub quiet: bool,
#[arg(long = "progress", default_value = "true", help = "Show progress bar")]
pub progress: bool,
#[arg(long = "fail-fast", help = "Stop on first error")]
pub fail_fast: bool,
#[arg(
long = "include-headers",
help = "Include header row in CSV/TSV output"
)]
pub include_headers: bool,
}
pub fn execute_fuzzy_query(args: &FuzzyQueryArgs) -> Result<()> {
let start_time = Instant::now();
if !args.quiet {
eprintln!("Loading database: {}", args.database.display());
}
let database = RKDatabase::from_file_path(&args.database)?;
let kmer_size = database.kmer_size();
let position_mutations = if let Some(ref pos_str) = args.position_mutations {
Some(PositionMutationConfig::parse(pos_str)?)
} else {
None
};
let effective_mutations = if position_mutations.is_some() {
kmer_size
} else {
args.mutations
};
let query = FuzzyQuery::with_position_mutations(
&args.query,
kmer_size,
effective_mutations,
Some(args.max_variants),
false, args.batch_size,
position_mutations,
);
let engine = FuzzyQueryEngine::new(database);
if args.verbose {
eprintln!("Executing fuzzy query: {}", query.query_string);
eprintln!("K-mer size: {}", query.kmer_size);
eprintln!("Mutation tolerance: {}", query.mutation_tolerance);
eprintln!("Max variants: {}", query.max_variants.unwrap_or(0));
}
let result = engine.execute_query(&query)?;
output_fuzzy_result(&result, args)?;
if args.profile {
output_performance_profile(&result, &start_time, args)?;
}
Ok(())
}
pub fn execute_fuzzy_query_batch(args: &FuzzyQueryBatchArgs) -> Result<()> {
let start_time = Instant::now();
if !args.quiet {
eprintln!("Loading database: {}", args.database.display());
}
let database = RKDatabase::from_file_path(&args.database)?;
if !args.quiet {
eprintln!("Loading queries from: {}", args.query_file.display());
}
let queries = load_queries_from_file(&args.query_file)?;
if !args.quiet {
eprintln!("Loaded {} queries", queries.len());
}
let kmer_size = database.kmer_size();
let engine = FuzzyQueryEngine::new(database);
let mut results = Vec::new();
let mut successful_queries = 0;
let mut failed_queries = 0;
for (i, query_str) in queries.iter().enumerate() {
if args.progress && !args.quiet {
eprint!("Processing query {}/{}...\r", i + 1, queries.len());
io::stdout().flush()?;
}
let query = FuzzyQuery::with_params(
query_str,
kmer_size,
args.default_mutations,
Some(args.default_max_variants),
true,
args.batch_size,
);
match engine.execute_query(&query) {
Ok(result) => {
results.push((query_str.clone(), result));
successful_queries += 1;
}
Err(e) => {
if args.verbose {
eprintln!("Error processing query '{}': {}", query_str, e);
}
failed_queries += 1;
if args.fail_fast {
return Err(e.into());
}
}
}
}
if args.progress && !args.quiet {
eprintln!(); }
output_batch_results(&results, args)?;
if !args.quiet {
eprintln!("Batch processing complete:");
eprintln!(" Successful queries: {}", successful_queries);
eprintln!(" Failed queries: {}", failed_queries);
eprintln!(" Total time: {:.2}s", start_time.elapsed().as_secs_f64());
}
Ok(())
}
fn load_queries_from_file(file_path: &PathBuf) -> Result<Vec<String>> {
use std::fs::File;
use std::io::{BufRead, BufReader};
let file = File::open(file_path)?;
let reader = BufReader::new(file);
let mut queries = Vec::new();
for line in reader.lines() {
let line = line?;
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
if line.contains('|') {
let parts: Vec<&str> = line.split('|').collect();
if !parts.is_empty() {
queries.push(parts[0].trim().to_string());
}
} else {
queries.push(line.to_string());
}
}
Ok(queries)
}
fn output_fuzzy_result(
result: &crate::fuzzy::FuzzyQueryResultData,
args: &FuzzyQueryArgs,
) -> Result<()> {
match args.format.as_str() {
"table" => output_table_format(result, args),
"json" => output_json_format(result, args),
"tsv" => output_tsv_format(result, args),
"csv" => output_csv_format(result, args),
_ => Err(anyhow::anyhow!(
"Unsupported output format: {}",
args.format
)),
}
}
fn output_table_format(
result: &crate::fuzzy::FuzzyQueryResultData,
args: &FuzzyQueryArgs,
) -> Result<()> {
let mut writer: Box<dyn Write> = if let Some(output_path) = &args.output {
Box::new(std::fs::File::create(output_path)?)
} else {
Box::new(io::stdout())
};
writeln!(
writer,
"Query: {}",
result.query_metadata.query_params.query_string
)?;
writeln!(
writer,
"Mutations: {}",
result.query_metadata.query_params.mutation_tolerance
)?;
writeln!(
writer,
"Variants Generated: {}",
result.query_metadata.variants_generated
)?;
writeln!(writer, "Total Matches: {}", result.total_count)?;
writeln!(
writer,
"Query Time: {}ms",
result.query_metadata.query_time_ms
)?;
writeln!(writer)?;
if result.individual_matches.is_empty() {
writeln!(writer, "No matches found")?;
} else {
writeln!(writer, "┌─────────────────────┬───────┬─────────┐")?;
writeln!(writer, "│ Sequence │ Count │ Type │")?;
writeln!(writer, "├─────────────────────┼───────┼─────────┤")?;
for kmer_match in &result.individual_matches {
writeln!(
writer,
"│ {:<19} │ {:<5} │ {:<7} │",
kmer_match.sequence,
kmer_match.count,
format!("{:?}", kmer_match.match_type)
)?;
}
writeln!(writer, "└─────────────────────┴───────┴─────────┘")?;
}
Ok(())
}
fn output_json_format(
result: &crate::fuzzy::FuzzyQueryResultData,
args: &FuzzyQueryArgs,
) -> Result<()> {
let json = serde_json::to_string_pretty(result)?;
if let Some(output_path) = &args.output {
std::fs::write(output_path, json)?;
} else {
println!("{}", json);
}
Ok(())
}
fn output_tsv_format(
result: &crate::fuzzy::FuzzyQueryResultData,
args: &FuzzyQueryArgs,
) -> Result<()> {
let mut writer: Box<dyn Write> = if let Some(output_path) = &args.output {
Box::new(std::fs::File::create(output_path)?)
} else {
Box::new(io::stdout())
};
for kmer_match in &result.individual_matches {
writeln!(
writer,
"{}\t{}\t{}\t{}",
result.query_metadata.query_params.query_string,
kmer_match.sequence,
kmer_match.count,
format!("{:?}", kmer_match.match_type)
)?;
}
Ok(())
}
fn output_csv_format(
result: &crate::fuzzy::FuzzyQueryResultData,
args: &FuzzyQueryArgs,
) -> Result<()> {
let mut writer: Box<dyn Write> = if let Some(output_path) = &args.output {
Box::new(std::fs::File::create(output_path)?)
} else {
Box::new(io::stdout())
};
writeln!(writer, "query,sequence,count,match_type")?;
for kmer_match in &result.individual_matches {
writeln!(
writer,
"{},{},{},{}",
result.query_metadata.query_params.query_string,
kmer_match.sequence,
kmer_match.count,
format!("{:?}", kmer_match.match_type)
)?;
}
Ok(())
}
fn output_batch_results(
results: &[(String, crate::fuzzy::FuzzyQueryResultData)],
args: &FuzzyQueryBatchArgs,
) -> Result<()> {
match args.format.as_str() {
"json" => output_batch_json(results, args),
"table" => output_batch_table(results, args),
"tsv" => output_batch_tsv(results, args),
"csv" => output_batch_csv(results, args),
_ => Err(anyhow::anyhow!(
"Unsupported output format: {}",
args.format
)),
}
}
fn output_batch_json(
results: &[(String, crate::fuzzy::FuzzyQueryResultData)],
args: &FuzzyQueryBatchArgs,
) -> Result<()> {
let batch_metadata = serde_json::json!({
"total_queries": results.len(),
"processed_queries": results.len(),
"failed_queries": 0,
"output_format": args.format
});
let batch_result = serde_json::json!({
"batch_metadata": batch_metadata,
"query_results": results.iter().map(|(query, result)| {
serde_json::json!({
"query": query,
"result": result
})
}).collect::<Vec<_>>()
});
let json = serde_json::to_string_pretty(&batch_result)?;
if let Some(output_path) = &args.output {
std::fs::write(output_path, json)?;
} else {
println!("{}", json);
}
Ok(())
}
fn output_batch_table(
results: &[(String, crate::fuzzy::FuzzyQueryResultData)],
args: &FuzzyQueryBatchArgs,
) -> Result<()> {
let mut writer: Box<dyn Write> = if let Some(output_path) = &args.output {
Box::new(std::fs::File::create(output_path)?)
} else {
Box::new(io::stdout())
};
writeln!(writer, "Batch Query Results")?;
writeln!(writer, "Total Queries: {}", results.len())?;
writeln!(writer, "Processed: {}", results.len())?;
writeln!(writer)?;
writeln!(
writer,
"┌─────┬─────────────────────┬───────┬───────┬──────────┬──────────┬─────────┐"
)?;
writeln!(
writer,
"│ # │ Query │ Vars │ Mut │ Matches │ Time(ms) │ Status │"
)?;
writeln!(
writer,
"├─────┼─────────────────────┼───────┼───────┼──────────┼──────────┼─────────┤"
)?;
for (i, (query, result)) in results.iter().enumerate() {
writeln!(
writer,
"│ {:<3} │ {:<19} │ {:<5} │ {:<5} │ {:<8} │ {:<8} │ {:<7} │",
i + 1,
if query.len() > 19 {
&query[..16]
} else {
query
},
result.query_metadata.variants_generated,
result.query_metadata.query_params.mutation_tolerance,
result.total_count,
result.query_metadata.query_time_ms,
format!("{:?}", result.status)
)?;
}
writeln!(
writer,
"└─────┴─────────────────────┴───────┴───────┴──────────┴──────────┴─────────┘"
)?;
Ok(())
}
fn output_batch_tsv(
results: &[(String, crate::fuzzy::FuzzyQueryResultData)],
args: &FuzzyQueryBatchArgs,
) -> Result<()> {
let mut writer: Box<dyn Write> = if let Some(output_path) = &args.output {
Box::new(std::fs::File::create(output_path)?)
} else {
Box::new(io::stdout())
};
if args.include_headers {
writeln!(writer, "query_id\tquery_string\tvariants_generated\tmutations\ttotal_matches\tquery_time_ms\tstatus")?;
}
for (i, (query, result)) in results.iter().enumerate() {
writeln!(
writer,
"{}\t{}\t{}\t{}\t{}\t{}\t{}",
i + 1,
query,
result.query_metadata.variants_generated,
result.query_metadata.query_params.mutation_tolerance,
result.total_count,
result.query_metadata.query_time_ms,
format!("{:?}", result.status)
)?;
}
Ok(())
}
fn output_batch_csv(
results: &[(String, crate::fuzzy::FuzzyQueryResultData)],
args: &FuzzyQueryBatchArgs,
) -> Result<()> {
let mut writer: Box<dyn Write> = if let Some(output_path) = &args.output {
Box::new(std::fs::File::create(output_path)?)
} else {
Box::new(io::stdout())
};
if args.include_headers {
writeln!(
writer,
"query_id,query_string,variants_generated,mutations,total_matches,query_time_ms,status"
)?;
}
for (i, (query, result)) in results.iter().enumerate() {
writeln!(
writer,
"{},{},{},{},{},{},{}",
i + 1,
query,
result.query_metadata.variants_generated,
result.query_metadata.query_params.mutation_tolerance,
result.total_count,
result.query_metadata.query_time_ms,
format!("{:?}", result.status)
)?;
}
Ok(())
}
fn output_performance_profile(
result: &crate::fuzzy::FuzzyQueryResultData,
start_time: &Instant,
_args: &FuzzyQueryArgs,
) -> Result<()> {
let total_time = start_time.elapsed().as_millis() as u64;
eprintln!("\n=== Performance Profile ===");
eprintln!("Total Query Time: {}ms", total_time);
eprintln!(
"├─ Variant Generation: {}ms",
total_time - result.query_metadata.query_time_ms
);
eprintln!(
"├─ Database Queries: {}ms",
result.query_metadata.query_time_ms
);
eprintln!(
"│ ├─ Avg per query: {:.1}ms",
result.query_metadata.query_time_ms as f64
/ result.query_metadata.variants_generated as f64
);
eprintln!("│ └─ Cache hits: N/A"); eprintln!("└─ Result Aggregation: {}ms", 0);
if let Some(memory_usage) = result.query_metadata.memory_usage_mb {
eprintln!("\nMemory Usage:");
eprintln!("├─ Peak: {:.1}MB", memory_usage);
eprintln!("├─ Variants: {:.1}MB", memory_usage * 0.3); eprintln!("└─ Results: {:.1}MB", memory_usage * 0.2); }
eprintln!("\nProcessing Mode: Sequential");
Ok(())
}