use std::time::{Duration, Instant};
use crate::error::FerroError;
use crate::hgvs::parser::parse_hgvs;
use crate::hgvs::variant::HgvsVariant;
use crate::normalize::Normalizer;
use crate::reference::ReferenceProvider;
#[derive(Debug, Clone)]
pub struct BatchConfig {
pub continue_on_error: bool,
pub progress_interval: usize,
}
impl Default for BatchConfig {
fn default() -> Self {
Self {
continue_on_error: true,
progress_interval: 100,
}
}
}
impl BatchConfig {
pub fn new() -> Self {
Self::default()
}
pub fn continue_on_error(mut self, continue_on_error: bool) -> Self {
self.continue_on_error = continue_on_error;
self
}
pub fn progress_interval(mut self, interval: usize) -> Self {
self.progress_interval = interval;
self
}
}
#[derive(Debug, Clone)]
pub struct BatchProgress {
pub total: usize,
pub processed: usize,
pub success: usize,
pub errors: usize,
pub elapsed: Duration,
}
impl BatchProgress {
pub fn percent(&self) -> f64 {
if self.total == 0 {
100.0
} else {
(self.processed as f64 / self.total as f64) * 100.0
}
}
pub fn items_per_second(&self) -> f64 {
let secs = self.elapsed.as_secs_f64();
if secs < f64::EPSILON {
0.0
} else {
self.processed as f64 / secs
}
}
pub fn estimated_remaining(&self) -> Option<Duration> {
let rate = self.items_per_second();
if rate == 0.0 {
return None;
}
let remaining_items = self.total.saturating_sub(self.processed);
let remaining_secs = remaining_items as f64 / rate;
Some(Duration::from_secs_f64(remaining_secs))
}
}
#[derive(Debug, Clone)]
pub enum ItemResult<T> {
Ok(T),
Err {
input: Option<String>,
error: FerroError,
},
}
impl<T> ItemResult<T> {
pub fn is_ok(&self) -> bool {
matches!(self, ItemResult::Ok(_))
}
pub fn is_err(&self) -> bool {
matches!(self, ItemResult::Err { .. })
}
pub fn ok(self) -> Option<T> {
match self {
ItemResult::Ok(v) => Some(v),
ItemResult::Err { .. } => None,
}
}
pub fn err(self) -> Option<FerroError> {
match self {
ItemResult::Ok(_) => None,
ItemResult::Err { error, .. } => Some(error),
}
}
}
#[derive(Debug)]
pub struct BatchResult<T> {
pub results: Vec<ItemResult<T>>,
pub duration: Duration,
}
impl<T> BatchResult<T> {
pub fn new(results: Vec<ItemResult<T>>, duration: Duration) -> Self {
Self { results, duration }
}
pub fn total(&self) -> usize {
self.results.len()
}
pub fn success_count(&self) -> usize {
self.results.iter().filter(|r| r.is_ok()).count()
}
pub fn error_count(&self) -> usize {
self.results.iter().filter(|r| r.is_err()).count()
}
pub fn success_rate(&self) -> f64 {
if self.results.is_empty() {
100.0
} else {
(self.success_count() as f64 / self.results.len() as f64) * 100.0
}
}
pub fn items_per_second(&self) -> f64 {
let secs = self.duration.as_secs_f64();
if secs < f64::EPSILON {
0.0
} else {
self.results.len() as f64 / secs
}
}
pub fn successes(self) -> Vec<T> {
self.results.into_iter().filter_map(|r| r.ok()).collect()
}
pub fn errors(self) -> Vec<FerroError> {
self.results.into_iter().filter_map(|r| r.err()).collect()
}
pub fn all_ok(&self) -> bool {
self.results.iter().all(|r| r.is_ok())
}
pub fn has_errors(&self) -> bool {
self.results.iter().any(|r| r.is_err())
}
}
impl<T: Clone> BatchResult<T> {
pub fn success_refs(&self) -> Vec<&T> {
self.results
.iter()
.filter_map(|r| match r {
ItemResult::Ok(v) => Some(v),
ItemResult::Err { .. } => None,
})
.collect()
}
}
pub struct BatchProcessor<P: ReferenceProvider> {
normalizer: Normalizer<P>,
config: BatchConfig,
}
impl<P: ReferenceProvider> BatchProcessor<P> {
pub fn new(provider: P) -> Self {
Self {
normalizer: Normalizer::new(provider),
config: BatchConfig::default(),
}
}
pub fn with_config(provider: P, config: BatchConfig) -> Self {
Self {
normalizer: Normalizer::new(provider),
config,
}
}
pub fn config(&self) -> &BatchConfig {
&self.config
}
pub fn parse<S: AsRef<str>>(&self, variants: &[S]) -> BatchResult<HgvsVariant> {
self.parse_with_progress(variants, |_| {})
}
pub fn parse_with_progress<S, F>(
&self,
variants: &[S],
mut progress_fn: F,
) -> BatchResult<HgvsVariant>
where
S: AsRef<str>,
F: FnMut(BatchProgress),
{
let start = Instant::now();
let total = variants.len();
let mut results = Vec::with_capacity(total);
let mut success = 0;
let mut errors = 0;
for (i, input) in variants.iter().enumerate() {
match parse_hgvs(input.as_ref()) {
Ok(variant) => {
results.push(ItemResult::Ok(variant));
success += 1;
}
Err(error) => {
results.push(ItemResult::Err {
input: Some(input.as_ref().to_string()),
error,
});
errors += 1;
}
}
if (i + 1) % self.config.progress_interval == 0 || i + 1 == total {
progress_fn(BatchProgress {
total,
processed: i + 1,
success,
errors,
elapsed: start.elapsed(),
});
}
}
BatchResult::new(results, start.elapsed())
}
pub fn normalize(&self, variants: &[HgvsVariant]) -> BatchResult<HgvsVariant> {
self.normalize_with_progress(variants, |_| {})
}
pub fn normalize_with_progress<F>(
&self,
variants: &[HgvsVariant],
mut progress_fn: F,
) -> BatchResult<HgvsVariant>
where
F: FnMut(BatchProgress),
{
let start = Instant::now();
let total = variants.len();
let mut results = Vec::with_capacity(total);
let mut success = 0;
let mut errors = 0;
for (i, variant) in variants.iter().enumerate() {
match self.normalizer.normalize(variant) {
Ok(normalized) => {
results.push(ItemResult::Ok(normalized));
success += 1;
}
Err(error) => {
results.push(ItemResult::Err {
input: Some(variant.to_string()),
error,
});
errors += 1;
}
}
if (i + 1) % self.config.progress_interval == 0 || i + 1 == total {
progress_fn(BatchProgress {
total,
processed: i + 1,
success,
errors,
elapsed: start.elapsed(),
});
}
}
BatchResult::new(results, start.elapsed())
}
pub fn parse_and_normalize<S: AsRef<str>>(&self, variants: &[S]) -> BatchResult<HgvsVariant> {
self.parse_and_normalize_with_progress(variants, |_| {})
}
pub fn parse_and_normalize_with_progress<S, F>(
&self,
variants: &[S],
mut progress_fn: F,
) -> BatchResult<HgvsVariant>
where
S: AsRef<str>,
F: FnMut(BatchProgress),
{
let start = Instant::now();
let total = variants.len();
let mut results = Vec::with_capacity(total);
let mut success = 0;
let mut errors = 0;
for (i, input) in variants.iter().enumerate() {
let result = parse_hgvs(input.as_ref()).and_then(|v| self.normalizer.normalize(&v));
match result {
Ok(normalized) => {
results.push(ItemResult::Ok(normalized));
success += 1;
}
Err(error) => {
results.push(ItemResult::Err {
input: Some(input.as_ref().to_string()),
error,
});
errors += 1;
}
}
if (i + 1) % self.config.progress_interval == 0 || i + 1 == total {
progress_fn(BatchProgress {
total,
processed: i + 1,
success,
errors,
elapsed: start.elapsed(),
});
}
}
BatchResult::new(results, start.elapsed())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::MockProvider;
fn processor() -> BatchProcessor<MockProvider> {
BatchProcessor::new(MockProvider::with_test_data())
}
#[test]
fn test_batch_config_default() {
let config = BatchConfig::default();
assert!(config.continue_on_error);
assert_eq!(config.progress_interval, 100);
}
#[test]
fn test_batch_config_builder() {
let config = BatchConfig::new()
.continue_on_error(false)
.progress_interval(50);
assert!(!config.continue_on_error);
assert_eq!(config.progress_interval, 50);
}
#[test]
fn test_batch_progress_percent() {
let progress = BatchProgress {
total: 100,
processed: 50,
success: 45,
errors: 5,
elapsed: Duration::from_secs(1),
};
assert!((progress.percent() - 50.0).abs() < 0.01);
}
#[test]
fn test_batch_progress_items_per_second() {
let progress = BatchProgress {
total: 100,
processed: 50,
success: 45,
errors: 5,
elapsed: Duration::from_secs(1),
};
assert!((progress.items_per_second() - 50.0).abs() < 0.01);
}
#[test]
fn test_batch_progress_estimated_remaining() {
let progress = BatchProgress {
total: 100,
processed: 50,
success: 45,
errors: 5,
elapsed: Duration::from_secs(1),
};
let remaining = progress.estimated_remaining().unwrap();
assert!((remaining.as_secs_f64() - 1.0).abs() < 0.1);
}
#[test]
fn test_item_result_ok() {
let result: ItemResult<HgvsVariant> =
ItemResult::Ok(parse_hgvs("NM_000088.3:c.10A>G").unwrap());
assert!(result.is_ok());
assert!(!result.is_err());
}
#[test]
fn test_item_result_err() {
let result: ItemResult<HgvsVariant> = ItemResult::Err {
input: Some("invalid".to_string()),
error: FerroError::parse(0, "test error"),
};
assert!(!result.is_ok());
assert!(result.is_err());
}
#[test]
fn test_parse_batch() {
let processor = processor();
let variants = vec!["NM_000088.3:c.10A>G", "NC_000001.11:g.12345A>G"];
let result = processor.parse(&variants);
assert_eq!(result.total(), 2);
assert_eq!(result.success_count(), 2);
assert_eq!(result.error_count(), 0);
assert!(result.all_ok());
}
#[test]
fn test_parse_batch_with_errors() {
let processor = processor();
let variants = vec![
"NM_000088.3:c.10A>G",
"invalid variant",
"NC_000001.11:g.12345A>G",
];
let result = processor.parse(&variants);
assert_eq!(result.total(), 3);
assert_eq!(result.success_count(), 2);
assert_eq!(result.error_count(), 1);
assert!(result.has_errors());
}
#[test]
fn test_parse_batch_with_progress() {
let processor = processor();
let variants = vec!["NM_000088.3:c.10A>G", "NC_000001.11:g.12345A>G"];
let mut progress_count = 0;
let result = processor.parse_with_progress(&variants, |_| {
progress_count += 1;
});
assert_eq!(result.total(), 2);
assert!(progress_count > 0);
}
#[test]
fn test_normalize_batch() {
let processor = processor();
let variants: Vec<_> = vec!["NM_000088.3:c.10A>G", "NC_000001.11:g.12345A>G"]
.into_iter()
.map(|s| parse_hgvs(s).unwrap())
.collect();
let result = processor.normalize(&variants);
assert_eq!(result.total(), 2);
}
#[test]
fn test_parse_and_normalize_batch() {
let processor = processor();
let variants = vec!["NM_000088.3:c.10A>G", "NC_000001.11:g.12345A>G"];
let result = processor.parse_and_normalize(&variants);
assert_eq!(result.total(), 2);
}
#[test]
fn test_batch_result_successes() {
let processor = processor();
let variants = vec!["NM_000088.3:c.10A>G", "invalid", "NC_000001.11:g.12345A>G"];
let result = processor.parse(&variants);
let successes = result.successes();
assert_eq!(successes.len(), 2);
}
#[test]
fn test_batch_result_errors() {
let processor = processor();
let variants = vec!["NM_000088.3:c.10A>G", "invalid", "NC_000001.11:g.12345A>G"];
let result = processor.parse(&variants);
let errors = result.errors();
assert_eq!(errors.len(), 1);
}
#[test]
fn test_batch_result_success_rate() {
let processor = processor();
let variants = vec!["NM_000088.3:c.10A>G", "invalid"];
let result = processor.parse(&variants);
assert!((result.success_rate() - 50.0).abs() < 0.01);
}
#[test]
fn test_empty_batch() {
let processor = processor();
let variants: Vec<&str> = vec![];
let result = processor.parse(&variants);
assert_eq!(result.total(), 0);
assert!(result.all_ok());
assert!((result.success_rate() - 100.0).abs() < 0.01);
}
}