Skip to main content

fgumi_lib/
validation.rs

1//! Input validation utilities
2//!
3//! This module provides common validation functions for command-line parameters,
4//! file paths, and SAM tags with consistent error messages.
5//!
6//! All validation functions now use structured error types from [`crate::errors`] to provide
7//! rich contextual information when validation fails.
8
9use crate::errors::{FgumiError, Result};
10use noodles::sam::alignment::record::data::field::Tag;
11use std::fmt::Display;
12use std::path::Path;
13
14/// Validate that a file exists
15///
16/// # Arguments
17/// * `path` - Path to validate
18/// * `description` - Human-readable description of the file (e.g., "Input file", "Reference")
19///
20/// # Errors
21/// Returns an error if the file does not exist
22///
23/// # Example
24/// ```
25/// use fgumi_lib::validation::validate_file_exists;
26/// use std::path::Path;
27///
28/// let result = validate_file_exists("/nonexistent/file.bam", "Input file");
29/// assert!(result.is_err());
30/// ```
31pub fn validate_file_exists<P: AsRef<Path>>(path: P, description: &str) -> Result<()> {
32    let path_ref = path.as_ref();
33    if !path_ref.exists() {
34        return Err(FgumiError::InvalidFileFormat {
35            file_type: description.to_string(),
36            path: path_ref.display().to_string(),
37            reason: "File does not exist".to_string(),
38        });
39    }
40    Ok(())
41}
42
43/// Validate that multiple files exist
44///
45/// # Arguments
46/// * `files` - Slice of (path, description) tuples
47///
48/// # Errors
49/// Returns an error for the first file that doesn't exist
50///
51/// # Example
52/// ```no_run
53/// use fgumi_lib::validation::validate_files_exist;
54/// use std::path::PathBuf;
55///
56/// let files = vec![
57///     (PathBuf::from("input.bam"), "Input BAM"),
58///     (PathBuf::from("ref.fa"), "Reference"),
59/// ];
60/// validate_files_exist(&files).unwrap();
61/// ```
62pub fn validate_files_exist<P: AsRef<Path>>(files: &[(P, &str)]) -> Result<()> {
63    for (path, desc) in files {
64        validate_file_exists(path, desc)?;
65    }
66    Ok(())
67}
68
69/// Validate that a SAM tag is exactly 2 characters
70///
71/// # Arguments
72/// * `tag` - Tag string to validate
73/// * `name` - Name of the parameter for error messages
74///
75/// # Returns
76/// A 2-byte array representing the tag
77///
78/// # Errors
79/// Returns an error if the tag is not exactly 2 characters
80///
81/// # Example
82/// ```
83/// use fgumi_lib::validation::validate_tag;
84///
85/// let tag = validate_tag("MI", "UMI tag").unwrap();
86/// assert_eq!(tag, [b'M', b'I']);
87///
88/// let result = validate_tag("ABC", "UMI tag");
89/// assert!(result.is_err());
90/// ```
91pub fn validate_tag(tag: &str, name: &str) -> Result<[u8; 2]> {
92    if tag.len() != 2 {
93        return Err(FgumiError::InvalidParameter {
94            parameter: name.to_string(),
95            reason: format!("Tag must be exactly 2 characters, got: '{tag}'"),
96        });
97    }
98    let bytes = tag.as_bytes();
99    Ok([bytes[0], bytes[1]])
100}
101
102/// Convert a validated string tag to noodles Tag type
103///
104/// This combines validation and conversion for convenience.
105///
106/// # Arguments
107/// * `tag` - Tag string to validate and convert
108/// * `name` - Name of the parameter for error messages
109///
110/// # Returns
111/// A noodles `Tag` object
112///
113/// # Errors
114/// Returns an error if the tag is not exactly 2 characters
115///
116/// # Example
117/// ```
118/// use fgumi_lib::validation::string_to_tag;
119///
120/// let tag = string_to_tag("MI", "UMI tag").unwrap();
121/// ```
122pub fn string_to_tag(tag: &str, name: &str) -> Result<Tag> {
123    let tag_array = validate_tag(tag, name)?;
124    Ok(Tag::from(tag_array))
125}
126
127/// Convert an optional string tag to an optional noodles Tag type
128///
129/// This is a convenience function for optional CLI arguments like cell barcode tags.
130///
131/// # Arguments
132/// * `tag` - Optional tag string to validate and convert
133/// * `name` - Name of the parameter for error messages
134///
135/// # Returns
136/// `Some(Tag)` if the input is `Some` and valid, `None` if the input is `None`
137///
138/// # Errors
139/// Returns an error if the tag is `Some` but not exactly 2 characters
140///
141/// # Example
142/// ```
143/// use fgumi_lib::validation::optional_string_to_tag;
144///
145/// // Some tag - validates and converts
146/// let tag = optional_string_to_tag(Some("CB"), "cell tag").unwrap();
147/// assert!(tag.is_some());
148///
149/// // None - returns None
150/// let tag = optional_string_to_tag(None, "cell tag").unwrap();
151/// assert!(tag.is_none());
152///
153/// // Invalid tag - returns error
154/// let result = optional_string_to_tag(Some("ABC"), "cell tag");
155/// assert!(result.is_err());
156/// ```
157pub fn optional_string_to_tag(tag: Option<&str>, name: &str) -> Result<Option<Tag>> {
158    tag.map(|t| string_to_tag(t, name)).transpose()
159}
160
161/// Validate that max >= min for optional max values
162///
163/// # Arguments
164/// * `min_val` - Minimum value
165/// * `max_val` - Optional maximum value
166/// * `min_name` - Name of minimum parameter for error messages
167/// * `max_name` - Name of maximum parameter for error messages
168///
169/// # Errors
170/// Returns an error if max < min
171///
172/// # Example
173/// ```
174/// use fgumi_lib::validation::validate_min_max;
175///
176/// // Valid: max >= min
177/// validate_min_max(1, Some(10), "min-reads", "max-reads").unwrap();
178///
179/// // Valid: max is None
180/// validate_min_max(1, None, "min-reads", "max-reads").unwrap();
181///
182/// // Invalid: max < min
183/// let result = validate_min_max(10, Some(5), "min-reads", "max-reads");
184/// assert!(result.is_err());
185/// ```
186#[allow(clippy::needless_pass_by_value)]
187pub fn validate_min_max<T: Ord + Display>(
188    min_val: T,
189    max_val: Option<T>,
190    min_name: &str,
191    max_name: &str,
192) -> Result<()> {
193    if let Some(max) = max_val {
194        if max < min_val {
195            return Err(FgumiError::InvalidParameter {
196                parameter: max_name.to_string(),
197                reason: format!("{max_name} ({max}) must be >= {min_name} ({min_val})"),
198            });
199        }
200    }
201    Ok(())
202}
203
204/// Validate that an error rate is in the valid range [0.0, 1.0]
205///
206/// # Arguments
207/// * `rate` - Error rate to validate
208/// * `name` - Name of the parameter for error messages
209///
210/// # Errors
211/// Returns an error if the rate is not in [0.0, 1.0]
212///
213/// # Example
214/// ```
215/// use fgumi_lib::validation::validate_error_rate;
216///
217/// validate_error_rate(0.01, "error-rate-pre-umi").unwrap();
218/// validate_error_rate(1.0, "error-rate-post-umi").unwrap();
219///
220/// let result = validate_error_rate(1.5, "error-rate");
221/// assert!(result.is_err());
222/// ```
223pub fn validate_error_rate(rate: f64, _name: &str) -> Result<()> {
224    if !(0.0..=1.0).contains(&rate) {
225        return Err(FgumiError::InvalidFrequency { value: rate, min: 0.0, max: 1.0 });
226    }
227    Ok(())
228}
229
230/// Validate that a quality score is in the valid Phred range [0, 93]
231///
232/// # Arguments
233/// * `quality` - Quality score to validate
234/// * `name` - Name of the parameter for error messages
235///
236/// # Errors
237/// Returns an error if the quality is not in [0, 93]
238///
239/// # Example
240/// ```
241/// use fgumi_lib::validation::validate_quality_score;
242///
243/// validate_quality_score(30, "min-base-quality").unwrap();
244///
245/// let result = validate_quality_score(100, "min-base-quality");
246/// assert!(result.is_err());
247/// ```
248pub fn validate_quality_score(quality: u8, _name: &str) -> Result<()> {
249    if quality > 93 {
250        return Err(FgumiError::InvalidQuality { value: quality, max: 93 });
251    }
252    Ok(())
253}
254
255/// Validate that a value is positive (> 0)
256///
257/// # Arguments
258/// * `value` - Value to validate
259/// * `name` - Name of the parameter for error messages
260///
261/// # Errors
262/// Returns an error if the value is not positive
263///
264/// # Example
265/// ```
266/// use fgumi_lib::validation::validate_positive;
267///
268/// validate_positive(10, "min-reads").unwrap();
269///
270/// let result = validate_positive(0, "min-reads");
271/// assert!(result.is_err());
272/// ```
273#[allow(clippy::needless_pass_by_value)]
274pub fn validate_positive<T: Ord + Display + Default>(value: T, name: &str) -> Result<()> {
275    if value <= T::default() {
276        return Err(FgumiError::InvalidParameter {
277            parameter: name.to_string(),
278            reason: format!("Must be positive (> 0), got: {value}"),
279        });
280    }
281    Ok(())
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287    use rstest::rstest;
288    use std::path::PathBuf;
289    use tempfile::NamedTempFile;
290
291    #[test]
292    fn test_validate_file_exists_valid() {
293        let temp_file = NamedTempFile::new().unwrap();
294        validate_file_exists(temp_file.path(), "Test file").unwrap();
295    }
296
297    #[test]
298    fn test_validate_file_exists_invalid() {
299        let result = validate_file_exists("/nonexistent/file.bam", "Input file");
300        assert!(result.is_err());
301        let err_msg = result.unwrap_err().to_string();
302        assert!(err_msg.contains("Input file"));
303        assert!(err_msg.contains("does not exist"));
304    }
305
306    #[test]
307    fn test_validate_files_exist_all_valid() {
308        let temp1 = NamedTempFile::new().unwrap();
309        let temp2 = NamedTempFile::new().unwrap();
310
311        let files =
312            vec![(temp1.path().to_path_buf(), "File 1"), (temp2.path().to_path_buf(), "File 2")];
313
314        validate_files_exist(&files).unwrap();
315    }
316
317    #[test]
318    fn test_validate_files_exist_one_invalid() {
319        let temp1 = NamedTempFile::new().unwrap();
320
321        let files = vec![
322            (temp1.path().to_path_buf(), "File 1"),
323            (PathBuf::from("/nonexistent.bam"), "File 2"),
324        ];
325
326        let result = validate_files_exist(&files);
327        assert!(result.is_err());
328        let err_msg = result.unwrap_err().to_string();
329        assert!(err_msg.contains("File 2"));
330    }
331
332    #[rstest]
333    #[case("MI", true, Some([b'M', b'I']), "valid MI tag")]
334    #[case("RX", true, Some([b'R', b'X']), "valid RX tag")]
335    #[case("AB", true, Some([b'A', b'B']), "valid AB tag")]
336    #[case("M", false, None, "too short")]
337    #[case("ABC", false, None, "too long")]
338    #[case("", false, None, "empty string")]
339    fn test_validate_tag(
340        #[case] input: &str,
341        #[case] should_succeed: bool,
342        #[case] expected: Option<[u8; 2]>,
343        #[case] description: &str,
344    ) {
345        let result = validate_tag(input, "test tag");
346        if should_succeed {
347            assert!(result.is_ok(), "Failed for: {description}");
348            assert_eq!(result.unwrap(), expected.unwrap(), "Failed for: {description}");
349        } else {
350            assert!(result.is_err(), "Should have failed for: {description}");
351            let err_msg = result.unwrap_err().to_string();
352            assert!(
353                err_msg.contains("must be exactly 2 characters"),
354                "Missing expected error message for: {description}"
355            );
356        }
357    }
358
359    #[test]
360    fn test_string_to_tag_valid() -> Result<()> {
361        let tag = string_to_tag("MI", "UMI tag")?;
362        assert_eq!(tag, Tag::from([b'M', b'I']));
363        Ok(())
364    }
365
366    #[test]
367    fn test_string_to_tag_invalid_length() {
368        let result = string_to_tag("ABC", "UMI tag");
369        assert!(result.is_err());
370    }
371
372    #[test]
373    fn test_optional_string_to_tag_some_valid() -> Result<()> {
374        let tag = optional_string_to_tag(Some("CB"), "cell tag")?;
375        assert!(tag.is_some());
376        assert_eq!(tag.unwrap(), Tag::from([b'C', b'B']));
377        Ok(())
378    }
379
380    #[test]
381    fn test_optional_string_to_tag_none() -> Result<()> {
382        let tag = optional_string_to_tag(None, "cell tag")?;
383        assert!(tag.is_none());
384        Ok(())
385    }
386
387    #[test]
388    fn test_optional_string_to_tag_some_invalid() {
389        let result = optional_string_to_tag(Some("ABC"), "cell tag");
390        assert!(result.is_err());
391        let err_msg = result.unwrap_err().to_string();
392        assert!(err_msg.contains("must be exactly 2 characters"));
393    }
394
395    #[test]
396    fn test_validate_min_max_valid() -> Result<()> {
397        // max > min
398        validate_min_max(1, Some(10), "min-reads", "max-reads")?;
399
400        // max == min
401        validate_min_max(5, Some(5), "min-reads", "max-reads")?;
402
403        // max is None
404        validate_min_max(1, None, "min-reads", "max-reads")?;
405
406        Ok(())
407    }
408
409    #[test]
410    fn test_validate_min_max_invalid() {
411        let result = validate_min_max(10, Some(5), "min-reads", "max-reads");
412        assert!(result.is_err());
413        let err_msg = result.unwrap_err().to_string();
414        assert!(err_msg.contains("max-reads"));
415        assert!(err_msg.contains("min-reads"));
416        assert!(err_msg.contains(">="));
417    }
418
419    #[rstest]
420    #[case(0.0, true, "minimum valid rate")]
421    #[case(0.01, true, "typical low rate")]
422    #[case(0.5, true, "middle rate")]
423    #[case(1.0, true, "maximum valid rate")]
424    #[case(-0.1, false, "negative rate")]
425    #[case(1.5, false, "above maximum")]
426    #[case(2.0, false, "far above maximum")]
427    fn test_validate_error_rate(
428        #[case] rate: f64,
429        #[case] should_succeed: bool,
430        #[case] description: &str,
431    ) {
432        let result = validate_error_rate(rate, "error-rate");
433        if should_succeed {
434            assert!(result.is_ok(), "Failed for: {description}");
435        } else {
436            assert!(result.is_err(), "Should have failed for: {description}");
437            let err_msg = result.unwrap_err().to_string();
438            assert!(
439                err_msg.contains("Invalid frequency threshold"),
440                "Missing expected error for: {description}"
441            );
442            assert!(err_msg.contains("between 0 and 1"), "Missing range info for: {description}");
443        }
444    }
445
446    #[rstest]
447    #[case(0, true, "minimum valid quality")]
448    #[case(30, true, "typical quality")]
449    #[case(93, true, "maximum valid quality")]
450    #[case(60, true, "high quality")]
451    #[case(94, false, "just above maximum")]
452    #[case(100, false, "far above maximum")]
453    fn test_validate_quality_score(
454        #[case] score: u8,
455        #[case] should_succeed: bool,
456        #[case] description: &str,
457    ) {
458        let result = validate_quality_score(score, "min-base-quality");
459        if should_succeed {
460            assert!(result.is_ok(), "Failed for: {description}");
461        } else {
462            assert!(result.is_err(), "Should have failed for: {description}");
463            let err_msg = result.unwrap_err().to_string();
464            assert!(
465                err_msg.contains("Invalid quality threshold"),
466                "Missing expected error for: {description}"
467            );
468            assert!(err_msg.contains("between 0 and 93"), "Missing range info for: {description}");
469        }
470    }
471
472    #[test]
473    fn test_validate_positive_valid() -> Result<()> {
474        validate_positive(1, "min-reads")?;
475        validate_positive(100, "min-reads")?;
476        validate_positive(1_usize, "threshold")?;
477        Ok(())
478    }
479
480    #[test]
481    fn test_validate_positive_zero() {
482        let result = validate_positive(0, "min-reads");
483        assert!(result.is_err());
484        let err_msg = result.unwrap_err().to_string();
485        assert!(err_msg.contains("Invalid parameter 'min-reads'"));
486        assert!(err_msg.contains("Must be positive"));
487        assert!(err_msg.contains("got: 0"));
488    }
489
490    #[test]
491    fn test_validate_positive_negative() {
492        let result = validate_positive(-5, "threshold");
493        assert!(result.is_err());
494        let err_msg = result.unwrap_err().to_string();
495        assert!(err_msg.contains("Invalid parameter 'threshold'"));
496        assert!(err_msg.contains("Must be positive"));
497        assert!(err_msg.contains("got: -5"));
498    }
499}