rsomics_bed_validate/
lib.rs1use rsomics_common::{Result, RsomicsError};
2use std::io::BufRead;
3
4pub struct BedValidation {
6 pub records: u64,
8 pub errors: Vec<String>,
10 pub is_valid: bool,
12}
13
14pub fn validate<R: BufRead>(reader: R) -> Result<BedValidation> {
23 let mut records: u64 = 0;
24 let mut errors: Vec<String> = Vec::new();
25 let mut line_num: u64 = 0;
26
27 for line in reader.lines() {
28 let line = line.map_err(RsomicsError::Io)?;
29 line_num += 1;
30
31 if line.starts_with('#')
32 || line.starts_with("track")
33 || line.starts_with("browser")
34 || line.is_empty()
35 {
36 continue;
37 }
38
39 let fields: Vec<&str> = line.split('\t').collect();
40 if fields.len() < 3 {
41 errors.push(format!(
42 "line {line_num}: need >= 3 fields, got {}",
43 fields.len()
44 ));
45 continue;
46 }
47
48 let start = fields[1].parse::<u64>();
49 let end = fields[2].parse::<u64>();
50
51 if start.is_err() {
52 errors.push(format!("line {line_num}: start is not a valid integer"));
53 }
54 if end.is_err() {
55 errors.push(format!("line {line_num}: end is not a valid integer"));
56 }
57 if let (Ok(s), Ok(e)) = (start, end)
58 && s > e
59 {
60 errors.push(format!("line {line_num}: start ({s}) > end ({e})"));
61 }
62
63 records += 1;
64 }
65
66 let is_valid = errors.is_empty();
67 Ok(BedValidation {
68 records,
69 errors,
70 is_valid,
71 })
72}
73
74#[cfg(test)]
75mod tests {
76 use super::*;
77 use std::io::Cursor;
78
79 #[test]
80 fn valid_bed3_passes() {
81 let bed = "chr1\t0\t100\nchr2\t50\t200\n";
82 let v = validate(Cursor::new(bed)).unwrap();
83 assert!(v.is_valid);
84 assert_eq!(v.records, 2);
85 assert!(v.errors.is_empty());
86 }
87
88 #[test]
89 fn start_greater_than_end_fails() {
90 let bed = "chr1\t200\t100\n";
91 let v = validate(Cursor::new(bed)).unwrap();
92 assert!(!v.is_valid);
93 assert_eq!(v.errors.len(), 1);
94 assert!(v.errors[0].contains("start (200) > end (100)"));
95 }
96
97 #[test]
98 fn too_few_fields_fails() {
99 let bed = "chr1\t100\n";
100 let v = validate(Cursor::new(bed)).unwrap();
101 assert!(!v.is_valid);
102 assert!(v.errors[0].contains("need >= 3 fields"));
103 }
104
105 #[test]
106 fn non_integer_coordinates_fail() {
107 let bed = "chr1\tabc\t100\n";
108 let v = validate(Cursor::new(bed)).unwrap();
109 assert!(!v.is_valid);
110 assert!(v.errors[0].contains("start is not a valid integer"));
111 }
112
113 #[test]
114 fn headers_skipped() {
115 let bed = "# comment\ntrack name=test\nbrowser position chr1:1-100\nchr1\t0\t100\n";
116 let v = validate(Cursor::new(bed)).unwrap();
117 assert!(v.is_valid);
118 assert_eq!(v.records, 1);
119 }
120}