use rsomics_common::{Result, RsomicsError};
use std::io::BufRead;
pub struct BedValidation {
pub records: u64,
pub errors: Vec<String>,
pub is_valid: bool,
}
pub fn validate<R: BufRead>(reader: R) -> Result<BedValidation> {
let mut records: u64 = 0;
let mut errors: Vec<String> = Vec::new();
let mut line_num: u64 = 0;
for line in reader.lines() {
let line = line.map_err(RsomicsError::Io)?;
line_num += 1;
if line.starts_with('#')
|| line.starts_with("track")
|| line.starts_with("browser")
|| line.is_empty()
{
continue;
}
let fields: Vec<&str> = line.split('\t').collect();
if fields.len() < 3 {
errors.push(format!(
"line {line_num}: need >= 3 fields, got {}",
fields.len()
));
continue;
}
let start = fields[1].parse::<u64>();
let end = fields[2].parse::<u64>();
if start.is_err() {
errors.push(format!("line {line_num}: start is not a valid integer"));
}
if end.is_err() {
errors.push(format!("line {line_num}: end is not a valid integer"));
}
if let (Ok(s), Ok(e)) = (start, end)
&& s > e
{
errors.push(format!("line {line_num}: start ({s}) > end ({e})"));
}
records += 1;
}
let is_valid = errors.is_empty();
Ok(BedValidation {
records,
errors,
is_valid,
})
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn valid_bed3_passes() {
let bed = "chr1\t0\t100\nchr2\t50\t200\n";
let v = validate(Cursor::new(bed)).unwrap();
assert!(v.is_valid);
assert_eq!(v.records, 2);
assert!(v.errors.is_empty());
}
#[test]
fn start_greater_than_end_fails() {
let bed = "chr1\t200\t100\n";
let v = validate(Cursor::new(bed)).unwrap();
assert!(!v.is_valid);
assert_eq!(v.errors.len(), 1);
assert!(v.errors[0].contains("start (200) > end (100)"));
}
#[test]
fn too_few_fields_fails() {
let bed = "chr1\t100\n";
let v = validate(Cursor::new(bed)).unwrap();
assert!(!v.is_valid);
assert!(v.errors[0].contains("need >= 3 fields"));
}
#[test]
fn non_integer_coordinates_fail() {
let bed = "chr1\tabc\t100\n";
let v = validate(Cursor::new(bed)).unwrap();
assert!(!v.is_valid);
assert!(v.errors[0].contains("start is not a valid integer"));
}
#[test]
fn headers_skipped() {
let bed = "# comment\ntrack name=test\nbrowser position chr1:1-100\nchr1\t0\t100\n";
let v = validate(Cursor::new(bed)).unwrap();
assert!(v.is_valid);
assert_eq!(v.records, 1);
}
}