use std::str::FromStr;
use super::field_def::{FieldDef, FieldType};
use crate::OxbowError;
pub const STANDARD_FIELD_NAMES: [&str; 12] = [
"chrom",
"start",
"end",
"name",
"score",
"strand",
"thickStart",
"thickEnd",
"itemRgb",
"blockCount",
"blockSizes",
"blockStarts",
];
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct BedSchema {
n: usize,
m: Option<usize>,
custom: Vec<FieldDef>,
}
impl BedSchema {
pub fn new(n: usize, custom: Option<Vec<FieldDef>>) -> crate::Result<Self> {
if n < 3 {
return Err(OxbowError::invalid_input(format!(
"Invalid BED schema: n < 3 (n={})",
n
)));
} else if n > 12 {
return Err(OxbowError::invalid_input(format!(
"Invalid BED schema: n > 12 (n={})",
n
)));
}
let (m, custom) = match custom {
Some(custom) => (Some(custom.len()), custom),
None => (
None,
vec![FieldDef::new("rest".to_string(), FieldType::String)],
),
};
Ok(Self { n, m, custom })
}
pub fn new_from_nm(n: usize, m: Option<usize>) -> crate::Result<Self> {
let custom_fields = m.map(|m| {
(1..=m)
.map(|i| FieldDef::new(format!("BED{}+{}", n, i), FieldType::String))
.collect()
});
Self::new(n, custom_fields)
}
pub fn new_bedgraph() -> crate::Result<Self> {
Self::new(
3,
Some(vec![FieldDef::new("value".to_string(), FieldType::Float)]),
)
}
pub fn field_names(&self) -> Vec<String> {
let mut names: Vec<String> = STANDARD_FIELD_NAMES
.iter()
.take(self.n)
.map(|&s| s.to_string())
.collect();
names.extend(self.custom.iter().map(|d| d.name.clone()));
names
}
pub fn standard_field_count(&self) -> usize {
self.n
}
pub fn standard_field_names(&self) -> Vec<String> {
STANDARD_FIELD_NAMES
.iter()
.take(self.n)
.map(|&s| s.to_string())
.collect()
}
pub fn custom_field_count(&self) -> Option<usize> {
self.m
}
pub fn custom_field_names(&self) -> Vec<String> {
self.custom.iter().map(|d| d.name.clone()).collect()
}
pub fn custom_fields(&self) -> &[FieldDef] {
&self.custom
}
}
impl std::fmt::Display for BedSchema {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if let Some(m) = self.m {
if m == 0 {
write!(f, "bed{}", self.n)
} else {
write!(f, "bed{}+{}", self.n, m)
}
} else {
write!(f, "bed{}+", self.n)
}
}
}
impl FromStr for BedSchema {
type Err = OxbowError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let s = s.to_ascii_lowercase();
if s == "bed" {
return Self::new_from_nm(6, Some(0));
}
fn parse_error(s: &str) -> OxbowError {
OxbowError::invalid_input(format!("Invalid BED format specifier: {}", s))
}
if let Some(rest) = s.strip_prefix("bed") {
if rest == "graph" {
Self::new_bedgraph()
} else if let Some(n) = rest.strip_suffix('+') {
let n = n.parse::<usize>().map_err(|_| parse_error(&s))?;
Self::new_from_nm(n, None)
} else if let Some(pos) = rest.find('+') {
let n = rest[..pos].parse::<usize>().map_err(|_| parse_error(&s))?;
let m = rest[pos + 1..]
.parse::<usize>()
.map_err(|_| parse_error(&s))?;
Self::new_from_nm(n, Some(m))
} else {
let n = rest.parse::<usize>().map_err(|_| parse_error(&s))?;
Self::new_from_nm(n, Some(0))
}
} else {
Err(parse_error(&s))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_bed_schema_bedn() {
let spec: BedSchema = "bed".parse().unwrap();
assert_eq!(spec.standard_field_count(), 6);
assert_eq!(spec.custom_field_count(), Some(0));
let spec: BedSchema = "bed6".parse().unwrap();
assert_eq!(spec.standard_field_count(), 6);
assert_eq!(spec.custom_field_count(), Some(0));
let spec: BedSchema = "bed12".parse().unwrap();
assert_eq!(spec.standard_field_count(), 12);
assert_eq!(spec.custom_field_count(), Some(0));
}
#[test]
fn test_bed_schema_bedn_plus_zero() {
assert_eq!(
"bed6".parse::<BedSchema>().unwrap(),
"bed6+0".parse::<BedSchema>().unwrap()
);
}
#[test]
fn test_bed_schema_bedn_plus_m() {
let spec: BedSchema = "bed6+3".parse().unwrap();
assert_eq!(spec.standard_field_count(), 6);
assert_eq!(spec.custom_field_count(), Some(3));
let field_names = spec.field_names();
assert_eq!(
field_names,
vec!["chrom", "start", "end", "name", "score", "strand", "BED6+1", "BED6+2", "BED6+3"]
);
}
#[test]
fn test_bed_schema_bedn_plus() {
let spec: BedSchema = "bed6+".parse().unwrap();
assert_eq!(spec.standard_field_count(), 6);
assert_eq!(spec.custom_field_count(), None);
let field_names = spec.field_names();
assert_eq!(
field_names,
vec!["chrom", "start", "end", "name", "score", "strand", "rest"]
);
}
#[test]
fn test_bed_schema_bedgraph() {
let spec: BedSchema = "bedgraph".parse().unwrap();
assert_eq!(spec.standard_field_count(), 3);
assert_eq!(spec.custom_field_count(), Some(1));
assert_eq!(spec.custom_fields()[0].name, "value");
assert_eq!(spec.custom_fields()[0].ty, FieldType::Float);
}
#[test]
fn test_bed_schema_invalid() {
let result = BedSchema::new(0, None);
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Invalid BED schema: n < 3 (n=0)"
);
let result = BedSchema::new(13, None);
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Invalid BED schema: n > 12 (n=13)"
);
let result: Result<BedSchema, _> = "invalid".parse();
assert!(result.is_err());
assert_eq!(
result.unwrap_err().to_string(),
"Invalid BED format specifier: invalid"
);
}
#[test]
fn test_bed_schema_display() {
let spec: BedSchema = "bed6".parse().unwrap();
assert_eq!(spec.to_string(), "bed6");
let spec: BedSchema = "bed6+3".parse().unwrap();
assert_eq!(spec.to_string(), "bed6+3");
let spec: BedSchema = "bed6+".parse().unwrap();
assert_eq!(spec.to_string(), "bed6+");
let spec: BedSchema = "bed12".parse().unwrap();
assert_eq!(spec.to_string(), "bed12");
}
}