1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
//! VCF record chromosome.

mod parser;

use std::{error, fmt, str::FromStr};

use super::MISSING_FIELD;

/// A VCF record chromosome (`CHROM`).
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Chromosome {
    /// A reference sequence name.
    Name(String),
    /// A symbol.
    Symbol(String),
}

impl fmt::Display for Chromosome {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Name(name) => f.write_str(name),
            Self::Symbol(symbol) => write!(f, "<{}>", symbol),
        }
    }
}

/// An error returned when a raw VCF record chromosome fails to parse.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ParseError {
    /// The input is empty.
    Empty,
    /// The input is missing (`.`).
    Missing,
    /// The input is invalid.
    Invalid,
}

impl error::Error for ParseError {}

impl fmt::Display for ParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Empty => f.write_str("empty input"),
            Self::Missing => f.write_str("missing input (`.`)"),
            Self::Invalid => f.write_str("invalid input"),
        }
    }
}

impl FromStr for Chromosome {
    type Err = ParseError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "" => Err(ParseError::Empty),
            MISSING_FIELD => Err(ParseError::Missing),
            _ => {
                parser::parse(s)
                    .map_err(|_| ParseError::Invalid)
                    .and_then(|(_, value)| match value {
                        parser::Value::Name(t) => {
                            if is_valid_name(t) {
                                Ok(Self::Name(t.into()))
                            } else {
                                Err(ParseError::Invalid)
                            }
                        }
                        parser::Value::Symbol(t) => Ok(Self::Symbol(t.into())),
                    })
            }
        }
    }
}

// § 1.4.7 Contig field format
fn is_valid_name_char(c: char) -> bool {
    ('!'..='~').contains(&c)
        && !matches!(
            c,
            '\\' | ',' | '"' | '`' | '\'' | '(' | ')' | '[' | ']' | '{' | '}' | '<' | '>',
        )
}

pub(crate) fn is_valid_name(s: &str) -> bool {
    let mut chars = s.chars();

    if let Some(c) = chars.next() {
        if c == '*' || c == '=' || !is_valid_name_char(c) {
            return false;
        }
    }

    chars.all(is_valid_name_char)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_fmt() {
        assert_eq!(Chromosome::Name(String::from("sq0")).to_string(), "sq0");
        assert_eq!(Chromosome::Symbol(String::from("sq0")).to_string(), "<sq0>");
    }

    #[test]
    fn test_from_str() {
        assert_eq!("sq0".parse(), Ok(Chromosome::Name(String::from("sq0"))));
        assert_eq!("<sq0>".parse(), Ok(Chromosome::Symbol(String::from("sq0"))));

        assert_eq!("".parse::<Chromosome>(), Err(ParseError::Empty));
        assert_eq!(".".parse::<Chromosome>(), Err(ParseError::Missing));
        assert_eq!("sq 0".parse::<Chromosome>(), Err(ParseError::Invalid));
        assert_eq!("sq[0]".parse::<Chromosome>(), Err(ParseError::Invalid));
        assert_eq!(">sq0".parse::<Chromosome>(), Err(ParseError::Invalid));
        assert_eq!("*sq0".parse::<Chromosome>(), Err(ParseError::Invalid));
        assert_eq!("=sq0".parse::<Chromosome>(), Err(ParseError::Invalid));
    }
}