1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
//! SAM record reference sequence name.

use std::{error, fmt, ops::Deref, str::FromStr};

const MIN_LENGTH: usize = 1;

/// A SAM record reference sequence name.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ReferenceSequenceName(String);

impl Deref for ReferenceSequenceName {
    type Target = String;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl fmt::Display for ReferenceSequenceName {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.0)
    }
}

/// An error returned when a raw SAM record reference sequence name fails to parse.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ParseError {
    /// The input is empty.
    Empty,
    /// The input is invalid.
    Invalid,
}

impl error::Error for ParseError {}

impl fmt::Display for ParseError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            Self::Empty => f.write_str("empty input"),
            Self::Invalid => f.write_str("invalid input"),
        }
    }
}

impl FromStr for ReferenceSequenceName {
    type Err = ParseError;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        if s.is_empty() {
            Err(ParseError::Empty)
        } else if !is_valid_name(s) {
            Err(ParseError::Invalid)
        } else {
            Ok(Self(s.into()))
        }
    }
}

// § 1.2.1 Character set restrictions (2021-01-07)
fn is_valid_name_char(c: char) -> bool {
    ('!'..='~').contains(&c)
        && !matches!(
            c,
            '\\' | ',' | '"' | '`' | '\'' | '(' | ')' | '[' | ']' | '{' | '}' | '<' | '>',
        )
}

pub(crate) fn is_valid_name(s: &str) -> bool {
    if s.len() < MIN_LENGTH {
        return false;
    }

    let mut chars = s.chars();

    if let Some(c) = chars.next() {
        if c == '*' || c == '=' || !is_valid_name_char(c) {
            return false;
        }
    }

    chars.all(is_valid_name_char)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_fmt() -> Result<(), ParseError> {
        let reference_sequence_name: ReferenceSequenceName = "sq0".parse()?;
        assert_eq!(reference_sequence_name.to_string(), "sq0");

        Ok(())
    }

    #[test]
    fn test_from_str() {
        assert_eq!(
            "sq0".parse(),
            Ok(ReferenceSequenceName(String::from("sq0")))
        );

        assert_eq!(
            "sq0*".parse(),
            Ok(ReferenceSequenceName(String::from("sq0*")))
        );

        assert_eq!(
            "sq0=".parse(),
            Ok(ReferenceSequenceName(String::from("sq0=")))
        );

        assert_eq!("".parse::<ReferenceSequenceName>(), Err(ParseError::Empty));

        assert_eq!(
            "sq 0".parse::<ReferenceSequenceName>(),
            Err(ParseError::Invalid)
        );

        assert_eq!(
            "sq[0]".parse::<ReferenceSequenceName>(),
            Err(ParseError::Invalid)
        );

        assert_eq!(
            ">sq0".parse::<ReferenceSequenceName>(),
            Err(ParseError::Invalid)
        );

        assert_eq!(
            "*sq0".parse::<ReferenceSequenceName>(),
            Err(ParseError::Invalid)
        );

        assert_eq!(
            "=sq0".parse::<ReferenceSequenceName>(),
            Err(ParseError::Invalid)
        );
    }
}