#![crate_type="lib"]
#![deny(warnings, missing_docs)]
use std::convert::From;
use std::result::Result;
use std::num;
#[derive(Debug, PartialOrd, PartialEq)]
pub enum Sample {
Number(u8),
Sequence(String)
}
pub struct SequenceIdentifier {
pub sequencer_id: String,
pub run_count: u16,
pub flow_cell_id: String,
pub lane: u8,
pub side: u8,
pub swath: u8,
pub tile: u8,
pub x: u16,
pub y: u16,
pub read: u8,
pub is_filtered: bool,
pub control_number: u8,
pub sample: Sample
}
#[derive(Debug)]
pub enum IlluminaError {
ParseError,
SplitError
}
impl From<num::ParseIntError> for IlluminaError {
fn from(_: num::ParseIntError) -> IlluminaError {
IlluminaError::ParseError
}
}
pub fn parse_sequence_identifier(text: &str) -> Result<SequenceIdentifier, IlluminaError> {
let halves: Vec<&str> = text.trim().split(' ').collect();
if halves.len() != 2 {
return Err(IlluminaError::SplitError)
}
let left: Vec<&str> = halves[0].split(':').collect();
let right: Vec<&str> = halves[1].split(':').collect();
if left.len() != 7 {
return Err(IlluminaError::SplitError);
}
if right.len() != 4 {
return Err(IlluminaError::SplitError);
}
let sequencer_id = left[0].split_at(1).1.to_string();
let run_count = left[1].parse::<u16>()?;
let flow_cell_id = left[2].to_string();
let lane = left[3].parse::<u8>()?;
let (side, remainder) = left[4].split_at(1);
let (swath, tile) = remainder.split_at(1);
let side = side.parse::<u8>()?;
let swath = swath.parse::<u8>()?;
let tile = tile.parse::<u8>()?;
let x = left[5].parse::<u16>()?;
let y = left[6].parse::<u16>()?;
let read = right[0].parse::<u8>()?;
let is_filtered = match right[1] {
"Y" => true,
"N" => false,
_ => return Err(IlluminaError::ParseError)
};
let control_number= right[2].parse::<u8>()?;
let sample = right[3].parse::<u8>();
let sample = match sample {
Ok(n) => Sample::Number(n),
Err(_) => Sample::Sequence(String::from(right[3]))
};
Ok(SequenceIdentifier {
sequencer_id,
run_count,
flow_cell_id,
lane,
side,
swath,
tile,
x,
y,
read,
is_filtered,
control_number,
sample
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse() {
let line = "@M03745:11:000000000-B54L5:1:2108:4127:8949 1:N:0:0";
let seq_id = parse_sequence_identifier(&line).unwrap();
assert_eq!(seq_id.sequencer_id, "M03745".to_string());
assert_eq!(seq_id.run_count, 11);
assert_eq!(seq_id.flow_cell_id, "000000000-B54L5".to_string());
assert_eq!(seq_id.lane, 1);
assert_eq!(seq_id.side, 2);
assert_eq!(seq_id.swath, 1);
assert_eq!(seq_id.tile, 8);
assert_eq!(seq_id.x, 4127);
assert_eq!(seq_id.y, 8949);
assert_eq!(seq_id.read, 1);
assert_eq!(seq_id.is_filtered, false);
assert_eq!(seq_id.control_number, 0);
assert_eq!(seq_id.sample, Sample::Number(0));
}
#[test]
fn test_parse_with_newline() {
let line = "@M03745:11:000000000-B54L5:1:2108:4127:8949 1:Y:0:0\n";
let seq_id = parse_sequence_identifier(&line).unwrap();
assert_eq!(seq_id.sequencer_id, "M03745".to_string());
assert_eq!(seq_id.run_count, 11);
assert_eq!(seq_id.flow_cell_id, "000000000-B54L5".to_string());
assert_eq!(seq_id.lane, 1);
assert_eq!(seq_id.side, 2);
assert_eq!(seq_id.swath, 1);
assert_eq!(seq_id.tile, 8);
assert_eq!(seq_id.x, 4127);
assert_eq!(seq_id.y, 8949);
assert_eq!(seq_id.read, 1);
assert_eq!(seq_id.is_filtered, true);
assert_eq!(seq_id.control_number, 0);
assert_eq!(seq_id.sample, Sample::Number(0));
}
#[test]
fn test_parse_error() {
let result = parse_sequence_identifier("CACGACGACTAGCTACGGACGCGGCACGACGCAG");
assert!(result.is_err());
}
}