use crate::utils;
use std::fmt::Display;
use std::io::{self, BufRead, Write};
use std::ops::Range;
use std::str;
use gbz::{Metadata, Orientation, FullPathName};
use gbz::support;
#[cfg(test)]
mod tests;
pub fn append_walk(buffer: &mut Vec<u8>, walk: &[usize]) {
for handle in walk {
match support::node_orientation(*handle) {
Orientation::Forward => buffer.push(b'>'),
Orientation::Reverse => buffer.push(b'<'),
}
utils::append_usize(buffer, support::node_id(*handle));
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum TypedField {
Char([u8; 2], u8),
String([u8; 2], Vec<u8>),
Int([u8; 2], isize),
Float([u8; 2], f64),
Bool([u8; 2], bool),
}
impl TypedField {
pub fn parse(field: &[u8]) -> Result<Self, String> {
if field.len() < 5 || field[2] != b':' || field[4] != b':' {
return Err(format!("Invalid typed field: {}", String::from_utf8_lossy(field)));
}
let tag = [field[0], field[1]];
match field[3] {
b'A' => {
if field.len() != 6 {
return Err(format!("Invalid char field {}", String::from_utf8_lossy(field)));
}
Ok(TypedField::Char(tag, field[5]))
},
b'Z' => Ok(TypedField::String(tag, field[5..].to_vec())),
b'i' => {
let value = String::from_utf8_lossy(&field[5..]);
let value = value.parse::<isize>().map_err(|err| {
format!("Invalid int field {}: {}", value, err)
})?;
Ok(TypedField::Int(tag, value))
},
b'f' => {
let value = String::from_utf8_lossy(&field[5..]);
let value = value.parse::<f64>().map_err(|err| {
format!("Invalid float field {}: {}", value, err)
})?;
Ok(TypedField::Float(tag, value))
},
b'b' => {
if field.len() != 6 {
return Err(format!("Invalid bool field {}", String::from_utf8_lossy(field)));
}
match field[5] {
b'0' => Ok(TypedField::Bool(tag, false)),
b'1' => Ok(TypedField::Bool(tag, true)),
_ => Err(format!("Invalid bool field {}", String::from_utf8_lossy(field))),
}
},
_ => Err(format!("Unsupported field type: {}", field[3] as char)),
}
}
pub fn tag(&self) -> [u8; 2] {
match self {
TypedField::Char(tag, _) => *tag,
TypedField::String(tag, _) => *tag,
TypedField::Int(tag, _) => *tag,
TypedField::Float(tag, _) => *tag,
TypedField::Bool(tag, _) => *tag,
}
}
pub fn append_to(&self, buffer: &mut Vec<u8>, as_new_field: bool) {
if as_new_field {
buffer.push(b'\t');
}
let _ = write!(buffer, "{}", self);
}
pub fn append_string(buffer: &mut Vec<u8>, tag: [u8; 2], value: &[u8], as_new_field: bool) {
if as_new_field {
buffer.push(b'\t');
}
buffer.push(tag[0]);
buffer.push(tag[1]);
buffer.push(b':');
buffer.push(b'Z');
buffer.push(b':');
buffer.extend_from_slice(value);
}
}
impl Display for TypedField {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TypedField::Char(tag, value) => {
write!(f, "{}{}:A:{}", tag[0] as char, tag[1] as char, *value as char)
},
TypedField::String(tag, value) => {
let value = String::from_utf8_lossy(value);
write!(f, "{}{}:Z:{}", tag[0] as char, tag[1] as char, value)
},
TypedField::Int(tag, value) => {
write!(f, "{}{}:i:{}", tag[0] as char, tag[1] as char, value)
},
TypedField::Float(tag, value) => {
write!(f, "{}{}:f:{}", tag[0] as char, tag[1] as char, value)
},
TypedField::Bool(tag, value) => {
write!(f, "{}{}:b:{}", tag[0] as char, tag[1] as char, if *value { '1' } else { '0' })
},
}
}
}
pub struct WalkMetadata {
name: FullPathName,
end: usize,
weight: Option<usize>,
cigar: Option<String>,
}
impl WalkMetadata {
pub fn path_interval(path_name: &FullPathName, interval: Range<usize>) -> Self {
let mut name = path_name.clone();
let end = name.fragment + interval.end;
name.fragment += interval.start;
WalkMetadata { name, end, weight: None, cigar: None }
}
pub fn haplotype(metadata: &Metadata, path_id: usize, len: usize) -> Option<Self> {
let name = FullPathName::from_metadata(metadata, path_id)?;
Some(WalkMetadata { name, end: len, weight: None, cigar: None })
}
pub fn anonymous(haplotype: usize, contig: &str, len: usize) -> Self {
let path_name = FullPathName::haplotype("unknown", contig, haplotype, 0);
WalkMetadata {
name: path_name,
end: len,
weight: None,
cigar: None,
}
}
pub fn add_weight(&mut self, weight: Option<usize>) {
self.weight = weight;
}
pub fn add_cigar(&mut self, cigar: Option<String>) {
self.cigar = cigar;
}
}
pub fn is_gaf_header_line(buf: &[u8]) -> bool {
buf.first() == Some(&b'@')
}
pub fn peek_gaf_header_line<R: BufRead>(reader: &mut R) -> io::Result<bool> {
let buffer = reader.fill_buf()?;
Ok(buffer.first() == Some(&b'@'))
}
pub fn read_gaf_header_lines<R: BufRead>(reader: &mut R) -> io::Result<Vec<String>> {
let mut headers: Vec<String> = Vec::new();
while peek_gaf_header_line(reader)? {
let mut line: Vec<u8> = Vec::new();
let bytes_read = reader.read_until(b'\n', &mut line)?;
if bytes_read == 0 {
break;
}
if line.last() == Some(&b'\n') {
line.pop();
}
headers.push(String::from_utf8_lossy(&line).to_string());
}
Ok(headers)
}
pub fn write_gaf_file_header<T: Write>(output: &mut T) -> io::Result<()> {
let header = String::from("@HD\tVN:Z:1.0\n");
output.write_all(header.as_bytes())?;
Ok(())
}
pub fn write_gfa_header<T: Write>(reference_samples: Option<&str>, output: &mut T) -> io::Result<()> {
let header = if let Some(sample_names) = reference_samples {
format!("H\tVN:Z:1.1\tRS:Z:{}\n", sample_names)
} else {
"H\tVN:Z:1.1\n".to_string()
};
output.write_all(header.as_bytes())?;
Ok(())
}
pub fn write_header_lines<T: Write>(header_lines: &[String], output: &mut T) -> io::Result<()> {
for line in header_lines {
if line.ends_with('\n') {
output.write_all(line.as_bytes())?;
} else {
output.write_all(line.as_bytes())?;
output.write_all(b"\n")?;
}
}
Ok(())
}
pub fn write_gfa_node<T: Write>(node_id: usize, sequence: &[u8], output: &mut T) -> io::Result<()> {
write_gfa_segment(node_id.to_string().as_bytes(), sequence, output)
}
pub fn write_gfa_segment<T: Write>(name: &[u8], sequence: &[u8], output: &mut T) -> io::Result<()> {
let mut buffer: Vec<u8> = Vec::new();
buffer.extend_from_slice(b"S\t");
buffer.extend_from_slice(name);
buffer.push(b'\t');
buffer.extend_from_slice(sequence);
buffer.push(b'\n');
output.write_all(&buffer)?;
Ok(())
}
pub fn write_gfa_edge<T: Write>(from: (usize, Orientation), to: (usize, Orientation), output: &mut T) -> io::Result<()> {
write_gfa_link(
(from.0.to_string().as_bytes(), from.1),
(to.0.to_string().as_bytes(), to.1),
output
)
}
pub fn write_gfa_link<T: Write>(from: (&[u8], Orientation), to: (&[u8], Orientation), output: &mut T) -> io::Result<()> {
let mut buffer: Vec<u8> = Vec::new();
buffer.extend_from_slice(b"L\t");
buffer.extend_from_slice(from.0);
match from.1 {
Orientation::Forward => buffer.extend_from_slice(b"\t+\t"),
Orientation::Reverse => buffer.extend_from_slice(b"\t-\t"),
}
buffer.extend_from_slice(to.0);
match to.1 {
Orientation::Forward => buffer.extend_from_slice(b"\t+\t0M\n"),
Orientation::Reverse => buffer.extend_from_slice(b"\t-\t0M\n"),
}
output.write_all(&buffer)?;
Ok(())
}
pub fn write_gfa_walk<T: Write>(path: &[usize], metadata: &WalkMetadata, output: &mut T) -> io::Result<()> {
let mut buffer: Vec<u8> = Vec::new();
buffer.extend_from_slice(b"W\t");
buffer.extend_from_slice(metadata.name.sample.as_bytes());
buffer.push(b'\t');
utils::append_usize(&mut buffer, metadata.name.haplotype);
buffer.push(b'\t');
buffer.extend_from_slice(metadata.name.contig.as_bytes());
buffer.push(b'\t');
utils::append_usize(&mut buffer, metadata.name.fragment);
buffer.push(b'\t');
utils::append_usize(&mut buffer, metadata.end);
buffer.push(b'\t');
append_walk(&mut buffer, path);
if let Some(weight) = metadata.weight {
let field = TypedField::Int([b'W', b'T'], weight as isize);
field.append_to(&mut buffer, true);
}
if let Some(cigar) = &metadata.cigar {
let field = TypedField::String([b'C', b'G'], cigar.as_bytes().to_vec());
field.append_to(&mut buffer, true);
}
buffer.push(b'\n');
output.write_all(&buffer)?;
Ok(())
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum JSONValue {
Boolean(bool),
String(String),
Number(usize),
Array(Vec<JSONValue>),
Object(Vec<(String, JSONValue)>),
}
impl Display for JSONValue {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
JSONValue::Boolean(b) => write!(f, "{}", b),
JSONValue::String(s) => write!(f, "\"{}\"", s),
JSONValue::Number(n) => write!(f, "{}", n),
JSONValue::Array(v) => {
write!(f, "[")?;
let mut first = true;
for value in v.iter() {
if first {
first = false;
} else {
write!(f, ", ")?;
}
write!(f, "{}", value)?;
}
write!(f, "]")
},
JSONValue::Object(v) => {
write!(f, "{{")?;
let mut first = true;
for (key, value) in v.iter() {
if first {
first = false;
} else {
write!(f, ", ")?;
}
write!(f, "\"{}\": {}", key, value)?;
}
write!(f, "}}")
},
}
}
}
pub fn json_path(path: &[usize], metadata: &WalkMetadata) -> JSONValue {
let mut values: Vec<(String, JSONValue)> = Vec::new();
values.push(("name".to_string(), JSONValue::String(metadata.name.path_fragment_name(metadata.end))));
if let Some(weight) = metadata.weight {
values.push(("weight".to_string(), JSONValue::Number(weight)));
}
if let Some(cigar) = &metadata.cigar {
values.push(("cigar".to_string(), JSONValue::String(cigar.clone())));
}
values.push(("path".to_string(), JSONValue::Array(path.iter().map(
|x| JSONValue::Object(vec![
("id".to_string(), JSONValue::String(support::node_id(*x).to_string())),
("is_reverse".to_string(), JSONValue::Boolean(support::node_orientation(*x) == Orientation::Reverse)),
])
).collect())));
JSONValue::Object(values)
}