use std::collections::{HashMap, HashSet};
use std::io::{Seek, Write};
use crate::codec;
use crate::codec::huffman::HuffmanCodec;
use crate::error::{OneError, Result};
use crate::reader::{OneReader, VERSION_MAJOR, VERSION_MINOR};
use crate::schema::{LineInfo, SchemaEntry};
use crate::types::{FieldType, LineCounts, Provenance, Reference};
const CODEC_TRAINING_SIZE: usize = 100_000;
#[derive(Debug, Clone)]
enum FieldSlot {
Int(i64),
Real(f64),
Char(u8),
Empty,
}
impl FieldSlot {
fn as_int(&self) -> Result<i64> {
match self {
FieldSlot::Int(v) => Ok(*v),
_ => Err(OneError::Usage(format!("expected Int field, got {self:?}"))),
}
}
fn as_real(&self) -> Result<f64> {
match self {
FieldSlot::Real(v) => Ok(*v),
_ => Err(OneError::Usage(format!("expected Real field, got {self:?}"))),
}
}
fn as_char(&self) -> Result<u8> {
match self {
FieldSlot::Char(v) => Ok(*v),
_ => Err(OneError::Usage(format!("expected Char field, got {self:?}"))),
}
}
}
pub struct OneWriter<W: Write + Seek> {
inner: W,
schema_entry: SchemaEntry,
fields: Vec<FieldSlot>,
counts: HashMap<u8, LineCounts>,
prolog_written: bool,
provenance: Vec<Provenance>,
references: Vec<Reference>,
deferred: Vec<Reference>,
file_type: String,
sub_type: Option<String>,
is_binary: bool,
is_big_endian: bool,
data_start: u64,
bytes_written: u64,
indices: HashMap<u8, Vec<i64>>,
codecs: HashMap<u8, HuffmanCodec>,
codec_built: HashSet<u8>,
codec_tack: HashMap<u8, usize>,
}
impl<W: Write + Seek> OneWriter<W> {
pub fn new(
writer: W,
schema: &SchemaEntry,
sub_type: Option<&str>,
is_binary: bool,
) -> Result<Self> {
let n_field_max = schema
.info
.values()
.map(|li| li.field_types.len())
.max()
.unwrap_or(0);
let mut codecs = HashMap::new();
if is_binary {
for (<, li) in &schema.info {
if let Some(idx) = li.list_field
&& li.field_types[idx] == FieldType::String
{
codecs.insert(lt, HuffmanCodec::new());
}
}
}
let mut w = Self {
inner: writer,
schema_entry: schema.clone(),
fields: vec![FieldSlot::Empty; n_field_max],
counts: HashMap::new(),
prolog_written: false,
provenance: Vec::new(),
references: Vec::new(),
deferred: Vec::new(),
file_type: schema.primary.clone(),
sub_type: sub_type.map(String::from),
is_binary,
is_big_endian: cfg!(target_endian = "big"),
data_start: 0,
bytes_written: 0,
indices: HashMap::new(),
codecs,
codec_built: HashSet::new(),
codec_tack: HashMap::new(),
};
w.write_prolog()?;
Ok(w)
}
pub fn from_reader<R: std::io::Read + Seek>(
writer: W,
reader: &OneReader<R>,
is_binary: bool,
) -> Result<Self> {
Self::from_reader_with_provenance(writer, reader, is_binary, &[])
}
pub fn from_reader_with_provenance<R: std::io::Read + Seek>(
writer: W,
reader: &OneReader<R>,
is_binary: bool,
extra_provenance: &[Provenance],
) -> Result<Self> {
let schema = reader.schema().clone();
let n_field_max = schema
.info
.values()
.map(|li| li.field_types.len())
.max()
.unwrap_or(0);
let mut codecs = HashMap::new();
if is_binary {
for (<, li) in &schema.info {
if let Some(idx) = li.list_field
&& li.field_types[idx] == FieldType::String
{
codecs.insert(lt, HuffmanCodec::new());
}
}
}
let mut counts = HashMap::new();
for < in schema.info.keys() {
if let Some(given) = reader.given_counts(lt) {
counts.insert(lt, given.clone());
}
}
let mut provenance = reader.provenance().to_vec();
provenance.extend_from_slice(extra_provenance);
let mut w = Self {
inner: writer,
schema_entry: schema,
fields: vec![FieldSlot::Empty; n_field_max],
counts,
prolog_written: false,
provenance,
references: reader.references().to_vec(),
deferred: reader.deferred().to_vec(),
file_type: reader.file_type.clone(),
sub_type: reader.sub_type.clone(),
is_binary,
is_big_endian: cfg!(target_endian = "big"),
data_start: 0,
bytes_written: 0,
indices: HashMap::new(),
codecs,
codec_built: HashSet::new(),
codec_tack: HashMap::new(),
};
w.write_prolog()?;
Ok(w)
}
pub fn add_provenance(&mut self, prov: Provenance) -> Result<()> {
if self.prolog_written {
return Err(OneError::Usage(
"provenance must be added before writing data".to_string(),
));
}
self.provenance.push(prov);
Ok(())
}
pub fn add_reference(&mut self, filename: &str, count: i64) -> Result<()> {
if self.prolog_written {
return Err(OneError::Usage(
"references must be added before writing data".to_string(),
));
}
self.references.push(Reference {
filename: filename.to_string(),
count,
});
Ok(())
}
pub fn set_int(&mut self, index: usize, value: i64) {
self.fields[index] = FieldSlot::Int(value);
}
pub fn set_real(&mut self, index: usize, value: f64) {
self.fields[index] = FieldSlot::Real(value);
}
pub fn set_char(&mut self, index: usize, value: u8) {
self.fields[index] = FieldSlot::Char(value);
}
pub fn write_line(&mut self, line_type: u8, list: Option<&[u8]>) -> Result<()> {
let li = self
.schema_entry
.info
.get(&line_type)
.ok_or_else(|| {
OneError::Usage(format!("unknown line type '{}'", line_type as char))
})?
.clone();
let list_len = if self.is_binary {
self.write_line_binary(line_type, &li, list)?
} else {
self.write_line_ascii(line_type, &li, list)?
};
let accum = self.counts.entry(line_type).or_default();
accum.count += 1;
if li.list_field.is_some() {
if list_len > accum.max {
accum.max = list_len;
}
accum.total += list_len;
}
for slot in &mut self.fields {
*slot = FieldSlot::Empty;
}
Ok(())
}
pub fn write_string_line(&mut self, line_type: u8, s: &str) -> Result<()> {
self.write_line(line_type, Some(s.as_bytes()))
}
pub fn write_dna_line(&mut self, line_type: u8, dna: &str) -> Result<()> {
self.write_line(line_type, Some(dna.as_bytes()))
}
pub fn write_int_list_line(&mut self, line_type: u8, data: &[i64]) -> Result<()> {
let bytes: Vec<u8> = data.iter().flat_map(|v| v.to_le_bytes()).collect();
self.write_line(line_type, Some(&bytes))
}
pub fn write_real_list_line(&mut self, line_type: u8, data: &[f64]) -> Result<()> {
let bytes: Vec<u8> = data.iter().flat_map(|v| v.to_le_bytes()).collect();
self.write_line(line_type, Some(&bytes))
}
pub fn write_string_list_line(
&mut self,
line_type: u8,
strings: &[&str],
) -> Result<()> {
let mut buf = Vec::new();
for (i, s) in strings.iter().enumerate() {
buf.extend_from_slice(s.as_bytes());
if i + 1 < strings.len() {
buf.push(0);
}
}
self.write_line(line_type, Some(&buf))
}
pub fn write_comment(&mut self, _comment: &str) -> Result<()> {
Ok(())
}
pub fn close(mut self) -> Result<W> {
if self.is_binary {
self.write_footer()?;
}
self.inner.flush().map_err(write_err)?;
Ok(self.inner)
}
pub fn counts(&self, line_type: u8) -> Option<&LineCounts> {
self.counts.get(&line_type)
}
fn write_prolog(&mut self) -> Result<()> {
writeln!(
self.inner,
"1 {} {} {VERSION_MAJOR} {VERSION_MINOR}",
self.file_type.len(),
self.file_type
)
.map_err(write_err)?;
if let Some(ref st) = self.sub_type {
writeln!(self.inner, "2 {} {st}", st.len()).map_err(write_err)?;
}
for prov in &self.provenance {
writeln!(
self.inner,
"! 4 {} {} {} {} {} {} {} {}",
prov.program.len(),
prov.program,
prov.version.len(),
prov.version,
prov.command.len(),
prov.command,
prov.date.len(),
prov.date,
)
.map_err(write_err)?;
}
for < in &self.schema_entry.defn_order {
if let Some(li) = self.schema_entry.info.get(<) {
let kind = if li.is_object {
'O'
} else if li.is_group && li.field_types.is_empty() {
'G'
} else {
'D'
};
write!(
self.inner,
"~ {kind} {} {}",
lt as char,
li.field_types.len()
)
.map_err(write_err)?;
for ft in &li.field_types {
let name = ft.name();
write!(self.inner, " {} {name}", name.len()).map_err(write_err)?;
}
if let Some(ref comment) = li.comment {
write!(self.inner, " {comment}").map_err(write_err)?;
}
writeln!(self.inner).map_err(write_err)?;
}
}
for r in &self.references {
writeln!(
self.inner,
"< {} {} {}",
r.filename.len(),
r.filename,
r.count
)
.map_err(write_err)?;
}
for d in &self.deferred {
writeln!(self.inner, "> {} {}", d.filename.len(), d.filename)
.map_err(write_err)?;
}
for (<, lc) in &self.counts {
if lc.count > 0 {
writeln!(self.inner, "# {} {}", lt as char, lc.count)
.map_err(write_err)?;
}
if lc.max > 0 {
writeln!(self.inner, "@ {} {}", lt as char, lc.max)
.map_err(write_err)?;
}
if lc.total > 0 {
writeln!(self.inner, "+ {} {}", lt as char, lc.total)
.map_err(write_err)?;
}
}
if self.is_binary {
let big = if self.is_big_endian { 1 } else { 0 };
writeln!(self.inner, "$ {big}").map_err(write_err)?;
self.inner.flush().map_err(write_err)?;
self.data_start = self.inner.stream_position().map_err(write_err)?;
for (<, li) in &self.schema_entry.info {
if li.is_object {
self.indices.insert(lt, vec![self.data_start as i64]);
}
}
self.counts.clear();
}
self.prolog_written = true;
Ok(())
}
fn write_line_ascii(
&mut self,
line_type: u8,
li: &LineInfo,
list: Option<&[u8]>,
) -> Result<i64> {
write!(self.inner, "{}", line_type as char).map_err(write_err)?;
let mut list_len: i64 = 0;
for (i, &ft) in li.field_types.iter().enumerate() {
if ft.is_list() {
let data = require_list(list)?;
list_len = list_len_for(ft, data);
write!(self.inner, " {list_len}").map_err(write_err)?;
match ft {
FieldType::String | FieldType::Dna => {
write!(self.inner, " ").map_err(write_err)?;
self.inner.write_all(data).map_err(write_err)?;
}
FieldType::IntList => {
for chunk in data.chunks_exact(8) {
let val = i64::from_le_bytes(chunk.try_into().unwrap());
write!(self.inner, " {val}").map_err(write_err)?;
}
}
FieldType::RealList => {
for chunk in data.chunks_exact(8) {
let val = f64::from_le_bytes(chunk.try_into().unwrap());
write!(self.inner, " {val}").map_err(write_err)?;
}
}
FieldType::StringList => {
let strings = split_string_list(data);
for s in &strings {
write!(self.inner, " {} ", s.len()).map_err(write_err)?;
self.inner.write_all(s).map_err(write_err)?;
}
}
_ => {}
}
} else {
match ft {
FieldType::Int => {
let val = self.fields[i].as_int()?;
write!(self.inner, " {val}").map_err(write_err)?;
}
FieldType::Real => {
let val = self.fields[i].as_real()?;
write!(self.inner, " {val}").map_err(write_err)?;
}
FieldType::Char => {
let val = self.fields[i].as_char()?;
write!(self.inner, " {}", val as char).map_err(write_err)?;
}
_ => {}
}
}
}
writeln!(self.inner).map_err(write_err)?;
Ok(list_len)
}
fn write_line_binary(
&mut self,
line_type: u8,
li: &LineInfo,
list: Option<&[u8]>,
) -> Result<i64> {
let list_ft = li.list_field.map(|i| li.field_types[i]);
let use_codec = match list_ft {
Some(FieldType::Dna) => true,
Some(FieldType::String) => self.codec_built.contains(&line_type),
_ => false,
};
if li.is_object {
let pos = self.data_start + self.bytes_written;
self.indices
.entry(line_type)
.or_default()
.push(pos as i64);
}
let mut type_code = binary_type_pack(line_type);
if use_codec {
type_code |= 0x01;
}
self.inner.write_all(&[type_code]).map_err(write_err)?;
self.bytes_written += 1;
let mut list_len: i64 = 0;
for (i, &ft) in li.field_types.iter().enumerate() {
if ft.is_list() {
let data = require_list(list)?;
list_len = list_len_for(ft, data);
let n = codec::ltf::write(list_len, &mut self.inner)
.map_err(write_err)?;
self.bytes_written += n as u64;
} else {
match ft {
FieldType::Int => {
let val = self.fields[i].as_int()?;
let n = codec::ltf::write(val, &mut self.inner)
.map_err(write_err)?;
self.bytes_written += n as u64;
}
FieldType::Real => {
let val = self.fields[i].as_real()?;
let bytes = if self.is_big_endian {
val.to_be_bytes()
} else {
val.to_le_bytes()
};
self.inner.write_all(&bytes).map_err(write_err)?;
self.bytes_written += 8;
}
FieldType::Char => {
let val = self.fields[i].as_char()?;
self.inner.write_all(&[val]).map_err(write_err)?;
self.bytes_written += 1;
}
_ => {}
}
}
}
if let Some(ft) = list_ft {
let data = list.unwrap_or(&[]);
if list_len > 0 {
match ft {
FieldType::Dna => {
let compressed_len = data.len().div_ceil(4);
let mut compressed = vec![0u8; compressed_len];
codec::dna::compress(data, &mut compressed);
let n_bits = (data.len() * 2) as i64;
let n = codec::ltf::write(n_bits, &mut self.inner)
.map_err(write_err)?;
self.inner.write_all(&compressed).map_err(write_err)?;
self.bytes_written += n as u64 + compressed_len as u64;
}
FieldType::String => {
if use_codec {
let hc = self.codecs.get(&line_type).unwrap();
let mut compressed = vec![0u8; data.len() * 2];
let n_bits = hc.encode(data, &mut compressed);
let n_bytes = n_bits.div_ceil(8);
let n = codec::ltf::write(n_bits as i64, &mut self.inner)
.map_err(write_err)?;
self.inner
.write_all(&compressed[..n_bytes])
.map_err(write_err)?;
self.bytes_written += n as u64 + n_bytes as u64;
} else {
self.inner.write_all(data).map_err(write_err)?;
self.bytes_written += data.len() as u64;
if let Some(hc) = self.codecs.get_mut(&line_type) {
hc.add_to_histogram(data);
let tack = self
.codec_tack
.entry(line_type)
.or_default();
*tack += data.len();
if *tack > CODEC_TRAINING_SIZE {
hc.build(true);
self.codec_built.insert(line_type);
}
}
}
}
FieldType::IntList => {
let values: Vec<i64> = data
.chunks_exact(8)
.map(|c| i64::from_le_bytes(c.try_into().unwrap()))
.collect();
let n = codec::ltf::write(values[0], &mut self.inner)
.map_err(write_err)?;
self.bytes_written += n as u64;
if values.len() > 1 {
let mut data_copy = values;
let (compacted, byte_width) = codec::int_list::compact(
&mut data_copy,
self.is_big_endian,
);
self.inner
.write_all(&[byte_width])
.map_err(write_err)?;
self.inner
.write_all(&compacted)
.map_err(write_err)?;
self.bytes_written +=
1 + compacted.len() as u64;
}
}
FieldType::RealList => {
self.inner.write_all(data).map_err(write_err)?;
self.bytes_written += data.len() as u64;
}
FieldType::StringList => {
let strings = split_string_list(data);
for s in &strings {
let text = format!(
" {} {}",
s.len(),
std::str::from_utf8(s).unwrap_or("")
);
self.inner
.write_all(text.as_bytes())
.map_err(write_err)?;
self.bytes_written += text.len() as u64;
}
}
_ => {} }
}
}
Ok(list_len)
}
fn write_footer(&mut self) -> Result<()> {
self.inner.write_all(b"\n").map_err(write_err)?;
self.inner.flush().map_err(write_err)?;
let footer_offset = self.inner.stream_position().map_err(write_err)?;
for (<, lc) in &self.counts {
if lc.count > 0 {
writeln!(self.inner, "# {} {}", lt as char, lc.count)
.map_err(write_err)?;
}
if lc.max > 0 {
writeln!(self.inner, "@ {} {}", lt as char, lc.max)
.map_err(write_err)?;
}
if lc.total > 0 {
writeln!(self.inner, "+ {} {}", lt as char, lc.total)
.map_err(write_err)?;
}
}
let index_entries: Vec<(u8, Vec<i64>)> = self
.indices
.iter()
.filter(|(_, idx)| idx.len() > 1)
.map(|(<, idx)| (lt, idx.clone()))
.collect();
for (lt, idx) in &index_entries {
self.write_footer_index(*lt, idx)?;
}
let codec_entries: Vec<(u8, Vec<u8>)> = self
.codecs
.iter()
.filter(|(lt, _)| self.codec_built.contains(lt))
.map(|(<, hc)| {
let mut buf = vec![0u8; HuffmanCodec::max_serial_size()];
let n = hc.serialise(&mut buf);
buf.truncate(n);
(lt, buf)
})
.collect();
for (lt, data) in &codec_entries {
self.write_footer_codec(*lt, data)?;
}
writeln!(self.inner, "^").map_err(write_err)?;
let offset_bytes = if self.is_big_endian {
(footer_offset as i64).to_be_bytes()
} else {
(footer_offset as i64).to_le_bytes()
};
self.inner.write_all(&offset_bytes).map_err(write_err)?;
Ok(())
}
fn write_footer_index(&mut self, lt: u8, idx: &[i64]) -> Result<()> {
let type_code = binary_type_pack_special(b'&');
self.inner.write_all(&[type_code]).map_err(write_err)?;
self.inner.write_all(&[lt]).map_err(write_err)?;
let len = idx.len() as i64;
codec::ltf::write(len, &mut self.inner).map_err(write_err)?;
codec::ltf::write(idx[0], &mut self.inner).map_err(write_err)?;
if idx.len() > 1 {
let mut data = idx.to_vec();
let (compacted, byte_width) =
codec::int_list::compact(&mut data, self.is_big_endian);
self.inner.write_all(&[byte_width]).map_err(write_err)?;
self.inner.write_all(&compacted).map_err(write_err)?;
}
Ok(())
}
fn write_footer_codec(&mut self, lt: u8, data: &[u8]) -> Result<()> {
let type_code = binary_type_pack_special(b';');
self.inner.write_all(&[type_code]).map_err(write_err)?;
self.inner.write_all(&[lt]).map_err(write_err)?;
codec::ltf::write(data.len() as i64, &mut self.inner).map_err(write_err)?;
self.inner.write_all(data).map_err(write_err)?;
Ok(())
}
}
fn binary_type_pack(lt: u8) -> u8 {
let idx = if lt.is_ascii_uppercase() {
(lt - b'A') as u16
} else if lt.is_ascii_lowercase() {
(26 + lt - b'a') as u16
} else {
return 0;
};
((idx << 1) | 0x80) as u8
}
fn binary_type_pack_special(ch: u8) -> u8 {
match ch {
b';' => 0xe8,
b'&' => 0xea,
b'/' => 0xec,
b'.' => 0xee,
_ => 0,
}
}
fn list_len_for(ft: FieldType, data: &[u8]) -> i64 {
match ft {
FieldType::String | FieldType::Dna => data.len() as i64,
FieldType::IntList | FieldType::RealList => (data.len() / 8) as i64,
FieldType::StringList => split_string_list(data).len() as i64,
_ => 0,
}
}
fn split_string_list(data: &[u8]) -> Vec<&[u8]> {
data.split(|&b| b == 0)
.filter(|s| !s.is_empty())
.collect()
}
fn require_list(list: Option<&[u8]>) -> Result<&[u8]> {
list.ok_or_else(|| OneError::Usage("list field requires list data".to_string()))
}
fn write_err(e: std::io::Error) -> OneError {
OneError::Io {
path: "<writer>".into(),
source: e,
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::reader::OneReader;
use crate::schema::Schema;
use std::io::Cursor;
fn make_seq_schema() -> SchemaEntry {
let schema = Schema::from_text("P 3 seq\nO S 1 3 DNA\nD I 1 6 STRING\n").unwrap();
schema.entries[0].clone()
}
fn write_to_buf(
schema: &SchemaEntry,
is_binary: bool,
f: impl FnOnce(&mut OneWriter<Cursor<Vec<u8>>>) -> Result<()>,
) -> Vec<u8> {
let mut writer =
OneWriter::new(Cursor::new(Vec::new()), schema, None, is_binary).unwrap();
f(&mut writer).unwrap();
writer.close().unwrap().into_inner()
}
#[test]
fn write_and_read_back_seq() {
let schema = make_seq_schema();
let buf = write_to_buf(&schema, false, |w| {
w.write_dna_line(b'S', "acgtacgt")?;
w.write_string_line(b'I', "seq1")?;
w.write_dna_line(b'S', "tgca")?;
w.write_string_line(b'I', "seq2")?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
assert_eq!(reader.file_type, "seq");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader.dna_chars(), "acgtacgt");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'I'));
assert_eq!(reader.string(), "seq1");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader.dna_chars(), "tgca");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'I'));
assert_eq!(reader.string(), "seq2");
assert_eq!(reader.read_line().unwrap(), None);
}
#[test]
fn write_int_field() {
let schema = Schema::from_text("P 3 foo\nO B 1 3 INT\n").unwrap();
let entry = &schema.entries[0];
let buf = write_to_buf(entry, false, |w| {
w.set_int(0, 42);
w.write_line(b'B', None)?;
w.set_int(0, -7);
w.write_line(b'B', None)?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'B'));
assert_eq!(reader.int_field(0), 42);
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'B'));
assert_eq!(reader.int_field(0), -7);
}
#[test]
fn write_multi_field_line() {
let schema = Schema::from_text(
"P 4 adna\nO S 1 3 DNA\nD N 3 3 INT 4 CHAR 3 INT\n",
)
.unwrap();
let entry = &schema.entries[0];
let buf = write_to_buf(entry, false, |w| {
w.write_dna_line(b'S', "acgt")?;
w.set_int(0, 5);
w.set_char(1, b'N');
w.set_int(2, 3);
w.write_line(b'N', None)?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
reader.read_line().unwrap(); let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'N'));
assert_eq!(reader.int_field(0), 5);
assert_eq!(reader.char_field(1), b'N');
assert_eq!(reader.int_field(2), 3);
}
#[test]
fn write_with_provenance() {
let schema = make_seq_schema();
let buf = write_to_buf(&schema, false, |w| {
w.write_dna_line(b'S', "acgt")?;
Ok(())
});
let reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
assert_eq!(reader.file_type, "seq");
}
#[test]
fn round_trip_via_from_reader() {
let schema = make_seq_schema();
let buf1 = write_to_buf(&schema, false, |w| {
w.write_dna_line(b'S', "acgtacgt")?;
w.write_string_line(b'I', "test1")?;
Ok(())
});
let reader = OneReader::open(Cursor::new(&buf1), None, None).unwrap();
let mut w =
OneWriter::from_reader(Cursor::new(Vec::new()), &reader, false).unwrap();
w.write_dna_line(b'S', "acgtacgt").unwrap();
w.write_string_line(b'I', "test1").unwrap();
let buf2 = w.close().unwrap().into_inner();
let mut reader2 = OneReader::open(Cursor::new(buf2), None, None).unwrap();
assert_eq!(reader2.file_type, "seq");
let t = reader2.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader2.dna_chars(), "acgtacgt");
let t = reader2.read_line().unwrap();
assert_eq!(t, Some(b'I'));
assert_eq!(reader2.string(), "test1");
}
#[test]
fn write_int_list() {
let schema = Schema::from_text("P 3 foo\nO L 1 8 INT_LIST\n").unwrap();
let entry = &schema.entries[0];
let buf = write_to_buf(entry, false, |w| {
w.write_int_list_line(b'L', &[10, 20, 30])?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'L'));
assert_eq!(reader.int_list(), vec![10, 20, 30]);
}
#[test]
fn accumulated_counts() {
let schema = make_seq_schema();
let mut w =
OneWriter::new(Cursor::new(Vec::new()), &schema, None, false).unwrap();
w.write_dna_line(b'S', "acgtacgt").unwrap();
w.write_string_line(b'I', "seq1").unwrap();
w.write_dna_line(b'S', "tg").unwrap();
w.write_string_line(b'I', "seq2").unwrap();
let s_counts = w.counts(b'S').unwrap();
assert_eq!(s_counts.count, 2);
assert_eq!(s_counts.max, 8);
assert_eq!(s_counts.total, 10);
let i_counts = w.counts(b'I').unwrap();
assert_eq!(i_counts.count, 2);
assert_eq!(i_counts.max, 4);
assert_eq!(i_counts.total, 8);
w.close().unwrap();
}
#[test]
fn binary_round_trip_seq() {
let schema = make_seq_schema();
let buf = write_to_buf(&schema, true, |w| {
w.write_dna_line(b'S', "acgtacgt")?;
w.write_string_line(b'I', "seq1")?;
w.write_dna_line(b'S', "tgca")?;
w.write_string_line(b'I', "seq2")?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
assert_eq!(reader.file_type, "seq");
assert!(reader.is_binary());
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader.list_len(), 8);
assert_eq!(reader.dna_chars(), "acgtacgt");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'I'));
assert_eq!(reader.string(), "seq1");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader.dna_chars(), "tgca");
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'I'));
assert_eq!(reader.string(), "seq2");
assert_eq!(reader.read_line().unwrap(), None);
}
#[test]
fn binary_footer_counts() {
let schema = make_seq_schema();
let buf = write_to_buf(&schema, true, |w| {
w.write_dna_line(b'S', "acgtacgt")?;
w.write_string_line(b'I', "seq1")?;
w.write_dna_line(b'S', "tg")?;
w.write_string_line(b'I', "seq2")?;
Ok(())
});
let reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
let s_counts = reader.given_counts(b'S').unwrap();
assert_eq!(s_counts.count, 2);
assert_eq!(s_counts.max, 8);
assert_eq!(s_counts.total, 10);
let i_counts = reader.given_counts(b'I').unwrap();
assert_eq!(i_counts.count, 2);
assert_eq!(i_counts.max, 4);
assert_eq!(i_counts.total, 8);
}
#[test]
fn binary_int_list_round_trip() {
let schema = Schema::from_text("P 3 foo\nO L 1 8 INT_LIST\n").unwrap();
let entry = &schema.entries[0];
let buf = write_to_buf(entry, true, |w| {
w.write_int_list_line(b'L', &[100, 200, 300])?;
w.write_int_list_line(b'L', &[42])?;
w.write_int_list_line(b'L', &[-5, 0, 5, 10])?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
reader.read_line().unwrap();
assert_eq!(reader.int_list(), vec![100, 200, 300]);
reader.read_line().unwrap();
assert_eq!(reader.int_list(), vec![42]);
reader.read_line().unwrap();
assert_eq!(reader.int_list(), vec![-5, 0, 5, 10]);
}
#[test]
fn binary_ascii_data_match() {
let schema = make_seq_schema();
let write_data = |w: &mut OneWriter<Cursor<Vec<u8>>>| -> Result<()> {
w.write_dna_line(b'S', "acgtacgtaacc")?;
w.write_string_line(b'I', "seq1")?;
w.write_dna_line(b'S', "tgcatgca")?;
w.write_string_line(b'I', "seq2")?;
w.write_dna_line(b'S', "aaaa")?;
w.write_string_line(b'I', "seq3")?;
Ok(())
};
let ascii_buf = write_to_buf(&schema, false, write_data);
let binary_buf = write_to_buf(&schema, true, write_data);
let mut a = OneReader::open(Cursor::new(ascii_buf), None, None).unwrap();
let mut b = OneReader::open(Cursor::new(binary_buf), None, None).unwrap();
assert!(!a.is_binary());
assert!(b.is_binary());
loop {
let at = a.read_line().unwrap();
let bt = b.read_line().unwrap();
assert_eq!(at, bt, "line type mismatch");
match (at, bt) {
(None, None) => break,
(Some(at), Some(_)) => {
let li = a.schema().info.get(&at).unwrap();
if li.list_field.is_some() {
assert_eq!(
a.list_len(),
b.list_len(),
"list_len mismatch on '{}'",
at as char
);
assert_eq!(
a.string(),
b.string(),
"list data mismatch on '{}'",
at as char
);
}
}
_ => unreachable!(),
}
}
}
#[test]
fn binary_index_and_goto() {
let schema = make_seq_schema();
let buf = write_to_buf(&schema, true, |w| {
w.write_dna_line(b'S', "aaaa")?;
w.write_string_line(b'I', "s1")?;
w.write_dna_line(b'S', "cccc")?;
w.write_string_line(b'I', "s2")?;
w.write_dna_line(b'S', "gggg")?;
w.write_string_line(b'I', "s3")?;
Ok(())
});
let mut reader = OneReader::open(Cursor::new(buf), None, None).unwrap();
reader.goto(b'S', 3).unwrap();
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader.dna_chars(), "gggg");
reader.goto(b'S', 1).unwrap();
let t = reader.read_line().unwrap();
assert_eq!(t, Some(b'S'));
assert_eq!(reader.dna_chars(), "aaaa");
}
}