angsd_saf/
version.rs

1//! SAF file versions.
2//!
3//! This module provides types and traits for abstracting over different SAF files version.
4//! Users should not generally need to interact with code in this module, except perhaps to use the
5//! marker structs in generic bounds.
6//!
7//! Note that the SAF versioning is ambiguous: the versions described in the magic numbers are
8//! out of sync with those used internally in ANGSD. The usage here follows the magic numbers.
9//! Hence, what is here referred to as [`V3`] corresponds to files with magic numbers `safv3`,
10//! which is also sometimes referred to as "Version 1" in ANGSD. Likewise, what is here referred to
11//! as [`V4`] corresponds to files with magic numbers `safv4`, also sometimes known as "Version 2"
12//! in ANGSD.
13
14use std::{io, mem};
15
16use byteorder::{ReadBytesExt, LE};
17
18use crate::ReadStatus;
19
20use super::{
21    index::{self, Index, IndexReaderExt, IndexWriterExt},
22    reader::{Reader, ReaderExt},
23    record::{Band, Id, Likelihoods, Record},
24    writer::{Writer, WriterExt},
25};
26
27const MAGIC_LEN: usize = 8;
28
29/// A type that describes a SAF file version.
30///
31/// Users should not generally need to use methods defined by this trait directly. Rather, these
32/// methods are used by struct generic over methods instead.
33pub trait Version: Sized {
34    /// The numeric description of the SAF version.
35    const VERSION: u8;
36
37    /// The SAF version magic number.
38    const MAGIC_NUMBER: [u8; MAGIC_LEN];
39
40    /// The items contained in the SAF item file for this version.
41    type Item;
42
43    /// Creates a SAF record buffer suitable for reading from a reader for this version.
44    fn create_record_buf(index: &Index<Self>) -> Record<Id, Self::Item>;
45
46    /// Reads the SAF index record for this version from a reader.
47    fn read_index_record<R>(reader: &mut R) -> io::Result<index::Record<Self>>
48    where
49        R: io::BufRead;
50
51    /// Reads a single item from a reader into a provided buffer.
52    ///
53    /// The stream is assumed to be positioned immediately before the start of the item.
54    fn read_item<R>(reader: &mut R, buf: &mut Self::Item) -> io::Result<ReadStatus>
55    where
56        R: io::BufRead;
57
58    /// Reads a single record from a SAF reader into a provided buffer.
59    ///
60    /// The stream is assumed to be positioned immediately before the start of the record.
61    ///
62    /// Note that the record buffer needs to be correctly set up. Use [`Self::create_record_buf`]
63    /// for a correctly initialised record buffer to use for reading.
64    fn read_record<R>(
65        reader: &mut Reader<R, Self>,
66        buf: &mut Record<Id, Self::Item>,
67    ) -> io::Result<ReadStatus>
68    where
69        R: io::BufRead,
70    {
71        Reader::read_record(reader, buf)
72    }
73
74    /// Writes the SAF index record for to a reader.
75    fn write_index_record<W>(writer: &mut W, record: &index::Record<Self>) -> io::Result<()>
76    where
77        W: io::Write;
78
79    /// Writes a single item to a writer.
80    fn write_item<W>(writer: &mut W, item: &Self::Item) -> io::Result<()>
81    where
82        W: io::Write;
83
84    /// Writes a single record to a writer.
85    fn write_record<W, I>(
86        writer: &mut Writer<W, Self>,
87        record: &Record<I, Self::Item>,
88    ) -> io::Result<()>
89    where
90        W: io::Write,
91        I: AsRef<str>;
92
93    /// Reads the SAF version magic number from a reader.
94    fn read_magic<R>(reader: &mut R) -> io::Result<()>
95    where
96        R: io::BufRead,
97    {
98        let mut magic = [0; MAGIC_LEN];
99        reader.read_exact(&mut magic)?;
100
101        if magic == Self::MAGIC_NUMBER {
102            Ok(())
103        } else {
104            Err(io::Error::new(
105                io::ErrorKind::InvalidData,
106                format!(
107                    "invalid or unsupported SAF magic number \
108                    (found '{magic:02x?}', expected '{:02x?}')",
109                    Self::MAGIC_NUMBER
110                ),
111            ))
112        }
113    }
114
115    /// Writes the SAF version magic number to a writer.
116    fn write_magic<W>(writer: &mut W) -> io::Result<()>
117    where
118        W: io::Write,
119    {
120        writer.write_all(&Self::MAGIC_NUMBER)
121    }
122}
123
124/// A marker type for the SAF version 3.
125///
126/// In this version, the SAF item file contains the full set of likelihoods for each sample
127/// frequency.
128///
129/// See also [`Version`] for a note on naming of versions.
130#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
131pub struct V3;
132
133impl Version for V3 {
134    const VERSION: u8 = 3;
135
136    const MAGIC_NUMBER: [u8; MAGIC_LEN] = [b's', b'a', b'f', b'v', b'3', 0, 0, 0];
137
138    type Item = Likelihoods;
139
140    fn create_record_buf(index: &Index<Self>) -> Record<Id, Self::Item> {
141        // Record likelihoods must be set up to be correct size from beginning
142        Record::from_alleles(0, 1, index.alleles())
143    }
144
145    fn read_index_record<R>(reader: &mut R) -> io::Result<index::Record<Self>>
146    where
147        R: io::BufRead,
148    {
149        let name = reader.read_contig_name()?;
150        let sites = reader.read_sites()?;
151        let position_offset = reader.read_position_offset()?;
152        let item_offset = reader.read_item_offset()?;
153
154        Ok(index::Record::new(
155            name,
156            sites,
157            position_offset,
158            item_offset,
159        ))
160    }
161
162    fn read_item<R>(reader: &mut R, buf: &mut Self::Item) -> io::Result<ReadStatus>
163    where
164        R: io::BufRead,
165    {
166        reader.read_likelihoods(buf)
167    }
168
169    fn write_index_record<W>(writer: &mut W, record: &index::Record<Self>) -> io::Result<()>
170    where
171        W: io::Write,
172    {
173        writer.write_contig_name(record.name())?;
174        writer.write_sites(record.sites())?;
175        writer.write_position_offset(record.position_offset())?;
176        writer.write_item_offset(record.item_offset())
177    }
178
179    fn write_item<W>(writer: &mut W, item: &Self::Item) -> io::Result<()>
180    where
181        W: io::Write,
182    {
183        writer.write_likelihoods(item)
184    }
185
186    fn write_record<W, I>(
187        writer: &mut Writer<W, Self>,
188        record: &Record<I, Self::Item>,
189    ) -> io::Result<()>
190    where
191        W: io::Write,
192        I: AsRef<str>,
193    {
194        let contig_id = record.contig_id().as_ref();
195
196        if let Some(index_record) = writer.index_record.as_mut() {
197            if index_record.name() == contig_id {
198                // We're on the same contig, so we can simply update index record
199                *index_record.sites_mut() += 1;
200            } else {
201                // We're on a new contig, which means we have to write the current record index
202                // and set up a new one
203                let position_offset = u64::from(writer.position_writer.virtual_position());
204                let item_offset = u64::from(writer.item_writer.virtual_position());
205
206                let new =
207                    index::Record::new(contig_id.to_string(), 1, position_offset, item_offset);
208
209                let old = mem::replace(index_record, new);
210                old.write(&mut writer.index_writer)?;
211            }
212        } else {
213            let offset = Self::MAGIC_NUMBER.len() as u64;
214            let index_record = index::Record::new(contig_id.to_string(), 0, offset, offset);
215            writer.index_record = Some(index_record);
216
217            return Self::write_record(writer, record);
218        }
219
220        // Write record
221        writer.position_writer.write_position(record.position())?;
222        Self::write_item(&mut writer.item_writer, record.item())?;
223
224        Ok(())
225    }
226}
227
228/// A marker type for the SAF version 4.
229///
230/// In this version, the SAF item file contains only a smaller "band" of likelihoods centered around
231/// the most likely sample frequency, along with information about the location of the band.
232///
233/// See also [`Version`] for a note on naming of versions.
234#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
235pub struct V4;
236
237impl Version for V4 {
238    const VERSION: u8 = 4;
239
240    const MAGIC_NUMBER: [u8; MAGIC_LEN] = [b's', b'a', b'f', b'v', b'4', 0, 0, 0];
241
242    type Item = Band;
243
244    fn create_record_buf(_index: &Index<Self>) -> Record<Id, Self::Item> {
245        // Band is resized during reading, so we can simplify initialise empty band
246        Record::new(0, 1, Band::new(0, Vec::new()))
247    }
248
249    fn read_index_record<R>(reader: &mut R) -> io::Result<index::Record<Self>>
250    where
251        R: io::BufRead,
252    {
253        let name = reader.read_contig_name()?;
254        let sites = reader.read_sites()?;
255        let sum_band = reader.read_sum_band()?;
256        let position_offset = reader.read_position_offset()?;
257        let item_offset = reader.read_item_offset()?;
258
259        Ok(index::Record::new_with_sum_band(
260            name,
261            sites,
262            sum_band,
263            position_offset,
264            item_offset,
265        ))
266    }
267
268    fn read_item<R>(reader: &mut R, buf: &mut Self::Item) -> io::Result<ReadStatus>
269    where
270        R: io::BufRead,
271    {
272        if ReadStatus::check(reader)?.is_done() {
273            return Ok(ReadStatus::Done);
274        }
275
276        *buf.start_mut() = reader
277            .read_u32::<LE>()?
278            .try_into()
279            .expect("cannot convert band start to usize");
280
281        let len: usize = reader
282            .read_u32::<LE>()?
283            .try_into()
284            .expect("cannot convert band length to usize");
285
286        buf.likelihoods_mut().resize(len, 0.0);
287
288        reader
289            .read_likelihoods(buf.likelihoods_mut())
290            .map(|_| ReadStatus::NotDone)
291    }
292
293    fn write_index_record<W>(writer: &mut W, record: &index::Record<Self>) -> io::Result<()>
294    where
295        W: io::Write,
296    {
297        writer.write_contig_name(record.name())?;
298        writer.write_sites(record.sites())?;
299        writer.write_sum_band(record.sum_band())?;
300        writer.write_position_offset(record.position_offset())?;
301        writer.write_item_offset(record.item_offset())
302    }
303
304    fn write_item<W>(writer: &mut W, item: &Self::Item) -> io::Result<()>
305    where
306        W: io::Write,
307    {
308        writer.write_band(item)
309    }
310
311    fn write_record<W, I>(
312        writer: &mut Writer<W, Self>,
313        record: &Record<I, Self::Item>,
314    ) -> io::Result<()>
315    where
316        W: io::Write,
317        I: AsRef<str>,
318    {
319        let contig_id = record.contig_id().as_ref();
320
321        if let Some(index_record) = writer.index_record.as_mut() {
322            if index_record.name() == contig_id {
323                // We're on the same contig, so we can simply update index record
324                *index_record.sum_band_mut() += record.item().likelihoods().len();
325                *index_record.sites_mut() += 1;
326            } else {
327                // We're on a new contig, which means we have to write the current record index
328                // and set up a new one
329                let position_offset = u64::from(writer.position_writer.virtual_position());
330                let item_offset = u64::from(writer.item_writer.virtual_position());
331
332                let new = index::Record::new_with_sum_band(
333                    contig_id.to_string(),
334                    1,
335                    0,
336                    position_offset,
337                    item_offset,
338                );
339
340                let old = mem::replace(index_record, new);
341                old.write(&mut writer.index_writer)?;
342            }
343        } else {
344            let offset = Self::MAGIC_NUMBER.len() as u64;
345            let index_record =
346                index::Record::new_with_sum_band(contig_id.to_string(), 0, 0, offset, offset);
347            writer.index_record = Some(index_record);
348
349            return Self::write_record(writer, record);
350        }
351
352        // Write record
353        writer.position_writer.write_position(record.position())?;
354        Self::write_item(&mut writer.item_writer, record.item())?;
355
356        Ok(())
357    }
358}