noodles_bgzf/
lib.rs

1//! **noodles-bgzf** handles the reading and writing of the blocked gzip format (BGZF).
2//!
3//! While the gzip format is typically a single stream, a BGZF is the concatenation of many gzip
4//! streams. Each stream is called a block, with its uncompressed data size being constrained to
5//! less than 64 KiB. This multistream gzip allows random access using [`virtual positions`].
6//!
7//! noodles-bgzf abstracts away the concept of blocks, implementing [`std::io::Read`] for the
8//! reader and [`std::io::Write`] for the writer.
9//!
10//! [`virtual positions`]: VirtualPosition
11//!
12//! # Examples
13//!
14//! ## Read an entire BGZF file
15//!
16//! ```no_run
17//! # use std::{fs::File, io::{self, Read}};
18//! use noodles_bgzf as bgzf;
19//! let mut reader = File::open("data.gz").map(bgzf::io::Reader::new)?;
20//! let mut data = Vec::new();
21//! reader.read_to_end(&mut data)?;
22//! # Ok::<(), io::Error>(())
23//! ```
24//!
25//! ## Write a BGZF file
26//!
27//! ```no_run
28//! # use std::{fs::File, io::{self, Write}};
29//! use noodles_bgzf as bgzf;
30//! let mut writer = File::create("data.gz").map(bgzf::io::Writer::new)?;
31//! writer.write_all(b"noodles-bgzf")?;
32//! # Ok::<(), io::Error>(())
33//! ```
34
35#[cfg(feature = "async")]
36pub mod r#async;
37
38pub(crate) mod deflate;
39mod gz;
40pub mod gzi;
41pub mod io;
42pub mod virtual_position;
43
44pub use self::virtual_position::VirtualPosition;
45
46#[deprecated(since = "0.38.0", note = "Use `bgzf::io::IndexedReader` instead.")]
47pub use self::io::IndexedReader;
48
49#[deprecated(
50    since = "0.38.0",
51    note = "Use `bgzf::io::MultithreadedReader` instead."
52)]
53pub use self::io::MultithreadedReader;
54
55#[deprecated(
56    since = "0.38.0",
57    note = "Use `bgzf::io::MultithreadedWriter` instead."
58)]
59pub use self::io::MultithreadedWriter;
60
61#[deprecated(since = "0.38.0", note = "Use `bgzf::io::Reader` instead.")]
62pub use self::io::Reader;
63
64#[deprecated(since = "0.38.0", note = "Use `bgzf::io::Writer` instead.")]
65pub use self::io::Writer;
66
67#[cfg(feature = "async")]
68#[deprecated(since = "0.35.0", note = "Use `bgzf::r#async::io::Reader` instead.")]
69pub use self::r#async::io::Reader as AsyncReader;
70
71#[cfg(feature = "async")]
72#[deprecated(since = "0.35.0", note = "Use `bgzf::r#async::Writer` instead.")]
73pub use self::r#async::Writer as AsyncWriter;
74
75// XLEN (2)
76const GZIP_XLEN_SIZE: usize = 2;
77
78// SI1 (1) + SI2 (1) + SLEN (2) + BSIZE (2)
79const BGZF_XLEN: usize = 6;
80
81// ยง 4.1 The BGZF compression format (2021-06-03): "Thus while `ISIZE` is stored as a `uint32_t` as
82// per the gzip format, in BGZF it is limited to the range [0, 65536]."
83const BGZF_MAX_ISIZE: usize = 1 << 16;
84
85pub(crate) const BGZF_HEADER_SIZE: usize = gz::HEADER_SIZE + GZIP_XLEN_SIZE + BGZF_XLEN;
86
87#[cfg(test)]
88mod tests {
89    use std::io::{self, BufRead, Cursor, Read, Write};
90
91    use super::*;
92
93    #[test]
94    fn test_self() -> io::Result<()> {
95        let mut writer = Writer::new(Vec::new());
96
97        writer.write_all(b"noodles")?;
98        writer.flush()?;
99        writer.write_all(b"-")?;
100        writer.flush()?;
101        writer.write_all(b"bgzf")?;
102
103        let data = writer.finish()?;
104        let mut reader = Reader::new(&data[..]);
105
106        let mut buf = Vec::new();
107        reader.read_to_end(&mut buf)?;
108
109        assert_eq!(buf, b"noodles-bgzf");
110
111        Ok(())
112    }
113
114    #[test]
115    fn test_self_buffered() -> io::Result<()> {
116        let mut writer = Writer::new(Vec::new());
117
118        writer.write_all(b"noodles\n-\nbgzf\nbuffered")?;
119
120        let data = writer.finish()?;
121        let mut reader = Reader::new(&data[..]);
122
123        let mut lines = Vec::new();
124        let mut virtual_positions = Vec::new();
125
126        loop {
127            virtual_positions.push(reader.virtual_position());
128
129            let mut line = String::new();
130            match reader.read_line(&mut line) {
131                Ok(0) => {
132                    virtual_positions.pop();
133                    break;
134                }
135                Err(e) => return Err(e),
136                _ => (),
137            }
138
139            lines.push(line);
140        }
141
142        let expected_lines = vec!["noodles\n", "-\n", "bgzf\n", "buffered"];
143        assert_eq!(lines, expected_lines);
144
145        let expected_upos = [0, 8, 10, 15];
146        let expected_virtual_positions: Vec<VirtualPosition> = expected_upos
147            .iter()
148            .map(|x| VirtualPosition::try_from((0, *x)).unwrap())
149            .collect();
150        assert_eq!(virtual_positions, expected_virtual_positions);
151
152        Ok(())
153    }
154
155    #[test]
156    fn test_self_multithreaded() -> io::Result<()> {
157        let mut writer = MultithreadedWriter::new(Vec::new());
158
159        writer.write_all(b"noodles")?;
160        writer.flush()?;
161        writer.write_all(b"-")?;
162        writer.flush()?;
163        writer.write_all(b"bgzf")?;
164
165        let data = writer.finish().map(Cursor::new)?;
166        let mut reader = MultithreadedReader::new(data);
167
168        let mut buf = Vec::new();
169        reader.read_to_end(&mut buf)?;
170
171        assert_eq!(buf, b"noodles-bgzf");
172
173        Ok(())
174    }
175}