rosm_pbf_reader/
lib.rs

1//! A low-level library for parsing OSM data in PBF format.
2//!
3//! An OSM PBF file is a sequence of blobs. These blobs can be read with [`read_blob`]. The
4//! [`RawBlock`]s returned by `read_blob` can then be decompressed and parsed by
5//! [`BlockParser::parse_block`], which returns a [`Block`], containing either a parsed
6//! header/primitive block or an unknown block's binary data.
7//!
8//! The library also provides utilities for reading densely or delta encoded data in these blocks.
9//!
10//! Raw header and primitive block definitions (generated by [Prost](https://github.com/tokio-rs/prost)) are exported
11//! through the `pbf` module.
12//!
13//! # Links
14//!
15//! - [OSM PBF format documentation](https://wiki.openstreetmap.org/wiki/PBF_Format)
16
17#![forbid(unsafe_code)]
18
19#[cfg(feature = "default")]
20use flate2::read::ZlibDecoder;
21
22use prost::Message;
23
24use std::convert::From;
25#[cfg(feature = "default")]
26use std::io::prelude::*;
27use std::io::ErrorKind;
28use std::str;
29
30pub mod dense;
31pub mod pbf;
32pub mod util;
33
34/// Possible errors returned by the library.
35#[derive(Debug)]
36pub enum Error {
37    /// Returned when a PBF parse error has occured.
38    PbfParseError(prost::DecodeError),
39    /// Returned when reading from the input stream or decompression of blob data has failed.
40    IoError(std::io::Error),
41    /// Returned when a blob header with an invalid size (negative or >=64 KB) is encountered.
42    InvalidBlobHeader,
43    /// Returned when blob data with an invalid size (negative or >=32 MB) is encountered.
44    InvalidBlobData,
45    /// Returned when an error has occured during blob decompression.
46    DecompressionError(DecompressionError),
47    /// Returned when some assumption in the data is violated (for example, an out of bounds index is encountered).
48    LogicError(String),
49}
50
51impl std::fmt::Display for Error {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        write!(f, "{self:?}")
54    }
55}
56
57impl std::error::Error for Error {}
58
59/// Result of [`BlockParser::parse_block`].
60pub enum Block<'a> {
61    /// A raw `OSMHeader` block.
62    Header(pbf::HeaderBlock),
63    /// A raw `OSMData` (primitive) block.
64    Primitive(pbf::PrimitiveBlock),
65    /// An unknown block.
66    Unknown(&'a [u8]),
67}
68
69enum BlockType {
70    Header,
71    Primitive,
72    Unknown,
73}
74
75impl From<&str> for BlockType {
76    fn from(value: &str) -> Self {
77        match value {
78            "OSMHeader" => BlockType::Header,
79            "OSMData" => BlockType::Primitive,
80            _ => BlockType::Unknown,
81        }
82    }
83}
84
85/// An unparsed, possibly compressed block.
86pub struct RawBlock {
87    r#type: BlockType,
88    data: Vec<u8>,
89}
90
91/// Reads the next blob from `pbf`.
92///
93/// # Examples
94///
95/// ```no_run
96/// use rosm_pbf_reader::read_blob;
97///
98/// use std::fs::File;
99///
100/// let mut file = File::open("some.osm.pbf").unwrap();
101///
102/// while let Some(result) = read_blob(&mut file) {
103///     match result {
104///         Ok(raw_block) => {}
105///         Err(error) => {}
106///     }
107/// }
108/// ```
109pub fn read_blob<Input>(pbf: &mut Input) -> Option<Result<RawBlock, Error>>
110where
111    Input: std::io::Read,
112{
113    let mut header_size_buffer = [0u8; 4];
114
115    if let Err(error) = pbf.read_exact(&mut header_size_buffer) {
116        return match error.kind() {
117            ErrorKind::UnexpectedEof => None,
118            _ => Some(Err(Error::IoError(error))),
119        };
120    }
121
122    Some(read_blob_inner(pbf, header_size_buffer))
123}
124
125fn read_blob_inner<Input>(pbf: &mut Input, header_size_buffer: [u8; 4]) -> Result<RawBlock, Error>
126where
127    Input: std::io::Read,
128{
129    use pbf::BlobHeader;
130
131    let blob_header_size: usize = i32::from_be_bytes(header_size_buffer)
132        .try_into()
133        .map_err(|_err| Error::InvalidBlobHeader)?;
134
135    if blob_header_size >= 64 * 1024 {
136        return Err(Error::InvalidBlobHeader);
137    }
138
139    let mut blob = vec![0u8; blob_header_size];
140    if let Err(error) = pbf.read_exact(&mut blob) {
141        return Err(Error::IoError(error));
142    }
143
144    let blob_header = match BlobHeader::decode(&*blob) {
145        Ok(blob_header) => blob_header,
146        Err(error) => return Err(Error::PbfParseError(error)),
147    };
148
149    let block_type = BlockType::from(blob_header.r#type.as_ref());
150    let blob_size: usize = blob_header.datasize.try_into().map_err(|_err| Error::InvalidBlobData)?;
151
152    if blob_size >= 32 * 1024 * 1024 {
153        return Err(Error::InvalidBlobData);
154    }
155
156    blob.resize_with(blob_size, Default::default);
157
158    if let Err(error) = pbf.read_exact(&mut blob) {
159        return Err(Error::IoError(error));
160    }
161
162    let raw_block = RawBlock {
163        r#type: block_type,
164        data: blob,
165    };
166
167    Ok(raw_block)
168}
169
170/// Blob compression method.
171pub enum CompressionMethod {
172    /// LZ4
173    Lz4,
174    /// LZMA
175    Lzma,
176    /// ZLib
177    Zlib,
178    /// Zstandard
179    Zstd,
180}
181
182/// Possible errors returned by [Decompressor] implementations.
183#[derive(Debug)]
184pub enum DecompressionError {
185    /// The given compression method isn't supported by the decompressor.
186    UnsupportedCompression,
187    /// An internal error occured during decompression.
188    InternalError(Box<dyn std::error::Error + Send + Sync>),
189}
190
191/// Trait for custom decompression support.
192pub trait Decompressor {
193    /// Decompresses `input` blob into the preallocated `output` slice.
194    fn decompress(method: CompressionMethod, input: &[u8], output: &mut [u8]) -> Result<(), DecompressionError>;
195}
196
197/// The default blob decompressor.
198///
199/// Supports ZLib decompression if default features are enabled.
200pub struct DefaultDecompressor;
201
202impl Decompressor for DefaultDecompressor {
203    #[cfg(feature = "default")]
204    fn decompress(method: CompressionMethod, input: &[u8], output: &mut [u8]) -> Result<(), DecompressionError> {
205        match method {
206            CompressionMethod::Zlib => {
207                let mut decoder = ZlibDecoder::new(input);
208
209                match decoder.read_exact(output) {
210                    Ok(_) => Ok(()),
211                    Err(error) => Err(DecompressionError::InternalError(Box::new(error))),
212                }
213            }
214            _ => Err(DecompressionError::UnsupportedCompression),
215        }
216    }
217
218    #[cfg(not(feature = "default"))]
219    fn decompress(_method: CompressionMethod, _input: &[u8], _output: &mut [u8]) -> Result<(), DecompressionError> {
220        Err(DecompressionError::UnsupportedCompression)
221    }
222}
223
224/// Parser with an internal buffer for `RawBlock`s.
225///
226/// When multiple threads are used to speed up parsing, it's recommended to use a single
227/// `BlockParser` per thread (e.g. by making it thread local), so its internal buffer remains
228/// alive, avoiding repeated memory allocations.
229pub struct BlockParser<D: Decompressor = DefaultDecompressor> {
230    block_buffer: Vec<u8>,
231    decompressor: std::marker::PhantomData<D>,
232}
233
234impl Default for BlockParser {
235    fn default() -> Self {
236        BlockParser::<DefaultDecompressor>::new()
237    }
238}
239
240impl<D: Decompressor> BlockParser<D> {
241    /// Creates a new `BlockParser`.
242    pub fn new() -> Self {
243        Self {
244            block_buffer: Vec::new(),
245            decompressor: Default::default(),
246        }
247    }
248
249    /// Parses `raw_block` into a header, primitive or unknown block.
250    ///
251    /// # Errors
252    ///
253    /// Will return `Err` if an error occurs during PBF parsing, decompression or validation.
254    #[allow(deprecated)]
255    pub fn parse_block(&mut self, raw_block: RawBlock) -> Result<Block, Error> {
256        let blob = match pbf::Blob::decode(&*raw_block.data) {
257            Ok(blob) => blob,
258            Err(error) => return Err(Error::PbfParseError(error)),
259        };
260
261        if let Some(uncompressed_size) = blob.raw_size {
262            let uncompressed_size: usize = uncompressed_size.try_into().map_err(|_err| Error::InvalidBlobData)?;
263            self.block_buffer.resize_with(uncompressed_size, Default::default);
264        }
265
266        if let Some(blob_data) = blob.data {
267            match blob_data {
268                pbf::blob::Data::Raw(raw_data) => self.block_buffer.extend_from_slice(&raw_data),
269                pbf::blob::Data::ZlibData(zlib_data) => {
270                    if let Err(error) = D::decompress(CompressionMethod::Zlib, &zlib_data, &mut self.block_buffer) {
271                        return Err(Error::DecompressionError(error));
272                    }
273                }
274                pbf::blob::Data::Lz4Data(lz4_data) => {
275                    if let Err(error) = D::decompress(CompressionMethod::Lz4, &lz4_data, &mut self.block_buffer) {
276                        return Err(Error::DecompressionError(error));
277                    }
278                }
279                pbf::blob::Data::LzmaData(lzma_data) => {
280                    if let Err(error) = D::decompress(CompressionMethod::Lzma, &lzma_data, &mut self.block_buffer) {
281                        return Err(Error::DecompressionError(error));
282                    }
283                }
284                pbf::blob::Data::ZstdData(zstd_data) => {
285                    if let Err(error) = D::decompress(CompressionMethod::Zstd, &zstd_data, &mut self.block_buffer) {
286                        return Err(Error::DecompressionError(error));
287                    }
288                }
289                pbf::blob::Data::ObsoleteBzip2Data(_) => return Err(Error::InvalidBlobData),
290            }
291        } else {
292            return Err(Error::InvalidBlobData);
293        }
294
295        match raw_block.r#type {
296            BlockType::Header => match pbf::HeaderBlock::decode(&*self.block_buffer) {
297                Ok(header_block) => Ok(Block::Header(header_block)),
298                Err(error) => Err(Error::PbfParseError(error)),
299            },
300            BlockType::Primitive => match pbf::PrimitiveBlock::decode(&*self.block_buffer) {
301                Ok(primitive_block) => Ok(Block::Primitive(primitive_block)),
302                Err(error) => Err(Error::PbfParseError(error)),
303            },
304            BlockType::Unknown => Ok(Block::Unknown(&self.block_buffer)),
305        }
306    }
307}
308
309/// Generalized implementation for reading normal or densely encoded tags from string tables.
310///
311/// Use [`new_tag_reader`] or [`dense::new_dense_tag_reader`] to construct it.
312pub struct TagReader<'a, I>
313where
314    I: Iterator<Item = (Result<usize, Error>, Result<usize, Error>)>,
315{
316    string_table: &'a pbf::StringTable,
317    iter: I,
318}
319
320impl<'a, I> Iterator for TagReader<'a, I>
321where
322    I: Iterator<Item = (Result<usize, Error>, Result<usize, Error>)>,
323{
324    /// Tag as a (key, value) pair, containing either a string or an error if decoding has failed
325    type Item = (Result<&'a str, Error>, Result<&'a str, Error>);
326
327    fn next(&mut self) -> Option<Self::Item> {
328        match self.iter.next() {
329            Some((key, value)) => {
330                let decode_string = |index: usize| -> Result<&str, Error> {
331                    if let Some(bytes) = self.string_table.s.get(index) {
332                        if let Ok(utf8_string) = str::from_utf8(bytes) {
333                            Ok(utf8_string)
334                        } else {
335                            Err(Error::LogicError(format!("string at index {index} is not valid UTF-8")))
336                        }
337                    } else {
338                        Err(Error::LogicError(format!(
339                            "string table index {index} is out of bounds ({})",
340                            self.string_table.s.len()
341                        )))
342                    }
343                };
344
345                let key = match key {
346                    Ok(key_idx) => decode_string(key_idx),
347                    Err(error) => Err(error),
348                };
349
350                let value = match value {
351                    Ok(value_idx) => decode_string(value_idx),
352                    Err(error) => Err(error),
353                };
354
355                Some((key, value))
356            }
357            None => None,
358        }
359    }
360}
361
362/// Constructs a new `TagReader` from key and value index slices, and a corresponding string table.
363///
364/// # Examples
365///
366/// ```no_run
367/// use rosm_pbf_reader::{pbf, new_tag_reader};
368///
369/// fn process_primitive_block(block: pbf::PrimitiveBlock) {
370///     for group in &block.primitivegroup {
371///         for way in &group.ways {
372///             let tags = new_tag_reader(&block.stringtable, &way.keys, &way.vals);
373///             for (key, value) in tags {
374///                 println!("{}: {}", key.unwrap(), value.unwrap());
375///             }
376///         }
377///     }
378/// }
379pub fn new_tag_reader<'a>(
380    string_table: &'a pbf::StringTable,
381    key_indices: &'a [u32],
382    value_indices: &'a [u32],
383) -> TagReader<'a, impl Iterator<Item = (Result<usize, Error>, Result<usize, Error>)> + 'a> {
384    TagReader {
385        string_table,
386        iter: key_indices
387            .iter()
388            .map(|i| Ok(*i as usize))
389            .zip(value_indices.iter().map(|i| Ok(*i as usize))),
390    }
391}
392
393#[cfg(test)]
394mod tag_reader_tests {
395    use super::*;
396
397    #[test]
398    fn valid_input() {
399        let key_vals = ["", "key1", "val1", "key2", "val2"];
400        let string_table = pbf::StringTable {
401            s: key_vals.iter().map(|s| s.as_bytes().to_vec()).collect(),
402        };
403
404        let key_indices = [1, 3];
405        let value_indices = [2, 4];
406        let mut reader = new_tag_reader(&string_table, &key_indices, &value_indices);
407
408        matches!(reader.next(), Some((Ok("key1"), Ok("val1"))));
409        matches!(reader.next(), Some((Ok("key2"), Ok("val2"))));
410
411        assert!(reader.next().is_none());
412    }
413}
414
415/// Utility for reading delta-encoded values directly, like [`pbf::Way::refs`] and [`pbf::Relation::memids`].
416pub struct DeltaValueReader<'a, T> {
417    remaining: &'a [T],
418    accumulated: T,
419}
420
421impl<'a, T> DeltaValueReader<'a, T>
422where
423    T: std::default::Default,
424{
425    /// Constructs a new `DeltaValueReader` from a slice of values.
426    ///
427    /// # Examples
428    ///
429    /// ```no_run
430    /// use rosm_pbf_reader::{pbf, DeltaValueReader};
431    ///
432    /// fn process_primitive_block(block: pbf::PrimitiveBlock) {
433    ///     for group in &block.primitivegroup {
434    ///         for way in &group.ways {
435    ///             let refs = DeltaValueReader::new(&way.refs);
436    ///             for node_id in refs {
437    ///                 println!("{}", node_id);
438    ///             }
439    ///         }
440    ///     }
441    /// }
442    /// ```
443    pub fn new(values: &'a [T]) -> Self {
444        DeltaValueReader {
445            remaining: values,
446            accumulated: T::default(),
447        }
448    }
449}
450
451impl<T> Iterator for DeltaValueReader<'_, T>
452where
453    T: std::ops::AddAssign + std::clone::Clone,
454{
455    type Item = T;
456
457    fn next(&mut self) -> Option<Self::Item> {
458        if let Some((first, elements)) = self.remaining.split_first() {
459            self.accumulated += first.clone();
460            self.remaining = elements;
461            Some(self.accumulated.clone())
462        } else {
463            None
464        }
465    }
466}
467
468#[cfg(test)]
469mod delta_value_reader_tests {
470    use super::*;
471
472    #[test]
473    fn empty_input() {
474        let mut reader = DeltaValueReader::new(&[] as &[i64]);
475        assert_eq!(reader.next(), None);
476    }
477
478    #[test]
479    fn valid_input() {
480        let values = [10, -1, 4, -2];
481        let mut reader = DeltaValueReader::new(&values);
482        assert_eq!(reader.next(), Some(10));
483        assert_eq!(reader.next(), Some(9));
484        assert_eq!(reader.next(), Some(13));
485        assert_eq!(reader.next(), Some(11));
486    }
487}