gistools/readers/osm/
mod.rs

1/// Blob defines a chunk of data to be parsed as either a header or a primitive
2pub mod blob;
3/// Filtering tool
4pub mod filter;
5/// OSM Header Block defines the contents of the file
6pub mod header_block;
7/// Info Block defines optional metadata that may be included into each primitive
8pub mod info;
9/// Node Block defines the contents of a node
10pub mod node;
11/// Primitive Block defines the contents of a node, way or relation
12pub mod primitive;
13/// Relation Block defines the contents of a relation
14pub mod relation;
15/// Way Block defines the contents of a way
16pub mod way;
17
18#[cfg(feature = "std")]
19use crate::data_store::kv::file::FileKV;
20use crate::{
21    data_store::{KV, KVStore},
22    parsers::{FeatureReader, Reader},
23};
24use alloc::{boxed::Box, vec::Vec};
25use blob::{Blob, BlobHeader};
26use core::fmt::Debug;
27use filter::*;
28use header_block::{HeaderBlock, OSMHeader};
29use node::IntermediateNode;
30use pbf::{Field, Protobuf, Type};
31use primitive::{OSMMetadata, PrimitiveBlock};
32use relation::IntermediateRelation;
33use s2json::{MValue, Properties, VectorFeature, VectorPoint};
34use way::{IntermediateWay, WayNodes};
35
36// TODO: Add threads for reading the blocks
37
38/// OSM Reader options
39#[derive(Debug, Clone)]
40pub struct OSMReaderOptions {
41    /// if true, remove nodes that have no tags [Default = true]
42    pub remove_empty_nodes: bool,
43    /// If provided, filters of the
44    pub tag_filter: Option<OSMTagFilter>,
45    /// If set to true, nodes will be skipped. [Default = false]
46    pub skip_nodes: bool,
47    /// If set to true, ways will be skipped. [Default = false]
48    pub skip_ways: bool,
49    /// If set to true, relations will be skipped. [Default = false]
50    pub skip_relations: bool,
51    /// If set to true, ways will be converted to areas if they are closed.
52    /// NOTE: They are upgraded anyways if the tag "area" is set to "yes".
53    /// [Default = false]
54    pub upgrade_ways_to_areas: bool,
55    /// If set to true, add a bbox property to each feature [Default = true]
56    pub add_bbox: bool,
57}
58impl Default for OSMReaderOptions {
59    fn default() -> Self {
60        OSMReaderOptions {
61            remove_empty_nodes: false,
62            tag_filter: None,
63            skip_nodes: false,
64            skip_ways: false,
65            skip_relations: false,
66            upgrade_ways_to_areas: false,
67            add_bbox: true,
68        }
69    }
70}
71
72/// OSM File Reader ensures we are using local buffers to store intermediate Nodes, Ways, and Relations
73///
74/// See [`OSMReader`] for full documentation.
75#[cfg(feature = "std")]
76pub type OSMFileReader<T> = OSMReader<
77    T,
78    FileKV<u64, VectorPoint<MValue>>,
79    FileKV<u64, IntermediateNode>,
80    FileKV<u64, WayNodes>,
81    FileKV<u64, IntermediateWay>,
82    FileKV<u64, IntermediateRelation>,
83>;
84/// OSM File Reader Iterator
85#[cfg(feature = "std")]
86pub type OSMFileReaderIter<'a, T> = OsmReaderIter<
87    'a,
88    T,
89    FileKV<u64, VectorPoint<MValue>>,
90    FileKV<u64, IntermediateNode>,
91    FileKV<u64, WayNodes>,
92    FileKV<u64, IntermediateWay>,
93    FileKV<u64, IntermediateRelation>,
94>;
95
96/// OSM Buffer Reader ensures we are using local buffers to store intermediate Nodes, Ways, and Relations
97///
98/// See [`OSMReader`] for full documentation.
99pub type OSMLocalReader<T> = OSMReader<
100    T,
101    KV<u64, VectorPoint<MValue>>,
102    KV<u64, IntermediateNode>,
103    KV<u64, WayNodes>,
104    KV<u64, IntermediateWay>,
105    KV<u64, IntermediateRelation>,
106>;
107/// OSM Buffer Reader Iterator
108pub type OSMLocalReaderIter<'a, T> = OsmReaderIter<
109    'a,
110    T,
111    KV<u64, VectorPoint<MValue>>,
112    KV<u64, IntermediateNode>,
113    KV<u64, WayNodes>,
114    KV<u64, IntermediateWay>,
115    KV<u64, IntermediateRelation>,
116>;
117
118/// # OSM Reader
119///
120/// ## Description
121/// Parses OSM PBF files
122///
123/// Implements the [`FeatureReader`] trait
124///
125/// ## Usage
126///
127/// For Simplicity, you can use the [`OSMFileReader`] or [`OSMLocalReader`] wrappers.
128///
129/// The methods you have access to:
130/// - [`OSMReader::new`]: Create a new OSMReader
131/// - [`OSMReader::cleanup`]: Cleans up the reader's temp data
132/// - [`OSMReader::get_header`]: Get the OSM Header
133/// - [`OSMReader::parse_blocks`]: Parse all blocks to prepare for reads
134/// - [`OSMReader::next_block`]: Get the next block (if you want to iteratively use blocks yourself)
135/// - [`OSMReader::par_parse_node_blocks`]: If you are only interested in nodes, parse all of them quicker using threads
136/// - [`OSMReader::parse_node_blocks`]: If you are only interested in nodes, parse all of them quicker
137/// - [`OSMReader::iter`]: Create a new OSMReader Iterator
138///
139/// ### Local Reader
140/// All data is stored locally in memory and will be cleaned up on drop
141///
142/// ```rust
143/// use gistools::{parsers::{FileReader, FeatureReader}, readers::OSMLocalReader};
144/// use std::path::PathBuf;
145///
146/// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
147/// path.push("tests/readers/osm/fixtures/test.pbf");
148/// let path_str = path.to_str().unwrap();
149/// let reader = FileReader::from(path_str);
150///
151/// let mut osm = OSMLocalReader::new(reader, None);
152/// osm.parse_blocks();
153///
154/// let features: Vec<_> = osm.iter().collect();
155/// assert_eq!(features.len(), 8);
156/// ```
157#[derive(Debug, Clone)]
158pub struct OSMReader<
159    T: Reader,
160    _N: KVStore<u64, VectorPoint<MValue>> = KV<u64, VectorPoint<MValue>>,
161    N: KVStore<u64, IntermediateNode> = KV<u64, IntermediateNode>,
162    _W: KVStore<u64, WayNodes> = KV<u64, WayNodes>,
163    W: KVStore<u64, IntermediateWay> = KV<u64, IntermediateWay>,
164    R: KVStore<u64, IntermediateRelation> = KV<u64, IntermediateRelation>,
165> {
166    /// The input reader
167    reader: T,
168    /// if true, skip nodes that have no tags [Default = true]
169    skip_empty_nodes: bool,
170    /// If provided, filters of the
171    tag_filter: Option<OSMTagFilter>,
172    /// If set to true, nodes will be skipped
173    skip_nodes: bool,
174    /// If set to true, ways will be skipped
175    skip_ways: bool,
176    /// If set to true, relations will be skipped
177    skip_relations: bool,
178    /// If set to true, ways will be converted to areas if they are closed
179    /// NOTE: They are upgraded anyways if the tag "area" is set to "yes"
180    /// [Default = false]
181    upgrade_ways_to_areas: bool,
182    /// If set to true, add a bbox property to each feature
183    add_bbox: bool,
184    /// The current offset of the reader
185    _offset: u64,
186    /// track if the data has been parsed or not
187    _parsed: bool,
188    /// Node Geometry Store
189    node_geometry: _N,
190    /// Inermediate node store
191    nodes: N,
192    /// Way Geometry Store
193    way_geometry: _W,
194    /// Intermediate way store
195    ways: W,
196    /// Intermediate relation store
197    relations: R,
198}
199impl<
200    T: Reader,
201    _N: KVStore<u64, VectorPoint<MValue>>,
202    N: KVStore<u64, IntermediateNode>,
203    _W: KVStore<u64, WayNodes>,
204    W: KVStore<u64, IntermediateWay>,
205    R: KVStore<u64, IntermediateRelation>,
206> OSMReader<T, _N, N, _W, W, R>
207{
208    /// Creates a new OSM Reader
209    pub fn new(reader: T, options: Option<OSMReaderOptions>) -> Self {
210        let options = options.unwrap_or_default();
211        OSMReader {
212            reader,
213            skip_empty_nodes: options.remove_empty_nodes,
214            tag_filter: options.tag_filter,
215            skip_nodes: options.skip_nodes,
216            skip_ways: options.skip_ways,
217            skip_relations: options.skip_relations,
218            upgrade_ways_to_areas: options.upgrade_ways_to_areas,
219            add_bbox: options.add_bbox,
220            _offset: 0,
221            _parsed: false,
222            node_geometry: _N::new(None),
223            nodes: N::new(None),
224            way_geometry: _W::new(None),
225            ways: W::new(None),
226            relations: R::new(None),
227        }
228    }
229
230    /// Cleanup the data which will cleanup any temporary files if they exist
231    pub fn cleanup(&mut self) {
232        self.node_geometry.cleanup();
233        self.nodes.cleanup();
234        self.way_geometry.cleanup();
235        self.ways.cleanup();
236        self.relations.cleanup();
237    }
238
239    /// returns - The header of the OSM file
240    pub fn get_header(&mut self) -> OSMHeader {
241        self._offset = 0;
242        let blob_header = self.next();
243        if blob_header.is_none() {
244            panic!("OSM header not found");
245        }
246        let bytes = blob_header.unwrap();
247        let mut pbf = Protobuf::from(bytes.clone());
248        let mut header_block = HeaderBlock::default();
249        let Field { tag, r#type } = pbf.read_field();
250        if tag != 1 || r#type != Type::Bytes {
251            return OSMHeader::default();
252        }
253        pbf.read_message(&mut header_block);
254
255        header_block.to_header()
256    }
257
258    fn next_blob(&mut self) -> Option<BlobHeader> {
259        // if we've already read all the data, return null
260        if self._offset >= self.reader.len() {
261            return None;
262        }
263        // STEP 1: Get blob size
264        // read length of current blob
265        let length = self.reader.int32_be(Some(self._offset)) as u64;
266        self._offset += 4;
267        let blob_header_data = self.reader.slice(Some(self._offset), Some(self._offset + length));
268        self._offset += length;
269        // build a blob header
270        let mut pbf: Protobuf = blob_header_data.into();
271        let mut blob_header = BlobHeader::default();
272        pbf.read_fields(&mut blob_header, None);
273        Some(blob_header)
274    }
275
276    /// Read the next blob
277    ///
278    /// ## Returns
279    /// The next blob if it exists
280    fn next(&mut self) -> Option<Vec<u8>> {
281        if let Some(blob_header) = self.next_blob() {
282            // STEP 2: Get blob data
283            let compressed_blob_data =
284                self.reader.slice(Some(self._offset), Some(self._offset + blob_header.datasize));
285            self._offset += blob_header.datasize;
286            Some(compressed_blob_data)
287        } else {
288            None
289        }
290    }
291
292    /// Skip a block of data
293    fn skip(&mut self) {
294        if let Some(blob_header) = self.next_blob() {
295            self._offset += blob_header.datasize;
296        }
297    }
298
299    /// Parse all blocks, storing all nodes, ways, and relations into local stores for future consumption
300    pub fn parse_blocks(&mut self) {
301        if self._parsed {
302            return;
303        }
304        self._offset = 0;
305        // skip the header
306        self.skip();
307        while let Some(b) = self.next() {
308            self.parse_block(OSMReader::<T, _N, N, _W, W, R>::next_block(b));
309        }
310        self._parsed = true;
311    }
312
313    /// Read the input blob and parse the block of data
314    pub fn next_block(data: Vec<u8>) -> PrimitiveBlock {
315        // Blob data is PBF encoded and ?compressed, so we need to parse & decompress it first
316        let mut pbf: Protobuf = data.into();
317        let mut blob = Blob::default();
318        pbf.read_fields(&mut blob, None);
319        let mut pbf: Protobuf = blob.data.into();
320        // Parse the PrimitiveBlock and read its contents.
321        // all nodes/ways/relations that can be filtered already are on invocation.
322        let mut pb = PrimitiveBlock::default();
323        pbf.read_fields(&mut pb, None);
324        pb
325    }
326
327    fn parse_block(&mut self, block: PrimitiveBlock) {
328        let skip_wr = self.skip_ways && self.skip_relations;
329        for group in &block.primitive_groups {
330            for node in &group.nodes {
331                if !node.is_filterable(&block, self) {
332                    self.nodes.set(node.id, node.to_intermediate_feature(&block));
333                }
334                if !skip_wr {
335                    self.node_geometry.set(node.id, node.to_vector_geometry(&block));
336                }
337            }
338            if !skip_wr {
339                for way in &group.ways {
340                    if !way.is_filterable(&block, self)
341                        && let Some(i_way) = way.to_intermediate_feature(&block, self)
342                    {
343                        self.ways.set(way.id, i_way);
344                    }
345                    if !self.skip_ways {
346                        self.way_geometry.set(way.id, way.node_refs());
347                    }
348                }
349                for relation in &group.relations {
350                    if !relation.is_filterable(&block, self)
351                        && let Some(i_relation) = relation.to_intermediate_feature(&block)
352                    {
353                        self.relations.set(relation.id, i_relation);
354                    }
355                }
356            }
357        }
358    }
359
360    /// Parse only nodes using threads. Assumed this reader has already been cloned and passed
361    /// to a thread.
362    pub fn par_parse_node_blocks(
363        &mut self,
364        pool_size: usize,
365        thread_id: usize,
366        cb: &mut dyn FnMut(VectorFeature<OSMMetadata, Properties, MValue>),
367    ) {
368        if pool_size == 0 || thread_id > pool_size {
369            panic!("pool_size must be > 0 and thread_id must be <= pool_size");
370        }
371        // ensure an offset reset, skip header, then skip to offset of thread_id
372        self._offset = 0;
373        self.skip();
374        for _ in 0..thread_id {
375            self.skip();
376        }
377        // loop through the whole list of parse_node_blocks, but with a stride of pool_size
378        while let Some(b) = self.next() {
379            self.parse_node_block(OSMReader::<T, _N, N, _W, W, R>::next_block(b), cb);
380            for _ in 0..pool_size {
381                self.skip();
382            }
383        }
384    }
385
386    /// If you are only interested in the nodes, run this function instead as it doesn't need
387    /// Prep data in memory
388    pub fn parse_node_blocks(
389        &mut self,
390        cb: &mut dyn FnMut(VectorFeature<OSMMetadata, Properties, MValue>),
391    ) {
392        self._offset = 0;
393        // skip the header
394        self.skip();
395        while let Some(b) = self.next() {
396            self.parse_node_block(OSMReader::<T, _N, N, _W, W, R>::next_block(b), cb);
397        }
398    }
399
400    fn parse_node_block(
401        &mut self,
402        block: PrimitiveBlock,
403        cb: &mut dyn FnMut(VectorFeature<OSMMetadata, Properties, MValue>),
404    ) {
405        for group in &block.primitive_groups {
406            for node in &group.nodes {
407                if !node.is_filterable(&block, self) {
408                    cb(node.to_intermediate_feature(&block).to_vector_feature(self.add_bbox));
409                }
410            }
411        }
412    }
413}
414
415/// OSM Reader iterator
416pub struct OsmReaderIter<
417    'a,
418    T: Reader,
419    _N: KVStore<u64, VectorPoint<MValue>>,
420    N: KVStore<u64, IntermediateNode>,
421    _W: KVStore<u64, WayNodes>,
422    W: KVStore<u64, IntermediateWay>,
423    R: KVStore<u64, IntermediateRelation>,
424> {
425    reader: &'a OSMReader<T, _N, N, _W, W, R>,
426    node_iter: Box<dyn Iterator<Item = (&'a u64, &'a IntermediateNode)> + 'a>,
427    way_iter: Box<dyn Iterator<Item = (&'a u64, &'a IntermediateWay)> + 'a>,
428    relation_iter: Box<dyn Iterator<Item = (&'a u64, &'a IntermediateRelation)> + 'a>,
429}
430impl<
431    'a,
432    T: Reader,
433    _N: KVStore<u64, VectorPoint<MValue>>,
434    N: KVStore<u64, IntermediateNode>,
435    _W: KVStore<u64, WayNodes>,
436    W: KVStore<u64, IntermediateWay>,
437    R: KVStore<u64, IntermediateRelation>,
438> Debug for OsmReaderIter<'a, T, _N, N, _W, W, R>
439{
440    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
441        write!(f, "OsmReaderIter")
442    }
443}
444
445impl<
446    T: Reader,
447    _N: KVStore<u64, VectorPoint<MValue>>,
448    N: KVStore<u64, IntermediateNode>,
449    _W: KVStore<u64, WayNodes>,
450    W: KVStore<u64, IntermediateWay>,
451    R: KVStore<u64, IntermediateRelation>,
452> Iterator for OsmReaderIter<'_, T, _N, N, _W, W, R>
453{
454    type Item = VectorFeature<OSMMetadata, Properties, MValue>;
455
456    fn next(&mut self) -> Option<Self::Item> {
457        let node_geometry = &self.reader.node_geometry;
458        let way_geometry = &self.reader.way_geometry;
459        let add_bbox = self.reader.add_bbox;
460        if let Some((_, node)) = self.node_iter.next() {
461            Some(node.to_vector_feature(add_bbox))
462        } else if let Some((_, way)) = self.way_iter.next() {
463            Some(way.to_vector_feature(node_geometry, add_bbox))
464        } else if let Some((_, relation)) = self.relation_iter.next() {
465            relation.to_vector_feature(node_geometry, way_geometry, add_bbox)
466        } else {
467            None
468        }
469    }
470}
471impl<
472    T: Reader,
473    _N: KVStore<u64, VectorPoint<MValue>>,
474    N: KVStore<u64, IntermediateNode>,
475    _W: KVStore<u64, WayNodes>,
476    W: KVStore<u64, IntermediateWay>,
477    R: KVStore<u64, IntermediateRelation>,
478> FeatureReader<OSMMetadata, Properties, MValue> for OSMReader<T, _N, N, _W, W, R>
479{
480    type FeatureIterator<'a>
481        = OsmReaderIter<'a, T, _N, N, _W, W, R>
482    where
483        T: 'a,
484        _N: 'a,
485        N: 'a,
486        _W: 'a,
487        W: 'a,
488        R: 'a;
489
490    fn iter(&self) -> Self::FeatureIterator<'_> {
491        OsmReaderIter {
492            reader: self,
493            node_iter: Box::new(self.nodes.iter()),
494            way_iter: Box::new(self.ways.iter()),
495            relation_iter: Box::new(self.relations.iter()),
496        }
497    }
498
499    fn par_iter(&self, _pool_size: usize, _thread_id: usize) -> Self::FeatureIterator<'_> {
500        // TODO: Unimplemented
501        self.iter()
502    }
503}