gistools/readers/osm/
mod.rs

1/// Blob defines a chunk of data to be parsed as either a header or a primitive
2pub mod blob;
3/// Filtering tool
4pub mod filter;
5/// OSM Header Block defines the contents of the file
6pub mod header_block;
7/// Info Block defines optional metadata that may be included into each primitive
8pub mod info;
9/// Node Block defines the contents of a node
10pub mod node;
11/// Primitive Block defines the contents of a node, way or relation
12pub mod primitive;
13/// Relation Block defines the contents of a relation
14pub mod relation;
15/// Way Block defines the contents of a way
16pub mod way;
17
18use crate::{
19    data_store::{KV, KVStore, kv::file::FileKV},
20    parsers::{FeatureReader, Reader},
21};
22use alloc::{boxed::Box, vec::Vec};
23use blob::{Blob, BlobHeader};
24use core::fmt::Debug;
25use filter::*;
26use header_block::{HeaderBlock, OSMHeader};
27use node::IntermediateNode;
28use pbf::{Field, Protobuf, Type};
29use primitive::{OSMMetadata, PrimitiveBlock};
30use relation::IntermediateRelation;
31use s2json::{MValue, Properties, VectorFeature, VectorPoint};
32use way::{IntermediateWay, WayNodes};
33
34// TODO: Add threads for reading the blocks
35
36/// OSM Reader options
37#[derive(Debug, Clone)]
38pub struct OSMReaderOptions {
39    /// if true, remove nodes that have no tags [Default = true]
40    pub remove_empty_nodes: bool,
41    /// If provided, filters of the
42    pub tag_filter: Option<OSMTagFilter>,
43    /// If set to true, nodes will be skipped. [Default = false]
44    pub skip_nodes: bool,
45    /// If set to true, ways will be skipped. [Default = false]
46    pub skip_ways: bool,
47    /// If set to true, relations will be skipped. [Default = false]
48    pub skip_relations: bool,
49    /// If set to true, ways will be converted to areas if they are closed.
50    /// NOTE: They are upgraded anyways if the tag "area" is set to "yes".
51    /// [Default = false]
52    pub upgrade_ways_to_areas: bool,
53    /// If set to true, add a bbox property to each feature [Default = true]
54    pub add_bbox: bool,
55}
56impl Default for OSMReaderOptions {
57    fn default() -> Self {
58        OSMReaderOptions {
59            remove_empty_nodes: false,
60            tag_filter: None,
61            skip_nodes: false,
62            skip_ways: false,
63            skip_relations: false,
64            upgrade_ways_to_areas: false,
65            add_bbox: true,
66        }
67    }
68}
69
70/// OSM File Reader ensures we are using local buffers to store intermediate Nodes, Ways, and Relations
71///
72/// See [`OSMReader`] for full documentation.
73pub type OSMFileReader<T> = OSMReader<
74    T,
75    FileKV<u64, VectorPoint<MValue>>,
76    FileKV<u64, IntermediateNode>,
77    FileKV<u64, WayNodes>,
78    FileKV<u64, IntermediateWay>,
79    FileKV<u64, IntermediateRelation>,
80>;
81/// OSM File Reader Iterator
82pub type OSMFileReaderIter<'a, T> = OsmReaderIter<
83    'a,
84    T,
85    FileKV<u64, VectorPoint<MValue>>,
86    FileKV<u64, IntermediateNode>,
87    FileKV<u64, WayNodes>,
88    FileKV<u64, IntermediateWay>,
89    FileKV<u64, IntermediateRelation>,
90>;
91
92/// OSM Buffer Reader ensures we are using local buffers to store intermediate Nodes, Ways, and Relations
93///
94/// See [`OSMReader`] for full documentation.
95pub type OSMLocalReader<T> = OSMReader<
96    T,
97    KV<u64, VectorPoint<MValue>>,
98    KV<u64, IntermediateNode>,
99    KV<u64, WayNodes>,
100    KV<u64, IntermediateWay>,
101    KV<u64, IntermediateRelation>,
102>;
103/// OSM Buffer Reader Iterator
104pub type OSMLocalReaderIter<'a, T> = OsmReaderIter<
105    'a,
106    T,
107    KV<u64, VectorPoint<MValue>>,
108    KV<u64, IntermediateNode>,
109    KV<u64, WayNodes>,
110    KV<u64, IntermediateWay>,
111    KV<u64, IntermediateRelation>,
112>;
113
114/// # OSM Reader
115///
116/// ## Description
117/// Parses OSM PBF files
118///
119/// Implements the [`FeatureReader`] trait
120///
121/// ## Usage
122///
123/// For Simplicity, you can use the [`OSMFileReader`] or [`OSMLocalReader`] wrappers.
124///
125/// The methods you have access to:
126/// - [`OSMReader::new`]: Create a new OSMReader
127/// - [`OSMReader::cleanup`]: Cleans up the reader's temp data
128/// - [`OSMReader::get_header`]: Get the OSM Header
129/// - [`OSMReader::parse_blocks`]: Parse all blocks to prepare for reads
130/// - [`OSMReader::next_block`]: Get the next block (if you want to iteratively use blocks yourself)
131/// - [`OSMReader::par_parse_node_blocks`]: If you are only interested in nodes, parse all of them quicker using threads
132/// - [`OSMReader::parse_node_blocks`]: If you are only interested in nodes, parse all of them quicker
133/// - [`OSMReader::iter`]: Create a new OSMReader Iterator
134///
135/// ### Local Reader
136/// All data is stored locally in memory and will be cleaned up on drop
137///
138/// ```rust
139/// use gistools::{parsers::{FileReader, FeatureReader}, readers::OSMLocalReader};
140/// use std::path::PathBuf;
141///
142/// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
143/// path.push("tests/readers/osm/fixtures/test.pbf");
144/// let path_str = path.to_str().unwrap();
145/// let reader = FileReader::from(path_str);
146///
147/// let mut osm = OSMLocalReader::new(reader, None);
148/// osm.parse_blocks();
149///
150/// let features: Vec<_> = osm.iter().collect();
151/// assert_eq!(features.len(), 8);
152/// ```
153#[derive(Debug, Clone)]
154pub struct OSMReader<
155    T: Reader,
156    _N: KVStore<u64, VectorPoint<MValue>> = KV<u64, VectorPoint<MValue>>,
157    N: KVStore<u64, IntermediateNode> = KV<u64, IntermediateNode>,
158    _W: KVStore<u64, WayNodes> = KV<u64, WayNodes>,
159    W: KVStore<u64, IntermediateWay> = KV<u64, IntermediateWay>,
160    R: KVStore<u64, IntermediateRelation> = KV<u64, IntermediateRelation>,
161> {
162    /// The input reader
163    reader: T,
164    /// if true, skip nodes that have no tags [Default = true]
165    skip_empty_nodes: bool,
166    /// If provided, filters of the
167    tag_filter: Option<OSMTagFilter>,
168    /// If set to true, nodes will be skipped
169    skip_nodes: bool,
170    /// If set to true, ways will be skipped
171    skip_ways: bool,
172    /// If set to true, relations will be skipped
173    skip_relations: bool,
174    /// If set to true, ways will be converted to areas if they are closed
175    /// NOTE: They are upgraded anyways if the tag "area" is set to "yes"
176    /// [Default = false]
177    upgrade_ways_to_areas: bool,
178    /// If set to true, add a bbox property to each feature
179    add_bbox: bool,
180    /// The current offset of the reader
181    _offset: u64,
182    /// track if the data has been parsed or not
183    _parsed: bool,
184    /// Node Geometry Store
185    node_geometry: _N,
186    /// Inermediate node store
187    nodes: N,
188    /// Way Geometry Store
189    way_geometry: _W,
190    /// Intermediate way store
191    ways: W,
192    /// Intermediate relation store
193    relations: R,
194}
195impl<
196    T: Reader,
197    _N: KVStore<u64, VectorPoint<MValue>>,
198    N: KVStore<u64, IntermediateNode>,
199    _W: KVStore<u64, WayNodes>,
200    W: KVStore<u64, IntermediateWay>,
201    R: KVStore<u64, IntermediateRelation>,
202> OSMReader<T, _N, N, _W, W, R>
203{
204    /// Creates a new OSM Reader
205    pub fn new(reader: T, options: Option<OSMReaderOptions>) -> Self {
206        let options = options.unwrap_or_default();
207        OSMReader {
208            reader,
209            skip_empty_nodes: options.remove_empty_nodes,
210            tag_filter: options.tag_filter,
211            skip_nodes: options.skip_nodes,
212            skip_ways: options.skip_ways,
213            skip_relations: options.skip_relations,
214            upgrade_ways_to_areas: options.upgrade_ways_to_areas,
215            add_bbox: options.add_bbox,
216            _offset: 0,
217            _parsed: false,
218            node_geometry: _N::new(None),
219            nodes: N::new(None),
220            way_geometry: _W::new(None),
221            ways: W::new(None),
222            relations: R::new(None),
223        }
224    }
225
226    /// Cleanup the data which will cleanup any temporary files if they exist
227    pub fn cleanup(&mut self) {
228        self.node_geometry.cleanup();
229        self.nodes.cleanup();
230        self.way_geometry.cleanup();
231        self.ways.cleanup();
232        self.relations.cleanup();
233    }
234
235    /// returns - The header of the OSM file
236    pub fn get_header(&mut self) -> OSMHeader {
237        self._offset = 0;
238        let blob_header = self.next();
239        if blob_header.is_none() {
240            panic!("OSM header not found");
241        }
242        let bytes = blob_header.unwrap();
243        let mut pbf = Protobuf::from(bytes.clone());
244        let mut header_block = HeaderBlock::default();
245        let Field { tag, r#type } = pbf.read_field();
246        if tag != 1 || r#type != Type::Bytes {
247            return OSMHeader::default();
248        }
249        pbf.read_message(&mut header_block);
250
251        header_block.to_header()
252    }
253
254    fn next_blob(&mut self) -> Option<BlobHeader> {
255        // if we've already read all the data, return null
256        if self._offset >= self.reader.len() {
257            return None;
258        }
259        // STEP 1: Get blob size
260        // read length of current blob
261        let length = self.reader.int32_be(Some(self._offset)) as u64;
262        self._offset += 4;
263        let blob_header_data = self.reader.slice(Some(self._offset), Some(self._offset + length));
264        self._offset += length;
265        // build a blob header
266        let mut pbf: Protobuf = blob_header_data.into();
267        let mut blob_header = BlobHeader::default();
268        pbf.read_fields(&mut blob_header, None);
269        Some(blob_header)
270    }
271
272    /// Read the next blob
273    ///
274    /// ## Returns
275    /// The next blob if it exists
276    fn next(&mut self) -> Option<Vec<u8>> {
277        if let Some(blob_header) = self.next_blob() {
278            // STEP 2: Get blob data
279            let compressed_blob_data =
280                self.reader.slice(Some(self._offset), Some(self._offset + blob_header.datasize));
281            self._offset += blob_header.datasize;
282            Some(compressed_blob_data)
283        } else {
284            None
285        }
286    }
287
288    /// Skip a block of data
289    fn skip(&mut self) {
290        if let Some(blob_header) = self.next_blob() {
291            self._offset += blob_header.datasize;
292        }
293    }
294
295    /// Parse all blocks, storing all nodes, ways, and relations into local stores for future consumption
296    pub fn parse_blocks(&mut self) {
297        if self._parsed {
298            return;
299        }
300        self._offset = 0;
301        // skip the header
302        self.skip();
303        while let Some(b) = self.next() {
304            self.parse_block(OSMReader::<T, _N, N, _W, W, R>::next_block(b));
305        }
306        self._parsed = true;
307    }
308
309    /// Read the input blob and parse the block of data
310    pub fn next_block(data: Vec<u8>) -> PrimitiveBlock {
311        // Blob data is PBF encoded and ?compressed, so we need to parse & decompress it first
312        let mut pbf: Protobuf = data.into();
313        let mut blob = Blob::default();
314        pbf.read_fields(&mut blob, None);
315        let mut pbf: Protobuf = blob.data.into();
316        // Parse the PrimitiveBlock and read its contents.
317        // all nodes/ways/relations that can be filtered already are on invocation.
318        let mut pb = PrimitiveBlock::default();
319        pbf.read_fields(&mut pb, None);
320        pb
321    }
322
323    fn parse_block(&mut self, block: PrimitiveBlock) {
324        let skip_wr = self.skip_ways && self.skip_relations;
325        for group in &block.primitive_groups {
326            for node in &group.nodes {
327                if !node.is_filterable(&block, self) {
328                    self.nodes.set(node.id, node.to_intermediate_feature(&block));
329                }
330                if !skip_wr {
331                    self.node_geometry.set(node.id, node.to_vector_geometry(&block));
332                }
333            }
334            if !skip_wr {
335                for way in &group.ways {
336                    if !way.is_filterable(&block, self)
337                        && let Some(i_way) = way.to_intermediate_feature(&block, self)
338                    {
339                        self.ways.set(way.id, i_way);
340                    }
341                    if !self.skip_ways {
342                        self.way_geometry.set(way.id, way.node_refs());
343                    }
344                }
345                for relation in &group.relations {
346                    if !relation.is_filterable(&block, self)
347                        && let Some(i_relation) = relation.to_intermediate_feature(&block)
348                    {
349                        self.relations.set(relation.id, i_relation);
350                    }
351                }
352            }
353        }
354    }
355
356    /// Parse only nodes using threads. Assumed this reader has already been cloned and passed
357    /// to a thread.
358    pub fn par_parse_node_blocks(
359        &mut self,
360        pool_size: usize,
361        thread_id: usize,
362        cb: &mut dyn FnMut(VectorFeature<OSMMetadata, Properties, MValue>),
363    ) {
364        if pool_size == 0 || thread_id > pool_size {
365            panic!("pool_size must be > 0 and thread_id must be <= pool_size");
366        }
367        // ensure an offset reset, skip header, then skip to offset of thread_id
368        self._offset = 0;
369        self.skip();
370        for _ in 0..thread_id {
371            self.skip();
372        }
373        // loop through the whole list of parse_node_blocks, but with a stride of pool_size
374        while let Some(b) = self.next() {
375            self.parse_node_block(OSMReader::<T, _N, N, _W, W, R>::next_block(b), cb);
376            for _ in 0..pool_size {
377                self.skip();
378            }
379        }
380    }
381
382    /// If you are only interested in the nodes, run this function instead as it doesn't need
383    /// Prep data in memory
384    pub fn parse_node_blocks(
385        &mut self,
386        cb: &mut dyn FnMut(VectorFeature<OSMMetadata, Properties, MValue>),
387    ) {
388        self._offset = 0;
389        // skip the header
390        self.skip();
391        while let Some(b) = self.next() {
392            self.parse_node_block(OSMReader::<T, _N, N, _W, W, R>::next_block(b), cb);
393        }
394    }
395
396    fn parse_node_block(
397        &mut self,
398        block: PrimitiveBlock,
399        cb: &mut dyn FnMut(VectorFeature<OSMMetadata, Properties, MValue>),
400    ) {
401        for group in &block.primitive_groups {
402            for node in &group.nodes {
403                if !node.is_filterable(&block, self) {
404                    cb(node.to_intermediate_feature(&block).to_vector_feature(self.add_bbox));
405                }
406            }
407        }
408    }
409}
410
411/// OSM Reader iterator
412pub struct OsmReaderIter<
413    'a,
414    T: Reader,
415    _N: KVStore<u64, VectorPoint<MValue>>,
416    N: KVStore<u64, IntermediateNode>,
417    _W: KVStore<u64, WayNodes>,
418    W: KVStore<u64, IntermediateWay>,
419    R: KVStore<u64, IntermediateRelation>,
420> {
421    reader: &'a OSMReader<T, _N, N, _W, W, R>,
422    node_iter: Box<dyn Iterator<Item = (&'a u64, &'a IntermediateNode)> + 'a>,
423    way_iter: Box<dyn Iterator<Item = (&'a u64, &'a IntermediateWay)> + 'a>,
424    relation_iter: Box<dyn Iterator<Item = (&'a u64, &'a IntermediateRelation)> + 'a>,
425}
426impl<
427    'a,
428    T: Reader,
429    _N: KVStore<u64, VectorPoint<MValue>>,
430    N: KVStore<u64, IntermediateNode>,
431    _W: KVStore<u64, WayNodes>,
432    W: KVStore<u64, IntermediateWay>,
433    R: KVStore<u64, IntermediateRelation>,
434> Debug for OsmReaderIter<'a, T, _N, N, _W, W, R>
435{
436    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
437        write!(f, "OsmReaderIter")
438    }
439}
440
441impl<
442    T: Reader,
443    _N: KVStore<u64, VectorPoint<MValue>>,
444    N: KVStore<u64, IntermediateNode>,
445    _W: KVStore<u64, WayNodes>,
446    W: KVStore<u64, IntermediateWay>,
447    R: KVStore<u64, IntermediateRelation>,
448> Iterator for OsmReaderIter<'_, T, _N, N, _W, W, R>
449{
450    type Item = VectorFeature<OSMMetadata, Properties, MValue>;
451
452    fn next(&mut self) -> Option<Self::Item> {
453        let node_geometry = &self.reader.node_geometry;
454        let way_geometry = &self.reader.way_geometry;
455        let add_bbox = self.reader.add_bbox;
456        if let Some((_, node)) = self.node_iter.next() {
457            Some(node.to_vector_feature(add_bbox))
458        } else if let Some((_, way)) = self.way_iter.next() {
459            Some(way.to_vector_feature(node_geometry, add_bbox))
460        } else if let Some((_, relation)) = self.relation_iter.next() {
461            relation.to_vector_feature(node_geometry, way_geometry, add_bbox)
462        } else {
463            None
464        }
465    }
466}
467impl<
468    T: Reader,
469    _N: KVStore<u64, VectorPoint<MValue>>,
470    N: KVStore<u64, IntermediateNode>,
471    _W: KVStore<u64, WayNodes>,
472    W: KVStore<u64, IntermediateWay>,
473    R: KVStore<u64, IntermediateRelation>,
474> FeatureReader<OSMMetadata, Properties, MValue> for OSMReader<T, _N, N, _W, W, R>
475{
476    type FeatureIterator<'a>
477        = OsmReaderIter<'a, T, _N, N, _W, W, R>
478    where
479        T: 'a,
480        _N: 'a,
481        N: 'a,
482        _W: 'a,
483        W: 'a,
484        R: 'a;
485
486    fn iter(&self) -> Self::FeatureIterator<'_> {
487        OsmReaderIter {
488            reader: self,
489            node_iter: Box::new(self.nodes.iter()),
490            way_iter: Box::new(self.ways.iter()),
491            relation_iter: Box::new(self.relations.iter()),
492        }
493    }
494
495    fn par_iter(&self, _pool_size: usize, _thread_id: usize) -> Self::FeatureIterator<'_> {
496        // TODO: Unimplemented
497        self.iter()
498    }
499}