osm_pbf/
decode.rs

1use std::{iter::Zip, rc::Rc, string::FromUtf8Error, vec::IntoIter};
2
3use crate::protos::osmformat::{
4    self, mod_Relation::MemberType as MemberTypePbf, Info as InfoPbf, PrimitiveBlock, StringTable,
5};
6use chrono::NaiveDateTime;
7use fnv::FnvHashMap as HashMap;
8use itertools::izip;
9use kstring::KString;
10use num_traits::CheckedAdd;
11use rust_decimal::Decimal;
12use thiserror::Error;
13
14use osm_types::{Element, Id, Info, Member, MemberType, Node, Relation, Way};
15
16/// Any error encountered in [decode_elements]
17#[derive(Debug, Error)]
18pub enum DecodeError {
19    #[error("String in string table is not UTF8: {0}")]
20    StringTable(#[from] FromUtf8Error),
21    #[error("Relation element has an unknown member type: {0}")]
22    UnknownRelationMemberType(i32),
23    #[error("Addition overflow while decoding delta-encoded values")]
24    DeltaOverflow,
25    #[error("Multiplication overflow while decoding timestamps")]
26    TimestampOverflow,
27    #[error("Failed to calculate node position: {0}")]
28    Position(#[from] rust_decimal::Error),
29}
30
31/// Decode a [PrimitiveBlock] into an [Element] stream
32///
33/// There is no dependency on order, so this can be called in parallel.
34/// Technically, the file format is `(<HeaderBlock> (<PrimitiveBlock>)+)*`, but
35/// [FileBlock::Header] and [FileBlock::Other] don't contain any elements.
36///
37///
38/// A number of hydration steps are performed:
39/// * String table indices ==> actual strings
40/// * Positions packed as granularity, offset, value ==> positions with [Decimal]
41/// * Timestamps packed as date_granularity, value ==> timestamps with [NaiveDateTime]
42/// * Dense nodes => [Node]s
43/// * Delta-encoded values => actual values
44///
45/// <https://wiki.openstreetmap.org/wiki/PBF_Format#Encoding_OSM_entities_into_fileblocks>
46pub fn decode_elements<'a>(
47    PrimitiveBlock {
48        stringtable: StringTable {
49            s: raw_string_table,
50        },
51        primitivegroup,
52        granularity,
53        lat_offset,
54        lon_offset,
55        date_granularity,
56    }: PrimitiveBlock,
57) -> impl Iterator<Item = Result<Element, DecodeError>> + 'a {
58    let (string_table, string_table_decode_err) = match raw_string_table
59        .into_iter()
60        .map(|raw_string| Ok(KString::from(String::from_utf8(raw_string)?)))
61        .collect::<Result<Vec<_>, DecodeError>>()
62    {
63        Ok(string_table) => (string_table, None),
64        Err(err) => (vec![], Some(err)),
65    };
66
67    let string_table: Rc<[KString]> = Rc::from(string_table);
68    let date_granularity = i64::from(date_granularity);
69
70    string_table_decode_err
71        .into_iter()
72        .map(Err)
73        .chain(primitivegroup.into_iter().flat_map(move |group| {
74            // A PrimitiveGroup cannot contain different types of objects
75            let (size_hint, group_its) = if !group.nodes.is_empty() {
76                (
77                    group.nodes.len(),
78                    GroupIterators::Nodes(group.nodes.into_iter()),
79                )
80            } else if let Some(dense) = group.dense {
81                let (dense_nodes_it, dense_attrs_it) = {
82                    let dense_attrs_size_hint = dense.id.len();
83
84                    let id_it = Delta::from(dense.id.into_iter());
85                    let lat_it = Delta::from(dense.lat.into_iter());
86                    let lon_it = Delta::from(dense.lon.into_iter());
87
88                    let dense_attrs = dense_kv_to_attributes(
89                        dense.keys_vals,
90                        string_table.clone(),
91                        dense_attrs_size_hint,
92                    );
93                    #[cfg(debug)]
94                    {
95                        assert_eq!(dense.id.len(), dense.lat.len());
96                        assert_eq!(dense.lat.len(), dense.lon.len());
97                        if !dense.keys_vals.is_empty() {
98                            assert_eq!(
99                                dense.lon.len(),
100                                dense.keys_vals.iter().filter(|k_or_v| *k_or_v == 0).count()
101                            );
102                        }
103                    }
104                    (id_it.zip(lat_it).zip(lon_it), dense_attrs)
105                };
106
107                let dense_info_it = if let Some(dense_info) = dense.denseinfo {
108                    let version_it = dense_info.version.into_iter();
109                    let timestamp_it = Delta::from(dense_info.timestamp.into_iter());
110                    let changeset_it = Delta::from(dense_info.changeset.into_iter());
111                    let uid_it = Delta::from(dense_info.uid.into_iter());
112                    let user_sid_it = Delta::from(dense_info.user_sid.into_iter());
113                    let visible_it = dense_info.visible.into_iter();
114                    #[cfg(debug)]
115                    {
116                        assert_eq!(dense_info.version.len(), dense_info.timestamp.len());
117                        assert_eq!(dense_info.timestamp.len(), dense_info.changeset.len());
118                        assert_eq!(dense_info.changeset.len(), dense_info.uid.len());
119                        assert_eq!(dense_info.uid.len(), dense_info.user_sid.len());
120                        if !dense_info.visible.is_empty() {
121                            assert_eq!(dense_info.user_sid.len(), dense_info.visible.len());
122                        }
123                    }
124                    Some((
125                        version_it
126                            .zip(timestamp_it)
127                            .zip(changeset_it)
128                            .zip(uid_it)
129                            .zip(user_sid_it),
130                        visible_it,
131                    ))
132                } else {
133                    None
134                };
135
136                (
137                    dense_attrs_it.size_hint,
138                    GroupIterators::Dense {
139                        dense_nodes_it: Box::new(dense_nodes_it),
140                        dense_attrs_it,
141                        dense_info_it: dense_info_it.map(Box::new),
142                    },
143                )
144            } else if !group.ways.is_empty() {
145                (
146                    group.ways.len(),
147                    GroupIterators::Ways(group.ways.into_iter()),
148                )
149            } else if !group.relations.is_empty() {
150                (
151                    group.relations.len(),
152                    GroupIterators::Relations(group.relations.into_iter()),
153                )
154            } else {
155                (0, GroupIterators::Empty)
156            };
157
158            Decoder {
159                group_its,
160                size_hint,
161                string_table: string_table.clone(),
162                granularity,
163                lat_offset,
164                lon_offset,
165                date_granularity,
166            }
167        }))
168}
169
170enum GroupIterators {
171    Nodes(IntoIter<osmformat::Node>),
172    Dense {
173        dense_nodes_it: Box<
174            Zip<
175                Zip<Delta<i64, IntoIter<i64>>, Delta<i64, IntoIter<i64>>>,
176                Delta<i64, IntoIter<i64>>,
177            >,
178        >,
179        dense_attrs_it: DenseKVIterator<IntoIter<i32>>,
180        dense_info_it: Option<
181            Box<(
182                Zip<
183                    Zip<
184                        Zip<
185                            Zip<IntoIter<i32>, Delta<i64, IntoIter<i64>>>,
186                            Delta<i64, IntoIter<i64>>,
187                        >,
188                        Delta<i32, IntoIter<i32>>,
189                    >,
190                    Delta<i32, IntoIter<i32>>,
191                >,
192                IntoIter<bool>,
193            )>,
194        >,
195    },
196    Ways(IntoIter<osmformat::Way>),
197    Relations(IntoIter<osmformat::Relation>),
198    Empty,
199}
200
201struct Decoder {
202    group_its: GroupIterators,
203    size_hint: usize,
204    string_table: Rc<[KString]>,
205    granularity: i32,
206    lat_offset: i64,
207    lon_offset: i64,
208    date_granularity: i64,
209}
210
211impl Iterator for Decoder {
212    type Item = Result<Element, DecodeError>;
213
214    #[inline]
215    fn next(&mut self) -> Option<Self::Item> {
216        match &mut self.group_its {
217            GroupIterators::Nodes(nodes) => nodes.next().map(|node| {
218                Ok(Element::Node(Node {
219                    id: Id(node.id),
220                    tags: kv_to_attributes(node.keys, node.vals, &self.string_table),
221                    info: node
222                        .info
223                        .map(|info| info_from_pbf(info, self.date_granularity, &self.string_table))
224                        .transpose()?,
225                    lat: packed_to_degrees(node.lat, self.granularity, self.lat_offset)?,
226                    lon: packed_to_degrees(node.lon, self.granularity, self.lon_offset)?,
227                }))
228            }),
229            GroupIterators::Dense {
230                dense_nodes_it,
231                dense_attrs_it,
232                dense_info_it,
233            } => dense_nodes_it.next().map(
234                |((id, lat), lon): (
235                    (Result<i64, DecodeError>, Result<i64, DecodeError>),
236                    Result<i64, DecodeError>,
237                )| {
238                    Ok(Element::Node(Node {
239                        id: Id(id?),
240                        tags: dense_attrs_it.next().unwrap_or_default(),
241                        info: if let Some((
242                            ((((version, timestamp), changeset), uid), user_sid),
243                            visible,
244                        )) = dense_info_it
245                            .as_mut()
246                            .and_then(|its| its.0.next().zip(Some(its.1.next())))
247                        {
248                            Some(info_from_pbf(
249                                InfoPbf {
250                                    version,
251                                    timestamp: Some(timestamp?),
252                                    changeset: Some(changeset?),
253                                    uid: Some(uid?),
254                                    // OSM authors indicated that this is incorrectly specified in the proto file
255                                    // so it is ok to convert like this.
256                                    user_sid: Some(user_sid? as u32),
257                                    visible,
258                                },
259                                self.date_granularity,
260                                &self.string_table,
261                            )?)
262                        } else {
263                            None
264                        },
265                        lat: packed_to_degrees(lat?, self.granularity, self.lat_offset)?,
266                        lon: packed_to_degrees(lon?, self.granularity, self.lon_offset)?,
267                    }))
268                },
269            ),
270            GroupIterators::Ways(ways) => ways.next().map(|way| {
271                #[cfg(debug)]
272                {
273                    assert_eq!(way.lat.len(), way.lon.len());
274                    if !lat.is_empty() {
275                        assert_eq!(way.refs.len(), way.lat.len());
276                    }
277                }
278                Ok(Element::Way(Way {
279                    id: Id(way.id),
280                    tags: kv_to_attributes(way.keys, way.vals, &self.string_table),
281                    info: way
282                        .info
283                        .map(|info| info_from_pbf(info, self.date_granularity, &self.string_table))
284                        .transpose()?,
285                    refs: {
286                        let mut refs = Vec::with_capacity(way.refs.len());
287                        let delta = Delta::from(way.refs.into_iter());
288                        for r in delta {
289                            refs.push(Id(r?));
290                        }
291
292                        refs
293                    },
294                }))
295            }),
296            GroupIterators::Relations(relations) => relations.next().map(|relation| {
297                #[cfg(debug)]
298                {
299                    assert_eq!(relation.roles_sid.len(), relation.memids.len());
300                    assert_eq!(relation.memids.len(), relation.types.len());
301                }
302
303                let members = izip!(
304                    relation.roles_sid.into_iter(),
305                    relation.memids.into_iter(),
306                    relation.types.into_iter(),
307                )
308                .map(|(role_sid, member_id, ty)| Member {
309                    id: Id(member_id),
310                    ty: ty.into(),
311                    role: self
312                        .string_table
313                        .get(role_sid as usize)
314                        .filter(|s| !s.is_empty())
315                        .cloned(),
316                })
317                .collect();
318
319                Ok(Element::Relation(Relation {
320                    id: Id(relation.id),
321                    tags: kv_to_attributes(relation.keys, relation.vals, &self.string_table),
322                    info: relation
323                        .info
324                        .map(|info| info_from_pbf(info, self.date_granularity, &self.string_table))
325                        .transpose()?,
326                    members,
327                }))
328            }),
329            GroupIterators::Empty => None,
330        }
331    }
332
333    #[inline]
334    fn size_hint(&self) -> (usize, Option<usize>) {
335        (self.size_hint, Some(self.size_hint))
336    }
337}
338
339/// Converts key & value index arrays into an attribute map
340#[inline]
341fn kv_to_attributes(
342    keys: Vec<u32>,
343    vals: Vec<u32>,
344    string_table: &[KString],
345) -> HashMap<KString, KString> {
346    #[cfg(debug)]
347    assert_eq!(keys.len(), vals.len());
348
349    keys.into_iter()
350        .zip(vals.into_iter())
351        .filter_map(|(k, v)| {
352            string_table
353                .get(k as usize)
354                .cloned()
355                .zip(string_table.get(v as usize).cloned())
356        })
357        .collect()
358}
359
360/// Converts a dense key-value array into attribute maps
361///
362/// This differs from [kv_to_attributes] because the keys and values
363/// for _multiple_ nodes are packed into a single array and end-delimited by a `0`.
364///
365/// <https://wiki.openstreetmap.org/wiki/PBF_Format#Nodes>
366#[inline]
367fn dense_kv_to_attributes(
368    keys_vals: Vec<i32>,
369    string_table: Rc<[KString]>,
370    size_hint: usize,
371) -> DenseKVIterator<IntoIter<i32>> {
372    DenseKVIterator {
373        size_hint: {
374            if keys_vals.is_empty() {
375                0
376            } else {
377                size_hint
378            }
379        },
380        keys_vals: keys_vals.into_iter(),
381        string_table,
382        key: None,
383        current: HashMap::default(),
384    }
385}
386
387struct DenseKVIterator<KI> {
388    keys_vals: KI,
389    string_table: Rc<[KString]>,
390    size_hint: usize,
391    key: Option<usize>,
392    current: HashMap<KString, KString>,
393}
394
395impl<KI> Iterator for DenseKVIterator<KI>
396where
397    KI: Iterator<Item = i32>,
398{
399    type Item = HashMap<KString, KString>;
400
401    #[inline]
402    fn next(&mut self) -> Option<Self::Item> {
403        for k_or_v in self.keys_vals.by_ref() {
404            let k_or_v = k_or_v as usize;
405            if k_or_v == 0 {
406                #[cfg(debug)]
407                assert_eq!(self.key, None);
408                let mut ret = HashMap::default();
409                std::mem::swap(&mut self.current, &mut ret);
410                return Some(ret);
411            } else if let Some(key) = self.key.take() {
412                if let Some((key, value)) = self
413                    .string_table
414                    .get(key)
415                    .cloned()
416                    .zip(self.string_table.get(k_or_v).cloned())
417                {
418                    self.current.insert(key, value);
419                }
420            } else {
421                self.key = Some(k_or_v);
422            }
423        }
424
425        #[cfg(debug)]
426        {
427            assert_eq!(self.key, None);
428            assert!(self.current.is_empty());
429        }
430        None
431    }
432
433    #[inline]
434    fn size_hint(&self) -> (usize, Option<usize>) {
435        (self.size_hint, Some(self.size_hint))
436    }
437}
438
439/// Converts an iterator into a cumulative sum iterator with checks for overflow
440///
441/// Used to decode delta-encoded PBF values.
442struct Delta<T, I>
443where
444    I: Iterator<Item = T>,
445{
446    acc: Option<T>,
447    iterator: I,
448}
449
450impl<T, I> From<I> for Delta<T, I>
451where
452    I: Iterator<Item = T>,
453{
454    fn from(iterator: I) -> Self {
455        Self {
456            acc: None,
457            iterator,
458        }
459    }
460}
461
462impl<T, I> Iterator for Delta<T, I>
463where
464    T: Clone + CheckedAdd,
465    I: Iterator<Item = T>,
466{
467    type Item = Result<T, DecodeError>;
468
469    #[inline]
470    fn next(&mut self) -> Option<Self::Item> {
471        match self.iterator.next() {
472            Some(x) => {
473                match self.acc.as_mut() {
474                    Some(acc) => match acc.checked_add(&x) {
475                        Some(new_acc) => {
476                            *acc = new_acc;
477                        }
478                        None => return Some(Err(DecodeError::DeltaOverflow)),
479                    },
480                    None => {
481                        self.acc = Some(x);
482                    }
483                };
484
485                self.acc.clone().map(Ok)
486            }
487            None => None,
488        }
489    }
490
491    #[inline]
492    fn size_hint(&self) -> (usize, Option<usize>) {
493        self.iterator.size_hint()
494    }
495}
496
497/// Latitude/longitude packed storage scale
498const SCALE: u32 = 9;
499
500/// Calculates the latitude/longitude degrees from the packed PBF format
501///
502/// <https://wiki.openstreetmap.org/wiki/PBF_Format#Definition_of_OSMData_fileblock>
503#[inline]
504fn packed_to_degrees(
505    value: i64,
506    granularity: i32,
507    offset: i64,
508) -> Result<Decimal, rust_decimal::Error> {
509    let value: i128 = value.into();
510    let granularity: i128 = granularity.into();
511    let offset: i128 = offset.into();
512
513    let scale_free = value
514        .checked_mul(granularity)
515        .and_then(|product| product.checked_add(offset))
516        .ok_or(rust_decimal::Error::ExceedsMaximumPossibleValue)?;
517
518    Decimal::try_from_i128_with_scale(scale_free, SCALE)
519}
520
521impl From<MemberTypePbf> for MemberType {
522    fn from(value: MemberTypePbf) -> Self {
523        match value {
524            MemberTypePbf::NODE => Self::Node,
525            MemberTypePbf::WAY => Self::Way,
526            MemberTypePbf::RELATION => Self::Relation,
527        }
528    }
529}
530
531/// Decodes the timestamp and user_sid fields to create the non-pbf representation
532#[inline]
533fn info_from_pbf(
534    info: InfoPbf,
535    date_granularity: i64,
536    string_table: &[KString],
537) -> Result<Info, DecodeError> {
538    Ok(Info {
539        version: info.version,
540        timestamp: info
541            .timestamp
542            .map(|ts| {
543                ts.checked_mul(date_granularity)
544                    .ok_or(DecodeError::TimestampOverflow)
545            })
546            .transpose()?
547            .and_then(NaiveDateTime::from_timestamp_millis),
548        changeset: info.changeset,
549        uid: info.uid,
550        user: info.user_sid.and_then(|user_sid| {
551            string_table
552                .get(user_sid as usize)
553                .filter(|s| !s.is_empty())
554                .cloned()
555        }),
556        visible: info.visible,
557    })
558}