country_boundaries/
deserializer.rs

1use crate::cell::Cell;
2use crate::multipolygon::Multipolygon;
3use crate::multipolygon::Point;
4use crate::CountryBoundaries;
5use std::collections::HashMap;
6use std::fmt;
7use std::io::Read;
8
9type Result<T> = std::result::Result<T, ReadError>;
10
11#[derive(Debug)]
12pub enum ReadError {
13    WrongVersionNumber { expected: u16, actual: u16 },
14    UnableToParseUsize(std::num::TryFromIntError),
15    UnableToDecodeUtf8(std::string::FromUtf8Error),
16    Io(std::io::Error),
17}
18
19impl std::error::Error for ReadError {}
20
21impl fmt::Display for ReadError {
22    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
23        match self {
24            ReadError::WrongVersionNumber { expected, actual } => {
25                write!(
26                    f,
27                    "Wrong version number '{actual}' of the boundaries file \
28                       (expected: '{expected}'). \
29                       You may need to get the current version of the data."
30                )
31            }
32            ReadError::UnableToParseUsize(e) => {
33                write!(f, "Unable to parse usize from '{}'", e)
34            }
35            ReadError::UnableToDecodeUtf8(e) => {
36                write!(f, "Unable to decode UTF-8 string from '{}'", e)
37            }
38            ReadError::Io(e) => {
39                write!(f, "IO error: {}", e)
40            }
41        }
42    }
43}
44
45impl From<std::num::TryFromIntError> for ReadError {
46    fn from(error: std::num::TryFromIntError) -> Self {
47        Self::UnableToParseUsize(error)
48    }
49}
50
51impl From<std::string::FromUtf8Error> for ReadError {
52    fn from(error: std::string::FromUtf8Error) -> Self {
53        Self::UnableToDecodeUtf8(error)
54    }
55}
56
57impl From<std::io::Error> for ReadError {
58    fn from(error: std::io::Error) -> Self {
59        Self::Io(error)
60    }
61}
62
63/// Deserialize a `CountryBoundaries` from an IO stream.
64///
65/// The content of the IO stream is deserialized directly from the stream without being buffered in
66/// memory.
67///
68/// When reading from a source against which short reads are not efficient, such as a [`File`],
69/// you will want to apply your own buffering because this function will not buffer the input. See
70/// [`io::BufReader`].
71pub fn from_reader(mut reader: impl Read) -> Result<CountryBoundaries> {
72    let version = read_u16(&mut reader)?;
73    if version != 2 {
74        return Err(ReadError::WrongVersionNumber {
75            expected: 2,
76            actual: version,
77        });
78    }
79
80    let geometry_sizes_count = read_usize32(&mut reader)?;
81    let mut geometry_sizes = HashMap::with_capacity(geometry_sizes_count);
82    for _ in 0..geometry_sizes_count {
83        let id = read_string(&mut reader)?;
84        let size = read_f64(&mut reader)?;
85        geometry_sizes.insert(id, size);
86    }
87
88    let raster_width = read_usize32(&mut reader)?;
89
90    let raster_size = read_usize32(&mut reader)?;
91
92    let mut raster = Vec::with_capacity(raster_size);
93    for _ in 0..raster_size {
94        raster.push(read_cell(&mut reader)?);
95    }
96
97    Ok(CountryBoundaries {
98        raster,
99        raster_width,
100        geometry_sizes,
101    })
102}
103
104fn read_cell(reader: &mut impl Read) -> Result<Cell> {
105    let containing_ids_size = usize::from(read_u8(reader)?);
106    let mut containing_ids = Vec::with_capacity(containing_ids_size);
107    for _ in 0..containing_ids_size {
108        containing_ids.push(read_string(reader)?);
109    }
110    let intersecting_areas_size = usize::from(read_u8(reader)?);
111    let mut intersecting_areas = Vec::with_capacity(intersecting_areas_size);
112    for _ in 0..intersecting_areas_size {
113        intersecting_areas.push(read_areas(reader)?);
114    }
115    Ok(Cell {
116        containing_ids,
117        intersecting_areas,
118    })
119}
120
121fn read_areas(reader: &mut impl Read) -> Result<(String, Multipolygon)> {
122    let id = read_string(reader)?;
123    let outer = read_polygons(reader)?;
124    let inner = read_polygons(reader)?;
125    Ok((id, Multipolygon { outer, inner }))
126}
127
128fn read_polygons(reader: &mut impl Read) -> Result<Vec<Vec<Point>>> {
129    let size = usize::from(read_u8(reader)?);
130    let mut polygons: Vec<Vec<Point>> = Vec::with_capacity(size);
131    for _ in 0..size {
132        polygons.push(read_ring(reader)?);
133    }
134    Ok(polygons)
135}
136
137fn read_ring(reader: &mut impl Read) -> Result<Vec<Point>> {
138    let size = read_usize32(reader)?;
139    let mut ring = Vec::with_capacity(size);
140    for _ in 0..size {
141        ring.push(read_point(reader)?);
142    }
143    Ok(ring)
144}
145
146fn read_point(reader: &mut impl Read) -> Result<Point> {
147    let x = read_u16(reader)?;
148    let y = read_u16(reader)?;
149    Ok(Point { x, y })
150}
151
152fn read_u8(reader: &mut impl Read) -> Result<u8> {
153    let mut buf = [0; 1];
154    reader.read_exact(&mut buf)?;
155    Ok(u8::from_be_bytes(buf))
156}
157
158fn read_u16(reader: &mut impl Read) -> Result<u16> {
159    let mut buf = [0; 2];
160    reader.read_exact(&mut buf)?;
161    Ok(u16::from_be_bytes(buf))
162}
163
164fn read_u32(reader: &mut impl Read) -> Result<u32> {
165    let mut buf = [0; 4];
166    reader.read_exact(&mut buf)?;
167    Ok(u32::from_be_bytes(buf))
168}
169
170fn read_usize32(reader: &mut impl Read) -> Result<usize> {
171    Ok(usize::try_from(read_u32(reader)?)?)
172}
173
174fn read_f64(reader: &mut impl Read) -> Result<f64> {
175    let mut buf = [0; 8];
176    reader.read_exact(&mut buf)?;
177    Ok(f64::from_be_bytes(buf))
178}
179
180fn read_string(reader: &mut impl Read) -> Result<String> {
181    let length = usize::from(read_u16(reader)?);
182    let mut vec: Vec<u8> = vec![0; length];
183    reader.read_exact(vec.as_mut_slice())?;
184    Ok(String::from_utf8(vec)?)
185}
186
187#[cfg(test)]
188mod tests {
189    use super::*;
190
191    #[test]
192    fn test_read_string() {
193        assert!(read_string(&mut [0x00].as_slice()).is_err());
194        assert!(read_string(&mut [0x00, 0x01].as_slice()).is_err());
195        assert!(read_string(&mut [0x00, 0x02, 0x41].as_slice()).is_err());
196
197        assert!(read_string(&mut [0x00, 0x00].as_slice())
198            .unwrap()
199            .is_empty());
200        assert_eq!(
201            "A",
202            read_string(&mut [0x00, 0x01, 0x41].as_slice()).unwrap()
203        );
204        assert_eq!(
205            "AB",
206            read_string(&mut [0x00, 0x02, 0x41, 0x42].as_slice()).unwrap()
207        );
208    }
209
210    #[test]
211    fn read_float() {
212        assert!(read_f64(&mut [0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00].as_slice()).is_err());
213
214        assert_eq!(
215            12.5,
216            read_f64(&mut [0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00].as_slice()).unwrap()
217        );
218    }
219
220    #[test]
221    fn test_read_u8() {
222        assert!(read_u8(&mut [].as_slice()).is_err());
223
224        assert_eq!(17, read_u8(&mut [0x11].as_slice()).unwrap());
225        assert_eq!(u8::MIN, read_u8(&mut [0x00].as_slice()).unwrap());
226        assert_eq!(u8::MAX, read_u8(&mut [0xff].as_slice()).unwrap());
227    }
228
229    #[test]
230    fn test_read_u16() {
231        assert!(read_u16(&mut [0x00].as_slice()).is_err());
232
233        assert_eq!(17, read_u16(&mut [0x00, 0x11].as_slice()).unwrap());
234        assert_eq!(u16::MIN, read_u16(&mut [0x00, 0x00].as_slice()).unwrap());
235        assert_eq!(u16::MAX, read_u16(&mut [0xff, 0xff].as_slice()).unwrap());
236    }
237
238    #[test]
239    fn test_read_u32() {
240        assert!(read_u32(&mut [0x00, 0x00, 0x00].as_slice()).is_err());
241
242        assert_eq!(
243            17,
244            read_u32(&mut [0x00, 0x00, 0x00, 0x11].as_slice()).unwrap()
245        );
246        assert_eq!(
247            u32::MIN,
248            read_u32(&mut [0x00, 0x00, 0x00, 0x00].as_slice()).unwrap()
249        );
250        assert_eq!(
251            u32::MAX,
252            read_u32(&mut [0xff, 0xff, 0xff, 0xff].as_slice()).unwrap()
253        );
254    }
255
256    #[test]
257    fn test_read_usize32() {
258        assert!(read_usize32(&mut [0x00, 0x00, 0x00].as_slice()).is_err());
259
260        assert_eq!(
261            17,
262            read_usize32(&mut [0x00, 0x00, 0x00, 0x11].as_slice()).unwrap()
263        );
264        assert_eq!(
265            0,
266            read_usize32(&mut [0x00, 0x00, 0x00, 0x00].as_slice()).unwrap()
267        );
268        assert_eq!(
269            0xffff,
270            read_usize32(&mut [0x00, 0x00, 0xff, 0xff].as_slice()).unwrap()
271        );
272    }
273
274    #[test]
275    #[cfg(target_pointer_width = "16")]
276    fn read_usize32_on_16_bit_machines_results_in_error_if_number_too_big() {
277        assert!(read_usize32(&mut [0x00, 0xff, 0xff, 0xff].as_slice()).is_err());
278    }
279
280    #[test]
281    fn test_read_point() {
282        assert_eq!(
283            Point { x: 1, y: 2 },
284            read_point(&mut [0x00, 0x01, 0x00, 0x02].as_slice()).unwrap()
285        );
286    }
287
288    #[test]
289    fn test_read_ring() {
290        let empty = [0x00, 0x00, 0x00, 0x00];
291        for i in 0..empty.len() - 1 {
292            assert!(read_ring(&mut &empty[0..i]).is_err());
293        }
294        assert!(read_ring(&mut empty.as_slice()).unwrap().is_empty());
295
296        let two_points = [
297            0x00, 0x00, 0x00, 0x02, // length
298            0x00, 0x01, // p1.x
299            0x00, 0x02, // p1.y
300            0x00, 0x03, // p2.x
301            0x00, 0x04, // p2.y
302        ];
303        for i in 0..two_points.len() - 1 {
304            assert!(read_ring(&mut &two_points[0..i]).is_err());
305        }
306        assert_eq!(
307            vec![Point { x: 1, y: 2 }, Point { x: 3, y: 4 }],
308            read_ring(&mut two_points.as_slice()).unwrap()
309        );
310    }
311
312    #[test]
313    fn test_read_polygons() {
314        assert!(read_polygons(&mut [0x00].as_slice()).unwrap().is_empty());
315
316        let two_rings = [
317            0x02, // polygons length
318            0x00, 0x00, 0x00, 0x01, // ring length
319            0x00, 0x01, // p1.x
320            0x00, 0x02, // p1.y
321            0x00, 0x00, 0x00, 0x01, // ring length
322            0x00, 0x03, // p2.x
323            0x00, 0x04, // p2.y
324        ];
325        for i in 0..two_rings.len() - 1 {
326            assert!(read_polygons(&mut &two_rings[0..i]).is_err());
327        }
328        assert_eq!(
329            vec![vec![Point { x: 1, y: 2 }], vec![Point { x: 3, y: 4 }]],
330            read_polygons(&mut two_rings.as_slice()).unwrap()
331        );
332    }
333
334    #[test]
335    fn test_read_cell() {
336        assert_eq!(
337            Cell {
338                containing_ids: vec![],
339                intersecting_areas: vec![]
340            },
341            read_cell(&mut [0x00, 0x00].as_slice()).unwrap()
342        );
343
344        let cell = [
345            0x01, // containing ids length
346            0x00, 0x01, 0x41, // "A"
347            0x01, // intersecting areas length
348            0x00, 0x01, 0x42, // "B"
349            0x00, 0x00, // empty multipolygon
350        ];
351        for i in 0..cell.len() - 1 {
352            assert!(read_polygons(&mut &cell[0..i]).is_err());
353        }
354        assert_eq!(
355            Cell {
356                containing_ids: vec![String::from("A")],
357                intersecting_areas: vec![(
358                    String::from("B"),
359                    Multipolygon {
360                        inner: vec![],
361                        outer: vec![]
362                    }
363                )]
364            },
365            read_cell(&mut cell.as_slice()).unwrap()
366        );
367    }
368
369    #[test]
370    fn test_read_wrong_version() {
371        let minimum = [
372            0x00, 0x03, // version number
373            0x00, 0x00, 0x00, 0x00, // geometry sizes map length
374            0x00, 0x00, 0x00, 0x00, // raster width
375            0x00, 0x00, 0x00, 0x00, // raster size
376        ];
377        assert!(from_reader(&mut minimum.as_slice()).is_err());
378    }
379
380    #[test]
381    fn test_read_minimum() {
382        let minimum = [
383            0x00, 0x02, // version number
384            0x00, 0x00, 0x00, 0x00, // geometry sizes map length
385            0x00, 0x00, 0x00, 0x00, // raster width
386            0x00, 0x00, 0x00, 0x00, // raster size
387        ];
388        for i in 0..minimum.len() - 1 {
389            assert!(from_reader(&mut &minimum[0..i]).is_err());
390        }
391        assert_eq!(
392            CountryBoundaries {
393                raster: vec![],
394                raster_width: 0,
395                geometry_sizes: HashMap::new()
396            },
397            from_reader(&mut minimum.as_slice()).unwrap()
398        );
399    }
400
401    #[test]
402    fn test_read_basic() {
403        let basic = [
404            0x00, 0x02, // version number
405            0x00, 0x00, 0x00, 0x01, // geometry sizes map length
406            0x00, 0x01, 0x41, // "A"
407            0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 12.5
408            0x00, 0x00, 0x00, 0x01, // raster width
409            0x00, 0x00, 0x00, 0x01, // raster size
410            0x01, // cell containing ids length
411            0x00, 0x01, 0x41, // "A"
412            0x00, // intersecting areas length
413        ];
414        for i in 0..basic.len() - 1 {
415            assert!(from_reader(&mut &basic[0..i]).is_err());
416        }
417        assert_eq!(
418            CountryBoundaries {
419                raster: vec![Cell {
420                    containing_ids: vec![String::from("A")],
421                    intersecting_areas: vec![]
422                }],
423                raster_width: 1,
424                geometry_sizes: HashMap::from([(String::from("A"), 12.5)])
425            },
426            from_reader(&mut basic.as_slice()).unwrap()
427        );
428    }
429}