country_boundaries/
lib.rs

1// Use README.md in a documentation on github, crates.io, and docs site, as well as unit test the examples in it.
2#![doc = include_str!("../README.md")]
3
4use crate::deserializer::from_reader;
5use cell::Cell;
6use multipolygon::Point;
7use std::{cmp::min, collections::HashMap, collections::HashSet, io, vec::Vec};
8
9pub use self::bbox::BoundingBox;
10pub use self::deserializer::ReadError;
11pub use self::error::Error;
12pub use self::latlon::LatLon;
13
14mod bbox;
15mod cell;
16mod deserializer;
17mod error;
18mod latlon;
19mod multipolygon;
20
21/// Bytes of the ODbL licensed data in a 360x180 raster, (c) OpenStreetMap contributors.
22pub static BOUNDARIES_ODBL_360X180: &'static [u8] = include_bytes!("../data/boundaries360x180.ser");
23/// Bytes of the ODbL licensed data in a 180x90 raster, (c) OpenStreetMap contributors.
24pub static BOUNDARIES_ODBL_180X90: &'static [u8] = include_bytes!("../data/boundaries180x90.ser");
25/// Bytes of the ODbL licensed data in a 60x30 raster, (c) OpenStreetMap contributors.
26pub static BOUNDARIES_ODBL_60X30: &'static [u8] = include_bytes!("../data/boundaries60x30.ser");
27
28#[derive(Debug, Clone, PartialEq)]
29pub struct CountryBoundaries {
30    /// 2-dimensional array of cells
31    raster: Vec<Cell>,
32    /// width of the raster
33    raster_width: usize,
34    /// the sizes of the different countries contained
35    geometry_sizes: HashMap<String, f64>,
36}
37
38impl CountryBoundaries {
39    /// Create a `CountryBoundaries` from a stream of bytes.
40    ///
41    /// # Errors
42    /// Returns an error if the given data is not a valid country boundaries file.
43    pub fn from_reader(reader: impl io::Read) -> Result<Self, ReadError> {
44        from_reader(reader)
45    }
46
47    /// Returns whether the given `position` is in the region with the given `id`
48    ///
49    /// # Example
50    /// ```
51    /// # use country_boundaries::{CountryBoundaries, LatLon, BOUNDARIES_ODBL_360X180};
52    /// #
53    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
54    /// # let boundaries = CountryBoundaries::from_reader(BOUNDARIES_ODBL_360X180)?;
55    /// assert!(boundaries.is_in(LatLon::new(47.6973, 8.6910)?, "DE"));
56    /// # Ok(())
57    /// # }
58    /// ```
59    pub fn is_in(&self, position: LatLon, id: &str) -> bool {
60        let (cell, point) = self.cell_and_local_point(position);
61        cell.is_in(point, id)
62    }
63
64    /// Returns whether the given `position` is in any of the regions with the given `ids`.
65    ///
66    /// # Example
67    /// ```
68    /// # use country_boundaries::{CountryBoundaries, LatLon, BOUNDARIES_ODBL_360X180};
69    /// # use std::collections::HashSet;
70    /// #
71    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
72    /// # let boundaries = CountryBoundaries::from_reader(BOUNDARIES_ODBL_360X180)?;
73    /// // check if position is in any country where the first day of the workweek is Saturday. It is
74    /// // more efficient than calling `is_in` for every id in a row.
75    /// assert!(
76    ///     !boundaries.is_in_any(
77    ///         LatLon::new(21.0, 96.0)?,
78    ///         &HashSet::from(["BD", "DJ", "IR", "PS"])
79    ///     )
80    /// );
81    /// # Ok(())
82    /// # }
83    /// ```
84    pub fn is_in_any(&self, position: LatLon, ids: &HashSet<&str>) -> bool {
85        let (cell, point) = self.cell_and_local_point(position);
86        cell.is_in_any(point, ids)
87    }
88
89    /// Returns the ids of the regions the given `position` is contained in, ordered by size of
90    /// the region ascending
91    ///
92    /// # Example
93    /// ```
94    /// # use country_boundaries::{CountryBoundaries, LatLon, BOUNDARIES_ODBL_360X180};
95    /// # use std::collections::HashSet;
96    /// #
97    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
98    /// # let boundaries = CountryBoundaries::from_reader(BOUNDARIES_ODBL_360X180)?;
99    /// assert_eq!(
100    ///     vec!["US-TX", "US"],
101    ///     boundaries.ids(LatLon::new(33.0, -97.0)?)
102    /// );
103    /// # Ok(())
104    /// # }
105    /// ```
106    pub fn ids(&self, position: LatLon) -> Vec<&str> {
107        let (cell, point) = self.cell_and_local_point(position);
108        let mut result = cell.get_ids(point);
109        let zero = 0.0;
110        result.sort_by(|&a, &b| {
111            let a = self.geometry_sizes.get(a).unwrap_or(&zero);
112            let b = self.geometry_sizes.get(b).unwrap_or(&zero);
113            a.total_cmp(b)
114        });
115        result
116    }
117
118    /// Returns the ids of the regions that fully contain the given bounding box `bounds`.
119    ///
120    /// The given bounding box is allowed to wrap around the 180th longitude,
121    /// i.e `bounds.min_longitude` = 170 and `bounds.max_longitude` = -170 is fine.
122    ///
123    /// # Example
124    /// ```
125    /// # use country_boundaries::{CountryBoundaries, BoundingBox, BOUNDARIES_ODBL_360X180};
126    /// # use std::collections::HashSet;
127    /// #
128    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
129    /// # let boundaries = CountryBoundaries::from_reader(BOUNDARIES_ODBL_360X180)?;
130    /// assert_eq!(
131    ///     HashSet::from(["RU"]),
132    ///     boundaries.containing_ids(BoundingBox::new(66.0, 178.0, 68.0, -178.0)?)
133    /// );
134    /// # Ok(())
135    /// # }
136    /// ```
137    pub fn containing_ids(&self, bounds: BoundingBox) -> HashSet<&str> {
138        let mut ids: HashSet<&str> = HashSet::new();
139        let mut first_cell = true;
140        for cell in self.cells(&bounds) {
141            if first_cell {
142                ids.extend(cell.containing_ids.iter().map(String::as_str));
143                first_cell = false;
144            } else {
145                ids.retain(|&id| {
146                    cell.containing_ids
147                        .iter()
148                        .any(|containing_id| containing_id == id)
149                });
150                if ids.is_empty() {
151                    return ids;
152                }
153            }
154        }
155        ids
156    }
157
158    /// Returns the ids of the regions that contain or at lest intersect with the given bounding box
159    /// `bounds`.
160    ///
161    /// The given bounding box is allowed to wrap around the 180th longitude,
162    /// i.e `bounds.min_longitude` = 170 and `bounds.max_longitude` = -170 is fine.
163    ///
164    /// # Example
165    /// ```
166    /// # use country_boundaries::{CountryBoundaries, BoundingBox, BOUNDARIES_ODBL_360X180};
167    /// # use std::collections::HashSet;
168    /// #
169    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
170    /// # let boundaries = CountryBoundaries::from_reader(BOUNDARIES_ODBL_360X180)?;
171    /// assert_eq!(
172    ///     HashSet::from(["RU", "US-AK", "US"]),
173    ///     boundaries.intersecting_ids(BoundingBox::new(50.0, 163.0, 67.0, -150.0)?)
174    /// );
175    /// # Ok(())
176    /// # }
177    /// ```
178    pub fn intersecting_ids(&self, bounds: BoundingBox) -> HashSet<&str> {
179        let mut ids: HashSet<&str> = HashSet::new();
180        for cell in self.cells(&bounds) {
181            ids.extend(cell.get_all_ids());
182        }
183        ids
184    }
185
186    fn cell_and_local_point(&self, position: LatLon) -> (&Cell, Point) {
187        let normalized_longitude = normalize(position.longitude(), -180.0, 360.0);
188        let cell_x = self.longitude_to_cell_x(normalized_longitude);
189        let cell_y = self.latitude_to_cell_y(position.latitude());
190
191        (
192            self.cell(cell_x, cell_y),
193            Point {
194                x: self.longitude_to_local_x(cell_x, normalized_longitude),
195                y: self.latitude_to_local_y(cell_y, position.latitude()),
196            },
197        )
198    }
199
200    fn cell(&self, x: usize, y: usize) -> &Cell {
201        &self.raster[y * self.raster_width + x]
202    }
203
204    fn longitude_to_cell_x(&self, longitude: f64) -> usize {
205        let raster_width = self.raster_width as f64;
206        min(
207            self.raster_width.saturating_sub(1),
208            (raster_width * (180.0 + longitude) / 360.0).floor() as usize,
209        )
210    }
211
212    fn latitude_to_cell_y(&self, latitude: f64) -> usize {
213        let raster_height = (self.raster.len() / self.raster_width) as f64;
214        ((raster_height * (90.0 - latitude) / 180.0).ceil() as usize).saturating_sub(1)
215    }
216
217    fn longitude_to_local_x(&self, cell_x: usize, longitude: f64) -> u16 {
218        let raster_width = self.raster_width as f64;
219        let cell_x = cell_x as f64;
220        let cell_longitude = -180.0 + 360.0 * cell_x / raster_width;
221        ((longitude - cell_longitude) * raster_width * 0xffff as f64 / 360.0) as u16
222    }
223
224    fn latitude_to_local_y(&self, cell_y: usize, latitude: f64) -> u16 {
225        let raster_height = (self.raster.len() / self.raster_width) as f64;
226        let cell_y = cell_y as f64;
227        let cell_latitude = 90.0 - 180.0 * (cell_y + 1.0) / raster_height;
228        ((latitude - cell_latitude) * raster_height * 0xffff as f64 / 180.0) as u16
229    }
230
231    fn cells(&self, bounds: &BoundingBox) -> impl Iterator<Item = &Cell> {
232        let normalized_min_longitude = normalize(bounds.min_longitude(), -180.0, 360.0);
233        let normalized_max_longitude = normalize(bounds.max_longitude(), -180.0, 360.0);
234
235        let min_x = self.longitude_to_cell_x(normalized_min_longitude);
236        let max_y = self.latitude_to_cell_y(bounds.min_latitude());
237        let max_x = self.longitude_to_cell_x(normalized_max_longitude);
238        let min_y = self.latitude_to_cell_y(bounds.max_latitude());
239
240        let steps_y = max_y - min_y;
241        // might wrap around
242        let steps_x = if min_x > max_x {
243            self.raster_width - min_x + max_x
244        } else {
245            max_x - min_x
246        };
247
248        let mut x_step = 0;
249        let mut y_step = 0;
250
251        std::iter::from_fn(move || {
252            let result = if x_step <= steps_x && y_step <= steps_y {
253                let x = (min_x + x_step) % self.raster_width;
254                let y = min_y + y_step;
255                Some(self.cell(x, y))
256            } else {
257                None
258            };
259
260            if y_step < steps_y {
261                y_step += 1;
262            } else {
263                y_step = 0;
264                x_step += 1;
265            }
266
267            result
268        })
269        /*
270        // this would be more elegant and shorter, but it is still experimental
271
272        return std::iter::from_generator(|| {
273            for x_step in 0..=steps_x {
274                let x = (min_x + x_step) % self.raster_width;
275                for y_step in 0..=steps_y {
276                    let y = y_step + min_y;
277                    yield &self.raster[y * self.raster_width + x];
278                }
279            }
280        })
281        */
282    }
283}
284
285fn normalize(value: f64, start_at: f64, base: f64) -> f64 {
286    let mut value = value % base;
287    if value < start_at {
288        value += base;
289    } else if value >= start_at + base {
290        value -= base;
291    }
292    value
293}
294
295#[cfg(test)]
296mod tests {
297    use crate::LatLon;
298
299    use super::*;
300
301    // just a convenience macro that constructs a cell
302    macro_rules! cell {
303        ($containing_ids: expr) => {
304            Cell {
305                containing_ids: $containing_ids.iter().map(|&s| String::from(s)).collect(),
306                intersecting_areas: vec![],
307            }
308        };
309        ($containing_ids: expr, $intersecting_areas: expr) => {
310            Cell {
311                containing_ids: $containing_ids.iter().map(|&s| String::from(s)).collect(),
312                intersecting_areas: $intersecting_areas,
313            }
314        };
315    }
316
317    fn latlon(latitude: f64, longitude: f64) -> LatLon {
318        LatLon::new(latitude, longitude).unwrap()
319    }
320
321    fn bbox(
322        min_latitude: f64,
323        min_longitude: f64,
324        max_latitude: f64,
325        max_longitude: f64,
326    ) -> BoundingBox {
327        BoundingBox::new(min_latitude, min_longitude, max_latitude, max_longitude).unwrap()
328    }
329
330    #[test]
331    fn delegates_to_correct_cell_at_edges() {
332        // the world:
333        // ┌─┬─┐
334        // │A│B│
335        // ├─┼─┤
336        // │C│D│
337        // └─┴─┘
338        let boundaries = CountryBoundaries {
339            raster: vec![cell!(&["A"]), cell!(&["B"]), cell!(&["C"]), cell!(&["D"])],
340            raster_width: 2,
341            geometry_sizes: HashMap::new(),
342        };
343
344        assert_eq!(vec!["C"], boundaries.ids(latlon(-90.0, -180.0)));
345        assert_eq!(vec!["C"], boundaries.ids(latlon(-90.0, -90.0)));
346        assert_eq!(vec!["C"], boundaries.ids(latlon(-45.0, -180.0)));
347        // wrap around
348        assert_eq!(vec!["C"], boundaries.ids(latlon(-45.0, 180.0)));
349        assert_eq!(vec!["C"], boundaries.ids(latlon(-90.0, 180.0)));
350
351        assert_eq!(vec!["A"], boundaries.ids(latlon(0.0, -180.0)));
352        assert_eq!(vec!["A"], boundaries.ids(latlon(45.0, -180.0)));
353        assert_eq!(vec!["A"], boundaries.ids(latlon(0.0, -90.0)));
354        // wrap around
355        assert_eq!(vec!["A"], boundaries.ids(latlon(0.0, 180.0)));
356        assert_eq!(vec!["A"], boundaries.ids(latlon(45.0, 180.0)));
357
358        assert_eq!(vec!["B"], boundaries.ids(latlon(0.0, 0.0)));
359        assert_eq!(vec!["B"], boundaries.ids(latlon(45.0, 0.0)));
360        assert_eq!(vec!["B"], boundaries.ids(latlon(0.0, 90.0)));
361
362        assert_eq!(vec!["D"], boundaries.ids(latlon(-45.0, 0.0)));
363        assert_eq!(vec!["D"], boundaries.ids(latlon(-90.0, 0.0)));
364        assert_eq!(vec!["D"], boundaries.ids(latlon(-90.0, 90.0)));
365    }
366
367    #[test]
368    fn no_array_index_out_of_bounds_at_world_edges() {
369        let boundaries = CountryBoundaries {
370            raster: vec![cell!(&["A"])],
371            raster_width: 1,
372            geometry_sizes: HashMap::new(),
373        };
374
375        boundaries.ids(latlon(-90.0, -180.0));
376        boundaries.ids(latlon(90.0, 180.0));
377        boundaries.ids(latlon(90.0, -180.0));
378        boundaries.ids(latlon(-90.0, 180.0));
379    }
380
381    #[test]
382    fn get_containing_ids_sorted_by_size_ascending() {
383        let boundaries = CountryBoundaries {
384            raster: vec![cell!(&["D", "B", "C", "A"])],
385            raster_width: 1,
386            geometry_sizes: HashMap::from([
387                (String::from("A"), 10.0),
388                (String::from("B"), 15.0),
389                (String::from("C"), 100.0),
390                (String::from("D"), 800.0),
391            ]),
392        };
393        assert_eq!(vec!["A", "B", "C", "D"], boundaries.ids(latlon(1.0, 1.0)));
394    }
395
396    #[test]
397    fn get_intersecting_ids_in_bbox_is_merged_correctly() {
398        let boundaries = CountryBoundaries {
399            raster: vec![
400                cell!(&["A"]),
401                cell!(&["B"]),
402                cell!(&["C"]),
403                cell!(&["D", "E"]),
404            ],
405            raster_width: 2,
406            geometry_sizes: HashMap::new(),
407        };
408        assert_eq!(
409            HashSet::from(["A", "B", "C", "D", "E"]),
410            boundaries.intersecting_ids(bbox(-10.0, -10.0, 10.0, 10.0))
411        )
412    }
413
414    #[test]
415    fn get_intersecting_ids_in_bbox_wraps_longitude_correctly() {
416        let boundaries = CountryBoundaries {
417            raster: vec![cell!(&["A"]), cell!(&["B"]), cell!(&["C"])],
418            raster_width: 3,
419            geometry_sizes: HashMap::new(),
420        };
421        assert_eq!(
422            HashSet::from(["A", "C"]),
423            boundaries.intersecting_ids(bbox(0.0, 170.0, 1.0, -170.0))
424        )
425    }
426
427    #[test]
428    fn get_containing_ids_in_bbox_wraps_longitude_correctly() {
429        let boundaries = CountryBoundaries {
430            raster: vec![cell!(&["A", "B", "C"]), cell!(&["X"]), cell!(&["A", "B"])],
431            raster_width: 3,
432            geometry_sizes: HashMap::new(),
433        };
434        assert_eq!(
435            HashSet::from(["A", "B"]),
436            boundaries.containing_ids(bbox(0.0, 170.0, 1.0, -170.0))
437        )
438    }
439
440    #[test]
441    fn get_containing_ids_in_bbox_returns_correct_result_when_one_cell_is_empty() {
442        let boundaries = CountryBoundaries {
443            raster: vec![
444                cell!(&[] as &[&str; 0]),
445                cell!(&["A"]),
446                cell!(&["A"]),
447                cell!(&["A"]),
448            ],
449            raster_width: 2,
450            geometry_sizes: HashMap::new(),
451        };
452        assert!(boundaries
453            .containing_ids(bbox(-10.0, -10.0, 10.0, 10.0))
454            .is_empty())
455    }
456
457    #[test]
458    fn get_containing_ids_in_bbox_is_merged_correctly() {
459        let boundaries = CountryBoundaries {
460            raster: vec![
461                cell!(&["A", "B"]),
462                cell!(&["B", "A"]),
463                cell!(&["C", "B", "A"]),
464                cell!(&["D", "A"]),
465            ],
466            raster_width: 2,
467            geometry_sizes: HashMap::new(),
468        };
469        assert_eq!(
470            HashSet::from(["A"]),
471            boundaries.containing_ids(bbox(-10.0, -10.0, 10.0, 10.0))
472        )
473    }
474
475    #[test]
476    fn get_containing_ids_in_bbox_is_merged_correctly_an_nothing_is_left() {
477        let boundaries = CountryBoundaries {
478            raster: vec![cell!(&["A"]), cell!(&["B"]), cell!(&["C"]), cell!(&["D"])],
479            raster_width: 2,
480            geometry_sizes: HashMap::new(),
481        };
482
483        assert!(boundaries
484            .containing_ids(bbox(-10.0, -10.0, 10.0, 10.0))
485            .is_empty())
486    }
487}