gistools/readers/shapefile/
shp.rs

1use super::DataBaseFile;
2use crate::{
3    parsers::{BufferReader, FeatureReader, Reader},
4    proj::Transformer,
5};
6use alloc::{vec, vec::Vec};
7use core::marker::PhantomData;
8use s2json::{
9    BBox3D, MValue, MValueCompatible, Properties, VectorFeature, VectorFeatureType, VectorGeometry,
10    VectorGeometryType, VectorLineString, VectorMultiLineString, VectorMultiPoint,
11    VectorMultiPointGeometry, VectorPoint, VectorPointGeometry,
12};
13
14/// A Shapefile Header describing the internal data
15#[derive(Debug, Clone, PartialEq)]
16pub struct SHPHeader {
17    /// The length of the file
18    pub length: u64,
19    /// The shapefile version
20    pub version: i32,
21    /// The shape code
22    pub shp_code: i32,
23    /// The bounding box
24    pub bbox: BBox3D,
25}
26
27/// A Shapefile Row explaining how to read the feature
28#[derive(Debug)]
29pub struct SHPRow {
30    id: u64,
31    #[allow(dead_code)]
32    len: u64,
33    _type: i32,
34    data: Vec<u8>,
35}
36
37/// # The Shapefile Reader
38///
39/// ## Description
40/// Reads data from a shapefile
41///
42/// Implements the [`FeatureReader`] trait
43///
44/// ## Usage
45///
46/// NOTE: It's recommended to not parse the shapefile directly but instead:
47/// - [`crate::readers::file::shapefile_from_path`]
48/// - [`crate::readers::mmap::shapefile_from_path`]
49///
50/// This ensures the other files paired with the shapefile are loaded to properly handle the
51/// projection and properties data.
52///
53/// ## Usage
54///
55/// The methods you have access to:
56/// - [`ShapeFileReader::new`]: Create a new ShapeFileReader
57/// - [`ShapeFileReader::get_header`]: Get the file header data
58/// - [`ShapeFileReader::iter`]: Iterate over the features in the shapefile
59///
60/// ### From Path (Recommended as it will ensure to pull in associated files):
61/// ```rust
62/// use gistools::{parsers::{FileReader, FeatureReader}, readers::{ShapeFileReader, file::shapefile_from_path}};
63/// use s2json::MValue;
64/// use std::{collections::BTreeMap, path::PathBuf};
65///
66/// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
67/// path.push("tests/readers/shapefile/fixtures/utf.shp");
68/// let path_str = path.to_str().unwrap();
69///
70/// #[derive(Default, Debug, Clone, MValue, PartialEq)]
71/// struct Props {
72///     field: String,
73/// }
74///
75/// let shp: ShapeFileReader<FileReader, Props> =
76///     shapefile_from_path(path_str, BTreeMap::from([("a".into(), "b".into())]));
77///
78/// let features: Vec<_> = shp.iter().collect();
79/// assert_eq!(features.len(), 2);
80/// ```
81///
82/// ## Links
83/// - <https://en.wikipedia.org/wiki/Shapefile>
84#[derive(Debug, Clone)]
85pub struct ShapeFileReader<T: Reader, P: MValueCompatible = Properties> {
86    /// The input reader
87    reader: T,
88    header: SHPHeader,
89    dbf: Option<DataBaseFile<T, P>>, // Use the same lifetime for dbf
90    transform: Option<Transformer>,
91    row_offsets: Vec<u64>,
92    _phantom: PhantomData<VectorFeature<(), P, MValue>>,
93}
94impl<T: Reader, P: MValueCompatible> ShapeFileReader<T, P> {
95    /// Create a new Shapefile Reader
96    pub fn new(
97        mut reader: T,
98        dbf: Option<DataBaseFile<T, P>>,
99        transform: Option<Transformer>,
100    ) -> ShapeFileReader<T, P> {
101        let header = ShapeFileReader::<T, P>::parse_header(&mut reader);
102        let row_offsets = ShapeFileReader::<T, P>::parse_row_offsets(&mut reader);
103        ShapeFileReader::<T, P> {
104            reader,
105            header,
106            dbf,
107            row_offsets,
108            transform,
109            _phantom: PhantomData,
110        }
111    }
112
113    /// Return a reference to the header
114    pub fn get_header(&self) -> &SHPHeader {
115        &self.header
116    }
117
118    /// Returns a header object from the reader
119    fn parse_header(reader: &mut T) -> SHPHeader {
120        let mut header = SHPHeader {
121            length: (reader.int32_be(Some(6 << 2)) << 1) as u64,
122            version: reader.int32_le(Some(7 << 2)),
123            shp_code: reader.int32_le(Some(8 << 2)),
124            bbox: BBox3D::new(
125                reader.f64_le(Some(9 << 2)),
126                reader.f64_le(Some(11 << 2)),
127                reader.f64_le(Some(13 << 2)),
128                reader.f64_le(Some(15 << 2)),
129                reader.f64_le(Some(17 << 2)),
130                reader.f64_le(Some(19 << 2)),
131            ),
132        };
133        if header.shp_code > 20 {
134            header.shp_code -= 20;
135        }
136
137        header
138    }
139
140    /// Returns the rows starting positions from the reader
141    fn parse_row_offsets(reader: &mut T) -> Vec<u64> {
142        let mut res = vec![];
143
144        let mut offset = 100;
145        let len = reader.len() - 8;
146        while offset <= len {
147            let offset_length = (reader.int32_be(Some(offset + 4)) << 1) as u64;
148            let _type = reader.int32_le(Some(offset + 8));
149            if offset_length == 0 {
150                break;
151            }
152            if _type != 0 {
153                res.push(offset);
154            }
155            offset += 8 + offset_length;
156        }
157
158        res
159    }
160
161    fn parse_row(&self, index: usize) -> Option<VectorFeature<(), P, MValue>> {
162        if index >= self.row_offsets.len() {
163            return None;
164        }
165        let row_offset = self.row_offsets.get(index)?;
166        let SHPRow { id, _type, data, .. } = self.get_row(*row_offset)?;
167        let geometry = self.parse_geometry(_type, &data);
168        geometry.as_ref()?;
169        let mut properties: P = P::default();
170        if let Some(dbf) = &self.dbf
171            && let Some(props) = dbf.get_properties(index as u64)
172        {
173            properties = props;
174        }
175
176        Some(VectorFeature {
177            id: Some(id),
178            _type: VectorFeatureType::VectorFeature,
179            face: 0.into(),
180            properties,
181            geometry: geometry.unwrap(),
182            metadata: None,
183        })
184    }
185
186    /// Get a row
187    fn get_row(&self, offset: u64) -> Option<SHPRow> {
188        let id = self.reader.int32_be(Some(offset)) as u64;
189        let len = (self.reader.int32_be(Some(offset + 4)) << 1) as u64;
190        if len == 0 || offset + len + 8 > self.reader.len() {
191            return None;
192        }
193        Some(SHPRow {
194            id,
195            len,
196            data: self.reader.slice(Some(offset + 12), Some(offset + 12 + len - 4)),
197            _type: self.reader.int32_le(Some(offset + 8)),
198        })
199    }
200
201    fn parse_geometry(&self, _type: i32, data: &[u8]) -> Option<VectorGeometry<MValue>> {
202        let reader: BufferReader = data.into();
203        let is_3d = _type == 11 || _type == 13 || _type == 15 || _type == 18;
204        if _type == 1 || _type == 11 {
205            Some(VectorGeometry::Point(VectorPointGeometry {
206                _type: VectorGeometryType::Point,
207                is_3d,
208                coordinates: self.parse_point(&reader, 0, if is_3d { Some(16) } else { None }),
209                ..Default::default()
210            }))
211        } else if _type == 8 || _type == 18 {
212            self.parse_multi_point(&reader, is_3d)
213        } else if _type == 3 || _type == 5 || _type == 13 || _type == 15 {
214            let is_poly = _type == 5 || _type == 15;
215            self.parse_multi_line(&reader, is_poly, is_3d)
216        } else {
217            panic!("invalid shape type: {}", _type);
218        }
219    }
220
221    /// Parse a point
222    fn parse_point(
223        &self,
224        data: &BufferReader,
225        offset: u64,
226        offset_3d: Option<u64>,
227    ) -> VectorPoint<MValue> {
228        let mut z: Option<f64> = None;
229        if let Some(offset) = offset_3d {
230            z = Some(data.f64_le(Some(offset)));
231        }
232        let mut point =
233            VectorPoint::new(data.f64_le(Some(offset)), data.f64_le(Some(offset + 8)), z, None);
234        if let Some(transformer) = &self.transform {
235            point = transformer.forward(&point);
236        }
237
238        point
239    }
240
241    fn parse_multi_point(
242        &self,
243        data: &BufferReader,
244        is_3d: bool,
245    ) -> Option<VectorGeometry<MValue>> {
246        let num_points = data.int32_le(Some(32)) as u64;
247        if num_points == 0 {
248            return None;
249        }
250        let mut offset = 0;
251        let mut z_offset = 36 + 16 * num_points;
252        // grab the min-max
253        let mins = self.parse_point(data, offset, None);
254        let maxs = self.parse_point(data, offset + 16, None);
255        offset += 36;
256        let mut bbox = BBox3D::new(mins.x, mins.y, maxs.x, maxs.y, 0., 0.);
257        if is_3d {
258            bbox.near = data.f64_le(Some(z_offset));
259            bbox.far = data.f64_le(Some(z_offset + 8));
260            z_offset += 16;
261        }
262
263        let mut coordinates: VectorMultiPoint<MValue> = vec![];
264        let mut index = 0;
265        while index < num_points {
266            let point = self.parse_point(data, offset, if is_3d { Some(z_offset) } else { None });
267            offset += 16;
268            if is_3d {
269                z_offset += 8;
270                bbox.extend_from_point(&point); // shapefiles often don't store the bbox z-values
271            }
272            coordinates.push(point);
273            index += 1;
274        }
275
276        if num_points == 1 {
277            Some(VectorGeometry::Point(VectorPointGeometry {
278                _type: VectorGeometryType::Point,
279                is_3d,
280                coordinates: core::mem::take(&mut coordinates[0]),
281                bbox: Some(bbox),
282                ..Default::default()
283            }))
284        } else {
285            Some(VectorGeometry::MultiPoint(VectorMultiPointGeometry {
286                _type: VectorGeometryType::MultiPoint,
287                is_3d,
288                coordinates,
289                bbox: Some(bbox),
290                ..Default::default()
291            }))
292        }
293    }
294
295    fn parse_multi_line(
296        &self,
297        data: &BufferReader,
298        is_poly: bool,
299        is_3d: bool,
300    ) -> Option<VectorGeometry<MValue>> {
301        let num_parts = data.int32_le(Some(32)) as u64; // The number of rings in the polygon.
302        let num_points = data.int32_le(Some(36)) as u64; // the total number of points in the polygon.
303        if num_points == 0 || num_parts == 0 {
304            return None;
305        }
306        let mut offset = 0;
307        let mut z_offset = 40 + 4 * num_parts + 16 * num_points;
308        // grab the min-max
309        let mins = self.parse_point(data, offset, None);
310        let maxs = self.parse_point(data, offset + 16, None);
311        let mut bbox = BBox3D::new(mins.x, mins.y, maxs.x, maxs.y, 0., 0.);
312        offset += 40;
313        if is_3d {
314            bbox.near = data.f64_le(Some(z_offset));
315            bbox.far = data.f64_le(Some(z_offset + 8));
316            z_offset += 16;
317        }
318
319        // build parts
320        let mut parts: Vec<u64> = vec![];
321        let mut done = 0;
322        while done < num_parts {
323            parts.push(data.int32_le(Some(offset)) as u64);
324            offset += 4;
325            done += 1;
326        }
327
328        // build coordinates
329        let mut index = 0;
330        let mut coordinates: VectorMultiLineString<MValue> = vec![];
331        for i in 0..num_parts {
332            let part_end = parts.get(i as usize + 1).unwrap_or(&num_points);
333            // build a line for part
334            let mut line: VectorLineString<MValue> = vec![];
335            while index < *part_end {
336                let point =
337                    self.parse_point(data, offset, if is_3d { Some(z_offset) } else { None });
338                offset += 16;
339                if is_3d {
340                    z_offset += 8;
341                    bbox.extend_from_point(&point); // shapefiles often don't store the bbox z-values
342                }
343                line.push(point);
344                index += 1;
345            }
346            coordinates.push(line);
347        }
348
349        if !is_poly {
350            if num_parts == 1 {
351                Some(VectorGeometry::new_linestring(
352                    core::mem::take(&mut coordinates[0]),
353                    Some(bbox),
354                ))
355            } else {
356                Some(VectorGeometry::new_multilinestring(coordinates, Some(bbox)))
357            }
358        } else {
359            Some(VectorGeometry::new_polygon(coordinates, Some(bbox)))
360        }
361    }
362}
363/// The GPX Iterator tool
364#[derive(Debug)]
365pub struct ShapefileIterator<'a, T: Reader, P: MValueCompatible> {
366    reader: &'a ShapeFileReader<T, P>,
367    index: usize,
368    stride: usize,
369}
370impl<T: Reader, P: MValueCompatible> Iterator for ShapefileIterator<'_, T, P> {
371    type Item = VectorFeature<(), P, MValue>;
372
373    fn next(&mut self) -> Option<Self::Item> {
374        if let Some(feature) = self.reader.parse_row(self.index) {
375            self.index += self.stride;
376            return Some(feature);
377        }
378        None
379    }
380}
381/// A feature reader trait with a callback-based approach
382impl<T: Reader, P: MValueCompatible> FeatureReader<(), P, MValue> for ShapeFileReader<T, P> {
383    type FeatureIterator<'a>
384        = ShapefileIterator<'a, T, P>
385    where
386        T: 'a,
387        P: 'a;
388
389    fn iter(&self) -> Self::FeatureIterator<'_> {
390        ShapefileIterator { reader: self, index: 0, stride: 1 }
391    }
392    // The assumption here is that the reader has been cloned already
393    fn par_iter(&self, pool_size: usize, thread_id: usize) -> Self::FeatureIterator<'_> {
394        ShapefileIterator { reader: self, index: thread_id, stride: pool_size }
395    }
396}