Skip to main content

geonative_shapefile/
dataset.rs

1//! Top-level public API: `Shapefile`, feature iterator.
2//!
3//! A shapefile is a single-layer dataset (unlike a FileGDB), so the type
4//! exposes layer-level methods directly. Mmap-backed for the `.shp`
5//! payload so multi-GB shapefiles stay bounded in app-private memory.
6
7use std::path::{Path, PathBuf};
8
9use geonative_core::{self as core, Crs, Feature, GeomField, GeometryType, Schema, Value};
10use memmap2::Mmap;
11
12use crate::dbf::{build_schema, decode_field, parse_header, DbfHeader};
13use crate::error::{Result, ShpError};
14use crate::header::ShapeType;
15use crate::shape;
16use crate::shx;
17
18/// One opened shapefile (`.shp` + `.shx` + `.dbf` + optional `.prj`).
19#[derive(Debug)]
20pub struct Shapefile {
21    /// Mmapped `.shp` (bounded RAM regardless of file size).
22    shp_mmap: Mmap,
23    shx: shx::Shx,
24    dbf_bytes: Vec<u8>,
25    dbf_header: DbfHeader,
26    schema: Schema,
27}
28
29impl Shapefile {
30    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
31        let base = strip_shp_ext(path.as_ref());
32        let shp_path = base.with_extension("shp");
33        let shx_path = base.with_extension("shx");
34        let dbf_path = base.with_extension("dbf");
35        let prj_path = base.with_extension("prj");
36
37        let shp_file = std::fs::File::open(&shp_path)
38            .map_err(|e| ShpError::MissingFile(format!("{}: {e}", shp_path.display())))?;
39        // SAFETY: standard mmap caveats — file truncation/modification while
40        // mapped may SIGBUS. Read-only, process-private view of a local file.
41        #[allow(unsafe_code)]
42        let shp_mmap = unsafe { Mmap::map(&shp_file)? };
43
44        let shx_bytes = std::fs::read(&shx_path)
45            .map_err(|e| ShpError::MissingFile(format!("{}: {e}", shx_path.display())))?;
46        let shx = shx::parse(&shx_bytes)?;
47
48        let dbf_bytes = std::fs::read(&dbf_path)
49            .map_err(|e| ShpError::MissingFile(format!("{}: {e}", dbf_path.display())))?;
50        let dbf_header = parse_header(&dbf_bytes)?;
51
52        let prj = std::fs::read_to_string(&prj_path).ok();
53        let crs = match prj {
54            Some(s) if !s.trim().is_empty() => Crs::Wkt(s.trim().to_string()),
55            _ => Crs::Unknown,
56        };
57
58        let geom_field = GeomField {
59            name: "geometry".to_string(),
60            kind: shape_type_to_geometry_type(shx.header.shape_type),
61            has_z: false,
62            has_m: false,
63            extent: Some([
64                shx.header.bbox_xy[0],
65                shx.header.bbox_xy[1],
66                shx.header.bbox_z[0],
67                shx.header.bbox_xy[2],
68                shx.header.bbox_xy[3],
69                shx.header.bbox_z[1],
70            ]),
71        };
72        let schema = build_schema(&dbf_header, geom_field, crs);
73
74        Ok(Self {
75            shp_mmap,
76            shx,
77            dbf_bytes,
78            dbf_header,
79            schema,
80        })
81    }
82
83    pub fn schema(&self) -> &Schema {
84        &self.schema
85    }
86
87    pub fn feature_count(&self) -> usize {
88        // Trust the .dbf record count (matches .shx record count for well-
89        // formed files; we don't repair mismatches in v0.1).
90        self.dbf_header.n_records as usize
91    }
92
93    pub fn shape_type(&self) -> ShapeType {
94        self.shx.header.shape_type
95    }
96
97    /// Iterate features in record order. Lazy — each step does one shape
98    /// decode + one DBF row decode.
99    pub fn read(&self) -> FeatureIter<'_> {
100        FeatureIter {
101            shp: self,
102            index: 0,
103        }
104    }
105}
106
107fn strip_shp_ext(p: &Path) -> PathBuf {
108    if p.extension().and_then(|s| s.to_str()).is_some() {
109        p.with_extension("")
110    } else {
111        p.to_path_buf()
112    }
113}
114
115fn shape_type_to_geometry_type(s: ShapeType) -> GeometryType {
116    match s {
117        ShapeType::Point | ShapeType::PointZ | ShapeType::PointM => GeometryType::Point,
118        ShapeType::Polyline | ShapeType::PolylineZ | ShapeType::PolylineM => {
119            GeometryType::MultiLineString
120        }
121        ShapeType::Polygon | ShapeType::PolygonZ | ShapeType::PolygonM => {
122            GeometryType::MultiPolygon
123        }
124        ShapeType::Multipoint | ShapeType::MultipointZ | ShapeType::MultipointM => {
125            GeometryType::MultiPoint
126        }
127        _ => GeometryType::Polygon, // multipatch / null — treat as polygon best-effort
128    }
129}
130
131#[derive(Debug)]
132pub struct FeatureIter<'a> {
133    shp: &'a Shapefile,
134    index: usize,
135}
136
137impl<'a> Iterator for FeatureIter<'a> {
138    type Item = Result<Feature>;
139
140    fn next(&mut self) -> Option<Self::Item> {
141        let total = self.shp.shx.records.len();
142        while self.index < total {
143            let i = self.index;
144            self.index += 1;
145
146            match decode_one(self.shp, i) {
147                Ok(Some(f)) => return Some(Ok(f)),
148                Ok(None) => continue, // deleted row
149                Err(e) => return Some(Err(e)),
150            }
151        }
152        None
153    }
154}
155
156// ----- core::Layer impl -------------------------------------------------
157// Shapefile is single-layer. Wrap with `core::SingleLayerDataset` to expose
158// it through the format-polymorphic `core::Dataset` interface.
159
160impl core::Layer for Shapefile {
161    fn name(&self) -> &str {
162        "default"
163    }
164    fn schema(&self) -> &Schema {
165        &self.schema
166    }
167    fn feature_count(&self) -> Option<i64> {
168        Some(self.dbf_header.n_records as i64)
169    }
170    fn read<'a>(&'a self) -> Box<dyn Iterator<Item = core::Result<Feature>> + 'a> {
171        Box::new(self.read().map(|r| r.map_err(core::Error::from)))
172    }
173}
174
175fn decode_one(shp: &Shapefile, i: usize) -> Result<Option<Feature>> {
176    let rec = &shp.shx.records[i];
177    let start = rec.offset_bytes as usize;
178    let end = start + 8 + rec.content_len_bytes as usize;
179    if end > shp.shp_mmap.len() {
180        return Err(ShpError::malformed(format!(
181            "record {i} runs past .shp EOF (offset {start} + content {})",
182            rec.content_len_bytes
183        )));
184    }
185    let content = &shp.shp_mmap[start + 8..end];
186    let geom = shape::decode_record_content(content)?;
187
188    // DBF row: skip header, then i × record_len bytes.
189    let dbf_off = shp.dbf_header.header_len as usize + i * shp.dbf_header.record_len as usize;
190    if dbf_off + shp.dbf_header.record_len as usize > shp.dbf_bytes.len() {
191        return Ok(None); // ran past the table
192    }
193    let row = &shp.dbf_bytes[dbf_off..dbf_off + shp.dbf_header.record_len as usize];
194    if row.first() == Some(&0x2A) {
195        return Ok(None); // deleted row
196    }
197
198    let mut attrs: Vec<Value> = Vec::with_capacity(shp.dbf_header.fields.len());
199    for f in &shp.dbf_header.fields {
200        attrs.push(decode_field(row, f));
201    }
202
203    Ok(Some(Feature {
204        fid: Some((i as i64) + 1),
205        geometry: Some(geom),
206        attributes: attrs,
207    }))
208}