use crate::dbf::{DbfReader, FieldDescriptor};
use crate::error::{Result, ShapefileError};
use crate::shp::{Shape, ShapefileHeader, ShpReader};
use crate::shx::{IndexEntry, ShxReader};
use oxigdal_core::vector::{
Coordinate, Feature, FieldValue, Geometry, LineString as CoreLineString,
MultiLineString as CoreMultiLineString, MultiPoint as CoreMultiPoint, Point as CorePoint,
Polygon as CorePolygon,
};
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct ShapefileFeature {
pub record_number: i32,
pub geometry: Option<Geometry>,
pub attributes: HashMap<String, FieldValue>,
}
impl ShapefileFeature {
pub fn new(
record_number: i32,
geometry: Option<Geometry>,
attributes: HashMap<String, FieldValue>,
) -> Self {
Self {
record_number,
geometry,
attributes,
}
}
pub fn to_oxigdal_feature(&self) -> Result<Feature> {
let geometry = self
.geometry
.clone()
.ok_or_else(|| ShapefileError::invalid_geometry("feature has no geometry"))?;
let mut feature = Feature::new(geometry);
for (key, value) in &self.attributes {
feature.set_property(key, value.clone());
}
Ok(feature)
}
}
pub struct ShapefileReader {
base_path: PathBuf,
header: ShapefileHeader,
field_descriptors: Vec<FieldDescriptor>,
index_entries: Option<Vec<IndexEntry>>,
pub crs: Option<String>,
pub encoding: Option<String>,
}
impl ShapefileReader {
pub fn open<P: AsRef<Path>>(base_path: P) -> Result<Self> {
let base_path = base_path.as_ref();
let shp_path = Self::with_extension(base_path, "shp");
let dbf_path = Self::with_extension(base_path, "dbf");
let shx_path = Self::with_extension(base_path, "shx");
let prj_path = Self::with_extension(base_path, "prj");
let cpg_path = Self::with_extension(base_path, "cpg");
let shp_file = File::open(&shp_path).map_err(|_| ShapefileError::MissingFile {
file_type: ".shp".to_string(),
})?;
let shp_reader = BufReader::new(shp_file);
let shp_reader = ShpReader::new(shp_reader)?;
let header = shp_reader.header().clone();
let dbf_file = File::open(&dbf_path).map_err(|_| ShapefileError::MissingFile {
file_type: ".dbf".to_string(),
})?;
let dbf_reader = BufReader::new(dbf_file);
let dbf_reader = DbfReader::new(dbf_reader)?;
let field_descriptors = dbf_reader.field_descriptors().to_vec();
let index_entries = if shx_path.exists() {
let shx_file = File::open(&shx_path).ok();
if let Some(file) = shx_file {
let shx_reader = BufReader::new(file);
let mut shx_reader = ShxReader::new(shx_reader)?;
Some(shx_reader.read_all_entries()?)
} else {
None
}
} else {
None
};
let crs = if prj_path.exists() {
std::fs::read_to_string(&prj_path)
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
} else {
None
};
let encoding = if cpg_path.exists() {
std::fs::read_to_string(&cpg_path)
.ok()
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty())
} else {
None
};
Ok(Self {
base_path: base_path.to_path_buf(),
header,
field_descriptors,
index_entries,
crs,
encoding,
})
}
pub fn header(&self) -> &ShapefileHeader {
&self.header
}
pub fn field_descriptors(&self) -> &[FieldDescriptor] {
&self.field_descriptors
}
pub fn index_entries(&self) -> Option<&[IndexEntry]> {
self.index_entries.as_deref()
}
pub fn crs(&self) -> Option<&str> {
self.crs.as_deref()
}
pub fn encoding(&self) -> Option<&str> {
self.encoding.as_deref()
}
pub fn features_in_bbox(
&mut self,
min_x: f64,
min_y: f64,
max_x: f64,
max_y: f64,
) -> Result<Vec<ShapefileFeature>> {
let all_features = self.read_features()?;
let filtered = all_features
.into_iter()
.filter(|feature| {
let Some(ref geom) = feature.geometry else {
return false;
};
if let Some((fx_min, fy_min, fx_max, fy_max)) = Self::geometry_bbox(geom) {
!(fx_max < min_x || fx_min > max_x || fy_max < min_y || fy_min > max_y)
} else {
false
}
})
.collect();
Ok(filtered)
}
fn geometry_bbox(geom: &Geometry) -> Option<(f64, f64, f64, f64)> {
geom.bounds()
}
pub fn iter_features(&self) -> Result<FeatureIter<'_>> {
let shp_path = Self::with_extension(&self.base_path, "shp");
let dbf_path = Self::with_extension(&self.base_path, "dbf");
let shp_file = File::open(&shp_path)?;
let shp_reader = BufReader::new(shp_file);
let shp_reader = ShpReader::new(shp_reader)?;
let dbf_file = File::open(&dbf_path)?;
let dbf_reader = BufReader::new(dbf_file);
let dbf_reader = DbfReader::new(dbf_reader)?;
Ok(FeatureIter {
shp_reader,
dbf_reader,
field_descriptors: &self.field_descriptors,
done: false,
})
}
pub fn read_features_where<F>(&self, predicate: F) -> Result<Vec<ShapefileFeature>>
where
F: Fn(&ShapefileFeature) -> bool,
{
let all = self.read_features()?;
Ok(all.into_iter().filter(|f| predicate(f)).collect())
}
pub fn read_features_filtered(
&self,
filter: &crate::filter::FieldFilter,
) -> Result<Vec<ShapefileFeature>> {
self.read_features_where(|f| filter.matches(f))
}
pub fn read_features(&self) -> Result<Vec<ShapefileFeature>> {
let shp_path = Self::with_extension(&self.base_path, "shp");
let dbf_path = Self::with_extension(&self.base_path, "dbf");
let shp_file = File::open(&shp_path)?;
let shp_reader = BufReader::new(shp_file);
let mut shp_reader = ShpReader::new(shp_reader)?;
let dbf_file = File::open(&dbf_path)?;
let dbf_reader = BufReader::new(dbf_file);
let mut dbf_reader = DbfReader::new(dbf_reader)?;
let shape_records = shp_reader.read_all_records()?;
let dbf_records = dbf_reader.read_all_records()?;
if shape_records.len() != dbf_records.len() {
return Err(ShapefileError::RecordMismatch {
shp_count: shape_records.len(),
dbf_count: dbf_records.len(),
});
}
let mut features = Vec::with_capacity(shape_records.len());
for (shape_record, dbf_record) in shape_records.iter().zip(dbf_records.iter()) {
let geometry = Self::shape_to_geometry(&shape_record.shape)?;
let attributes = Self::dbf_to_attributes(dbf_record, &self.field_descriptors);
features.push(ShapefileFeature::new(
shape_record.record_number,
geometry,
attributes,
));
}
Ok(features)
}
fn shape_to_geometry(shape: &Shape) -> Result<Option<Geometry>> {
match shape {
Shape::Null => Ok(None),
Shape::Point(point) => {
let oxigdal_point = CorePoint::new(point.x, point.y);
Ok(Some(Geometry::Point(oxigdal_point)))
}
Shape::PointZ(point) => {
use oxigdal_core::vector::Coordinate;
let coord = if let Some(m) = point.m {
Coordinate::new_3dm(point.x, point.y, point.z, m)
} else {
Coordinate::new_3d(point.x, point.y, point.z)
};
Ok(Some(Geometry::Point(CorePoint::from_coord(coord))))
}
Shape::PointM(point) => {
use oxigdal_core::vector::Coordinate;
let coord = Coordinate::new_2dm(point.x, point.y, point.m);
Ok(Some(Geometry::Point(CorePoint::from_coord(coord))))
}
Shape::PolyLine(multi_part) => {
if multi_part.parts.len() == 1 {
let coords: Vec<Coordinate> = multi_part
.points
.iter()
.map(|p| Coordinate::new_2d(p.x, p.y))
.collect();
if coords.len() < 2 {
return Ok(None);
}
let linestring = CoreLineString::new(coords).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid LineString: {}", e))
})?;
Ok(Some(Geometry::LineString(linestring)))
} else {
let mut linestrings = Vec::new();
for i in 0..multi_part.parts.len() {
let start_idx = multi_part.parts[i] as usize;
let end_idx = if i + 1 < multi_part.parts.len() {
multi_part.parts[i + 1] as usize
} else {
multi_part.points.len()
};
let coords: Vec<Coordinate> = multi_part.points[start_idx..end_idx]
.iter()
.map(|p| Coordinate::new_2d(p.x, p.y))
.collect();
if coords.len() >= 2 {
if let Ok(linestring) = CoreLineString::new(coords) {
linestrings.push(linestring);
}
}
}
if linestrings.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiLineString(CoreMultiLineString::new(
linestrings,
))))
}
}
}
Shape::Polygon(multi_part) => {
if multi_part.parts.is_empty() {
return Ok(None);
}
let exterior_start = multi_part.parts[0] as usize;
let exterior_end = if multi_part.parts.len() > 1 {
multi_part.parts[1] as usize
} else {
multi_part.points.len()
};
let exterior_coords: Vec<Coordinate> = multi_part.points
[exterior_start..exterior_end]
.iter()
.map(|p| Coordinate::new_2d(p.x, p.y))
.collect();
if exterior_coords.len() < 4 {
return Ok(None);
}
let exterior = CoreLineString::new(exterior_coords).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid exterior ring: {}", e))
})?;
let mut interiors = Vec::new();
for i in 1..multi_part.parts.len() {
let start_idx = multi_part.parts[i] as usize;
let end_idx = if i + 1 < multi_part.parts.len() {
multi_part.parts[i + 1] as usize
} else {
multi_part.points.len()
};
let interior_coords: Vec<Coordinate> = multi_part.points[start_idx..end_idx]
.iter()
.map(|p| Coordinate::new_2d(p.x, p.y))
.collect();
if interior_coords.len() >= 4 {
if let Ok(interior) = CoreLineString::new(interior_coords) {
interiors.push(interior);
}
}
}
let polygon = CorePolygon::new(exterior, interiors).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid polygon: {}", e))
})?;
Ok(Some(Geometry::Polygon(polygon)))
}
Shape::MultiPoint(multi_part) => {
let points: Vec<CorePoint> = multi_part
.points
.iter()
.map(|p| CorePoint::new(p.x, p.y))
.collect();
if points.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiPoint(CoreMultiPoint::new(points))))
}
}
Shape::PolyLineZ(shape_z) => Self::multipart_z_to_linestring_geometry(
&shape_z.base,
&shape_z.z_values,
shape_z.m_values.as_deref(),
),
Shape::PolygonZ(shape_z) => Self::multipart_z_to_polygon_geometry(
&shape_z.base,
&shape_z.z_values,
shape_z.m_values.as_deref(),
),
Shape::MultiPointZ(shape_z) => Self::multipart_z_to_multipoint_geometry(
&shape_z.base,
&shape_z.z_values,
shape_z.m_values.as_deref(),
),
Shape::PolyLineM(shape_m) => {
Self::multipart_m_to_linestring_geometry(&shape_m.base, &shape_m.m_values)
}
Shape::PolygonM(shape_m) => {
Self::multipart_m_to_polygon_geometry(&shape_m.base, &shape_m.m_values)
}
Shape::MultiPointM(shape_m) => {
Self::multipart_m_to_multipoint_geometry(&shape_m.base, &shape_m.m_values)
}
Shape::MultiPatch(mp_shape) => {
use oxigdal_core::vector::Coordinate;
let points: Vec<CorePoint> = mp_shape
.base
.points
.iter()
.zip(mp_shape.z_values.iter())
.map(|(p, z)| CorePoint::from_coord(Coordinate::new_3d(p.x, p.y, *z)))
.collect();
if points.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiPoint(CoreMultiPoint::new(points))))
}
}
}
}
fn multipart_z_to_linestring_geometry(
base: &crate::shp::MultiPartShape,
z_values: &[f64],
m_values: Option<&[f64]>,
) -> Result<Option<Geometry>> {
use oxigdal_core::vector::Coordinate;
let make_coord = |i: usize, p: &crate::shp::shapes::Point| -> Coordinate {
let z = z_values.get(i).copied().unwrap_or(0.0);
if let Some(mv) = m_values {
Coordinate::new_3dm(p.x, p.y, z, mv.get(i).copied().unwrap_or(0.0))
} else {
Coordinate::new_3d(p.x, p.y, z)
}
};
if base.parts.len() == 1 {
let coords: Vec<Coordinate> = base
.points
.iter()
.enumerate()
.map(|(i, p)| make_coord(i, p))
.collect();
if coords.len() < 2 {
return Ok(None);
}
let linestring = CoreLineString::new(coords).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid LineString: {}", e))
})?;
Ok(Some(Geometry::LineString(linestring)))
} else {
let mut linestrings = Vec::new();
for i in 0..base.parts.len() {
let start = base.parts[i] as usize;
let end = if i + 1 < base.parts.len() {
base.parts[i + 1] as usize
} else {
base.points.len()
};
let coords: Vec<Coordinate> = base.points[start..end]
.iter()
.enumerate()
.map(|(j, p)| make_coord(start + j, p))
.collect();
if coords.len() >= 2 {
if let Ok(ls) = CoreLineString::new(coords) {
linestrings.push(ls);
}
}
}
if linestrings.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiLineString(CoreMultiLineString::new(
linestrings,
))))
}
}
}
fn multipart_z_to_polygon_geometry(
base: &crate::shp::MultiPartShape,
z_values: &[f64],
m_values: Option<&[f64]>,
) -> Result<Option<Geometry>> {
use oxigdal_core::vector::Coordinate;
if base.parts.is_empty() {
return Ok(None);
}
let make_coord = |i: usize, p: &crate::shp::shapes::Point| -> Coordinate {
let z = z_values.get(i).copied().unwrap_or(0.0);
if let Some(mv) = m_values {
Coordinate::new_3dm(p.x, p.y, z, mv.get(i).copied().unwrap_or(0.0))
} else {
Coordinate::new_3d(p.x, p.y, z)
}
};
let ext_start = base.parts[0] as usize;
let ext_end = if base.parts.len() > 1 {
base.parts[1] as usize
} else {
base.points.len()
};
let ext_coords: Vec<Coordinate> = base.points[ext_start..ext_end]
.iter()
.enumerate()
.map(|(j, p)| make_coord(ext_start + j, p))
.collect();
if ext_coords.len() < 4 {
return Ok(None);
}
let exterior = CoreLineString::new(ext_coords).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid exterior Z ring: {}", e))
})?;
let mut interiors = Vec::new();
for i in 1..base.parts.len() {
let start = base.parts[i] as usize;
let end = if i + 1 < base.parts.len() {
base.parts[i + 1] as usize
} else {
base.points.len()
};
let coords: Vec<Coordinate> = base.points[start..end]
.iter()
.enumerate()
.map(|(j, p)| make_coord(start + j, p))
.collect();
if coords.len() >= 4 {
if let Ok(ring) = CoreLineString::new(coords) {
interiors.push(ring);
}
}
}
let polygon = CorePolygon::new(exterior, interiors)
.map_err(|e| ShapefileError::invalid_geometry(format!("Invalid polygon Z: {}", e)))?;
Ok(Some(Geometry::Polygon(polygon)))
}
fn multipart_z_to_multipoint_geometry(
base: &crate::shp::MultiPartShape,
z_values: &[f64],
m_values: Option<&[f64]>,
) -> Result<Option<Geometry>> {
use oxigdal_core::vector::Coordinate;
let points: Vec<CorePoint> = base
.points
.iter()
.enumerate()
.map(|(i, p)| {
let z = z_values.get(i).copied().unwrap_or(0.0);
let coord = if let Some(mv) = m_values {
Coordinate::new_3dm(p.x, p.y, z, mv.get(i).copied().unwrap_or(0.0))
} else {
Coordinate::new_3d(p.x, p.y, z)
};
CorePoint::from_coord(coord)
})
.collect();
if points.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiPoint(CoreMultiPoint::new(points))))
}
}
fn multipart_m_to_linestring_geometry(
base: &crate::shp::MultiPartShape,
m_values: &[f64],
) -> Result<Option<Geometry>> {
use oxigdal_core::vector::Coordinate;
let make_coord = |i: usize, p: &crate::shp::shapes::Point| -> Coordinate {
Coordinate::new_2dm(p.x, p.y, m_values.get(i).copied().unwrap_or(0.0))
};
if base.parts.len() == 1 {
let coords: Vec<Coordinate> = base
.points
.iter()
.enumerate()
.map(|(i, p)| make_coord(i, p))
.collect();
if coords.len() < 2 {
return Ok(None);
}
let linestring = CoreLineString::new(coords).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid LineStringM: {}", e))
})?;
Ok(Some(Geometry::LineString(linestring)))
} else {
let mut linestrings = Vec::new();
for i in 0..base.parts.len() {
let start = base.parts[i] as usize;
let end = if i + 1 < base.parts.len() {
base.parts[i + 1] as usize
} else {
base.points.len()
};
let coords: Vec<Coordinate> = base.points[start..end]
.iter()
.enumerate()
.map(|(j, p)| make_coord(start + j, p))
.collect();
if coords.len() >= 2 {
if let Ok(ls) = CoreLineString::new(coords) {
linestrings.push(ls);
}
}
}
if linestrings.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiLineString(CoreMultiLineString::new(
linestrings,
))))
}
}
}
fn multipart_m_to_polygon_geometry(
base: &crate::shp::MultiPartShape,
m_values: &[f64],
) -> Result<Option<Geometry>> {
use oxigdal_core::vector::Coordinate;
if base.parts.is_empty() {
return Ok(None);
}
let make_coord = |i: usize, p: &crate::shp::shapes::Point| -> Coordinate {
Coordinate::new_2dm(p.x, p.y, m_values.get(i).copied().unwrap_or(0.0))
};
let ext_start = base.parts[0] as usize;
let ext_end = if base.parts.len() > 1 {
base.parts[1] as usize
} else {
base.points.len()
};
let ext_coords: Vec<Coordinate> = base.points[ext_start..ext_end]
.iter()
.enumerate()
.map(|(j, p)| make_coord(ext_start + j, p))
.collect();
if ext_coords.len() < 4 {
return Ok(None);
}
let exterior = CoreLineString::new(ext_coords).map_err(|e| {
ShapefileError::invalid_geometry(format!("Invalid exterior M ring: {}", e))
})?;
let mut interiors = Vec::new();
for i in 1..base.parts.len() {
let start = base.parts[i] as usize;
let end = if i + 1 < base.parts.len() {
base.parts[i + 1] as usize
} else {
base.points.len()
};
let coords: Vec<Coordinate> = base.points[start..end]
.iter()
.enumerate()
.map(|(j, p)| make_coord(start + j, p))
.collect();
if coords.len() >= 4 {
if let Ok(ring) = CoreLineString::new(coords) {
interiors.push(ring);
}
}
}
let polygon = CorePolygon::new(exterior, interiors)
.map_err(|e| ShapefileError::invalid_geometry(format!("Invalid polygon M: {}", e)))?;
Ok(Some(Geometry::Polygon(polygon)))
}
fn multipart_m_to_multipoint_geometry(
base: &crate::shp::MultiPartShape,
m_values: &[f64],
) -> Result<Option<Geometry>> {
use oxigdal_core::vector::Coordinate;
let points: Vec<CorePoint> = base
.points
.iter()
.enumerate()
.map(|(i, p)| {
CorePoint::from_coord(Coordinate::new_2dm(
p.x,
p.y,
m_values.get(i).copied().unwrap_or(0.0),
))
})
.collect();
if points.is_empty() {
Ok(None)
} else {
Ok(Some(Geometry::MultiPoint(CoreMultiPoint::new(points))))
}
}
fn dbf_to_attributes(
dbf_record: &crate::dbf::DbfRecord,
field_descriptors: &[FieldDescriptor],
) -> HashMap<String, FieldValue> {
let mut attributes = HashMap::new();
for (field, value) in field_descriptors.iter().zip(&dbf_record.values) {
let property_value = match value {
crate::dbf::FieldValue::String(s) => FieldValue::String(s.clone()),
crate::dbf::FieldValue::Integer(i) => FieldValue::Integer(*i),
crate::dbf::FieldValue::Float(f) => FieldValue::Float(*f),
crate::dbf::FieldValue::Boolean(b) => FieldValue::Bool(*b),
crate::dbf::FieldValue::Date(d) => FieldValue::String(d.clone()),
crate::dbf::FieldValue::Null => FieldValue::Null,
};
attributes.insert(field.name.clone(), property_value);
}
attributes
}
pub(crate) fn dbf_to_attributes_pub(
dbf_record: &crate::dbf::DbfRecord,
field_descriptors: &[FieldDescriptor],
) -> HashMap<String, FieldValue> {
Self::dbf_to_attributes(dbf_record, field_descriptors)
}
pub(crate) fn shape_to_geometry_pub(shape: &Shape) -> Result<Option<Geometry>> {
Self::shape_to_geometry(shape)
}
fn with_extension<P: AsRef<Path>>(base_path: P, ext: &str) -> PathBuf {
let base = base_path.as_ref();
if base.extension().is_some() {
base.with_extension(ext)
} else {
let mut path = base.to_path_buf();
path.set_extension(ext);
path
}
}
}
pub struct FeatureIter<'a> {
shp_reader: ShpReader<BufReader<File>>,
dbf_reader: DbfReader<BufReader<File>>,
field_descriptors: &'a [FieldDescriptor],
done: bool,
}
impl<'a> Iterator for FeatureIter<'a> {
type Item = Result<ShapefileFeature>;
fn next(&mut self) -> Option<Self::Item> {
if self.done {
return None;
}
let shp_record = match self.shp_reader.read_record() {
Ok(Some(r)) => r,
Ok(None) => {
self.done = true;
return None;
}
Err(e) => {
self.done = true;
return Some(Err(e));
}
};
let dbf_record = match self.dbf_reader.read_record() {
Ok(Some(r)) => r,
Ok(None) => {
self.done = true;
return None;
}
Err(e) => {
self.done = true;
return Some(Err(e));
}
};
let geometry = match ShapefileReader::shape_to_geometry_pub(&shp_record.shape) {
Ok(g) => g,
Err(e) => {
self.done = true;
return Some(Err(e));
}
};
let attributes =
ShapefileReader::dbf_to_attributes_pub(&dbf_record, self.field_descriptors);
Some(Ok(ShapefileFeature::new(
shp_record.record_number,
geometry,
attributes,
)))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_path_extension_helper() {
let base = std::env::temp_dir().join("oxigdal_shapefile_test");
let expected_shp = std::env::temp_dir().join("oxigdal_shapefile_test.shp");
assert_eq!(ShapefileReader::with_extension(&base, "shp"), expected_shp);
let base_shp = std::env::temp_dir().join("oxigdal_shapefile_test.shp");
let expected_dbf = std::env::temp_dir().join("oxigdal_shapefile_test.dbf");
assert_eq!(
ShapefileReader::with_extension(&base_shp, "dbf"),
expected_dbf
);
}
#[test]
fn test_shapefile_feature_creation() {
let mut attributes = HashMap::new();
attributes.insert("name".to_string(), FieldValue::String("Test".to_string()));
attributes.insert("value".to_string(), FieldValue::Integer(42));
let geometry = Some(Geometry::Point(CorePoint::new(10.0, 20.0)));
let feature = ShapefileFeature::new(1, geometry, attributes);
assert_eq!(feature.record_number, 1);
assert!(feature.geometry.is_some());
assert_eq!(feature.attributes.len(), 2);
}
}