shapefile_to_geojson/
lib.rs

1use futures::stream::{self, StreamExt};
2use geojson::{Feature, FeatureCollection};
3use indicatif::{ProgressBar, ProgressStyle};
4use regex::Regex;
5use serde_json::json;
6use shapefile::{PolygonRing, Reader, Shape};
7use std::fmt;
8use std::fs::File;
9use std::io::Write;
10use std::path::Path;
11use std::sync::Arc;
12use tokio::sync::Mutex;
13
14#[derive(Debug)]
15pub struct CustomError(String);
16
17impl fmt::Display for CustomError {
18  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19    write!(f, "{}", self.0)
20  }
21}
22
23impl std::error::Error for CustomError {}
24
25impl From<serde_json::Error> for CustomError {
26  fn from(err: serde_json::Error) -> Self {
27    CustomError(err.to_string())
28  }
29}
30
31impl From<std::io::Error> for CustomError {
32  fn from(err: std::io::Error) -> Self {
33    CustomError(err.to_string())
34  }
35}
36
37pub async fn convert_shapefile_to_geojson(
38  input_path: &str,
39  output_path: &str,
40) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
41  let base_path = Path::new(input_path);
42  let shp_path = base_path.with_extension("shp");
43  let dbf_path = base_path.with_extension("dbf");
44  let mut shp_reader = Reader::from_path(shp_path.clone())?;
45  let all_count = count_records(&shp_path, &dbf_path)?;
46
47  let features = Arc::new(Mutex::new(Vec::new()));
48
49  let pb = create_progress_bar(all_count);
50
51  let shape_records: Vec<_> = shp_reader.iter_shapes_and_records().collect();
52
53  let tasks = stream::iter(shape_records)
54    .map(|shape_record| {
55      let features = Arc::clone(&features);
56      let pb = pb.clone();
57
58      tokio::spawn(async move { process_shape_record(shape_record, features, pb).await })
59    })
60    .buffer_unordered(num_cpus::get());
61
62  tasks.for_each(|_| async {}).await;
63
64  println!("completed");
65  pb.finish_with_message("completed");
66
67  let feature_collection = FeatureCollection {
68    bbox: None,
69    features: features.lock().await.clone(),
70    foreign_members: None,
71  };
72
73  let geojson_output = serde_json::to_string_pretty(&feature_collection)?;
74
75  let mut file = File::create(output_path)?;
76  file.write_all(geojson_output.as_bytes())?;
77
78  println!("GeoJSON file has been created: {}", output_path);
79
80  Ok(())
81}
82
83fn count_records(
84  shp_path: &Path,
85  dbf_path: &Path,
86) -> Result<u64, Box<dyn std::error::Error + Send + Sync>> {
87  let mut shp_reader = Reader::from_path(shp_path)?;
88  let mut dbf_reader = dbase::Reader::from_path(dbf_path)?;
89  let shp_count = shp_reader.iter_shapes_and_records().count();
90  let dbf_count = dbf_reader.iter_records().count();
91
92  if shp_count != dbf_count {
93    println!("Warning: SHP data ({} records) and DBF data ({} records) have different numbers of elements.", shp_count, dbf_count);
94  } else {
95    println!(
96      "SHP and DBF data have the same number of elements: {} records",
97      shp_count
98    );
99  }
100
101  Ok(shp_count as u64)
102}
103
104fn create_progress_bar(total: u64) -> ProgressBar {
105  let pb = ProgressBar::new(total);
106  pb.set_style(
107    ProgressStyle::default_bar()
108      .template("{spinner:.green} [{bar:40.cyan/blue}] {msg}")
109      .unwrap()
110      .progress_chars("█▓▒░"),
111  );
112  pb.set_message("Processing...");
113  pb
114}
115
116async fn process_shape_record(
117  shape_record: Result<(Shape, shapefile::dbase::Record), shapefile::Error>,
118  features: Arc<Mutex<Vec<Feature>>>,
119  pb: ProgressBar,
120) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
121  let (shape, record) = shape_record?;
122  let geojson_string = match shape {
123    Shape::Polygon(_) => process_polygon(&shape)?,
124    Shape::Polyline(_) => process_polyline(&shape)?,
125    Shape::Point(_) => process_point(&shape)?,
126    _ => return Ok(()), // Skip unsupported shapes
127  };
128
129  let mut feature: Feature = serde_json::from_str(&geojson_string)?;
130
131  if let Some(props) = &mut feature.properties {
132    let re_numeric = Regex::new(r"Numeric\(Some\(([0-9]+(\.[0-9]+)?)\)\)").unwrap();
133    let re_character = Regex::new(r#"^Character\(Some\("(.+)"\)\)"#).unwrap();
134
135    for (field, value) in record.into_iter() {
136      let value_string = value.to_string();
137      let value_json = match value_string.as_str() {
138        "Numeric(None)" => json!(""),
139        "Character(None)" => json!(null),
140        _ => {
141          if let Some(caps) = re_numeric.captures(&value_string) {
142            let number_str = caps.get(1).map_or("", |m| m.as_str());
143            if let Ok(number) = number_str.parse::<f64>() {
144              json!(number)
145            } else {
146              eprintln!("Failed to parse numeric value: {}", value_string);
147              json!(value_string)
148            }
149          } else if let Some(caps) = re_character.captures(&value_string) {
150            let character_str = caps.get(1).map_or("", |m| m.as_str());
151            json!(character_str)
152          } else {
153            json!(value_string)
154          }
155        }
156      };
157      props.insert(field.to_string(), value_json);
158    }
159  }
160
161  features.lock().await.push(feature);
162  pb.inc(1);
163  Ok(())
164}
165
166fn process_polygon(shape: &Shape) -> Result<String, CustomError> {
167  let polygon = match shape {
168    Shape::Polygon(p) => p,
169    _ => return Err(CustomError("Expected Polygon shape".to_string())),
170  };
171
172  let rings: Vec<Vec<Vec<f64>>> = polygon
173    .rings()
174    .iter()
175    .map(|ring| match ring {
176      PolygonRing::Outer(points) | PolygonRing::Inner(points) => {
177        points.iter().map(|point| vec![point.x, point.y]).collect()
178      }
179    })
180    .collect();
181
182  let feature = json!({
183      "type": "Feature",
184      "geometry": {
185          "type": "Polygon",
186          "coordinates": rings
187      },
188      "properties": {}
189  });
190
191  serde_json::to_string(&feature).map_err(CustomError::from)
192}
193
194fn process_polyline(shape: &Shape) -> Result<String, CustomError> {
195  let polyline = match shape {
196    Shape::Polyline(p) => p,
197    _ => return Err(CustomError("Expected Polyline shape".to_string())),
198  };
199
200  let parts: Vec<Vec<Vec<f64>>> = polyline
201    .parts()
202    .iter()
203    .map(|part| part.iter().map(|point| vec![point.x, point.y]).collect())
204    .collect();
205
206  let feature = json!({
207      "type": "Feature",
208      "geometry": {
209          "type": "MultiLineString",
210          "coordinates": parts
211      },
212      "properties": {}
213  });
214
215  serde_json::to_string(&feature).map_err(CustomError::from)
216}
217
218fn process_point(shape: &Shape) -> Result<String, CustomError> {
219  let point = match shape {
220    Shape::Point(p) => p,
221    _ => return Err(CustomError("Expected Point shape".to_string())),
222  };
223
224  let feature = json!({
225      "type": "Feature",
226      "geometry": {
227          "type": "Point",
228          "coordinates": [point.x, point.y]
229      },
230      "properties": {}
231  });
232
233  serde_json::to_string(&feature).map_err(CustomError::from)
234}