shapefile_to_geojson/
lib.rs1use futures::stream::{self, StreamExt};
2use geojson::{Feature, FeatureCollection};
3use indicatif::{ProgressBar, ProgressStyle};
4use regex::Regex;
5use serde_json::json;
6use shapefile::{PolygonRing, Reader, Shape};
7use std::fmt;
8use std::fs::File;
9use std::io::Write;
10use std::path::Path;
11use std::sync::Arc;
12use tokio::sync::Mutex;
13
14#[derive(Debug)]
15pub struct CustomError(String);
16
17impl fmt::Display for CustomError {
18 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
19 write!(f, "{}", self.0)
20 }
21}
22
23impl std::error::Error for CustomError {}
24
25impl From<serde_json::Error> for CustomError {
26 fn from(err: serde_json::Error) -> Self {
27 CustomError(err.to_string())
28 }
29}
30
31impl From<std::io::Error> for CustomError {
32 fn from(err: std::io::Error) -> Self {
33 CustomError(err.to_string())
34 }
35}
36
37pub async fn convert_shapefile_to_geojson(
38 input_path: &str,
39 output_path: &str,
40) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
41 let base_path = Path::new(input_path);
42 let shp_path = base_path.with_extension("shp");
43 let dbf_path = base_path.with_extension("dbf");
44 let mut shp_reader = Reader::from_path(shp_path.clone())?;
45 let all_count = count_records(&shp_path, &dbf_path)?;
46
47 let features = Arc::new(Mutex::new(Vec::new()));
48
49 let pb = create_progress_bar(all_count);
50
51 let shape_records: Vec<_> = shp_reader.iter_shapes_and_records().collect();
52
53 let tasks = stream::iter(shape_records)
54 .map(|shape_record| {
55 let features = Arc::clone(&features);
56 let pb = pb.clone();
57
58 tokio::spawn(async move { process_shape_record(shape_record, features, pb).await })
59 })
60 .buffer_unordered(num_cpus::get());
61
62 tasks.for_each(|_| async {}).await;
63
64 println!("completed");
65 pb.finish_with_message("completed");
66
67 let feature_collection = FeatureCollection {
68 bbox: None,
69 features: features.lock().await.clone(),
70 foreign_members: None,
71 };
72
73 let geojson_output = serde_json::to_string_pretty(&feature_collection)?;
74
75 let mut file = File::create(output_path)?;
76 file.write_all(geojson_output.as_bytes())?;
77
78 println!("GeoJSON file has been created: {}", output_path);
79
80 Ok(())
81}
82
83fn count_records(
84 shp_path: &Path,
85 dbf_path: &Path,
86) -> Result<u64, Box<dyn std::error::Error + Send + Sync>> {
87 let mut shp_reader = Reader::from_path(shp_path)?;
88 let mut dbf_reader = dbase::Reader::from_path(dbf_path)?;
89 let shp_count = shp_reader.iter_shapes_and_records().count();
90 let dbf_count = dbf_reader.iter_records().count();
91
92 if shp_count != dbf_count {
93 println!("Warning: SHP data ({} records) and DBF data ({} records) have different numbers of elements.", shp_count, dbf_count);
94 } else {
95 println!(
96 "SHP and DBF data have the same number of elements: {} records",
97 shp_count
98 );
99 }
100
101 Ok(shp_count as u64)
102}
103
104fn create_progress_bar(total: u64) -> ProgressBar {
105 let pb = ProgressBar::new(total);
106 pb.set_style(
107 ProgressStyle::default_bar()
108 .template("{spinner:.green} [{bar:40.cyan/blue}] {msg}")
109 .unwrap()
110 .progress_chars("█▓▒░"),
111 );
112 pb.set_message("Processing...");
113 pb
114}
115
116async fn process_shape_record(
117 shape_record: Result<(Shape, shapefile::dbase::Record), shapefile::Error>,
118 features: Arc<Mutex<Vec<Feature>>>,
119 pb: ProgressBar,
120) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
121 let (shape, record) = shape_record?;
122 let geojson_string = match shape {
123 Shape::Polygon(_) => process_polygon(&shape)?,
124 Shape::Polyline(_) => process_polyline(&shape)?,
125 Shape::Point(_) => process_point(&shape)?,
126 _ => return Ok(()), };
128
129 let mut feature: Feature = serde_json::from_str(&geojson_string)?;
130
131 if let Some(props) = &mut feature.properties {
132 let re_numeric = Regex::new(r"Numeric\(Some\(([0-9]+(\.[0-9]+)?)\)\)").unwrap();
133 let re_character = Regex::new(r#"^Character\(Some\("(.+)"\)\)"#).unwrap();
134
135 for (field, value) in record.into_iter() {
136 let value_string = value.to_string();
137 let value_json = match value_string.as_str() {
138 "Numeric(None)" => json!(""),
139 "Character(None)" => json!(null),
140 _ => {
141 if let Some(caps) = re_numeric.captures(&value_string) {
142 let number_str = caps.get(1).map_or("", |m| m.as_str());
143 if let Ok(number) = number_str.parse::<f64>() {
144 json!(number)
145 } else {
146 eprintln!("Failed to parse numeric value: {}", value_string);
147 json!(value_string)
148 }
149 } else if let Some(caps) = re_character.captures(&value_string) {
150 let character_str = caps.get(1).map_or("", |m| m.as_str());
151 json!(character_str)
152 } else {
153 json!(value_string)
154 }
155 }
156 };
157 props.insert(field.to_string(), value_json);
158 }
159 }
160
161 features.lock().await.push(feature);
162 pb.inc(1);
163 Ok(())
164}
165
166fn process_polygon(shape: &Shape) -> Result<String, CustomError> {
167 let polygon = match shape {
168 Shape::Polygon(p) => p,
169 _ => return Err(CustomError("Expected Polygon shape".to_string())),
170 };
171
172 let rings: Vec<Vec<Vec<f64>>> = polygon
173 .rings()
174 .iter()
175 .map(|ring| match ring {
176 PolygonRing::Outer(points) | PolygonRing::Inner(points) => {
177 points.iter().map(|point| vec![point.x, point.y]).collect()
178 }
179 })
180 .collect();
181
182 let feature = json!({
183 "type": "Feature",
184 "geometry": {
185 "type": "Polygon",
186 "coordinates": rings
187 },
188 "properties": {}
189 });
190
191 serde_json::to_string(&feature).map_err(CustomError::from)
192}
193
194fn process_polyline(shape: &Shape) -> Result<String, CustomError> {
195 let polyline = match shape {
196 Shape::Polyline(p) => p,
197 _ => return Err(CustomError("Expected Polyline shape".to_string())),
198 };
199
200 let parts: Vec<Vec<Vec<f64>>> = polyline
201 .parts()
202 .iter()
203 .map(|part| part.iter().map(|point| vec![point.x, point.y]).collect())
204 .collect();
205
206 let feature = json!({
207 "type": "Feature",
208 "geometry": {
209 "type": "MultiLineString",
210 "coordinates": parts
211 },
212 "properties": {}
213 });
214
215 serde_json::to_string(&feature).map_err(CustomError::from)
216}
217
218fn process_point(shape: &Shape) -> Result<String, CustomError> {
219 let point = match shape {
220 Shape::Point(p) => p,
221 _ => return Err(CustomError("Expected Point shape".to_string())),
222 };
223
224 let feature = json!({
225 "type": "Feature",
226 "geometry": {
227 "type": "Point",
228 "coordinates": [point.x, point.y]
229 },
230 "properties": {}
231 });
232
233 serde_json::to_string(&feature).map_err(CustomError::from)
234}