gistools/readers/wkt/
mod.rs

1use crate::parsers::{FeatureReader, clean_string};
2use alloc::{string::String, vec, vec::Vec};
3use s2json::{
4    BBox3D, MValue, Properties, VectorFeature, VectorGeometry, VectorLineString,
5    VectorMultiLineString, VectorMultiPolygon, VectorPoint,
6};
7
8/// WKT Value can be a point or an array of points
9#[derive(Debug, Clone, PartialEq)]
10pub enum WKTAValue {
11    /// A Vector Point
12    Point(VectorPoint),
13    /// A collection of sub WKT values
14    Array(Vec<WKTAValue>),
15}
16impl WKTAValue {
17    /// Get the Vector Point
18    pub fn get_point(&mut self) -> Option<&mut VectorPoint> {
19        match self {
20            WKTAValue::Point(point) => Some(point),
21            WKTAValue::Array(arr) => arr.first_mut().and_then(|v| v.get_point()),
22        }
23    }
24    /// Get a vector linestring
25    pub fn get_linestring(&mut self) -> Option<VectorLineString> {
26        match self {
27            WKTAValue::Point(point) => Some(vec![point.clone()]),
28            WKTAValue::Array(arr) => {
29                arr.iter_mut().map(|v| v.get_point().map(core::mem::take)).collect()
30            }
31        }
32    }
33    /// Get a vector multilinestring
34    pub fn get_multilinestring(&mut self) -> Option<VectorMultiLineString> {
35        match self {
36            WKTAValue::Point(point) => Some(vec![vec![point.clone()]]),
37            WKTAValue::Array(arr) => arr.iter_mut().map(|v| v.get_linestring()).collect(),
38        }
39    }
40}
41
42/// WKT Array can be an array of points or even nested arrays of points
43pub type WKTArray = Vec<WKTAValue>;
44
45/// # WKT Geometry Reader
46///
47/// ## Description
48/// Parse a collection of WKT geometries from a string
49///
50/// Implements the [`FeatureReader`] trait
51///
52/// ## Usage
53///
54/// The methods you have access to:
55/// - [`WKTGeometryReader::new`]: Create a new WKTGeometryReader
56///
57/// ```rust
58/// use gistools::{parsers::FeatureReader, readers::WKTGeometryReader};
59///
60/// let collection_wkt = r#"POINT(4 6)
61/// GEOMETRYCOLLECTION(POINT(1 2), LINESTRING(3 4,5 6))
62/// MULTIPOLYGON EMPTY
63/// TRIANGLE((0 0 0,0 1 0,1 1 0,0 0 0))"#;
64///
65/// let reader = WKTGeometryReader::new(collection_wkt.into());
66/// let features: Vec<_> = reader.iter().collect();
67/// assert_eq!(features.len(), 3);
68/// ```
69///
70/// ## Links
71/// - <https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry>
72#[derive(Debug, Clone)]
73pub struct WKTGeometryReader {
74    /// The parsed WKT geometries
75    pub features: Vec<VectorFeature>,
76}
77impl WKTGeometryReader {
78    /// Create a new WKT Geometry Reader
79    pub fn new(data: String) -> Self {
80        let mut features = vec![];
81        let wkt_strings = split_wkt_geometry(data);
82        for wkt_string in wkt_strings {
83            let geometry = parse_wkt_geometry(wkt_string);
84            if let Some(geometry) = geometry {
85                features.push(VectorFeature { geometry, ..Default::default() });
86            }
87        }
88        WKTGeometryReader { features }
89    }
90
91    /// Get the number of features
92    pub fn len(&self) -> usize {
93        self.features.len()
94    }
95
96    /// Check if the reader is empty
97    pub fn is_empty(&self) -> bool {
98        self.features.is_empty()
99    }
100}
101/// The WKT Iterator tool
102#[derive(Debug)]
103pub struct WKTIterator<'a> {
104    reader: &'a WKTGeometryReader,
105    index: usize,
106    len: usize,
107}
108impl Iterator for WKTIterator<'_> {
109    type Item = VectorFeature;
110
111    fn next(&mut self) -> Option<Self::Item> {
112        if self.index >= self.len {
113            return None;
114        }
115        self.index += 1;
116        self.reader.features.get(self.index - 1).cloned()
117    }
118}
119/// A feature reader trait with a callback-based approach
120impl FeatureReader<(), Properties, MValue> for WKTGeometryReader {
121    type FeatureIterator<'a> = WKTIterator<'a>;
122
123    fn iter(&self) -> Self::FeatureIterator<'_> {
124        WKTIterator { reader: self, index: 0, len: self.features.len() }
125    }
126
127    fn par_iter(&self, pool_size: usize, thread_id: usize) -> Self::FeatureIterator<'_> {
128        let start = self.len() * thread_id / pool_size;
129        let end = self.len() * (thread_id + 1) / pool_size;
130        WKTIterator { reader: self, index: start, len: end }
131    }
132}
133
134/// # WKT Geometry Parser
135///
136/// ## Description
137/// Parse individual geometries from a WKT string into a VectorGeometry
138///
139/// ## Usage
140/// ```rust
141/// use gistools::readers::parse_wkt_geometry;
142/// use s2json::{VectorPoint, VectorGeometry, BBox3D};
143///
144/// let wkt_str = "POINT Z (5.4321 1.2345 2.3456)";
145/// let geo = parse_wkt_geometry(wkt_str.into());
146/// let expected = VectorPoint::from_xyz(5.4321, 1.2345, 2.3456);
147/// assert_eq!(
148///     geo,
149///     Some(VectorGeometry::new_point(expected.clone(), Some(BBox3D::from_point(&expected))))
150/// );
151/// ```
152///
153/// ## Links
154/// - <https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry>
155///
156/// ## Parameters
157/// - `wkt_str`: WKT Geometry string
158///
159/// ## Returns
160/// A [`VectorGeometry`] if the WKT string is valid
161pub fn parse_wkt_geometry(wkt_str: String) -> Option<VectorGeometry> {
162    if wkt_str.starts_with("POINT") {
163        parse_wkt_point(wkt_str)
164    } else if wkt_str.starts_with("MULTIPOINT") {
165        parse_wkt_line(wkt_str, LineParseType::MultiPoint)
166    } else if wkt_str.starts_with("LINESTRING") {
167        parse_wkt_line(wkt_str, LineParseType::LineString)
168    } else if wkt_str.starts_with("MULTILINESTRING") {
169        parse_wkt_multi_line(wkt_str, MultiLineParseType::MultiLineString)
170    } else if wkt_str.starts_with("POLYGON") {
171        parse_wkt_multi_line(wkt_str, MultiLineParseType::Polygon)
172    } else if wkt_str.starts_with("MULTIPOLYGON") {
173        parse_wkt_multi_polygon(wkt_str)
174    } else {
175        None
176    }
177}
178
179/// Split a WKT string into individual geometry strings
180///
181/// Removes EMPTY geometries, flattens GEOMETRYCOLLECTIONs recursively,
182/// and returns a vector of individual WKT geometry strings.
183///
184/// ## Parameters
185/// - `input`: WKT string that is a collection of geometries
186///
187/// ## Returns
188/// Array of individual WKT geometries still in string form
189pub fn split_wkt_geometry(mut input: String) -> Vec<String> {
190    // Remove EMPTY geometries and their preceding type keyword
191    let mut words: Vec<&str> = input.split_whitespace().collect();
192    let mut i = 0;
193    while i < words.len() {
194        if words[i].contains("EMPTY") && i > 0 {
195            words.drain(i - 1..=i);
196            i = i.saturating_sub(1);
197        } else {
198            i += 1;
199        }
200    }
201    input = words.join(" ");
202
203    let mut geometries = Vec::new();
204    let mut start = 0;
205    let mut found = false;
206    let mut depth = 0;
207    let input_chars: Vec<char> = input.chars().collect();
208
209    for i in 0..input_chars.len() {
210        match input_chars[i] {
211            '(' => {
212                depth += 1;
213                found = true;
214            }
215            ')' => {
216                depth -= 1;
217                if found && depth == 0 {
218                    let end = i + 1;
219                    let segment: String =
220                        input_chars[start..end].iter().collect::<String>().trim().into();
221                    geometries.push(segment);
222                    start = end;
223                    found = false;
224                }
225            }
226            _ => {}
227        }
228    }
229
230    let mut i = 0;
231    while i < geometries.len() {
232        if geometries[i].starts_with("GEOMETRYCOLLECTION") {
233            let g = geometries.remove(i);
234            let inner = g[g.find('(').unwrap() + 1..g.len() - 1].into();
235            let nested = split_wkt_geometry(inner);
236            geometries.splice(i..i, nested);
237        } else {
238            if geometries[i].starts_with(',') {
239                geometries[i] = geometries[i].trim_start_matches(',').trim().into();
240            }
241            i += 1;
242        }
243    }
244
245    geometries.into_iter().filter(|g| !g.is_empty()).collect()
246}
247
248/// Parse a WKT point string to a VectorPoint
249///
250/// ## Parameters
251/// - `wkt_str`: WKT string
252///
253/// ## Returns
254/// A [`VectorPoint`] in a [`VectorGeometry`] if the WKT string is valid
255fn parse_wkt_point(wkt_str: String) -> Option<VectorGeometry> {
256    if let Some(WKTAValue::Point(point)) = parse_wkt_array(wkt_str).get_mut(0) {
257        let bbox = BBox3D::from_point(point);
258        Some(VectorGeometry::new_point(core::mem::take(point), Some(bbox)))
259    } else {
260        None
261    }
262}
263
264enum LineParseType {
265    MultiPoint,
266    LineString,
267}
268
269/// Parse a WKT array to a LineString or MultiPoint geometry
270///
271/// ## Parameters
272/// - `wkt_str`: WKT string
273/// - `type`: 'MultiPoint' or 'LineString'
274///
275/// ## Returns
276/// A [`VectorGeometry`] as either a [`s2json::VectorLineString`] or [`s2json::VectorMultiPoint`]
277/// if the WKT string is valid
278fn parse_wkt_line(wkt_str: String, r#type: LineParseType) -> Option<VectorGeometry> {
279    let mut line = parse_wkt_array(wkt_str);
280    let points: VectorLineString =
281        line.iter_mut().map(|e| e.get_point().map(core::mem::take).unwrap_or_default()).collect();
282    let bbox = BBox3D::from_linestring(&points);
283    match r#type {
284        LineParseType::MultiPoint => Some(VectorGeometry::new_multipoint(points, Some(bbox))),
285        LineParseType::LineString => Some(VectorGeometry::new_linestring(points, Some(bbox))),
286    }
287}
288
289enum MultiLineParseType {
290    MultiLineString,
291    Polygon,
292}
293
294/// Parse a WKT array to a MultiLineString or Polygon
295///
296/// ## Parameters
297/// - `wkt_str`: WKT string
298/// - `type`: 'MultiLineString' or 'Polygon'
299///
300/// ## Returns
301/// A [`VectorGeometry`] as either a [`s2json::VectorMultiLineString`] or [`s2json::VectorPolygon`]
302/// if the WKT string is valid
303fn parse_wkt_multi_line(wkt_str: String, r#type: MultiLineParseType) -> Option<VectorGeometry> {
304    let mut multiline = parse_wkt_array(wkt_str);
305    let lines: VectorMultiLineString =
306        multiline.iter_mut().map(|e| e.get_linestring().unwrap_or_default()).collect();
307    let bbox = BBox3D::from_multi_linestring(&lines);
308    match r#type {
309        MultiLineParseType::MultiLineString => {
310            Some(VectorGeometry::new_multilinestring(lines, Some(bbox)))
311        }
312        MultiLineParseType::Polygon => Some(VectorGeometry::new_polygon(lines, Some(bbox))),
313    }
314}
315
316/// Parse a WKT array to a MultiPolygon
317///
318/// ## Parameters
319/// - `wkt_str`: WKT string
320///
321/// ## Returns
322/// A [`VectorGeometry`] as a [`s2json::VectorMultiPolygon`] if the WKT string is valid
323fn parse_wkt_multi_polygon(wkt_str: String) -> Option<VectorGeometry> {
324    let mut multipolygon = parse_wkt_array(wkt_str);
325    let polygons: VectorMultiPolygon =
326        multipolygon.iter_mut().map(|e| e.get_multilinestring().unwrap_or_default()).collect();
327    let bbox = BBox3D::from_multi_polygon(&polygons);
328    Some(VectorGeometry::new_multipolygon(polygons, Some(bbox)))
329}
330
331/// Parse a WKT array
332///
333/// ## Parameters
334/// - `wkt_str`: WKT string
335///
336/// ## Returns
337/// Collection of points as [`WKTArray`]
338pub fn parse_wkt_array(wkt_str: String) -> WKTArray {
339    let mut res = Vec::new();
340    let _ = _parse_wkt_array(wkt_str, &mut res);
341    if let Some(WKTAValue::Array(inner)) = res.first() { inner.clone() } else { res }
342}
343
344/// Parse a WKT array.
345/// always return the endBracketIndex if we hit it
346///
347/// ## Parameters
348/// - `wkt_str`: WKT string
349/// - `res`: collection to store the values
350///
351/// ## Returns
352/// A sliced WKT string with the parsed values
353fn _parse_wkt_array(mut wkt_str: String, res: &mut WKTArray) -> String {
354    while !wkt_str.is_empty() {
355        let comma_index = wkt_str.find(',').unwrap_or(usize::MAX);
356        let start_bracket_index = wkt_str.find('(').unwrap_or(usize::MAX);
357        let end_bracket_index = wkt_str.find(')').unwrap_or(usize::MAX);
358
359        if comma_index < start_bracket_index.min(end_bracket_index) {
360            let key = &wkt_str[..comma_index].trim();
361            if !key.is_empty() {
362                res.push(WKTAValue::Point(build_point(key)));
363            }
364            wkt_str = wkt_str[comma_index + 1..].into();
365        } else if start_bracket_index < end_bracket_index {
366            let mut inner = Vec::new();
367            let inner_str = &wkt_str[start_bracket_index + 1..];
368            wkt_str = _parse_wkt_array(inner_str.into(), &mut inner);
369            res.push(WKTAValue::Array(inner));
370        } else {
371            if end_bracket_index > 0 {
372                let key = &wkt_str[..end_bracket_index].trim();
373                if !key.is_empty() {
374                    res.push(WKTAValue::Point(build_point(key)));
375                }
376                wkt_str = wkt_str[end_bracket_index + 1..].into();
377            } else {
378                wkt_str = wkt_str[1..].into();
379            }
380            return wkt_str;
381        }
382    }
383    wkt_str
384}
385
386/// Build a point from a WKT string
387///
388/// ## Parameters
389/// - `str`: WKT string
390///
391/// ## Returns
392/// A [`VectorPoint`]
393fn build_point(input: &str) -> VectorPoint {
394    let binding = clean_string(input);
395    let parts: Vec<&str> = binding.split_whitespace().collect();
396
397    let x = parts.first().and_then(|v| v.parse::<f64>().ok()).unwrap_or(0.0);
398    let y = parts.get(1).and_then(|v| v.parse::<f64>().ok()).unwrap_or(0.0);
399    let z = parts.get(2).and_then(|v| v.parse::<f64>().ok());
400
401    VectorPoint::new(x, y, z, None)
402}