mojxml_parser/
parse.rs

1use crate::constants::{get_proj, get_xml_namespace};
2use crate::error::{Error, Result};
3use crate::types::{CommonProperties, Feature, FeatureProperties};
4use crate::{ParsedXML, 筆界未定構成筆};
5use geo::algorithm::interior_point::InteriorPoint;
6use geo_types::{LineString, Point, Polygon};
7use proj4rs::proj::Proj;
8use roxmltree::{Document, Node};
9use std::collections::HashMap;
10
11// --- Type Aliases ---
12type Curve = Point;
13type Surface = Polygon;
14
15fn has_tag(node: &Node, namespace: Option<&str>, name: &str) -> bool {
16    node.tag_name().name() == name && node.tag_name().namespace() == namespace
17}
18
19fn required_attribute<'a, 'd>(node: &Node<'a, 'd>, attr: &str) -> Result<&'a str>
20where
21    'd: 'a,
22{
23    node.attribute(attr).ok_or_else(|| Error::MissingAttribute {
24        element: node.tag_name().name().to_string(),
25        attribute: attr.to_string(),
26    })
27}
28
29fn node_text(node: &Node, label: &str) -> Result<String> {
30    node.text()
31        .map(|text| text.to_string())
32        .ok_or_else(|| Error::MissingElement(label.to_string()))
33}
34
35fn child_text(node: &Node, label: &str) -> Result<String> {
36    let child = get_child_element(node, label)?;
37    node_text(&child, label)
38}
39
40fn parse_text_as_f64(node: &Node, label: &str) -> Result<f64> {
41    let text = node
42        .text()
43        .ok_or_else(|| Error::MissingElement(label.to_string()))?;
44    Ok(text.parse::<f64>()?)
45}
46
47fn parse_xy(node: &Node) -> Result<(f64, f64)> {
48    let mut x = None;
49    let mut y = None;
50
51    for child in node.children().filter(|child| child.is_element()) {
52        match child.tag_name().name() {
53            "X" => x = Some(parse_text_as_f64(&child, "X")?),
54            "Y" => y = Some(parse_text_as_f64(&child, "Y")?),
55            _ => {}
56        }
57    }
58
59    let x = x.ok_or_else(|| Error::MissingElement("X".to_string()))?;
60    let y = y.ok_or_else(|| Error::MissingElement("Y".to_string()))?;
61    Ok((x, y))
62}
63
64fn collect_ring_points<'a, 'd>(
65    boundary: &Node<'a, 'd>,
66    curves: &HashMap<&'a str, Curve>,
67    zmn_ns: Option<&str>,
68) -> Result<Vec<Point>>
69where
70    'd: 'a,
71{
72    let mut ring_points = Vec::new();
73
74    for ring in boundary
75        .descendants()
76        .filter(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Ring"))
77    {
78        for curve_ref in ring.children().filter(|child| child.is_element()) {
79            let idref = required_attribute(&curve_ref, "idref")?;
80            let curve = curves
81                .get(idref)
82                .ok_or_else(|| Error::PointNotFound(idref.to_string()))?;
83            ring_points.push(*curve);
84        }
85    }
86
87    Ok(ring_points)
88}
89
90fn parse_constituent_fude(node: &Node) -> 筆界未定構成筆 {
91    let mut constituent = 筆界未定構成筆::default();
92
93    for entry in node.children().filter(|child| child.is_element()) {
94        let tag_name = entry.tag_name().name();
95        let text = entry.text();
96        match tag_name {
97            "大字コード" => constituent.大字コード = text.unwrap_or("").to_owned(),
98            "丁目コード" => constituent.丁目コード = text.unwrap_or("").to_owned(),
99            "小字コード" => constituent.小字コード = text.unwrap_or("").to_owned(),
100            "予備コード" => constituent.予備コード = text.unwrap_or("").to_owned(),
101            "大字名" => constituent.大字名 = text.map(str::to_owned),
102            "丁目名" => constituent.丁目名 = text.map(str::to_owned),
103            "小字名" => constituent.小字名 = text.map(str::to_owned),
104            "予備名" => constituent.予備名 = text.map(str::to_owned),
105            "地番" => constituent.地番 = text.unwrap_or("").to_owned(),
106            _ => {}
107        }
108    }
109
110    constituent
111}
112
113fn point_on_polygon(polygon: &Polygon) -> Result<Point<f64>> {
114    // interior_point returns None if the polygon is empty or has no interior point
115    // We've tested on 2024 data, and all polygons have an interior point
116    polygon
117        .interior_point()
118        .ok_or(Error::InteriorPointUnavailable)
119}
120
121#[derive(Debug, Clone, Default)]
122pub struct ParseOptions {
123    pub include_arbitrary_crs: bool,
124    pub include_chikugai: bool,
125}
126
127// --- Helper Functions ---
128fn get_child_element<'a, 'd>(node: &Node<'a, 'd>, name: &str) -> Result<Node<'a, 'd>>
129where
130    'd: 'a,
131{
132    node.children()
133        .find(|child| child.tag_name().name() == name)
134        .ok_or_else(|| Error::MissingElement(name.to_string()))
135}
136
137// -- Accessory parsing functions --
138fn parse_points<'a, 'd>(spatial_element: &Node<'a, 'd>) -> Result<HashMap<&'a str, Point>>
139where
140    'd: 'a,
141{
142    let mut points = HashMap::new();
143    let zmn_ns = get_xml_namespace(Some("zmn"));
144
145    for point in spatial_element
146        .children()
147        .filter(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Point"))
148    {
149        let direct_position = point
150            .descendants()
151            .find(|child| child.is_element() && has_tag(child, zmn_ns, "DirectPosition"))
152            .ok_or_else(|| Error::MissingElement("DirectPosition".to_string()))?;
153        let (x, y) = parse_xy(&direct_position)?;
154        let point_id = required_attribute(&point, "id")?;
155        points.insert(point_id, Point::new(x, y));
156    }
157
158    Ok(points)
159}
160
161fn parse_curves<'a, 'd>(
162    spatial_element: &Node<'a, 'd>,
163    points: &HashMap<&'a str, Point>,
164) -> Result<HashMap<&'a str, Curve>>
165where
166    'd: 'a,
167{
168    let mut curves = HashMap::new();
169    let zmn_ns = get_xml_namespace(Some("zmn"));
170
171    for curve in spatial_element
172        .children()
173        .filter(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Curve"))
174    {
175        let curve_id = required_attribute(&curve, "id")?;
176
177        let segment = curve
178            .children()
179            .find(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Curve.segment"))
180            .ok_or_else(|| Error::MissingElement("GM_Curve.segment".to_string()))?;
181
182        let column = segment
183            .descendants()
184            .find(|child| child.is_element() && has_tag(child, zmn_ns, "GM_PointArray.column"))
185            .ok_or_else(|| Error::MissingElement("GM_PointArray.column".to_string()))?;
186
187        let position = column
188            .first_element_child()
189            .ok_or_else(|| Error::MissingElement("GM_Position.*".to_string()))?;
190
191        let (x, y) = match position.tag_name().name() {
192            "GM_Position.indirect" => {
193                let reference = position
194                    .first_element_child()
195                    .ok_or_else(|| Error::MissingElement("GM_Position.indirect".to_string()))?;
196                let idref = required_attribute(&reference, "idref")?;
197                let point = points
198                    .get(idref)
199                    .ok_or_else(|| Error::PointNotFound(idref.to_string()))?;
200                (point.x(), point.y())
201            }
202            "GM_Position.direct" => parse_xy(&position)?,
203            other => return Err(Error::UnexpectedElement(other.to_string())),
204        };
205
206        curves.insert(curve_id, Curve::new(y, x));
207    }
208
209    Ok(curves)
210}
211
212/// Transform all curves' coordinates from source_crs to target_crs in-place.
213fn transform_curves_crs(
214    curves: &mut HashMap<&str, Curve>,
215    source_crs: &Proj,
216    target_crs: &Proj,
217) -> Result<()> {
218    if curves.is_empty() {
219        return Ok(());
220    }
221
222    for curve in curves.values_mut() {
223        let mut point = curve.x_y();
224        proj4rs::transform::transform(source_crs, target_crs, &mut point)?;
225        *curve = Point::new(point.0.to_degrees(), point.1.to_degrees());
226    }
227
228    Ok(())
229}
230
231fn parse_surfaces<'a, 'd>(
232    spatial_element: &Node<'a, 'd>,
233    curves: &HashMap<&'a str, Curve>,
234) -> Result<HashMap<&'a str, Surface>>
235where
236    'd: 'a,
237{
238    let mut surfaces = HashMap::new();
239    let zmn_ns = get_xml_namespace(Some("zmn"));
240
241    for surface in spatial_element
242        .children()
243        .filter(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Surface"))
244    {
245        let surface_id = required_attribute(&surface, "id")?;
246
247        let polygon = surface
248            .children()
249            .filter(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Surface.patch"))
250            .flat_map(|patch| {
251                patch
252                    .children()
253                    .filter(|child| child.is_element() && has_tag(child, zmn_ns, "GM_Polygon"))
254            })
255            .next()
256            .ok_or_else(|| Error::MissingElement("GM_Surface.patch".to_string()))?;
257
258        let exterior = polygon
259            .descendants()
260            .find(|child| {
261                child.is_element() && has_tag(child, zmn_ns, "GM_SurfaceBoundary.exterior")
262            })
263            .ok_or_else(|| Error::MissingElement("GM_SurfaceBoundary.exterior".to_string()))?;
264
265        let exterior_ring = LineString::from(collect_ring_points(&exterior, curves, zmn_ns)?);
266
267        let interior_rings = polygon
268            .descendants()
269            .filter(|child| {
270                child.is_element() && has_tag(child, zmn_ns, "GM_SurfaceBoundary.interior")
271            })
272            .map(|interior| collect_ring_points(&interior, curves, zmn_ns).map(LineString::from))
273            .collect::<Result<Vec<_>>>()?;
274
275        surfaces.insert(surface_id, Polygon::new(exterior_ring, interior_rings));
276    }
277
278    Ok(surfaces)
279}
280
281fn parse_features<'a, 'd>(
282    subject_elem: &Node<'a, 'd>,
283    surfaces: &HashMap<&'a str, Surface>,
284    options: &ParseOptions,
285) -> Result<Vec<Feature>>
286where
287    'd: 'a,
288{
289    let mut features: Vec<Feature> = Vec::new();
290    let default_ns = get_xml_namespace(None);
291
292    for fude in subject_elem
293        .children()
294        .filter(|child| child.is_element() && has_tag(child, default_ns, "筆"))
295    {
296        let fude_id = required_attribute(&fude, "id")?;
297        let mut geometry: Option<Polygon> = None;
298
299        let mut 精度区分 = None;
300        let mut 大字コード = None;
301        let mut 丁目コード = None;
302        let mut 小字コード = None;
303        let mut 予備コード = None;
304        let mut 大字名 = None;
305        let mut 丁目名 = None;
306        let mut 小字名 = None;
307        let mut 予備名 = None;
308        let mut 地番 = None;
309        let mut 座標値種別 = None;
310        let mut 筆界未定構成筆 = Vec::new();
311
312        for entry in fude.children().filter(|child| child.is_element()) {
313            let tag_name = entry.tag_name().name();
314            match tag_name {
315                "形状" => {
316                    let idref = required_attribute(&entry, "idref")?;
317                    geometry = surfaces.get(idref).cloned();
318                }
319                "精度区分" => 精度区分 = entry.text().map(str::to_owned),
320                "大字コード" => 大字コード = Some(entry.text().unwrap_or("").to_owned()),
321                "丁目コード" => 丁目コード = Some(entry.text().unwrap_or("").to_owned()),
322                "小字コード" => 小字コード = Some(entry.text().unwrap_or("").to_owned()),
323                "予備コード" => 予備コード = Some(entry.text().unwrap_or("").to_owned()),
324                "大字名" => 大字名 = entry.text().map(str::to_owned),
325                "丁目名" => 丁目名 = entry.text().map(str::to_owned),
326                "小字名" => 小字名 = entry.text().map(str::to_owned),
327                "予備名" => 予備名 = entry.text().map(str::to_owned),
328                "地番" => 地番 = Some(entry.text().unwrap_or("").to_owned()),
329                "座標値種別" => 座標値種別 = entry.text().map(str::to_owned),
330                "筆界未定構成筆" => 筆界未定構成筆.push(parse_constituent_fude(&entry)),
331                _ => {}
332            }
333        }
334
335        if !options.include_chikugai {
336            match 地番.as_ref() {
337                Some(value) if value.contains("地区外") || value.contains("別図") => continue,
338                Some(_) => {}
339                None => return Err(Error::MissingElement("地番".to_string())),
340            }
341        }
342
343        let geometry = geometry.ok_or_else(|| Error::MissingElement("geometry".to_string()))?;
344        let 大字コード =
345            大字コード.ok_or_else(|| Error::MissingElement("大字コード".to_string()))?;
346        let 丁目コード =
347            丁目コード.ok_or_else(|| Error::MissingElement("丁目コード".to_string()))?;
348        let 小字コード =
349            小字コード.ok_or_else(|| Error::MissingElement("小字コード".to_string()))?;
350        let 予備コード =
351            予備コード.ok_or_else(|| Error::MissingElement("予備コード".to_string()))?;
352        let 地番 = 地番.ok_or_else(|| Error::MissingElement("地番".to_string()))?;
353
354        let pop = point_on_polygon(&geometry)?;
355        features.push(Feature {
356            geometry,
357            props: FeatureProperties {
358                筆id: fude_id.to_owned(),
359                精度区分,
360                大字コード,
361                丁目コード,
362                小字コード,
363                予備コード,
364                大字名,
365                丁目名,
366                小字名,
367                予備名,
368                地番,
369                座標値種別,
370                筆界未定構成筆,
371                代表点緯度: pop.y(),
372                代表点経度: pop.x(),
373            },
374        });
375    }
376
377    Ok(features)
378}
379
380fn parse_base_properties(root: &Node) -> Result<CommonProperties> {
381    let map_name = child_text(root, "地図名")?;
382    let city_code = child_text(root, "市区町村コード")?;
383    let city_name = child_text(root, "市区町村名")?;
384    let crs = child_text(root, "座標系")?;
385    let crs_det = get_child_element(root, "測地系判別")
386        .ok()
387        .and_then(|elem| elem.text().map(|text| text.to_string()));
388
389    Ok(CommonProperties {
390        地図名: map_name,
391        市区町村コード: city_code,
392        市区町村名: city_name,
393        座標系: crs,
394        測地系判別: crs_det,
395    })
396}
397
398// --- Main Parsing Function ---
399pub fn parse_xml_content(
400    file_name: &str,
401    file_data: &str,
402    options: &ParseOptions,
403) -> Result<ParsedXML> {
404    let file_name = file_name.to_string();
405    let doc = Document::parse(file_data)?;
406    let root = doc.root_element();
407
408    let common_props = parse_base_properties(&root)?;
409
410    let crs = get_proj(&common_props.座標系)?;
411    if crs.is_none() && !options.include_arbitrary_crs {
412        return Ok(ParsedXML {
413            file_name,
414            features: vec![],
415            common_props,
416        });
417    }
418
419    let spatial_element = get_child_element(&root, "空間属性")?;
420    let points = parse_points(&spatial_element)?;
421    let mut curves = parse_curves(&spatial_element, &points)?;
422    if let Some(crs) = crs {
423        let tgt_crs = get_proj("WGS84")?.expect("WGS84 CRS not found");
424        transform_curves_crs(&mut curves, crs, tgt_crs)?;
425    }
426
427    let surfaces = parse_surfaces(&spatial_element, &curves)?;
428    let subject_elem = get_child_element(&root, "主題属性")?;
429
430    let features = parse_features(&subject_elem, &surfaces, options)?;
431    Ok(ParsedXML {
432        file_name,
433        features,
434        common_props,
435    })
436}
437
438#[cfg(test)]
439mod tests {
440    use super::*;
441    use crate::constants::get_proj;
442    use geo::Contains;
443    use geo::{Area, BooleanOps};
444    use geo_types::wkt;
445    use std::collections::HashMap;
446    use std::fs;
447    use std::path::PathBuf;
448
449    fn testdata_path() -> PathBuf {
450        let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
451        manifest_dir
452            .parent()
453            .and_then(|p| p.parent())
454            .expect("workspace root")
455            .join("testdata")
456    }
457
458    #[test]
459    fn test_transform_curves_crs_public_coords_to_wgs84() {
460        let source_crs = get_proj("公共座標1系")
461            .expect("failed to load source CRS")
462            .expect("公共座標1系 should resolve to a proj definition");
463        let target_crs = get_proj("WGS84")
464            .expect("failed to load target CRS")
465            .expect("WGS84 should resolve to a proj definition");
466
467        let mut curves: HashMap<&str, Curve> = HashMap::from([
468            ("curve-1", Point::new(0.0, 0.0)),
469            ("curve-2", Point::new(-1000.0, -1000.0)),
470            ("curve-3", Point::new(1000.0, 1000.0)),
471        ]);
472
473        let expected_results: HashMap<&str, Curve> = HashMap::from([
474            ("curve-1", Point::new(129.5, 33.0)),
475            ("curve-2", Point::new(129.48929948, 32.99098186)),
476            ("curve-3", Point::new(129.5107027, 33.00901721)),
477        ]);
478
479        transform_curves_crs(&mut curves, source_crs, target_crs)
480            .expect("curve transformation should succeed");
481
482        for (id, expected_point) in expected_results {
483            let curve = curves.get(id).expect("transformed curve missing");
484            assert!(
485                (curve.x() - expected_point.x()).abs() < 1e-7,
486                "longitude mismatch for {id} ({} vs {} )",
487                curve.x(),
488                expected_point.x()
489            );
490            assert!(
491                (curve.y() - expected_point.y()).abs() < 1e-7,
492                "latitude mismatch for {id} ({} vs {} )",
493                curve.y(),
494                expected_point.y()
495            );
496        }
497    }
498
499    #[test]
500    fn test_parse_xml_content() {
501        // Construct the path relative to the Cargo manifest directory
502        let xml_path = testdata_path().join("46505-3411-56.xml");
503        let xml_temp = fs::read_to_string(xml_path).expect("Failed to read XML file");
504        let options = ParseOptions {
505            include_arbitrary_crs: true,
506            include_chikugai: true,
507        };
508        let ParsedXML {
509            file_name: _,
510            features,
511            common_props,
512        } = parse_xml_content("46505-3411-56.xml", &xml_temp, &options)
513            .expect("Failed to parse XML");
514        assert_eq!(common_props.地図名, "AYA1anbou22B04_2000");
515        assert_eq!(common_props.市区町村コード, "46505");
516        assert_eq!(common_props.市区町村名, "熊毛郡屋久島町");
517
518        assert_eq!(features.len(), 2994);
519        let feature = &features[0];
520        assert_eq!(feature.props.筆id, "H000000001");
521        assert_eq!(feature.props.地番, "1");
522
523        let expected_geom = wkt! { POLYGON((130.65198936727597 30.31578177961301,130.65211112748588 30.31578250940004,130.65219722479674 30.315750035783307,130.6522397846286 30.315738240687146,130.65232325284867 30.315702331871517,130.6523668021 30.315675347347664,130.65235722919192 30.315650702546424,130.65229088479316 30.315622397556787,130.65227074994843 30.315602911975944,130.65225984787858 30.31558659939628,130.65223178039858 30.315557954059944,130.65219646886888 30.31555482900659,130.65216213192443 30.315543677500482,130.65214529987352 30.315560610998826,130.6521265046212 30.315576961906185,130.6521020960529 30.315589887800154,130.65207800626484 30.315597933967023,130.65192456437038 30.315643904777097,130.65190509850768 30.3156499243803,130.65198936727597 30.31578177961301)) };
524        let difference = feature.geometry.difference(&expected_geom);
525        assert!(
526            difference.unsigned_area() < 1e-10,
527            "Geometries do not match"
528        );
529    }
530
531    #[test]
532    fn test_parse_chikugai_miten_kosei_features() {
533        // Test parsing of 筆界未定構成筆 elements
534        let xml_path = testdata_path().join("46505-3411-56.xml");
535        let xml_temp = fs::read_to_string(xml_path).expect("Failed to read XML file");
536        let options = ParseOptions {
537            include_arbitrary_crs: true,
538            include_chikugai: true,
539        };
540        let ParsedXML {
541            file_name: _,
542            features,
543            common_props: _,
544        } = parse_xml_content("46505-3411-56.xml", &xml_temp, &options)
545            .expect("Failed to parse XML");
546
547        // Find a feature with 筆界未定構成筆 data
548        let features_with_chikugai: Vec<_> = features
549            .iter()
550            .filter(|f| !f.props.筆界未定構成筆.is_empty())
551            .collect();
552
553        assert!(
554            !features_with_chikugai.is_empty(),
555            "Should find features with 筆界未定構成筆"
556        );
557
558        // Check the first feature with 筆界未定構成筆
559        let feature_with_chikugai = features_with_chikugai[0];
560        assert!(!feature_with_chikugai.props.筆界未定構成筆.is_empty());
561
562        // Verify the structure of the first 筆界未定構成筆 element
563        let first_constituent = &feature_with_chikugai.props.筆界未定構成筆[0];
564
565        // These should not be empty/default based on the XML we saw
566        assert!(!first_constituent.大字コード.is_empty());
567        assert!(!first_constituent.地番.is_empty());
568        assert!(first_constituent.大字名.is_some());
569
570        println!(
571            "Found feature with {} 筆界未定構成筆 elements",
572            feature_with_chikugai.props.筆界未定構成筆.len()
573        );
574        println!(
575            "First constituent: {} {} {}",
576            first_constituent
577                .大字名
578                .as_ref()
579                .unwrap_or(&"N/A".to_string()),
580            first_constituent.地番,
581            first_constituent.大字コード
582        );
583    }
584
585    #[test]
586    fn test_representative_point_should_be_inside_of_polygon() {
587        // Construct the path relative to the Cargo manifest directory
588        let xml_path = testdata_path().join("46505-3411-56.xml");
589        let xml_temp = fs::read_to_string(xml_path).expect("Failed to read XML file");
590        let options = ParseOptions {
591            include_arbitrary_crs: false,
592            include_chikugai: false,
593        };
594        let ParsedXML {
595            file_name: _,
596            features,
597            common_props: _,
598        } = parse_xml_content("46505-3411-56.xml", &xml_temp, &options)
599            .expect("Failed to parse XML");
600
601        for feature in features.iter() {
602            let rep_point = Point::new(feature.props.代表点経度, feature.props.代表点緯度);
603            let is_inside = feature.geometry.contains(&rep_point);
604            assert!(is_inside, "Representative point is outside of the polygon");
605        }
606    }
607}