Skip to main content

mojxml_parser/
parse.rs

1use crate::constants::get_proj;
2use crate::error::{Error, Result};
3use crate::types::{CommonProperties, Feature, FeatureProperties};
4use crate::{ParsedXML, 筆界未定構成筆};
5use geo::algorithm::interior_point::InteriorPoint;
6use geo_types::{LineString, Point, Polygon};
7use proj4rs::proj::Proj;
8use quick_xml::Reader;
9use quick_xml::events::{BytesStart, Event};
10use rustc_hash::FxHashMap as HashMap;
11
12// --- Type Aliases ---
13type Curve = Point;
14type Surface = Polygon;
15
16#[derive(Debug, Clone, Default)]
17pub struct ParseOptions {
18    pub include_arbitrary_crs: bool,
19    pub include_chikugai: bool,
20}
21
22#[derive(Default)]
23struct CommonPropsBuilder {
24    map_name: Option<String>,
25    city_code: Option<String>,
26    city_name: Option<String>,
27    crs: Option<String>,
28    crs_det: Option<String>,
29}
30
31impl CommonPropsBuilder {
32    fn into_common_props(self) -> Result<CommonProperties> {
33        Ok(CommonProperties {
34            地図名: self
35                .map_name
36                .ok_or_else(|| Error::MissingElement("地図名".to_string()))?,
37            市区町村コード: self
38                .city_code
39                .ok_or_else(|| Error::MissingElement("市区町村コード".to_string()))?,
40            市区町村名: self
41                .city_name
42                .ok_or_else(|| Error::MissingElement("市区町村名".to_string()))?,
43            座標系: self
44                .crs
45                .ok_or_else(|| Error::MissingElement("座標系".to_string()))?,
46            測地系判別: self.crs_det,
47        })
48    }
49}
50
51#[derive(Default)]
52struct PointBuilder {
53    id: String,
54    saw_position: bool,
55    saw_direct_position: bool,
56    x: Option<f64>,
57    y: Option<f64>,
58}
59
60#[derive(Clone, Copy)]
61enum CurvePositionKind {
62    Direct,
63    Indirect,
64}
65
66#[derive(Default)]
67struct CurveBuilder {
68    id: String,
69    saw_segment: bool,
70    saw_first_column: bool,
71    first_column_depth: Option<usize>,
72    position_kind: Option<CurvePositionKind>,
73    position_depth: Option<usize>,
74    direct_x: Option<f64>,
75    direct_y: Option<f64>,
76    indirect_ref: Option<String>,
77}
78
79#[derive(Clone, Copy)]
80enum BoundaryKind {
81    Exterior,
82    Interior(usize),
83}
84
85#[derive(Default)]
86struct SurfaceBuilder {
87    id: String,
88    saw_patch: bool,
89    saw_polygon: bool,
90    saw_polygon_boundary: bool,
91    saw_surface_boundary: bool,
92    saw_exterior: bool,
93    active_boundary: Option<BoundaryKind>,
94    boundary_depth: Option<usize>,
95    ring_depth: Option<usize>,
96    exterior_points: Vec<Point>,
97    interior_points: Vec<Vec<Point>>,
98}
99
100#[derive(Default)]
101struct FudeBuilder {
102    id: String,
103    geometry_ref: Option<String>,
104    precision_class: Option<String>,
105    ooaza_code: Option<String>,
106    chome_code: Option<String>,
107    koaza_code: Option<String>,
108    yobi_code: Option<String>,
109    ooaza_name: Option<String>,
110    chome_name: Option<String>,
111    koaza_name: Option<String>,
112    yobi_name: Option<String>,
113    parcel_number: Option<String>,
114    coordinate_value_type: Option<String>,
115    constituents: Vec<筆界未定構成筆>,
116}
117
118#[derive(Clone, Copy)]
119enum TextTarget {
120    RootMapName,
121    RootCityCode,
122    RootCityName,
123    RootCrs,
124    RootCrsDet,
125    PointX,
126    PointY,
127    CurveDirectX,
128    CurveDirectY,
129    FudePrecisionClass,
130    FudeOoazaCode,
131    FudeChomeCode,
132    FudeKoazaCode,
133    FudeYobiCode,
134    FudeOoazaName,
135    FudeChomeName,
136    FudeKoazaName,
137    FudeYobiName,
138    FudeParcelNumber,
139    FudeCoordinateValueType,
140    ConstituentOoazaCode,
141    ConstituentChomeCode,
142    ConstituentKoazaCode,
143    ConstituentYobiCode,
144    ConstituentOoazaName,
145    ConstituentChomeName,
146    ConstituentKoazaName,
147    ConstituentYobiName,
148    ConstituentParcelNumber,
149}
150
151struct ActiveText {
152    target: TextTarget,
153    depth: usize,
154    value: String,
155    has_text: bool,
156}
157
158impl ActiveText {
159    fn new(target: TextTarget, depth: usize) -> Self {
160        Self {
161            target,
162            depth,
163            value: String::new(),
164            has_text: false,
165        }
166    }
167}
168
169struct StreamParser<'a> {
170    options: &'a ParseOptions,
171    common_builder: CommonPropsBuilder,
172    common_props: Option<CommonProperties>,
173    common_checked: bool,
174    skip_features: bool,
175    source_crs: Option<&'static Proj>,
176    target_crs: Option<&'static Proj>,
177
178    in_spatial: bool,
179    in_subject: bool,
180    saw_spatial: bool,
181    saw_subject: bool,
182
183    points: HashMap<String, Point>,
184    curves: HashMap<String, Curve>,
185    surfaces: HashMap<String, Surface>,
186    features: Vec<Feature>,
187
188    point: Option<PointBuilder>,
189    curve: Option<CurveBuilder>,
190    surface: Option<SurfaceBuilder>,
191    fude: Option<FudeBuilder>,
192    constituent: Option<筆界未定構成筆>,
193
194    active_text: Option<ActiveText>,
195}
196
197impl<'a> StreamParser<'a> {
198    fn new(options: &'a ParseOptions) -> Self {
199        Self {
200            options,
201            common_builder: CommonPropsBuilder::default(),
202            common_props: None,
203            common_checked: false,
204            skip_features: false,
205            source_crs: None,
206            target_crs: None,
207            in_spatial: false,
208            in_subject: false,
209            saw_spatial: false,
210            saw_subject: false,
211            points: HashMap::with_capacity_and_hasher(16_384, Default::default()),
212            curves: HashMap::with_capacity_and_hasher(32_768, Default::default()),
213            surfaces: HashMap::with_capacity_and_hasher(4_096, Default::default()),
214            features: Vec::with_capacity(4_096),
215            point: None,
216            curve: None,
217            surface: None,
218            fude: None,
219            constituent: None,
220            active_text: None,
221        }
222    }
223
224    fn ensure_common_and_crs(&mut self) -> Result<()> {
225        if self.common_checked {
226            return Ok(());
227        }
228
229        let common_props = std::mem::take(&mut self.common_builder).into_common_props()?;
230        let source_crs = get_proj(&common_props.座標系)?;
231
232        self.common_props = Some(common_props);
233        self.common_checked = true;
234
235        match source_crs {
236            Some(source_crs) => {
237                let target_crs = get_proj("WGS84")?.expect("WGS84 CRS not found");
238                self.source_crs = Some(source_crs);
239                self.target_crs = Some(target_crs);
240            }
241            None => {
242                if !self.options.include_arbitrary_crs {
243                    self.skip_features = true;
244                }
245            }
246        }
247
248        Ok(())
249    }
250
251    fn finish(mut self, file_name: String) -> Result<ParsedXML> {
252        if !self.common_checked {
253            self.ensure_common_and_crs()?;
254        }
255
256        let common_props = self
257            .common_props
258            .ok_or_else(|| Error::MissingElement("地図名".to_string()))?;
259
260        if self.skip_features {
261            return Ok(ParsedXML {
262                file_name,
263                features: Vec::new(),
264                common_props,
265            });
266        }
267
268        if !self.saw_spatial {
269            return Err(Error::MissingElement("空間属性".to_string()));
270        }
271
272        if !self.saw_subject {
273            return Err(Error::MissingElement("主題属性".to_string()));
274        }
275
276        Ok(ParsedXML {
277            file_name,
278            features: self.features,
279            common_props,
280        })
281    }
282
283    fn begin_text(&mut self, target: TextTarget, depth: usize) {
284        self.active_text = Some(ActiveText::new(target, depth));
285    }
286
287    fn push_text_bytes(&mut self, bytes: &[u8]) -> Result<()> {
288        if let Some(active) = &mut self.active_text {
289            active.value.push_str(std::str::from_utf8(bytes)?);
290            active.has_text = true;
291        }
292        Ok(())
293    }
294
295    fn finalize_text_if_needed(&mut self, depth: usize) -> Result<()> {
296        let should_finalize = match &self.active_text {
297            Some(active) => active.depth == depth,
298            None => false,
299        };
300
301        if !should_finalize {
302            return Ok(());
303        }
304
305        let active = self
306            .active_text
307            .take()
308            .expect("active text must be present");
309        let value = if active.has_text {
310            Some(active.value.as_str())
311        } else {
312            None
313        };
314        self.apply_text(active.target, value)
315    }
316
317    fn apply_text(&mut self, target: TextTarget, value: Option<&str>) -> Result<()> {
318        match target {
319            TextTarget::RootMapName => self.common_builder.map_name = value.map(str::to_owned),
320            TextTarget::RootCityCode => self.common_builder.city_code = value.map(str::to_owned),
321            TextTarget::RootCityName => self.common_builder.city_name = value.map(str::to_owned),
322            TextTarget::RootCrs => self.common_builder.crs = value.map(str::to_owned),
323            TextTarget::RootCrsDet => self.common_builder.crs_det = value.map(str::to_owned),
324
325            TextTarget::PointX => {
326                if let Some(point) = &mut self.point {
327                    point.x = value.map(|v| v.parse()).transpose()?;
328                }
329            }
330            TextTarget::PointY => {
331                if let Some(point) = &mut self.point {
332                    point.y = value.map(|v| v.parse()).transpose()?;
333                }
334            }
335
336            TextTarget::CurveDirectX => {
337                if let Some(curve) = &mut self.curve {
338                    curve.direct_x = value.map(|v| v.parse()).transpose()?;
339                }
340            }
341            TextTarget::CurveDirectY => {
342                if let Some(curve) = &mut self.curve {
343                    curve.direct_y = value.map(|v| v.parse()).transpose()?;
344                }
345            }
346
347            TextTarget::FudePrecisionClass => {
348                if let Some(fude) = &mut self.fude {
349                    fude.precision_class = value.map(str::to_owned);
350                }
351            }
352            TextTarget::FudeOoazaCode => {
353                if let Some(fude) = &mut self.fude {
354                    fude.ooaza_code = Some(value.unwrap_or("").to_owned());
355                }
356            }
357            TextTarget::FudeChomeCode => {
358                if let Some(fude) = &mut self.fude {
359                    fude.chome_code = Some(value.unwrap_or("").to_owned());
360                }
361            }
362            TextTarget::FudeKoazaCode => {
363                if let Some(fude) = &mut self.fude {
364                    fude.koaza_code = Some(value.unwrap_or("").to_owned());
365                }
366            }
367            TextTarget::FudeYobiCode => {
368                if let Some(fude) = &mut self.fude {
369                    fude.yobi_code = Some(value.unwrap_or("").to_owned());
370                }
371            }
372            TextTarget::FudeOoazaName => {
373                if let Some(fude) = &mut self.fude {
374                    fude.ooaza_name = value.map(str::to_owned);
375                }
376            }
377            TextTarget::FudeChomeName => {
378                if let Some(fude) = &mut self.fude {
379                    fude.chome_name = value.map(str::to_owned);
380                }
381            }
382            TextTarget::FudeKoazaName => {
383                if let Some(fude) = &mut self.fude {
384                    fude.koaza_name = value.map(str::to_owned);
385                }
386            }
387            TextTarget::FudeYobiName => {
388                if let Some(fude) = &mut self.fude {
389                    fude.yobi_name = value.map(str::to_owned);
390                }
391            }
392            TextTarget::FudeParcelNumber => {
393                if let Some(fude) = &mut self.fude {
394                    fude.parcel_number = Some(value.unwrap_or("").to_owned());
395                }
396            }
397            TextTarget::FudeCoordinateValueType => {
398                if let Some(fude) = &mut self.fude {
399                    fude.coordinate_value_type = value.map(str::to_owned);
400                }
401            }
402
403            TextTarget::ConstituentOoazaCode => {
404                if let Some(constituent) = &mut self.constituent {
405                    constituent.大字コード = value.unwrap_or("").to_owned();
406                }
407            }
408            TextTarget::ConstituentChomeCode => {
409                if let Some(constituent) = &mut self.constituent {
410                    constituent.丁目コード = value.unwrap_or("").to_owned();
411                }
412            }
413            TextTarget::ConstituentKoazaCode => {
414                if let Some(constituent) = &mut self.constituent {
415                    constituent.小字コード = value.unwrap_or("").to_owned();
416                }
417            }
418            TextTarget::ConstituentYobiCode => {
419                if let Some(constituent) = &mut self.constituent {
420                    constituent.予備コード = value.unwrap_or("").to_owned();
421                }
422            }
423            TextTarget::ConstituentOoazaName => {
424                if let Some(constituent) = &mut self.constituent {
425                    constituent.大字名 = value.map(str::to_owned);
426                }
427            }
428            TextTarget::ConstituentChomeName => {
429                if let Some(constituent) = &mut self.constituent {
430                    constituent.丁目名 = value.map(str::to_owned);
431                }
432            }
433            TextTarget::ConstituentKoazaName => {
434                if let Some(constituent) = &mut self.constituent {
435                    constituent.小字名 = value.map(str::to_owned);
436                }
437            }
438            TextTarget::ConstituentYobiName => {
439                if let Some(constituent) = &mut self.constituent {
440                    constituent.予備名 = value.map(str::to_owned);
441                }
442            }
443            TextTarget::ConstituentParcelNumber => {
444                if let Some(constituent) = &mut self.constituent {
445                    constituent.地番 = value.unwrap_or("").to_owned();
446                }
447            }
448        }
449        Ok(())
450    }
451
452    fn handle_start(&mut self, start: &BytesStart<'_>, depth: usize) -> Result<()> {
453        let start_name = start.name();
454        let name = local_name(start_name.as_ref());
455
456        if depth == 2 {
457            match () {
458                _ if name_eq(name, "地図名") => self.begin_text(TextTarget::RootMapName, depth),
459                _ if name_eq(name, "市区町村コード") => {
460                    self.begin_text(TextTarget::RootCityCode, depth)
461                }
462                _ if name_eq(name, "市区町村名") => {
463                    self.begin_text(TextTarget::RootCityName, depth)
464                }
465                _ if name_eq(name, "座標系") => self.begin_text(TextTarget::RootCrs, depth),
466                _ if name_eq(name, "測地系判別") => {
467                    self.begin_text(TextTarget::RootCrsDet, depth)
468                }
469                _ if name_eq(name, "空間属性") => {
470                    self.ensure_common_and_crs()?;
471                    if !self.skip_features {
472                        self.saw_spatial = true;
473                        self.in_spatial = true;
474                    }
475                }
476                _ if name_eq(name, "主題属性") => {
477                    self.ensure_common_and_crs()?;
478                    if !self.skip_features {
479                        self.saw_subject = true;
480                        self.in_subject = true;
481                    }
482                }
483                _ => {}
484            }
485        }
486
487        if self.skip_features {
488            return Ok(());
489        }
490
491        if self.in_spatial {
492            self.handle_spatial_start(start, name, depth)?;
493        }
494
495        if self.in_subject {
496            self.handle_subject_start(start, name, depth)?;
497        }
498
499        Ok(())
500    }
501
502    fn handle_spatial_start(
503        &mut self,
504        start: &BytesStart<'_>,
505        name: &[u8],
506        depth: usize,
507    ) -> Result<()> {
508        if depth == 3 && name == b"GM_Point" {
509            self.start_point(start)?;
510            return Ok(());
511        }
512
513        if depth == 3 && name == b"GM_Curve" {
514            self.start_curve(start)?;
515            return Ok(());
516        }
517
518        if depth == 3 && name == b"GM_Surface" {
519            self.start_surface(start)?;
520            return Ok(());
521        }
522
523        if let Some(point) = &mut self.point {
524            match name {
525                b"GM_Point.position" => point.saw_position = true,
526                b"DirectPosition" => point.saw_direct_position = true,
527                b"X" => self.begin_text(TextTarget::PointX, depth),
528                b"Y" => self.begin_text(TextTarget::PointY, depth),
529                _ => {}
530            }
531        }
532
533        if let Some(curve) = &mut self.curve {
534            if name == b"GM_Curve.segment" {
535                curve.saw_segment = true;
536            }
537
538            if name == b"GM_PointArray.column" && !curve.saw_first_column {
539                curve.saw_first_column = true;
540                curve.first_column_depth = Some(depth);
541            }
542
543            if let Some(column_depth) = curve.first_column_depth
544                && curve.position_kind.is_none()
545                && depth == column_depth + 1
546            {
547                match name {
548                    b"GM_Position.indirect" => {
549                        curve.position_kind = Some(CurvePositionKind::Indirect);
550                        curve.position_depth = Some(depth);
551                    }
552                    b"GM_Position.direct" => {
553                        curve.position_kind = Some(CurvePositionKind::Direct);
554                        curve.position_depth = Some(depth);
555                    }
556                    _ => {
557                        return Err(Error::UnexpectedElement(name_to_string(name)));
558                    }
559                }
560            }
561
562            if let (Some(position_kind), Some(position_depth)) =
563                (curve.position_kind, curve.position_depth)
564            {
565                match position_kind {
566                    CurvePositionKind::Direct => {
567                        if depth == position_depth + 1 {
568                            match name {
569                                b"X" => self.begin_text(TextTarget::CurveDirectX, depth),
570                                b"Y" => self.begin_text(TextTarget::CurveDirectY, depth),
571                                _ => {}
572                            }
573                        }
574                    }
575                    CurvePositionKind::Indirect => {
576                        if depth == position_depth + 1 && curve.indirect_ref.is_none() {
577                            curve.indirect_ref = Some(required_attribute(start, "idref")?);
578                        }
579                    }
580                }
581            }
582        }
583
584        if let Some(surface) = &mut self.surface {
585            match name {
586                b"GM_Surface.patch" => surface.saw_patch = true,
587                b"GM_Polygon" => surface.saw_polygon = true,
588                b"GM_Polygon.boundary" => surface.saw_polygon_boundary = true,
589                b"GM_SurfaceBoundary" => surface.saw_surface_boundary = true,
590                b"GM_SurfaceBoundary.exterior" => {
591                    surface.saw_exterior = true;
592                    surface.active_boundary = Some(BoundaryKind::Exterior);
593                    surface.boundary_depth = Some(depth);
594                }
595                b"GM_SurfaceBoundary.interior" => {
596                    let interior_index = surface.interior_points.len();
597                    surface.interior_points.push(Vec::new());
598                    surface.active_boundary = Some(BoundaryKind::Interior(interior_index));
599                    surface.boundary_depth = Some(depth);
600                }
601                b"GM_Ring" => {
602                    if surface.active_boundary.is_some() {
603                        surface.ring_depth = Some(depth);
604                    }
605                }
606                _ => {}
607            }
608
609            if let Some(ring_depth) = surface.ring_depth
610                && depth == ring_depth + 1
611            {
612                let idref = required_attribute(start, "idref")?;
613                let curve = *self
614                    .curves
615                    .get(&idref)
616                    .ok_or_else(|| Error::PointNotFound(idref.clone()))?;
617
618                match surface.active_boundary {
619                    Some(BoundaryKind::Exterior) => surface.exterior_points.push(curve),
620                    Some(BoundaryKind::Interior(idx)) => {
621                        if let Some(interior) = surface.interior_points.get_mut(idx) {
622                            interior.push(curve);
623                        }
624                    }
625                    None => {}
626                }
627            }
628        }
629
630        Ok(())
631    }
632
633    fn handle_subject_start(
634        &mut self,
635        start: &BytesStart<'_>,
636        name: &[u8],
637        depth: usize,
638    ) -> Result<()> {
639        if depth == 3 && name_eq(name, "筆") {
640            self.start_fude(start)?;
641            return Ok(());
642        }
643
644        if self.fude.is_none() {
645            return Ok(());
646        }
647
648        if name_eq(name, "筆界未定構成筆") {
649            self.constituent = Some(筆界未定構成筆::default());
650            return Ok(());
651        }
652
653        if self.constituent.is_some() {
654            match () {
655                _ if name_eq(name, "大字コード") => {
656                    self.begin_text(TextTarget::ConstituentOoazaCode, depth)
657                }
658                _ if name_eq(name, "丁目コード") => {
659                    self.begin_text(TextTarget::ConstituentChomeCode, depth)
660                }
661                _ if name_eq(name, "小字コード") => {
662                    self.begin_text(TextTarget::ConstituentKoazaCode, depth)
663                }
664                _ if name_eq(name, "予備コード") => {
665                    self.begin_text(TextTarget::ConstituentYobiCode, depth)
666                }
667                _ if name_eq(name, "大字名") => {
668                    self.begin_text(TextTarget::ConstituentOoazaName, depth)
669                }
670                _ if name_eq(name, "丁目名") => {
671                    self.begin_text(TextTarget::ConstituentChomeName, depth)
672                }
673                _ if name_eq(name, "小字名") => {
674                    self.begin_text(TextTarget::ConstituentKoazaName, depth)
675                }
676                _ if name_eq(name, "予備名") => {
677                    self.begin_text(TextTarget::ConstituentYobiName, depth)
678                }
679                _ if name_eq(name, "地番") => {
680                    self.begin_text(TextTarget::ConstituentParcelNumber, depth)
681                }
682                _ => {}
683            }
684            return Ok(());
685        }
686
687        match () {
688            _ if name_eq(name, "形状") => {
689                if let Some(fude) = &mut self.fude {
690                    fude.geometry_ref = Some(required_attribute(start, "idref")?);
691                }
692            }
693            _ if name_eq(name, "精度区分") => {
694                self.begin_text(TextTarget::FudePrecisionClass, depth)
695            }
696            _ if name_eq(name, "大字コード") => {
697                self.begin_text(TextTarget::FudeOoazaCode, depth)
698            }
699            _ if name_eq(name, "丁目コード") => {
700                self.begin_text(TextTarget::FudeChomeCode, depth)
701            }
702            _ if name_eq(name, "小字コード") => {
703                self.begin_text(TextTarget::FudeKoazaCode, depth)
704            }
705            _ if name_eq(name, "予備コード") => {
706                self.begin_text(TextTarget::FudeYobiCode, depth)
707            }
708            _ if name_eq(name, "大字名") => self.begin_text(TextTarget::FudeOoazaName, depth),
709            _ if name_eq(name, "丁目名") => self.begin_text(TextTarget::FudeChomeName, depth),
710            _ if name_eq(name, "小字名") => self.begin_text(TextTarget::FudeKoazaName, depth),
711            _ if name_eq(name, "予備名") => self.begin_text(TextTarget::FudeYobiName, depth),
712            _ if name_eq(name, "地番") => self.begin_text(TextTarget::FudeParcelNumber, depth),
713            _ if name_eq(name, "座標値種別") => {
714                self.begin_text(TextTarget::FudeCoordinateValueType, depth)
715            }
716            _ => {}
717        }
718
719        Ok(())
720    }
721
722    fn handle_end(&mut self, name: &[u8], depth: usize) -> Result<()> {
723        self.finalize_text_if_needed(depth)?;
724
725        if self.skip_features {
726            return Ok(());
727        }
728
729        if self.in_spatial {
730            self.handle_spatial_end(name, depth)?;
731        }
732
733        if self.in_subject {
734            self.handle_subject_end(name)?;
735        }
736
737        if depth == 2 {
738            match () {
739                _ if name_eq(name, "空間属性") => self.in_spatial = false,
740                _ if name_eq(name, "主題属性") => self.in_subject = false,
741                _ => {}
742            }
743        }
744
745        Ok(())
746    }
747
748    fn handle_spatial_end(&mut self, name: &[u8], depth: usize) -> Result<()> {
749        if let Some(curve) = &mut self.curve
750            && curve.position_depth == Some(depth)
751            && (name == b"GM_Position.direct" || name == b"GM_Position.indirect")
752        {
753            curve.position_depth = None;
754        }
755
756        if let Some(surface) = &mut self.surface {
757            if surface.ring_depth == Some(depth) && name == b"GM_Ring" {
758                surface.ring_depth = None;
759            }
760
761            if surface.boundary_depth == Some(depth)
762                && (name == b"GM_SurfaceBoundary.exterior"
763                    || name == b"GM_SurfaceBoundary.interior")
764            {
765                surface.active_boundary = None;
766                surface.boundary_depth = None;
767            }
768        }
769
770        match name {
771            b"GM_Point" => self.finish_point()?,
772            b"GM_Curve" => self.finish_curve()?,
773            b"GM_Surface" => self.finish_surface()?,
774            _ => {}
775        }
776
777        Ok(())
778    }
779
780    fn handle_subject_end(&mut self, name: &[u8]) -> Result<()> {
781        match () {
782            _ if name_eq(name, "筆界未定構成筆") => {
783                if let Some(constituent) = self.constituent.take()
784                    && let Some(fude) = &mut self.fude
785                {
786                    fude.constituents.push(constituent);
787                }
788            }
789            _ if name_eq(name, "筆") => self.finish_fude()?,
790            _ => {}
791        }
792
793        Ok(())
794    }
795
796    fn start_point(&mut self, start: &BytesStart<'_>) -> Result<()> {
797        self.point = Some(PointBuilder {
798            id: required_attribute(start, "id")?,
799            ..Default::default()
800        });
801        Ok(())
802    }
803
804    fn start_curve(&mut self, start: &BytesStart<'_>) -> Result<()> {
805        self.curve = Some(CurveBuilder {
806            id: required_attribute(start, "id")?,
807            ..Default::default()
808        });
809        Ok(())
810    }
811
812    fn start_surface(&mut self, start: &BytesStart<'_>) -> Result<()> {
813        self.surface = Some(SurfaceBuilder {
814            id: required_attribute(start, "id")?,
815            ..Default::default()
816        });
817        Ok(())
818    }
819
820    fn start_fude(&mut self, start: &BytesStart<'_>) -> Result<()> {
821        self.fude = Some(FudeBuilder {
822            id: required_attribute(start, "id")?,
823            ..Default::default()
824        });
825        Ok(())
826    }
827
828    fn finish_point(&mut self) -> Result<()> {
829        let point = match self.point.take() {
830            Some(point) => point,
831            None => return Ok(()),
832        };
833
834        if !point.saw_position {
835            return Err(Error::MissingElement("GM_Point.position".to_string()));
836        }
837
838        if !point.saw_direct_position {
839            return Err(Error::MissingElement("DirectPosition".to_string()));
840        }
841
842        let x = point
843            .x
844            .ok_or_else(|| Error::MissingElement("X".to_string()))?;
845        let y = point
846            .y
847            .ok_or_else(|| Error::MissingElement("Y".to_string()))?;
848
849        self.points.insert(point.id, Point::new(x, y));
850        Ok(())
851    }
852
853    fn finish_curve(&mut self) -> Result<()> {
854        let curve = match self.curve.take() {
855            Some(curve) => curve,
856            None => return Ok(()),
857        };
858
859        if !curve.saw_segment {
860            return Err(Error::MissingElement("GM_Curve.segment".to_string()));
861        }
862
863        if !curve.saw_first_column {
864            return Err(Error::MissingElement("GM_PointArray.column".to_string()));
865        }
866
867        let (x, y) = match curve.position_kind {
868            Some(CurvePositionKind::Direct) => {
869                let x = curve
870                    .direct_x
871                    .ok_or_else(|| Error::MissingElement("X".to_string()))?;
872                let y = curve
873                    .direct_y
874                    .ok_or_else(|| Error::MissingElement("Y".to_string()))?;
875                (x, y)
876            }
877            Some(CurvePositionKind::Indirect) => {
878                let idref = curve
879                    .indirect_ref
880                    .ok_or_else(|| Error::MissingElement("GM_Position.indirect".to_string()))?;
881                let point = self
882                    .points
883                    .get(&idref)
884                    .ok_or_else(|| Error::PointNotFound(idref.clone()))?;
885                (point.x(), point.y())
886            }
887            None => {
888                return Err(Error::MissingElement("GM_Position.*".to_string()));
889            }
890        };
891
892        let mut curve_point = Curve::new(y, x);
893        if let (Some(source_crs), Some(target_crs)) = (self.source_crs, self.target_crs) {
894            transform_curve_crs(&mut curve_point, source_crs, target_crs)?;
895        }
896
897        self.curves.insert(curve.id, curve_point);
898        Ok(())
899    }
900
901    fn finish_surface(&mut self) -> Result<()> {
902        let surface = match self.surface.take() {
903            Some(surface) => surface,
904            None => return Ok(()),
905        };
906
907        if !surface.saw_patch || !surface.saw_polygon {
908            return Err(Error::MissingElement("GM_Surface.patch".to_string()));
909        }
910
911        if !surface.saw_polygon_boundary || !surface.saw_surface_boundary {
912            return Err(Error::MissingElement("GM_SurfaceBoundary".to_string()));
913        }
914
915        if !surface.saw_exterior {
916            return Err(Error::MissingElement(
917                "GM_SurfaceBoundary.exterior".to_string(),
918            ));
919        }
920
921        let exterior_ring = LineString::from(surface.exterior_points);
922        let interior_rings = surface
923            .interior_points
924            .into_iter()
925            .map(LineString::from)
926            .collect::<Vec<_>>();
927
928        self.surfaces
929            .insert(surface.id, Polygon::new(exterior_ring, interior_rings));
930
931        Ok(())
932    }
933
934    fn finish_fude(&mut self) -> Result<()> {
935        let fude = match self.fude.take() {
936            Some(fude) => fude,
937            None => return Ok(()),
938        };
939
940        if !self.options.include_chikugai {
941            match fude.parcel_number.as_ref() {
942                Some(value) if value.contains("地区外") || value.contains("別図") => {
943                    return Ok(());
944                }
945                Some(_) => {}
946                None => return Err(Error::MissingElement("地番".to_string())),
947            }
948        }
949
950        let geometry = fude
951            .geometry_ref
952            .as_ref()
953            .and_then(|idref| self.surfaces.get(idref))
954            .cloned()
955            .ok_or_else(|| Error::MissingElement("geometry".to_string()))?;
956
957        let ooaza_code = fude
958            .ooaza_code
959            .ok_or_else(|| Error::MissingElement("大字コード".to_string()))?;
960        let chome_code = fude
961            .chome_code
962            .ok_or_else(|| Error::MissingElement("丁目コード".to_string()))?;
963        let koaza_code = fude
964            .koaza_code
965            .ok_or_else(|| Error::MissingElement("小字コード".to_string()))?;
966        let yobi_code = fude
967            .yobi_code
968            .ok_or_else(|| Error::MissingElement("予備コード".to_string()))?;
969        let parcel_number = fude
970            .parcel_number
971            .ok_or_else(|| Error::MissingElement("地番".to_string()))?;
972
973        let pop = point_on_polygon(&geometry)?;
974
975        self.features.push(Feature {
976            geometry,
977            props: FeatureProperties {
978                筆id: fude.id,
979                精度区分: fude.precision_class,
980                大字コード: ooaza_code,
981                丁目コード: chome_code,
982                小字コード: koaza_code,
983                予備コード: yobi_code,
984                大字名: fude.ooaza_name,
985                丁目名: fude.chome_name,
986                小字名: fude.koaza_name,
987                予備名: fude.yobi_name,
988                地番: parcel_number,
989                座標値種別: fude.coordinate_value_type,
990                筆界未定構成筆: fude.constituents,
991                代表点緯度: pop.y(),
992                代表点経度: pop.x(),
993            },
994        });
995
996        Ok(())
997    }
998}
999
1000#[inline]
1001fn local_name(name: &[u8]) -> &[u8] {
1002    match name.iter().rposition(|&b| b == b':') {
1003        Some(pos) => &name[pos + 1..],
1004        None => name,
1005    }
1006}
1007
1008#[inline]
1009fn name_to_string(name: &[u8]) -> String {
1010    String::from_utf8_lossy(local_name(name)).into_owned()
1011}
1012
1013#[inline]
1014fn name_eq(name: &[u8], expected: &str) -> bool {
1015    name == expected.as_bytes()
1016}
1017
1018fn required_attribute(start: &BytesStart<'_>, attr_name: &str) -> Result<String> {
1019    for attr in start.attributes().with_checks(false).flatten() {
1020        if local_name(attr.key.as_ref()) == attr_name.as_bytes() {
1021            return Ok(std::str::from_utf8(attr.value.as_ref())?.to_owned());
1022        }
1023    }
1024
1025    Err(Error::MissingAttribute {
1026        element: name_to_string(start.name().as_ref()),
1027        attribute: attr_name.to_string(),
1028    })
1029}
1030
1031fn point_on_polygon(polygon: &Polygon) -> Result<Point<f64>> {
1032    // interior_point returns None if the polygon is empty or has no interior point
1033    // We've tested on 2024 data, and all polygons have an interior point
1034    polygon
1035        .interior_point()
1036        .ok_or(Error::InteriorPointUnavailable)
1037}
1038
1039#[inline]
1040fn transform_curve_crs(curve: &mut Curve, source_crs: &Proj, target_crs: &Proj) -> Result<()> {
1041    let mut point = curve.x_y();
1042    proj4rs::transform::transform(source_crs, target_crs, &mut point)?;
1043    *curve = Point::new(point.0.to_degrees(), point.1.to_degrees());
1044    Ok(())
1045}
1046
1047/// Transform all curves' coordinates from source_crs to target_crs in-place.
1048#[cfg(test)]
1049fn transform_curves_crs(
1050    curves: &mut HashMap<&str, Curve>,
1051    source_crs: &Proj,
1052    target_crs: &Proj,
1053) -> Result<()> {
1054    if curves.is_empty() {
1055        return Ok(());
1056    }
1057
1058    for curve in curves.values_mut() {
1059        transform_curve_crs(curve, source_crs, target_crs)?;
1060    }
1061
1062    Ok(())
1063}
1064
1065// --- Main Parsing Function ---
1066pub fn parse_xml_content(
1067    file_name: &str,
1068    file_data: &str,
1069    options: &ParseOptions,
1070) -> Result<ParsedXML> {
1071    let file_name = file_name.to_string();
1072    let mut parser = StreamParser::new(options);
1073    let mut reader = Reader::from_str(file_data);
1074    reader.config_mut().trim_text(false);
1075
1076    let mut buf = Vec::new();
1077    let mut depth = 0usize;
1078
1079    loop {
1080        match reader.read_event_into(&mut buf)? {
1081            Event::Start(start) => {
1082                parser.handle_start(&start, depth + 1)?;
1083                if parser.skip_features {
1084                    break;
1085                }
1086                depth += 1;
1087            }
1088            Event::Empty(start) => {
1089                parser.handle_start(&start, depth + 1)?;
1090                if parser.skip_features {
1091                    break;
1092                }
1093
1094                let start_name = start.name();
1095                let name = local_name(start_name.as_ref());
1096                parser.handle_end(name, depth + 1)?;
1097                if parser.skip_features {
1098                    break;
1099                }
1100            }
1101            Event::End(end) => {
1102                parser.handle_end(local_name(end.name().as_ref()), depth)?;
1103                depth = depth.saturating_sub(1);
1104            }
1105            Event::Text(text) => parser.push_text_bytes(text.as_ref())?,
1106            Event::CData(text) => parser.push_text_bytes(text.as_ref())?,
1107            Event::Eof => break,
1108            _ => {}
1109        }
1110        buf.clear();
1111    }
1112
1113    parser.finish(file_name)
1114}
1115
1116#[cfg(test)]
1117mod tests {
1118    use super::*;
1119    use crate::constants::get_proj;
1120    use geo::Contains;
1121    use geo::{Area, BooleanOps};
1122    use geo_types::wkt;
1123    use rustc_hash::FxHashMap as HashMap;
1124    use std::fs;
1125    use std::path::PathBuf;
1126
1127    fn testdata_path() -> PathBuf {
1128        let manifest_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
1129        manifest_dir
1130            .parent()
1131            .and_then(|p| p.parent())
1132            .expect("workspace root")
1133            .join("testdata")
1134    }
1135
1136    #[test]
1137    fn test_transform_curves_crs_public_coords_to_wgs84() {
1138        let source_crs = get_proj("公共座標1系")
1139            .expect("failed to load source CRS")
1140            .expect("公共座標1系 should resolve to a proj definition");
1141        let target_crs = get_proj("WGS84")
1142            .expect("failed to load target CRS")
1143            .expect("WGS84 should resolve to a proj definition");
1144
1145        let mut curves: HashMap<&str, Curve> = [
1146            ("curve-1", Point::new(0.0, 0.0)),
1147            ("curve-2", Point::new(-1000.0, -1000.0)),
1148            ("curve-3", Point::new(1000.0, 1000.0)),
1149        ]
1150        .into_iter()
1151        .collect();
1152
1153        let expected_results: HashMap<&str, Curve> = [
1154            ("curve-1", Point::new(129.5, 33.0)),
1155            ("curve-2", Point::new(129.48929948, 32.99098186)),
1156            ("curve-3", Point::new(129.5107027, 33.00901721)),
1157        ]
1158        .into_iter()
1159        .collect();
1160
1161        transform_curves_crs(&mut curves, source_crs, target_crs)
1162            .expect("curve transformation should succeed");
1163
1164        for (id, expected_point) in expected_results {
1165            let curve = curves.get(id).expect("transformed curve missing");
1166            assert!(
1167                (curve.x() - expected_point.x()).abs() < 1e-7,
1168                "longitude mismatch for {id} ({} vs {} )",
1169                curve.x(),
1170                expected_point.x()
1171            );
1172            assert!(
1173                (curve.y() - expected_point.y()).abs() < 1e-7,
1174                "latitude mismatch for {id} ({} vs {} )",
1175                curve.y(),
1176                expected_point.y()
1177            );
1178        }
1179    }
1180
1181    #[test]
1182    fn test_parse_xml_content() {
1183        // Construct the path relative to the Cargo manifest directory
1184        let xml_path = testdata_path().join("46505-3411-56.xml");
1185        let xml_temp = fs::read_to_string(xml_path).expect("Failed to read XML file");
1186        let options = ParseOptions {
1187            include_arbitrary_crs: true,
1188            include_chikugai: true,
1189        };
1190        let ParsedXML {
1191            file_name: _,
1192            features,
1193            common_props,
1194        } = parse_xml_content("46505-3411-56.xml", &xml_temp, &options)
1195            .expect("Failed to parse XML");
1196        assert_eq!(common_props.地図名, "AYA1anbou22B04_2000");
1197        assert_eq!(common_props.市区町村コード, "46505");
1198        assert_eq!(common_props.市区町村名, "熊毛郡屋久島町");
1199
1200        assert_eq!(features.len(), 2994);
1201        let feature = &features[0];
1202        assert_eq!(feature.props.筆id, "H000000001");
1203        assert_eq!(feature.props.地番, "1");
1204
1205        let expected_geom = wkt! { POLYGON((130.65198936727597 30.31578177961301,130.65211112748588 30.31578250940004,130.65219722479674 30.315750035783307,130.6522397846286 30.315738240687146,130.65232325284867 30.315702331871517,130.6523668021 30.315675347347664,130.65235722919192 30.315650702546424,130.65229088479316 30.315622397556787,130.65227074994843 30.315602911975944,130.65225984787858 30.31558659939628,130.65223178039858 30.315557954059944,130.65219646886888 30.31555482900659,130.65216213192443 30.315543677500482,130.65214529987352 30.315560610998826,130.6521265046212 30.315576961906185,130.6521020960529 30.315589887800154,130.65207800626484 30.315597933967023,130.65192456437038 30.315643904777097,130.65190509850768 30.3156499243803,130.65198936727597 30.31578177961301)) };
1206        let difference = feature.geometry.difference(&expected_geom);
1207        assert!(
1208            difference.unsigned_area() < 1e-10,
1209            "Geometries do not match"
1210        );
1211    }
1212
1213    #[test]
1214    fn test_parse_chikugai_miten_kosei_features() {
1215        // Test parsing of 筆界未定構成筆 elements
1216        let xml_path = testdata_path().join("46505-3411-56.xml");
1217        let xml_temp = fs::read_to_string(xml_path).expect("Failed to read XML file");
1218        let options = ParseOptions {
1219            include_arbitrary_crs: true,
1220            include_chikugai: true,
1221        };
1222        let ParsedXML {
1223            file_name: _,
1224            features,
1225            common_props: _,
1226        } = parse_xml_content("46505-3411-56.xml", &xml_temp, &options)
1227            .expect("Failed to parse XML");
1228
1229        // Find a feature with 筆界未定構成筆 data
1230        let features_with_chikugai: Vec<_> = features
1231            .iter()
1232            .filter(|f| !f.props.筆界未定構成筆.is_empty())
1233            .collect();
1234
1235        assert!(
1236            !features_with_chikugai.is_empty(),
1237            "Should find features with 筆界未定構成筆"
1238        );
1239
1240        // Check the first feature with 筆界未定構成筆
1241        let feature_with_chikugai = features_with_chikugai[0];
1242        assert!(!feature_with_chikugai.props.筆界未定構成筆.is_empty());
1243
1244        // Verify the structure of the first 筆界未定構成筆 element
1245        let first_constituent = &feature_with_chikugai.props.筆界未定構成筆[0];
1246
1247        // These should not be empty/default based on the XML we saw
1248        assert!(!first_constituent.大字コード.is_empty());
1249        assert!(!first_constituent.地番.is_empty());
1250        assert!(first_constituent.大字名.is_some());
1251
1252        println!(
1253            "Found feature with {} 筆界未定構成筆 elements",
1254            feature_with_chikugai.props.筆界未定構成筆.len()
1255        );
1256        println!(
1257            "First constituent: {} {} {}",
1258            first_constituent
1259                .大字名
1260                .as_ref()
1261                .unwrap_or(&"N/A".to_string()),
1262            first_constituent.地番,
1263            first_constituent.大字コード
1264        );
1265    }
1266
1267    #[test]
1268    fn test_representative_point_should_be_inside_of_polygon() {
1269        // Construct the path relative to the Cargo manifest directory
1270        let xml_path = testdata_path().join("46505-3411-56.xml");
1271        let xml_temp = fs::read_to_string(xml_path).expect("Failed to read XML file");
1272        let options = ParseOptions {
1273            include_arbitrary_crs: false,
1274            include_chikugai: false,
1275        };
1276        let ParsedXML {
1277            file_name: _,
1278            features,
1279            common_props: _,
1280        } = parse_xml_content("46505-3411-56.xml", &xml_temp, &options)
1281            .expect("Failed to parse XML");
1282
1283        for feature in features.iter() {
1284            let rep_point = Point::new(feature.props.代表点経度, feature.props.代表点緯度);
1285            let is_inside = feature.geometry.contains(&rep_point);
1286            assert!(is_inside, "Representative point is outside of the polygon");
1287        }
1288    }
1289}