Skip to main content

config_disassembler/xml/handlers/
reassemble.rs

1//! Reassemble XML from disassembled directory.
2
3use crate::xml::builders::{build_xml_string, merge_xml_elements, reorder_root_keys};
4use crate::xml::multi_level::{ensure_segment_files_structure, load_multi_level_config};
5use crate::xml::parsers::parse_to_xml_object;
6use crate::xml::types::{MultiLevelRule, XmlElement};
7use crate::xml::utils::normalize_path_unix;
8use serde_json::Value;
9use std::collections::HashSet;
10use std::ffi::OsString;
11use std::future::Future;
12use std::path::{Path, PathBuf};
13use std::pin::Pin;
14use tokio::fs;
15
16/// Read a `.key_order.json` file (if present) and parse it as a list of root key names.
17async fn read_key_order(path: &Path) -> Option<Vec<String>> {
18    let bytes = fs::read(path).await.ok()?;
19    serde_json::from_slice::<Vec<String>>(&bytes).ok()
20}
21
22/// Remove @xmlns from an object so the reassembled segment wrapper (e.g. programProcesses) has no xmlns.
23fn strip_xmlns_from_value(v: Value) -> Value {
24    match v {
25        Value::Object(obj) => {
26            Value::Object(obj.into_iter().filter(|(k, _)| k != "@xmlns").collect())
27        }
28        other => other,
29    }
30}
31
32/// When recursing into a nested multi-level rule's `path_segment`, the
33/// deeper-level recursion needs the *sibling* rules — every rule
34/// except the one we just matched — so a sub-directory that happens
35/// to share its parent's `path_segment` doesn't re-enter the same
36/// rule. Returns the cloned slice with the matched segment filtered
37/// out. Pure helper extracted from
38/// `reassemble_multi_level_segment_inner`.
39fn deeper_candidate_rules(
40    all_rules: &[MultiLevelRule],
41    exclude_path_segment: &str,
42) -> Vec<MultiLevelRule> {
43    all_rules
44        .iter()
45        .filter(|r| r.path_segment != exclude_path_segment)
46        .cloned()
47        .collect()
48}
49
50/// True when the current directory is the disassembly root for any
51/// of the supplied multi-level rules. Each rule stores the base path
52/// it was disassembled from; if `dir_path` matches one, the caller is
53/// allowed to match that rule's child segments. Pure helper extracted
54/// from `process_files_in_directory` so the `dir_path == base`
55/// equality is testable without a temporary directory tree.
56fn is_at_base_path(dir_path: &str, base_segments: &[(String, String, bool)]) -> bool {
57    base_segments.iter().any(|(base, _, _)| dir_path == base)
58}
59
60type ProcessDirFuture<'a> = Pin<
61    Box<
62        dyn Future<Output = Result<Vec<XmlElement>, Box<dyn std::error::Error + Send + Sync>>>
63            + Send
64            + 'a,
65    >,
66>;
67
68type SegmentFuture<'a> =
69    Pin<Box<dyn Future<Output = Result<(), Box<dyn std::error::Error + Send + Sync>>> + Send + 'a>>;
70
71pub struct ReassembleXmlFileHandler;
72
73impl ReassembleXmlFileHandler {
74    pub fn new() -> Self {
75        Self
76    }
77
78    pub async fn reassemble(
79        &self,
80        file_path: &str,
81        file_extension: Option<&str>,
82        post_purge: bool,
83    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
84        let file_path = normalize_path_unix(file_path);
85        if !self.validate_directory(&file_path).await? {
86            return Ok(());
87        }
88
89        let path = Path::new(&file_path);
90        let config = load_multi_level_config(path).await;
91        if let Some(ref config) = config {
92            // Process each rule whose path_segment exists as a directory at the
93            // disassembly root. Inner-only rules (whose segment lives nested under another
94            // rule's item dir) are handled dynamically when the parent rule walks its
95            // items; we hand them in as `nested_rules` candidates here.
96            for (i, rule) in config.rules.iter().enumerate() {
97                let segment_path = path.join(&rule.path_segment);
98                if !segment_path.is_dir() {
99                    continue;
100                }
101                let nested: Vec<MultiLevelRule> = config
102                    .rules
103                    .iter()
104                    .enumerate()
105                    .filter(|(j, _)| *j != i)
106                    .map(|(_, r)| r.clone())
107                    .collect();
108                self.reassemble_multi_level_segment(&segment_path, rule, &nested)
109                    .await?;
110            }
111        }
112
113        // Build one base-segment entry per multi-level rule so the recursive walker can
114        // recognize each rule's path_segment under the disassembly root.
115        let base_segments: Vec<(String, String, bool)> = config
116            .as_ref()
117            .map(|c| {
118                c.rules
119                    .iter()
120                    .map(|r| (file_path.clone(), r.path_segment.clone(), true))
121                    .collect()
122            })
123            .unwrap_or_default();
124        // When multi-level reassembly is done, purge the entire disassembled directory
125        let post_purge_final = post_purge || config.is_some();
126        self.reassemble_plain(&file_path, file_extension, post_purge_final, &base_segments)
127            .await
128    }
129
130    /// Reassemble a single multi-level segment directory.
131    ///
132    /// For each item directory under `segment_path` (e.g. each `<dialog>/` under
133    /// `botDialogs/`):
134    ///
135    /// 1. **Phase 1 — nested rules first.** For every immediate sub-directory whose name
136    ///    matches a `nested_rules` candidate's `path_segment`, recursively reassemble
137    ///    that sub-directory as its own segment. This wraps each per-step file in
138    ///    `<wrap_root_element><inner_segment>...</inner_segment></wrap_root_element>` *before*
139    ///    the outer-level merge sees it, so multiple inner items survive as siblings
140    ///    rather than collapsing into a single bag of leaves.
141    ///
142    /// 2. **Phase 2 — flat sub-directories.** Any remaining sub-directory (anything not
143    ///    consumed by phase 1) is collapsed into a per-item `.xml` at the parent level
144    ///    via [`Self::reassemble_plain`], the original behaviour for things like
145    ///    decompose-rule outputs.
146    ///
147    /// 3. **Phase 3 — merge item.** Everything in the item directory (the `.xml` files
148    ///    written by phases 1 and 2 plus any leaf `.xml` already there) is merged into
149    ///    a single `.xml` at the parent level.
150    ///
151    /// Finally, [`ensure_segment_files_structure`] wraps every `.xml` in `segment_path`
152    /// in `<wrap_root_element><path_segment>...</path_segment></wrap_root_element>` so
153    /// the parent reassembly sees correctly-wrapped siblings.
154    fn reassemble_multi_level_segment<'a>(
155        &'a self,
156        segment_path: &'a Path,
157        rule: &'a MultiLevelRule,
158        nested_rules: &'a [MultiLevelRule],
159    ) -> SegmentFuture<'a> {
160        let segment_path = segment_path.to_path_buf();
161        let rule = rule.clone();
162        let nested_rules = nested_rules.to_vec();
163        Box::pin(async move {
164            self.reassemble_multi_level_segment_inner(&segment_path, &rule, &nested_rules)
165                .await
166        })
167    }
168
169    async fn reassemble_multi_level_segment_inner(
170        &self,
171        segment_path: &Path,
172        rule: &MultiLevelRule,
173        nested_rules: &[MultiLevelRule],
174    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
175        if !segment_path.is_dir() {
176            return Ok(());
177        }
178        let mut entries = Vec::new();
179        let mut read_dir = fs::read_dir(segment_path).await?;
180        while let Some(entry) = read_dir.next_entry().await? {
181            entries.push(entry);
182        }
183        entries.sort_by_key(|e| e.file_name());
184        for entry in entries {
185            let process_path = entry.path();
186            if !process_path.is_dir() {
187                continue;
188            }
189            let process_path_str = normalize_path_unix(&process_path.to_string_lossy());
190            let mut sub_entries = Vec::new();
191            let mut sub_read = fs::read_dir(&process_path).await?;
192            while let Some(e) = sub_read.next_entry().await? {
193                sub_entries.push(e);
194            }
195            sub_entries.sort_by_key(|e| e.file_name());
196
197            // Phase 1: drain any sub-directory that matches a nested rule's
198            // `path_segment` so it is re-wrapped before the outer merge runs.
199            let mut handled: HashSet<OsString> = HashSet::new();
200            for sub_entry in &sub_entries {
201                let sub_path: PathBuf = sub_entry.path();
202                if !sub_path.is_dir() {
203                    continue;
204                }
205                let sub_name = sub_path.file_name().and_then(|n| n.to_str()).unwrap_or("");
206                let Some(nested_rule) = nested_rules.iter().find(|r| r.path_segment == sub_name)
207                else {
208                    continue;
209                };
210                // Pass everything *except* the rule we just matched as deeper candidates.
211                // Sibling rules remain candidates further down the tree without re-entering
212                // the same rule on a sub-dir that happens to share its name.
213                let deeper = deeper_candidate_rules(nested_rules, &nested_rule.path_segment);
214                self.reassemble_multi_level_segment(&sub_path, nested_rule, &deeper)
215                    .await?;
216                handled.insert(sub_entry.file_name());
217            }
218
219            // Phase 2: collapse remaining sub-directories into per-item .xml files at
220            // the parent level (preserves existing behaviour for non-nested-rule subdirs).
221            for sub_entry in &sub_entries {
222                let sub_path = sub_entry.path();
223                if !sub_path.is_dir() {
224                    continue;
225                }
226                if handled.contains(&sub_entry.file_name()) {
227                    continue;
228                }
229                let sub_path_str = normalize_path_unix(&sub_path.to_string_lossy());
230                self.reassemble_plain(&sub_path_str, Some("xml"), true, &[])
231                    .await?;
232            }
233
234            // Phase 3: merge everything in the item dir into a single .xml at the parent.
235            self.reassemble_plain(&process_path_str, Some("xml"), true, &[])
236                .await?;
237        }
238        ensure_segment_files_structure(
239            segment_path,
240            &rule.wrap_root_element,
241            &rule.path_segment,
242            &rule.wrap_xmlns,
243        )
244        .await?;
245        Ok(())
246    }
247
248    /// Merge and write reassembled XML (no multi-level pre-step). Used internally.
249    /// `base_segments` carries one tuple `(base_path, segment_name, extract_inner)` per
250    /// multi-level rule. When the recursive walker reaches `base_path` and finds a subdir
251    /// whose name matches one of the segment_names, that subdir's XML files are folded
252    /// into a single array under the segment_name key. When extract_inner is true, each
253    /// file's structure is `document_root > segment_name > content` and only the content
254    /// is collected; otherwise the whole root is kept.
255    async fn reassemble_plain(
256        &self,
257        file_path: &str,
258        file_extension: Option<&str>,
259        post_purge: bool,
260        base_segments: &[(String, String, bool)],
261    ) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
262        let file_path = normalize_path_unix(file_path);
263        log::debug!("Parsing directory to reassemble: {}", file_path);
264        let parsed_objects = self
265            .process_files_in_directory(file_path.to_string(), base_segments.to_vec())
266            .await?;
267
268        if parsed_objects.is_empty() {
269            log::error!(
270                "No files under {} were parsed successfully. A reassembled XML file was not created.",
271                file_path
272            );
273            return Ok(());
274        }
275
276        // merge_xml_elements only returns None when every parsed element is empty or
277        // declaration-only (no usable root). Treat that the same as "nothing parsed"
278        // rather than emitting an `<root></root>` stub.
279        let Some(mut merged) = merge_xml_elements(&parsed_objects) else {
280            log::error!(
281                "No usable root element found while merging files under {}. A reassembled XML file was not created.",
282                file_path
283            );
284            return Ok(());
285        };
286
287        // Apply stored key order so reassembled XML matches original document order.
288        let key_order_path = Path::new(&file_path).join(".key_order.json");
289        if let Some(reordered) = read_key_order(&key_order_path)
290            .await
291            .and_then(|order| reorder_root_keys(&merged, &order))
292        {
293            merged = reordered;
294        }
295
296        let final_xml = build_xml_string(&merged);
297        let output_path = self.get_output_path(&file_path, file_extension);
298
299        fs::write(&output_path, final_xml).await?;
300
301        if post_purge {
302            fs::remove_dir_all(file_path).await.ok();
303        }
304
305        Ok(())
306    }
307
308    fn process_files_in_directory<'a>(
309        &'a self,
310        dir_path: String,
311        base_segments: Vec<(String, String, bool)>,
312    ) -> ProcessDirFuture<'a> {
313        Box::pin(async move {
314            let mut parsed = Vec::new();
315            let mut entries = Vec::new();
316            let mut read_dir = fs::read_dir(&dir_path).await?;
317            while let Some(entry) = read_dir.next_entry().await? {
318                entries.push(entry);
319            }
320            // Sort by full filename for deterministic cross-platform ordering
321            entries.sort_by(|a, b| {
322                let a_name = a.file_name().to_string_lossy().to_string();
323                let b_name = b.file_name().to_string_lossy().to_string();
324                a_name.cmp(&b_name)
325            });
326
327            // We are at the disassembly root for a given rule when our dir_path matches
328            // the base_path stored on that rule. Each rule shares the same base_path in
329            // the current implementation, but tracking them per-entry keeps the door open
330            // for future per-rule base_paths without another signature change.
331            let is_base = is_at_base_path(&dir_path, &base_segments);
332
333            for entry in entries {
334                let path = entry.path();
335                let file_path = normalize_path_unix(&path.to_string_lossy()).to_string();
336
337                if path.is_file() {
338                    let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
339                    if !name.starts_with('.') && self.is_parsable_file(name) {
340                        if let Some(parsed_obj) = parse_to_xml_object(&file_path).await {
341                            parsed.push(parsed_obj);
342                        }
343                    }
344                } else {
345                    // Anything not a regular file is treated as a directory; symlinks and
346                    // other exotic entries simply recurse via read_dir below.
347                    let dir_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
348                    let matched_segment = if is_base {
349                        base_segments
350                            .iter()
351                            .find(|(_, seg_name, _)| seg_name == dir_name)
352                            .cloned()
353                    } else {
354                        None
355                    };
356                    if let Some((_, segment_name, extract_inner)) = matched_segment {
357                        let segment_element = self
358                            .collect_segment_as_array(&file_path, &segment_name, extract_inner)
359                            .await?;
360                        if let Some(el) = segment_element {
361                            parsed.push(el);
362                        }
363                    } else {
364                        let sub_parsed = self
365                            .process_files_in_directory(file_path, base_segments.clone())
366                            .await?;
367                        parsed.extend(sub_parsed);
368                    }
369                }
370            }
371
372            Ok(parsed)
373        })
374    }
375
376    /// Collect all .xml files in a directory, parse each, and build one element with
377    /// root_key and single key segment_name whose value is array of each file's content.
378    /// When extract_inner is true, each file has root > segment_name > content; we push that content.
379    async fn collect_segment_as_array(
380        &self,
381        segment_dir: &str,
382        segment_name: &str,
383        extract_inner: bool,
384    ) -> Result<Option<XmlElement>, Box<dyn std::error::Error + Send + Sync>> {
385        let mut xml_files = Vec::new();
386        let mut read_dir = fs::read_dir(segment_dir).await?;
387        while let Some(entry) = read_dir.next_entry().await? {
388            let path = entry.path();
389            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
390            if path.is_file() && !name.starts_with('.') && self.is_parsable_file(name) {
391                xml_files.push(normalize_path_unix(&path.to_string_lossy()));
392            }
393        }
394        xml_files.sort();
395
396        let mut root_contents = Vec::new();
397        let mut first_xml: Option<(String, Option<Value>)> = None;
398        for file_path in &xml_files {
399            // parse_to_xml_object always yields a JSON object on success; treat any other
400            // shape (including parse failure) as a skip without branching explicitly.
401            let Some(parsed) = parse_to_xml_object(file_path).await else {
402                continue;
403            };
404            let obj_owned = parsed.as_object().cloned().unwrap_or_default();
405            let obj = &obj_owned;
406            let Some(root_key) = obj.keys().find(|k| *k != "?xml").cloned() else {
407                continue;
408            };
409            let root_val = obj
410                .get(&root_key)
411                .cloned()
412                .unwrap_or(Value::Object(serde_json::Map::new()));
413            let mut content = if extract_inner {
414                root_val
415                    .get(segment_name)
416                    .cloned()
417                    .unwrap_or_else(|| Value::Object(serde_json::Map::new()))
418            } else {
419                root_val
420            };
421            // Inner segment element (e.g. programProcesses) should not have xmlns in output
422            if extract_inner {
423                content = strip_xmlns_from_value(content);
424            }
425            root_contents.push(content);
426            if first_xml.is_none() {
427                first_xml = Some((root_key, obj.get("?xml").cloned()));
428            }
429        }
430        if root_contents.is_empty() {
431            return Ok(None);
432        }
433        let (root_key, decl_opt) = first_xml.unwrap();
434        let mut content = serde_json::Map::new();
435        content.insert(segment_name.to_string(), Value::Array(root_contents));
436        let mut top = serde_json::Map::new();
437        if let Some(decl) = decl_opt {
438            top.insert("?xml".to_string(), decl);
439        } else {
440            let mut d = serde_json::Map::new();
441            d.insert("@version".to_string(), Value::String("1.0".to_string()));
442            d.insert("@encoding".to_string(), Value::String("UTF-8".to_string()));
443            top.insert("?xml".to_string(), Value::Object(d));
444        }
445        top.insert(root_key, Value::Object(content));
446        Ok(Some(Value::Object(top)))
447    }
448
449    fn is_parsable_file(&self, file_name: &str) -> bool {
450        let lower = file_name.to_lowercase();
451        lower.ends_with(".xml")
452            || lower.ends_with(".json")
453            || lower.ends_with(".json5")
454            || lower.ends_with(".yaml")
455            || lower.ends_with(".yml")
456    }
457
458    async fn validate_directory(
459        &self,
460        path: &str,
461    ) -> Result<bool, Box<dyn std::error::Error + Send + Sync>> {
462        let meta = fs::metadata(path).await?;
463        if !meta.is_dir() {
464            log::error!(
465                "The provided path to reassemble is not a directory: {}",
466                path
467            );
468            return Ok(false);
469        }
470        Ok(true)
471    }
472
473    fn get_output_path(&self, dir_path: &str, extension: Option<&str>) -> String {
474        let path = Path::new(dir_path);
475        let parent = path.parent().unwrap_or(Path::new("."));
476        let base_name = path
477            .file_name()
478            .and_then(|n| n.to_str())
479            .unwrap_or("output");
480        let ext = extension.unwrap_or("xml");
481        parent
482            .join(format!("{}.{}", base_name, ext))
483            .to_string_lossy()
484            .to_string()
485    }
486}
487
488impl Default for ReassembleXmlFileHandler {
489    fn default() -> Self {
490        Self::new()
491    }
492}
493
494#[cfg(test)]
495mod tests {
496    use super::*;
497    use serde_json::json;
498
499    #[test]
500    #[allow(clippy::default_constructed_unit_structs)]
501    fn reassemble_handler_default_equals_new() {
502        let _ = ReassembleXmlFileHandler::default();
503    }
504
505    #[test]
506    fn strip_xmlns_from_value_passes_non_object_through() {
507        let s = Value::String("hello".to_string());
508        assert_eq!(
509            strip_xmlns_from_value(s),
510            Value::String("hello".to_string())
511        );
512        let arr = json!([1, 2]);
513        assert_eq!(strip_xmlns_from_value(arr.clone()), arr);
514    }
515
516    #[test]
517    fn strip_xmlns_from_value_removes_xmlns_key() {
518        let obj = json!({ "@xmlns": "ns", "child": 1 });
519        let stripped = strip_xmlns_from_value(obj);
520        let map = stripped.as_object().unwrap();
521        assert!(map.get("@xmlns").is_none());
522        assert_eq!(map.get("child").and_then(|v| v.as_i64()), Some(1));
523    }
524
525    #[test]
526    fn is_parsable_file_recognises_supported_extensions() {
527        let h = ReassembleXmlFileHandler::new();
528        assert!(h.is_parsable_file("a.xml"));
529        assert!(h.is_parsable_file("a.json"));
530        assert!(h.is_parsable_file("a.json5"));
531        assert!(h.is_parsable_file("a.yaml"));
532        assert!(h.is_parsable_file("a.yml"));
533        assert!(h.is_parsable_file("A.XML"));
534        assert!(!h.is_parsable_file("a.txt"));
535    }
536
537    #[test]
538    fn get_output_path_appends_extension_and_uses_parent_dir() {
539        let h = ReassembleXmlFileHandler::new();
540        let out = h.get_output_path("/tmp/foo", Some("xml"));
541        assert!(out.ends_with("foo.xml"));
542        let out_default = h.get_output_path("/tmp/bar", None);
543        assert!(out_default.ends_with("bar.xml"));
544        // No parent - uses "." fallback
545        assert_eq!(h.get_output_path("only", Some("json")), "only.json");
546    }
547
548    #[tokio::test]
549    async fn reassemble_multi_level_segment_noop_when_not_dir() {
550        let h = ReassembleXmlFileHandler::new();
551        let tmp = tempfile::tempdir().unwrap();
552        let file = tmp.path().join("not_a_dir.txt");
553        tokio::fs::write(&file, "hi").await.unwrap();
554        let rule = crate::xml::types::MultiLevelRule {
555            file_pattern: String::new(),
556            root_to_strip: String::new(),
557            unique_id_elements: String::new(),
558            path_segment: String::new(),
559            wrap_root_element: "Root".to_string(),
560            wrap_xmlns: String::new(),
561        };
562        h.reassemble_multi_level_segment(&file, &rule, &[])
563            .await
564            .unwrap();
565    }
566
567    #[tokio::test]
568    async fn reassemble_multi_level_segment_skips_files_in_segment_root() {
569        let h = ReassembleXmlFileHandler::new();
570        let tmp = tempfile::tempdir().unwrap();
571        let segment = tmp.path().join("segment");
572        tokio::fs::create_dir(&segment).await.unwrap();
573        // A bare file inside the segment dir should be skipped (not a subdir).
574        tokio::fs::write(segment.join("stray.txt"), "x")
575            .await
576            .unwrap();
577        let rule = crate::xml::types::MultiLevelRule {
578            file_pattern: String::new(),
579            root_to_strip: String::new(),
580            unique_id_elements: String::new(),
581            path_segment: "segment".to_string(),
582            wrap_root_element: "Root".to_string(),
583            wrap_xmlns: "http://example.com".to_string(),
584        };
585        h.reassemble_multi_level_segment(&segment, &rule, &[])
586            .await
587            .unwrap();
588    }
589
590    #[tokio::test]
591    async fn collect_segment_as_array_returns_none_for_empty_dir() {
592        let h = ReassembleXmlFileHandler::new();
593        let tmp = tempfile::tempdir().unwrap();
594        let out = h
595            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", true)
596            .await
597            .unwrap();
598        assert!(out.is_none());
599    }
600
601    #[tokio::test]
602    async fn collect_segment_as_array_skips_unparseable_and_empty_roots() {
603        let h = ReassembleXmlFileHandler::new();
604        let tmp = tempfile::tempdir().unwrap();
605        // Unparseable XML
606        tokio::fs::write(tmp.path().join("bad.xml"), "<<")
607            .await
608            .unwrap();
609        // Valid XML but only declaration and no root after parse
610        tokio::fs::write(tmp.path().join("only-decl.xml"), "")
611            .await
612            .unwrap();
613        // Hidden file is skipped
614        tokio::fs::write(tmp.path().join(".hidden.xml"), "<r/>")
615            .await
616            .unwrap();
617        let out = h
618            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", false)
619            .await
620            .unwrap();
621        assert!(out.is_none());
622    }
623
624    #[tokio::test]
625    async fn collect_segment_as_array_without_extract_inner_wraps_root() {
626        let h = ReassembleXmlFileHandler::new();
627        let tmp = tempfile::tempdir().unwrap();
628        tokio::fs::write(tmp.path().join("a.xml"), r#"<Root><child>1</child></Root>"#)
629            .await
630            .unwrap();
631        let out = h
632            .collect_segment_as_array(tmp.path().to_str().unwrap(), "seg", false)
633            .await
634            .unwrap()
635            .unwrap();
636        let obj = out.as_object().unwrap();
637        assert!(obj.contains_key("?xml"));
638        let root = obj.get("Root").and_then(|r| r.as_object()).unwrap();
639        assert!(root.get("seg").and_then(|v| v.as_array()).is_some());
640    }
641
642    fn rule_with_segment(segment: &str) -> MultiLevelRule {
643        MultiLevelRule {
644            file_pattern: String::new(),
645            root_to_strip: String::new(),
646            unique_id_elements: String::new(),
647            path_segment: segment.to_string(),
648            wrap_root_element: String::new(),
649            wrap_xmlns: String::new(),
650        }
651    }
652
653    #[test]
654    fn deeper_candidate_rules_excludes_the_matched_segment() {
655        // The matched rule must be filtered out, otherwise the
656        // recursion would re-enter that rule when a child directory
657        // happens to share its `path_segment`.
658        let rules = vec![rule_with_segment("seg_a"), rule_with_segment("seg_b")];
659        let deeper = deeper_candidate_rules(&rules, "seg_a");
660        assert_eq!(deeper.len(), 1);
661        assert_eq!(deeper[0].path_segment, "seg_b");
662    }
663
664    #[test]
665    fn deeper_candidate_rules_keeps_all_when_no_segment_matches() {
666        // When `exclude_path_segment` doesn't correspond to any rule
667        // the input is forwarded unchanged. Pins the `!= -> ==` mutant
668        // which would otherwise return an empty vec here.
669        let rules = vec![rule_with_segment("seg_a"), rule_with_segment("seg_b")];
670        let deeper = deeper_candidate_rules(&rules, "missing");
671        assert_eq!(deeper.len(), 2);
672    }
673
674    #[test]
675    fn deeper_candidate_rules_returns_empty_for_empty_input() {
676        let deeper: Vec<MultiLevelRule> = deeper_candidate_rules(&[], "anything");
677        assert!(deeper.is_empty());
678    }
679
680    #[test]
681    fn is_at_base_path_true_when_dir_matches_any_segment() {
682        let segs = vec![
683            ("/base/other".to_string(), "seg1".to_string(), false),
684            ("/base/here".to_string(), "seg2".to_string(), false),
685        ];
686        assert!(is_at_base_path("/base/here", &segs));
687    }
688
689    #[test]
690    fn is_at_base_path_false_when_dir_matches_nothing() {
691        let segs = vec![("/base/a".to_string(), "seg".to_string(), false)];
692        assert!(!is_at_base_path("/base/b", &segs));
693    }
694
695    #[test]
696    fn is_at_base_path_false_for_empty_segments() {
697        let segs: Vec<(String, String, bool)> = Vec::new();
698        assert!(!is_at_base_path("/anywhere", &segs));
699    }
700}