Skip to main content

xml_disassembler/
cli.rs

1//! Command-line interface for the xml-disassembler binary.
2//!
3//! Kept in the library crate so it can be exercised by unit tests and
4//! the binary stays a thin shim.
5
6use crate::{DecomposeRule, DisassembleXmlFileHandler, MultiLevelRule, ReassembleXmlFileHandler};
7
8/// Options parsed from disassemble CLI args.
9pub struct DisassembleOpts<'a> {
10    pub path: Option<&'a str>,
11    pub unique_id_elements: Option<&'a str>,
12    pub pre_purge: bool,
13    pub post_purge: bool,
14    pub ignore_path: &'a str,
15    pub format: &'a str,
16    pub strategy: Option<&'a str>,
17    pub multi_level: Option<String>,
18    pub split_tags: Option<String>,
19}
20
21/// Parse --split-tags spec for grouped-by-tag. Comma-separated rules; each rule:
22/// `tag:mode:field` (path_segment defaults to tag) or `tag:path:mode:field`.
23/// mode = "split" (one file per item) or "group" (group by field).
24pub fn parse_decompose_spec(spec: &str) -> Vec<DecomposeRule> {
25    let mut rules = Vec::new();
26    for part in spec.split(',') {
27        let part = part.trim();
28        let segments: Vec<&str> = part.splitn(4, ':').collect();
29        if segments.len() >= 3 {
30            let tag = segments[0].to_string();
31            let (path_segment, mode, field) = if segments.len() == 3 {
32                (
33                    tag.clone(),
34                    segments[1].to_string(),
35                    segments[2].to_string(),
36                )
37            } else {
38                (
39                    segments[1].to_string(),
40                    segments[2].to_string(),
41                    segments[3].to_string(),
42                )
43            };
44            if !tag.is_empty() && !mode.is_empty() && !field.is_empty() {
45                rules.push(DecomposeRule {
46                    tag,
47                    path_segment,
48                    mode,
49                    field,
50                });
51            }
52        }
53    }
54    rules
55}
56
57/// Parse --multi-level spec: `file_pattern:root_to_strip:unique_id_elements`.
58pub fn parse_multi_level_spec(spec: &str) -> Option<MultiLevelRule> {
59    let parts: Vec<&str> = spec.splitn(3, ':').collect();
60    if parts.len() != 3 {
61        return None;
62    }
63    let (file_pattern, root_to_strip, unique_id_elements) = (parts[0], parts[1], parts[2]);
64    if file_pattern.is_empty() || root_to_strip.is_empty() || unique_id_elements.is_empty() {
65        return None;
66    }
67    let path_segment = crate::path_segment_from_file_pattern(file_pattern);
68    Some(MultiLevelRule {
69        file_pattern: file_pattern.to_string(),
70        root_to_strip: root_to_strip.to_string(),
71        unique_id_elements: unique_id_elements.to_string(),
72        path_segment: path_segment.clone(),
73        wrap_root_element: root_to_strip.to_string(),
74        wrap_xmlns: String::new(),
75    })
76}
77
78/// Parse disassemble args: `<path> [options]`.
79pub fn parse_disassemble_args(args: &[String]) -> DisassembleOpts<'_> {
80    let mut path = None;
81    let mut unique_id_elements = None;
82    let mut pre_purge = false;
83    let mut post_purge = false;
84    let mut ignore_path = ".xmldisassemblerignore";
85    let mut format = "xml";
86    let mut strategy = None;
87    let mut multi_level = None;
88    let mut split_tags = None;
89
90    let mut i = 0;
91    while i < args.len() {
92        let arg = &args[i];
93        if arg == "--postpurge" {
94            post_purge = true;
95            i += 1;
96        } else if arg == "--prepurge" {
97            pre_purge = true;
98            i += 1;
99        } else if let Some(rest) = arg.strip_prefix("--unique-id-elements=") {
100            unique_id_elements = Some(rest);
101            i += 1;
102        } else if arg == "--unique-id-elements" {
103            i += 1;
104            if i < args.len() {
105                unique_id_elements = Some(args[i].as_str());
106                i += 1;
107            }
108        } else if let Some(rest) = arg.strip_prefix("--ignore-path=") {
109            ignore_path = rest;
110            i += 1;
111        } else if arg == "--ignore-path" {
112            i += 1;
113            if i < args.len() {
114                ignore_path = args[i].as_str();
115                i += 1;
116            }
117        } else if let Some(rest) = arg.strip_prefix("--format=") {
118            format = rest;
119            i += 1;
120        } else if arg == "--format" {
121            i += 1;
122            if i < args.len() {
123                format = args[i].as_str();
124                i += 1;
125            }
126        } else if let Some(rest) = arg.strip_prefix("--strategy=") {
127            strategy = Some(rest);
128            i += 1;
129        } else if arg == "--strategy" {
130            i += 1;
131            if i < args.len() {
132                strategy = Some(args[i].as_str());
133                i += 1;
134            }
135        } else if let Some(rest) = arg.strip_prefix("--multi-level=") {
136            multi_level = Some(rest.to_string());
137            i += 1;
138        } else if arg == "--multi-level" {
139            i += 1;
140            if i < args.len() {
141                multi_level = Some(args[i].clone());
142                i += 1;
143            }
144        } else if let Some(rest) = arg.strip_prefix("--split-tags=") {
145            split_tags = Some(rest.to_string());
146            i += 1;
147        } else if arg == "--split-tags" || arg == "-p" {
148            i += 1;
149            if i < args.len() {
150                split_tags = Some(args[i].clone());
151                i += 1;
152            }
153        } else if arg.starts_with("--") {
154            i += 1;
155        } else if path.is_none() {
156            path = Some(arg.as_str());
157            i += 1;
158        } else {
159            i += 1;
160        }
161    }
162
163    DisassembleOpts {
164        path,
165        unique_id_elements,
166        pre_purge,
167        post_purge,
168        ignore_path,
169        format,
170        strategy,
171        multi_level,
172        split_tags,
173    }
174}
175
176/// Parse reassemble args: `<path> [extension] [--postpurge]`.
177pub fn parse_reassemble_args(args: &[String]) -> (Option<&str>, Option<&str>, bool) {
178    let mut path = None;
179    let mut extension = None;
180    let mut post_purge = false;
181    for arg in args {
182        if arg == "--postpurge" {
183            post_purge = true;
184        } else if path.is_none() {
185            path = Some(arg.as_str());
186        } else if extension.is_none() {
187            extension = Some(arg.as_str());
188        }
189    }
190    (path, extension, post_purge)
191}
192
193/// Print CLI usage to stderr.
194pub fn print_usage() {
195    eprintln!("Usage: xml-disassembler <command> [options]");
196    eprintln!("  disassemble <path> [options]     - Disassemble XML file or directory");
197    eprintln!("    --postpurge                    - Delete original file/dir after disassembling (default: false)");
198    eprintln!("    --prepurge                     - Remove existing disassembly output before running (default: false)");
199    eprintln!(
200        "    --unique-id-elements <list>    - Comma-separated element names for nested filenames"
201    );
202    eprintln!("    --ignore-path <path>           - Path to ignore file (default: .xmldisassemblerignore)");
203    eprintln!(
204        "    --format <fmt>                 - Output format: xml, json, json5, yaml (default: xml)"
205    );
206    eprintln!(
207        "    --strategy <name>              - unique-id or grouped-by-tag (default: unique-id)"
208    );
209    eprintln!("    --multi-level <spec>          - Further disassemble matching files: file_pattern:root_to_strip:unique_id_elements");
210    eprintln!("    -p, --split-tags <spec>       - With grouped-by-tag: split/group nested tags (e.g. objectPermissions:split:object,fieldPermissions:group:field)");
211    eprintln!("  reassemble <path> [extension] [--postpurge]  - Reassemble directory (default extension: xml)");
212}
213
214/// Run the CLI with the given args. `args[0]` is expected to be the program name.
215pub async fn run(args: Vec<String>) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
216    if args.len() < 2 {
217        print_usage();
218        return Ok(());
219    }
220
221    let command = &args[1];
222    match command.as_str() {
223        "disassemble" => run_disassemble(&args[2..]).await?,
224        "reassemble" => run_reassemble(&args[2..]).await?,
225        _ => {
226            eprintln!("Unknown command: {}", command);
227        }
228    }
229
230    Ok(())
231}
232
233async fn run_disassemble(args: &[String]) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
234    let opts = parse_disassemble_args(args);
235    let path = opts.path.unwrap_or(".");
236    let strategy = opts.strategy.unwrap_or("unique-id");
237    let multi_level_rule = opts
238        .multi_level
239        .as_ref()
240        .and_then(|s| parse_multi_level_spec(s));
241    if opts.multi_level.is_some() && multi_level_rule.is_none() {
242        eprintln!("Invalid --multi-level spec; use file_pattern:root_to_strip:unique_id_elements");
243    }
244    let decompose_rules: Vec<DecomposeRule> = if strategy == "grouped-by-tag" {
245        opts.split_tags
246            .as_ref()
247            .map(|s| parse_decompose_spec(s))
248            .unwrap_or_default()
249    } else {
250        Vec::new()
251    };
252    let decompose_rules_ref = if decompose_rules.is_empty() {
253        None
254    } else {
255        Some(decompose_rules.as_slice())
256    };
257    let mut handler = DisassembleXmlFileHandler::new();
258    handler
259        .disassemble(
260            path,
261            opts.unique_id_elements,
262            Some(strategy),
263            opts.pre_purge,
264            opts.post_purge,
265            opts.ignore_path,
266            opts.format,
267            multi_level_rule.as_ref(),
268            decompose_rules_ref,
269        )
270        .await?;
271    Ok(())
272}
273
274async fn run_reassemble(args: &[String]) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
275    let (path, extension, post_purge) = parse_reassemble_args(args);
276    let path = path.unwrap_or(".");
277    let handler = ReassembleXmlFileHandler::new();
278    handler
279        .reassemble(path, extension.or(Some("xml")), post_purge)
280        .await?;
281    Ok(())
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    fn sv(s: &str) -> String {
289        s.to_string()
290    }
291
292    #[test]
293    fn parse_decompose_spec_three_segments_defaults_path_segment_to_tag() {
294        let rules = parse_decompose_spec("objectPermissions:split:object");
295        assert_eq!(rules.len(), 1);
296        let r = &rules[0];
297        assert_eq!(r.tag, "objectPermissions");
298        assert_eq!(r.path_segment, "objectPermissions");
299        assert_eq!(r.mode, "split");
300        assert_eq!(r.field, "object");
301    }
302
303    #[test]
304    fn parse_decompose_spec_four_segments_uses_explicit_path_segment() {
305        let rules = parse_decompose_spec("fieldPermissions:fieldPerms:group:field");
306        assert_eq!(rules.len(), 1);
307        let r = &rules[0];
308        assert_eq!(r.tag, "fieldPermissions");
309        assert_eq!(r.path_segment, "fieldPerms");
310        assert_eq!(r.mode, "group");
311        assert_eq!(r.field, "field");
312    }
313
314    #[test]
315    fn parse_decompose_spec_comma_separated_rules_trims_whitespace() {
316        let rules = parse_decompose_spec("a:split:f, b:group:g , c:x:split:y");
317        assert_eq!(rules.len(), 3);
318        assert_eq!(rules[0].tag, "a");
319        assert_eq!(rules[1].tag, "b");
320        assert_eq!(rules[2].tag, "c");
321        assert_eq!(rules[2].path_segment, "x");
322    }
323
324    #[test]
325    fn parse_decompose_spec_rejects_empty_segments() {
326        // Too few segments
327        assert!(parse_decompose_spec("only:two").is_empty());
328        // Empty tag, mode, or field are filtered
329        assert!(parse_decompose_spec(":split:field").is_empty());
330        assert!(parse_decompose_spec("tag::field").is_empty());
331        assert!(parse_decompose_spec("tag:split:").is_empty());
332    }
333
334    #[test]
335    fn parse_multi_level_spec_valid_returns_rule() {
336        let rule = parse_multi_level_spec(
337            "programProcesses-meta:LoyaltyProgramSetup:parameterName,ruleName",
338        )
339        .unwrap();
340        assert_eq!(rule.file_pattern, "programProcesses-meta");
341        assert_eq!(rule.root_to_strip, "LoyaltyProgramSetup");
342        assert_eq!(rule.unique_id_elements, "parameterName,ruleName");
343        assert_eq!(rule.path_segment, "programProcesses");
344        assert_eq!(rule.wrap_root_element, "LoyaltyProgramSetup");
345        assert!(rule.wrap_xmlns.is_empty());
346    }
347
348    #[test]
349    fn parse_multi_level_spec_rejects_wrong_parts() {
350        assert!(parse_multi_level_spec("only:two").is_none());
351        assert!(parse_multi_level_spec(":Root:ids").is_none());
352        assert!(parse_multi_level_spec("file::ids").is_none());
353        assert!(parse_multi_level_spec("file:Root:").is_none());
354    }
355
356    #[test]
357    fn parse_disassemble_args_handles_flags_and_eq_forms() {
358        let args = [
359            "path/to/file.xml",
360            "--postpurge",
361            "--prepurge",
362            "--unique-id-elements=name,id",
363            "--ignore-path=.foo",
364            "--format=json",
365            "--strategy=grouped-by-tag",
366            "--multi-level=pattern:Root:ids",
367            "--split-tags=a:split:b",
368        ]
369        .iter()
370        .map(|s| sv(s))
371        .collect::<Vec<_>>();
372        let opts = parse_disassemble_args(&args);
373        assert_eq!(opts.path, Some("path/to/file.xml"));
374        assert!(opts.pre_purge);
375        assert!(opts.post_purge);
376        assert_eq!(opts.unique_id_elements, Some("name,id"));
377        assert_eq!(opts.ignore_path, ".foo");
378        assert_eq!(opts.format, "json");
379        assert_eq!(opts.strategy, Some("grouped-by-tag"));
380        assert_eq!(opts.multi_level.as_deref(), Some("pattern:Root:ids"));
381        assert_eq!(opts.split_tags.as_deref(), Some("a:split:b"));
382    }
383
384    #[test]
385    fn parse_disassemble_args_handles_space_separated_forms() {
386        let args = [
387            "file.xml",
388            "--unique-id-elements",
389            "name",
390            "--ignore-path",
391            ".gitignore",
392            "--format",
393            "yaml",
394            "--strategy",
395            "unique-id",
396            "--multi-level",
397            "p:R:ids",
398            "--split-tags",
399            "t:split:f",
400        ]
401        .iter()
402        .map(|s| sv(s))
403        .collect::<Vec<_>>();
404        let opts = parse_disassemble_args(&args);
405        assert_eq!(opts.path, Some("file.xml"));
406        assert_eq!(opts.unique_id_elements, Some("name"));
407        assert_eq!(opts.ignore_path, ".gitignore");
408        assert_eq!(opts.format, "yaml");
409        assert_eq!(opts.strategy, Some("unique-id"));
410        assert_eq!(opts.multi_level.as_deref(), Some("p:R:ids"));
411        assert_eq!(opts.split_tags.as_deref(), Some("t:split:f"));
412    }
413
414    #[test]
415    fn parse_disassemble_args_p_alias_for_split_tags() {
416        let args = ["file.xml", "-p", "a:split:b"]
417            .iter()
418            .map(|s| sv(s))
419            .collect::<Vec<_>>();
420        let opts = parse_disassemble_args(&args);
421        assert_eq!(opts.split_tags.as_deref(), Some("a:split:b"));
422    }
423
424    #[test]
425    fn parse_disassemble_args_unknown_long_flag_is_skipped() {
426        let args = ["file.xml", "--unknown"]
427            .iter()
428            .map(|s| sv(s))
429            .collect::<Vec<_>>();
430        let opts = parse_disassemble_args(&args);
431        assert_eq!(opts.path, Some("file.xml"));
432    }
433
434    #[test]
435    fn parse_disassemble_args_defaults_when_empty() {
436        let opts = parse_disassemble_args(&[]);
437        assert!(opts.path.is_none());
438        assert!(opts.strategy.is_none());
439        assert!(opts.unique_id_elements.is_none());
440        assert!(!opts.pre_purge);
441        assert!(!opts.post_purge);
442        assert_eq!(opts.ignore_path, ".xmldisassemblerignore");
443        assert_eq!(opts.format, "xml");
444    }
445
446    #[test]
447    fn parse_disassemble_args_space_forms_without_value_leave_default() {
448        let args = ["--unique-id-elements"]
449            .iter()
450            .map(|s| sv(s))
451            .collect::<Vec<_>>();
452        let opts = parse_disassemble_args(&args);
453        assert!(opts.unique_id_elements.is_none());
454    }
455
456    #[test]
457    fn parse_disassemble_args_trailing_extra_positional_ignored() {
458        let args = ["first.xml", "second.xml"]
459            .iter()
460            .map(|s| sv(s))
461            .collect::<Vec<_>>();
462        let opts = parse_disassemble_args(&args);
463        assert_eq!(opts.path, Some("first.xml"));
464    }
465
466    #[test]
467    fn parse_reassemble_args_picks_path_extension_and_flag() {
468        let args = ["some/dir", "json", "--postpurge"]
469            .iter()
470            .map(|s| sv(s))
471            .collect::<Vec<_>>();
472        let (path, ext, purge) = parse_reassemble_args(&args);
473        assert_eq!(path, Some("some/dir"));
474        assert_eq!(ext, Some("json"));
475        assert!(purge);
476    }
477
478    #[test]
479    fn parse_reassemble_args_defaults_and_extra_args_ignored() {
480        let (p, e, purge) = parse_reassemble_args(&[]);
481        assert!(p.is_none());
482        assert!(e.is_none());
483        assert!(!purge);
484
485        let args = ["dir", "xml", "extra"]
486            .iter()
487            .map(|s| sv(s))
488            .collect::<Vec<_>>();
489        let (p, e, _) = parse_reassemble_args(&args);
490        assert_eq!(p, Some("dir"));
491        assert_eq!(e, Some("xml"));
492    }
493
494    #[tokio::test]
495    async fn run_no_args_prints_usage_and_succeeds() {
496        run(vec![sv("xml-disassembler")]).await.unwrap();
497    }
498
499    #[tokio::test]
500    async fn run_unknown_command_is_not_an_error() {
501        run(vec![sv("xml-disassembler"), sv("unknown")])
502            .await
503            .unwrap();
504    }
505
506    #[tokio::test]
507    async fn run_reassemble_missing_path_returns_err() {
508        // Missing directory path propagates an error from fs::metadata.
509        let err = run(vec![
510            sv("xml-disassembler"),
511            sv("reassemble"),
512            sv("/definitely/not/here/xyz"),
513        ])
514        .await;
515        assert!(err.is_err());
516    }
517
518    #[tokio::test]
519    async fn run_disassemble_writes_expected_output() {
520        let dir = tempfile::tempdir().unwrap();
521        let xml_path = dir.path().join("sample.xml");
522        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
523<Root xmlns="http://example.com">
524  <child><name>one</name></child>
525  <child><name>two</name></child>
526</Root>"#;
527        std::fs::write(&xml_path, xml).unwrap();
528        run(vec![
529            sv("xml-disassembler"),
530            sv("disassemble"),
531            xml_path.to_string_lossy().to_string(),
532        ])
533        .await
534        .unwrap();
535        assert!(dir.path().join("sample").exists());
536    }
537
538    #[tokio::test]
539    async fn run_disassemble_with_invalid_multi_level_spec_warns_and_continues() {
540        let dir = tempfile::tempdir().unwrap();
541        let xml_path = dir.path().join("sample.xml");
542        let xml =
543            r#"<?xml version="1.0" encoding="UTF-8"?><Root><child><name>a</name></child></Root>"#;
544        std::fs::write(&xml_path, xml).unwrap();
545        run(vec![
546            sv("xml-disassembler"),
547            sv("disassemble"),
548            xml_path.to_string_lossy().to_string(),
549            sv("--multi-level=bad-spec"),
550        ])
551        .await
552        .unwrap();
553    }
554
555    #[tokio::test]
556    async fn run_reassemble_on_existing_directory_succeeds() {
557        // Disassemble then reassemble via the CLI to cover the success path end-to-end.
558        let dir = tempfile::tempdir().unwrap();
559        let xml_path = dir.path().join("reasm.xml");
560        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
561<Root><child><name>one</name></child><child><name>two</name></child></Root>"#;
562        std::fs::write(&xml_path, xml).unwrap();
563        run(vec![
564            sv("xml-disassembler"),
565            sv("disassemble"),
566            xml_path.to_string_lossy().to_string(),
567        ])
568        .await
569        .unwrap();
570        let disassembled_dir = dir.path().join("reasm");
571        assert!(disassembled_dir.exists());
572        run(vec![
573            sv("xml-disassembler"),
574            sv("reassemble"),
575            disassembled_dir.to_string_lossy().to_string(),
576        ])
577        .await
578        .unwrap();
579    }
580
581    #[tokio::test]
582    async fn run_disassemble_with_grouped_by_tag_split_tags_runs() {
583        let dir = tempfile::tempdir().unwrap();
584        let xml_path = dir.path().join("perms.xml");
585        let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
586<Root>
587  <objectPermissions><object>A</object><allowRead>true</allowRead></objectPermissions>
588  <objectPermissions><object>B</object><allowRead>false</allowRead></objectPermissions>
589</Root>"#;
590        std::fs::write(&xml_path, xml).unwrap();
591        run(vec![
592            sv("xml-disassembler"),
593            sv("disassemble"),
594            xml_path.to_string_lossy().to_string(),
595            sv("--strategy=grouped-by-tag"),
596            sv("-p"),
597            sv("objectPermissions:split:object"),
598        ])
599        .await
600        .unwrap();
601    }
602}