Skip to main content

config_disassembler/
disassemble.rs

1//! Disassemble a JSON, JSON5, YAML, TOON, TOML, or INI document into a directory of
2//! smaller files, optionally written in a different format than the input.
3//!
4//! The `input` may be either a single file or a directory. When it points
5//! at a directory, every file under the directory whose extension matches
6//! the input format (or, when `input_format` is `None`, any supported
7//! value-model format) is disassembled in place. An optional `ignore_path`
8//! can point at a `.gitignore`-style ignore file used to skip paths.
9
10use std::collections::{BTreeMap, BTreeSet};
11use std::fs;
12use std::path::{Path, PathBuf};
13
14use ignore::gitignore::{Gitignore, GitignoreBuilder};
15use jsonc_parser::ast;
16use jsonc_parser::common::Ranged;
17use serde_json::{Map, Value};
18use sha2::{Digest, Sha256};
19
20use crate::error::{Error, Result};
21use crate::format::{jsonc_parse_options, ConversionOperation, Format};
22use crate::ignore_file::DEFAULT_IGNORE_FILENAME;
23use crate::meta::{Meta, Root};
24
25/// File written for object roots that contains the scalar top-level keys.
26const MAIN_BASENAME: &str = "_main";
27
28/// Options controlling disassembly.
29#[derive(Debug, Clone)]
30pub struct DisassembleOptions {
31    /// Path to the input. May be either a single config file or a
32    /// directory; when it is a directory, every matching file under it
33    /// is disassembled in place (see also `ignore_path`).
34    pub input: PathBuf,
35    /// Format to read the input as. If `None`, the format is inferred
36    /// from each file's extension.
37    pub input_format: Option<Format>,
38    /// Directory to write split files into. Only meaningful when
39    /// `input` is a single file; for directory inputs each file's
40    /// output goes into a sibling directory named after that file's
41    /// stem (mirroring the XML disassembler's behavior).
42    pub output_dir: Option<PathBuf>,
43    /// Format to write split files in. Defaults to `input_format`.
44    pub output_format: Option<Format>,
45    /// For array roots, name array-element files using the value of this
46    /// field if present on each element (must be a scalar).
47    pub unique_id: Option<String>,
48    /// If true, remove the contents of the output directory before writing.
49    pub pre_purge: bool,
50    /// If true, delete the input file after disassembling. For single-file
51    /// inputs this removes the source file itself. For directory inputs each
52    /// source file is removed individually; the directory container is kept
53    /// because split output directories live inside it.
54    pub post_purge: bool,
55    /// Optional path to a `.gitignore`-style ignore file that filters
56    /// which files are processed when `input` is a directory. Pass
57    /// `None` to use [`DEFAULT_IGNORE_FILENAME`] in the input directory
58    /// (silently absent if the file does not exist). Ignored entirely
59    /// for single-file inputs.
60    pub ignore_path: Option<PathBuf>,
61}
62
63impl DisassembleOptions {
64    /// Build options for a single-file disassembly with sensible
65    /// defaults. Directory walks should construct `DisassembleOptions`
66    /// directly so they can opt into `ignore_path`.
67    pub fn for_file(input: PathBuf) -> Self {
68        Self {
69            input,
70            input_format: None,
71            output_dir: None,
72            output_format: None,
73            unique_id: None,
74            pre_purge: false,
75            post_purge: false,
76            ignore_path: None,
77        }
78    }
79}
80
81/// Disassemble a configuration file (or directory of files) into split
82/// files.
83///
84/// * When `opts.input` is a regular file, returns the directory the files
85///   were written to (i.e. the single output directory for that file).
86/// * When `opts.input` is a directory, every matching file under it is
87///   disassembled in place and the input directory itself is returned.
88pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
89    let metadata = fs::metadata(&opts.input)?;
90    if metadata.is_dir() {
91        return disassemble_directory(opts);
92    }
93    disassemble_file(opts)
94}
95
96/// Disassemble a single file. Equivalent to the previous behavior of
97/// [`disassemble`].
98fn disassemble_file(opts: DisassembleOptions) -> Result<PathBuf> {
99    let input_format = match opts.input_format {
100        Some(f) => f,
101        None => Format::from_path(&opts.input)?,
102    };
103    let output_format = opts.output_format.unwrap_or(input_format);
104    input_format.ensure_can_convert_to(output_format, ConversionOperation::Convert)?;
105
106    let output_dir = match opts.output_dir.clone() {
107        Some(d) => d,
108        None => default_output_dir(&opts.input)?,
109    };
110
111    if opts.pre_purge && output_dir.exists() {
112        fs::remove_dir_all(&output_dir)?;
113    }
114    fs::create_dir_all(&output_dir)?;
115
116    let source_filename = opts
117        .input
118        .file_name()
119        .and_then(|n| n.to_str())
120        .map(|s| s.to_string());
121
122    if input_format == Format::Jsonc && output_format == Format::Jsonc {
123        let (root, indent) =
124            write_jsonc_root_preserving(&opts.input, &output_dir, opts.unique_id.as_deref())?;
125        let meta = Meta {
126            source_format: input_format,
127            file_format: output_format,
128            source_filename,
129            root,
130            indent: Some(indent),
131        };
132        meta.write(&output_dir)?;
133
134        if opts.post_purge {
135            fs::remove_file(&opts.input)?;
136        }
137
138        return Ok(output_dir);
139    }
140
141    let value = input_format.load(&opts.input)?;
142
143    let root = match &value {
144        Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
145        Value::Array(items) => {
146            write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
147        }
148        _ => {
149            return Err(Error::Invalid(
150                "top-level value must be an object or array to disassemble".into(),
151            ));
152        }
153    };
154
155    let meta = Meta {
156        source_format: input_format,
157        file_format: output_format,
158        source_filename,
159        root,
160        indent: None,
161    };
162    meta.write(&output_dir)?;
163
164    if opts.post_purge {
165        fs::remove_file(&opts.input)?;
166    }
167
168    Ok(output_dir)
169}
170
171/// Disassemble every matching file under a directory. Each file's split
172/// output is placed in a sibling directory named after the file's stem,
173/// matching how the XML disassembler treats directory inputs.
174fn disassemble_directory(opts: DisassembleOptions) -> Result<PathBuf> {
175    if opts.output_dir.is_some() {
176        return Err(Error::Usage(
177            "--output-dir is not supported with a directory input; each file's split output is written next to it".into(),
178        ));
179    }
180
181    let root = opts.input.clone();
182    let ignore = load_ignore_rules(opts.ignore_path.as_deref(), &root)?;
183
184    let mut targets = collect_disassemble_targets(&root, &ignore, opts.input_format)?;
185    targets.sort();
186
187    for file in &targets {
188        let mut child_opts = opts.clone();
189        child_opts.input = file.clone();
190        // Each file's output goes into <stem>/ next to the file itself,
191        // never into a shared --output-dir (we rejected that above).
192        child_opts.output_dir = None;
193        disassemble_file(child_opts)?;
194    }
195
196    Ok(root)
197}
198
199/// Walk `root` and collect every file whose extension matches one of the
200/// supported formats (or, if `expected_format` is `Some`, only that
201/// format), excluding paths matched by `ignore`.
202fn collect_disassemble_targets(
203    root: &Path,
204    ignore: &Option<Gitignore>,
205    expected_format: Option<Format>,
206) -> Result<Vec<PathBuf>> {
207    let mut out = Vec::new();
208    let mut stack = vec![root.to_path_buf()];
209    while let Some(dir) = stack.pop() {
210        for entry in fs::read_dir(&dir)? {
211            let entry = entry?;
212            let path = entry.path();
213            let ft = entry.file_type()?;
214            if is_ignored(ignore, root, &path, ft.is_dir()) {
215                continue;
216            }
217            if ft.is_dir() {
218                // Skip subdirectories that are already disassembled outputs
219                // (identified by containing a metadata sidecar) so that a
220                // second directory-mode run does not re-disassemble the split
221                // files or corrupt the metadata.
222                if !path.join(crate::meta::META_FILENAME).exists() {
223                    stack.push(path);
224                }
225                continue;
226            }
227            if !ft.is_file() {
228                continue;
229            }
230            // Only look at files whose extension parses as a known
231            // format, and (when input_format was set) only the matching
232            // format. Anything else is silently skipped — a directory of
233            // mixed config files commonly contains README/.git/etc.
234            let detected = match Format::from_path(&path) {
235                Ok(f) => f,
236                Err(_) => continue,
237            };
238            if let Some(expected) = expected_format {
239                if expected != detected {
240                    continue;
241                }
242            }
243            out.push(path);
244        }
245    }
246    Ok(out)
247}
248
249fn load_ignore_rules(explicit: Option<&Path>, fallback_dir: &Path) -> Result<Option<Gitignore>> {
250    let path = match explicit {
251        Some(p) => p.to_path_buf(),
252        None => fallback_dir.join(DEFAULT_IGNORE_FILENAME),
253    };
254    if !path.exists() {
255        return Ok(None);
256    }
257    let content = fs::read_to_string(&path)?;
258    let anchor = path.parent().unwrap_or(Path::new("."));
259    let mut builder = GitignoreBuilder::new(anchor);
260    for line in content.lines() {
261        // `add_line` returns a pattern-error on malformed globs; mirror
262        // the XML disassembler's tolerant parsing and skip bad lines
263        // rather than failing the whole run.
264        let _ = builder.add_line(None, line);
265    }
266    Ok(builder.build().ok())
267}
268
269fn is_ignored(ignore: &Option<Gitignore>, root: &Path, path: &Path, is_dir: bool) -> bool {
270    let Some(ign) = ignore.as_ref() else {
271        return false;
272    };
273    let candidate = path.strip_prefix(root).unwrap_or(path);
274    ign.matched(candidate, is_dir).is_ignore()
275}
276
277fn default_output_dir(input: &Path) -> Result<PathBuf> {
278    let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
279        Error::Invalid(format!(
280            "could not derive a directory name from {}",
281            input.display()
282        ))
283    })?;
284    let parent = input.parent().unwrap_or(Path::new("."));
285    Ok(parent.join(stem))
286}
287
288fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
289    let mut key_order: Vec<String> = Vec::with_capacity(map.len());
290    let mut key_files: BTreeMap<String, String> = BTreeMap::new();
291    let mut main_object: Map<String, Value> = Map::new();
292    let mut used_names: BTreeSet<String> = BTreeSet::new();
293    used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
294
295    for (key, value) in map {
296        key_order.push(key.clone());
297        if is_scalar(value) {
298            main_object.insert(key.clone(), value.clone());
299            continue;
300        }
301
302        let filename = unique_filename_for_key(key, fmt, &used_names);
303        used_names.insert(filename.clone());
304        let path = dir.join(&filename);
305        let payload = fmt.wrap_split_payload(key, value);
306        fs::write(&path, fmt.serialize(&payload)?)?;
307        key_files.insert(key.clone(), filename);
308    }
309
310    let main_file = if main_object.is_empty() {
311        None
312    } else {
313        let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
314        let path = dir.join(&filename);
315        fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
316        Some(filename)
317    };
318
319    Ok(Root::Object {
320        key_order,
321        key_files,
322        main_file,
323    })
324}
325
326fn write_array_root(
327    dir: &Path,
328    items: &[Value],
329    fmt: Format,
330    unique_id: Option<&str>,
331) -> Result<Root> {
332    let mut files = Vec::with_capacity(items.len());
333    let mut used_names: BTreeSet<String> = BTreeSet::new();
334    let width = digit_width(items.len());
335
336    for (idx, item) in items.iter().enumerate() {
337        let mut basename = if let Some(field) = unique_id {
338            unique_id_basename(item, field)
339        } else {
340            None
341        };
342        if basename
343            .as_ref()
344            .map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
345            .unwrap_or(false)
346        {
347            basename = None;
348        }
349        let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
350
351        let mut filename = format!("{basename}.{}", fmt.extension());
352        if used_names.contains(&filename) {
353            filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
354        }
355        used_names.insert(filename.clone());
356
357        let path = dir.join(&filename);
358        fs::write(&path, fmt.serialize(item)?)?;
359        files.push(filename);
360    }
361
362    Ok(Root::Array { files })
363}
364
365fn write_jsonc_root_preserving(
366    input: &Path,
367    dir: &Path,
368    unique_id: Option<&str>,
369) -> Result<(Root, String)> {
370    let text = fs::read_to_string(input)?;
371    let indent = detect_jsonc_indent(&text);
372    let ast = parse_jsonc_ast(&text)?;
373    let value = Format::Jsonc.parse(&text)?;
374
375    let root = match (ast, value) {
376        (ast::Value::Object(object), Value::Object(_)) => {
377            write_jsonc_object_root(dir, &text, object)?
378        }
379        (ast::Value::Array(array), Value::Array(items)) => {
380            write_jsonc_array_root(dir, &text, array, &items, unique_id)?
381        }
382        _ => {
383            return Err(Error::Invalid(
384                "top-level value must be an object or array to disassemble".into(),
385            ))
386        }
387    };
388    Ok((root, indent))
389}
390
391/// Detect the indentation unit from a JSONC document by finding the leading
392/// whitespace of the first indented line.  Falls back to two spaces if the
393/// document has no indented lines (e.g. minified JSON).
394fn detect_jsonc_indent(text: &str) -> String {
395    text.lines()
396        .find_map(|line| {
397            let ws: String = line
398                .chars()
399                .take_while(|c| c.is_ascii_whitespace())
400                .collect();
401            if ws.is_empty() {
402                None
403            } else {
404                Some(ws)
405            }
406        })
407        .unwrap_or_else(|| "  ".to_string())
408}
409
410fn write_jsonc_object_root(dir: &Path, text: &str, object: ast::Object<'_>) -> Result<Root> {
411    let properties = jsonc_object_properties(text, object)?;
412    let mut key_order = Vec::with_capacity(properties.len());
413    let mut key_files: BTreeMap<String, String> = BTreeMap::new();
414    let mut main_segments = Vec::new();
415    let mut used_names: BTreeSet<String> = BTreeSet::new();
416    used_names.insert(format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension()));
417
418    for property in properties {
419        key_order.push(property.key.clone());
420        if property.is_scalar {
421            main_segments.push(property.segment);
422            continue;
423        }
424
425        let filename = unique_filename_for_key(&property.key, Format::Jsonc, &used_names);
426        used_names.insert(filename.clone());
427        let path = dir.join(&filename);
428        let content = format!(
429            "{}{}",
430            property.leading_comment,
431            ensure_trailing_newline(&property.value_text)
432        );
433        fs::write(path, content)?;
434        key_files.insert(property.key, filename);
435    }
436
437    let main_file = if main_segments.is_empty() {
438        None
439    } else {
440        let filename = format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension());
441        let path = dir.join(&filename);
442        let text = render_jsonc_object(main_segments.iter());
443        fs::write(path, text)?;
444        Some(filename)
445    };
446
447    Ok(Root::Object {
448        key_order,
449        key_files,
450        main_file,
451    })
452}
453
454fn write_jsonc_array_root(
455    dir: &Path,
456    text: &str,
457    array: ast::Array<'_>,
458    items: &[Value],
459    unique_id: Option<&str>,
460) -> Result<Root> {
461    if array.elements.len() != items.len() {
462        return Err(Error::Invalid(
463            "JSONC AST and value model disagree on array length".into(),
464        ));
465    }
466
467    let mut files = Vec::with_capacity(array.elements.len());
468    let mut used_names: BTreeSet<String> = BTreeSet::new();
469    let width = digit_width(array.elements.len());
470
471    for (idx, (element, item)) in array.elements.iter().zip(items).enumerate() {
472        let mut basename = unique_id.and_then(|field| unique_id_basename(item, field));
473        if basename
474            .as_ref()
475            .map(|n| used_names.contains(&format!("{n}.{}", Format::Jsonc.extension())))
476            .unwrap_or(false)
477        {
478            basename = None;
479        }
480        let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
481
482        let mut filename = format!("{basename}.{}", Format::Jsonc.extension());
483        if used_names.contains(&filename) {
484            filename = format!(
485                "{basename}-{}.{}",
486                hash_value(item, 8),
487                Format::Jsonc.extension()
488            );
489        }
490        used_names.insert(filename.clone());
491
492        let value_text = element.text(text).trim();
493        fs::write(dir.join(&filename), ensure_trailing_newline(value_text))?;
494        files.push(filename);
495    }
496
497    Ok(Root::Array { files })
498}
499
500struct JsoncPropertySyntax {
501    key: String,
502    is_scalar: bool,
503    segment: String,
504    value_text: String,
505    /// Lines immediately before the property's key that are blank or
506    /// comment lines (captured so they can be prepended to the split
507    /// file, preserving comments on complex-value properties).
508    leading_comment: String,
509}
510
511fn jsonc_object_properties(
512    text: &str,
513    object: ast::Object<'_>,
514) -> Result<Vec<JsoncPropertySyntax>> {
515    let mut properties = Vec::with_capacity(object.properties.len());
516    for property in object.properties {
517        let key = property.name.clone().into_string();
518        let property_range = property.range();
519        let value_range = property.value.range();
520        let property_line_start = line_start(text, property_range.start);
521        let comment_start = leading_comment_start(text, property_line_start);
522        properties.push(JsoncPropertySyntax {
523            key,
524            is_scalar: is_jsonc_ast_scalar(&property.value),
525            segment: jsonc_property_segment(text, property_range.start, value_range.end)
526                .to_string(),
527            value_text: property.value.text(text).trim().to_string(),
528            leading_comment: text[comment_start..property_line_start].to_string(),
529        });
530    }
531    Ok(properties)
532}
533
534fn parse_jsonc_ast(text: &str) -> Result<ast::Value<'_>> {
535    jsonc_parser::parse_to_ast(text, &Default::default(), &jsonc_parse_options())
536        .map_err(|e| Error::Invalid(format!("jsonc parse error: {e}")))?
537        .value
538        .ok_or_else(|| Error::Invalid("JSONC document did not contain a value".into()))
539}
540
541fn is_jsonc_ast_scalar(value: &ast::Value<'_>) -> bool {
542    !matches!(value, ast::Value::Object(_) | ast::Value::Array(_))
543}
544
545fn jsonc_property_segment(text: &str, property_start: usize, value_end: usize) -> &str {
546    let start = leading_comment_start(text, line_start(text, property_start));
547    let end = line_end(text, value_end);
548    &text[start..end]
549}
550
551fn leading_comment_start(text: &str, mut start: usize) -> usize {
552    while start > 0 {
553        let previous_line_end = start.saturating_sub(1);
554        let previous_line_start = line_start(text, previous_line_end);
555        let line = &text[previous_line_start..previous_line_end];
556        let trimmed = line.trim();
557        if trimmed.is_empty()
558            || trimmed.starts_with("//")
559            || trimmed.starts_with("/*")
560            || trimmed.starts_with('*')
561            || trimmed.ends_with("*/")
562        {
563            start = previous_line_start;
564        } else {
565            break;
566        }
567    }
568    start
569}
570
571fn line_start(text: &str, pos: usize) -> usize {
572    text[..pos].rfind('\n').map(|idx| idx + 1).unwrap_or(0)
573}
574
575fn line_end(text: &str, pos: usize) -> usize {
576    text[pos..]
577        .find('\n')
578        .map(|idx| pos + idx)
579        .unwrap_or(text.len())
580}
581
582fn render_jsonc_object<'a>(segments: impl IntoIterator<Item = &'a String>) -> String {
583    let mut out = String::from("{\n");
584    for segment in segments {
585        out.push_str(&jsonc_segment_with_comma(segment));
586        out.push('\n');
587    }
588    out.push_str("}\n");
589    out
590}
591
592fn jsonc_segment_with_comma(segment: &str) -> String {
593    let segment = segment.trim_matches(|c| c == '\r' || c == '\n');
594    if segment.trim_end().ends_with(',') {
595        return segment.to_string();
596    }
597
598    let last = last_line(segment);
599    let last_line_start = segment.len() - last.len();
600    if let Some(comment_start) = line_comment_start(last) {
601        let comment_start = last_line_start + comment_start;
602        let (before_comment, comment) = segment.split_at(comment_start);
603        return format!("{},{}", before_comment.trim_end(), comment);
604    }
605
606    format!("{segment},")
607}
608
609/// Slice the substring after the final `\n`, or the entire input if there
610/// is no newline. Pulled out so callers can stay free of explicit
611/// `idx + 1` byte arithmetic -- a `+ 1` -> `* 1` mutant on that
612/// expression was provably equivalent to the original (the resulting
613/// off-by-one in `last_line_start` is exactly compensated by `\n` not
614/// toggling `line_comment_start`'s in-string state), which made the
615/// surviving mutant impossible to kill without contorting tests.
616fn last_line(s: &str) -> &str {
617    s.rsplit('\n').next().unwrap_or(s)
618}
619
620fn line_comment_start(line: &str) -> Option<usize> {
621    let mut chars = line.char_indices().peekable();
622    let mut in_string = false;
623    let mut escaped = false;
624
625    while let Some((idx, ch)) = chars.next() {
626        if in_string {
627            if escaped {
628                escaped = false;
629            } else if ch == '\\' {
630                escaped = true;
631            } else if ch == '"' {
632                in_string = false;
633            }
634            continue;
635        }
636
637        if ch == '"' {
638            in_string = true;
639        } else if ch == '/' && matches!(chars.peek(), Some((_, '/' | '*'))) {
640            return Some(idx);
641        }
642    }
643
644    None
645}
646
647fn ensure_trailing_newline(text: &str) -> String {
648    let mut out = text.to_string();
649    if !out.ends_with('\n') {
650        out.push('\n');
651    }
652    out
653}
654
655fn is_scalar(value: &Value) -> bool {
656    !matches!(value, Value::Object(_) | Value::Array(_))
657}
658
659fn digit_width(count: usize) -> usize {
660    let mut w = 1;
661    let mut n = count;
662    while n >= 10 {
663        n /= 10;
664        w += 1;
665    }
666    w.max(4)
667}
668
669fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
670    let sanitized = sanitize(key);
671    let base = if sanitized.is_empty() {
672        hash_string(key, 12)
673    } else {
674        sanitized
675    };
676    let mut filename = format!("{base}.{}", fmt.extension());
677    if used.contains(&filename) {
678        filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
679    }
680    filename
681}
682
683fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
684    let map = item.as_object()?;
685    let raw = match map.get(field)? {
686        Value::String(s) => s.clone(),
687        Value::Number(n) => n.to_string(),
688        Value::Bool(b) => b.to_string(),
689        _ => return None,
690    };
691    let s = sanitize(&raw);
692    if s.is_empty() {
693        None
694    } else {
695        Some(s)
696    }
697}
698
699fn sanitize(input: &str) -> String {
700    input
701        .chars()
702        .map(|c| {
703            if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
704                c
705            } else {
706                '_'
707            }
708        })
709        .collect::<String>()
710        .trim_matches('.')
711        .to_string()
712}
713
714fn hash_string(input: &str, len: usize) -> String {
715    let digest = Sha256::digest(input.as_bytes());
716    let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
717    hex.chars().take(len).collect()
718}
719
720fn hash_value(value: &Value, len: usize) -> String {
721    let canonical = serde_json::to_string(value).unwrap_or_default();
722    hash_string(&canonical, len)
723}
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728    use serde_json::json;
729
730    #[test]
731    fn jsonc_segment_with_comma_inserts_before_trailing_line_comment() {
732        assert_eq!(
733            jsonc_segment_with_comma(r#"  "name": "demo" // keep this comment"#),
734            r#"  "name": "demo",// keep this comment"#
735        );
736    }
737
738    #[test]
739    fn jsonc_segment_with_comma_inserts_comma_before_trailing_comment_on_multi_line() {
740        // Pins `last_line_start = idx + 1` on the `rfind('\n').map(|idx|
741        // idx + 1)` path. Mutating `+ 1` to `- 1` would back the slice up
742        // by two bytes, putting an unbalanced `"` at the start of
743        // `last_line`. That flips `line_comment_start` into in-string mode
744        // for the rest of the slice, it returns None, and we fall through
745        // to `format!("{segment},")` -- the comma ends up *after* the
746        // comment instead of before it.
747        let input = "  \"a\": \"x\"\n  \"b\": 2 // trail";
748        assert_eq!(
749            jsonc_segment_with_comma(input),
750            "  \"a\": \"x\"\n  \"b\": 2,// trail"
751        );
752    }
753
754    #[test]
755    fn jsonc_segment_with_comma_ignores_comment_markers_inside_strings() {
756        assert_eq!(
757            jsonc_segment_with_comma(r#"  "url": "https://example.com/a""#),
758            r#"  "url": "https://example.com/a","#
759        );
760    }
761
762    #[test]
763    fn jsonc_segment_with_comma_leaves_existing_comma_alone() {
764        assert_eq!(
765            jsonc_segment_with_comma("  \"enabled\": true,"),
766            "  \"enabled\": true,"
767        );
768    }
769
770    #[test]
771    fn line_comment_start_respects_escaped_quotes() {
772        let line = r#"  "text": "escaped \" quote // still string" // comment"#;
773        assert_eq!(
774            line_comment_start(line),
775            Some(line.find(" // comment").unwrap() + 1)
776        );
777    }
778
779    #[test]
780    fn ensure_trailing_newline_does_not_duplicate_newline() {
781        assert_eq!(ensure_trailing_newline("value\n"), "value\n");
782        assert_eq!(ensure_trailing_newline("value"), "value\n");
783    }
784
785    #[test]
786    fn jsonc_same_format_post_purge_removes_input_file() {
787        let tmp = tempfile::tempdir().unwrap();
788        let input = tmp.path().join("config.jsonc");
789        fs::write(
790            &input,
791            r#"{
792  "name": "demo",
793  "settings": {
794    "retry": 3,
795  },
796}"#,
797        )
798        .unwrap();
799
800        let output_dir = tmp.path().join("split");
801        let dir = disassemble(DisassembleOptions {
802            input: input.clone(),
803            input_format: Some(Format::Jsonc),
804            output_dir: Some(output_dir),
805            output_format: Some(Format::Jsonc),
806            unique_id: None,
807            pre_purge: false,
808            post_purge: true,
809            ignore_path: None,
810        })
811        .unwrap();
812
813        assert!(!input.exists());
814        assert!(dir.join("settings.jsonc").exists());
815        assert!(dir.join(MAIN_BASENAME).with_extension("jsonc").exists());
816    }
817
818    #[test]
819    fn detect_jsonc_indent_returns_first_indented_lines_leading_whitespace() {
820        assert_eq!(detect_jsonc_indent("{\n  \"a\": 1\n}"), "  ");
821        assert_eq!(detect_jsonc_indent("{\n    \"a\": 1\n}"), "    ");
822        assert_eq!(detect_jsonc_indent("{\n\t\"a\": 1\n}"), "\t");
823    }
824
825    #[test]
826    fn detect_jsonc_indent_falls_back_to_two_spaces_for_minified_input() {
827        assert_eq!(detect_jsonc_indent("{\"a\":1}"), "  ");
828        assert_eq!(detect_jsonc_indent(""), "  ");
829    }
830
831    #[test]
832    fn write_jsonc_object_root_writes_nested_and_main_files() {
833        let text = r#"{
834  "name": "demo",
835  "settings": {
836    "retry": 3,
837  },
838}"#;
839        let object = parse_jsonc_ast(text).unwrap().as_object().unwrap().clone();
840        let tmp = tempfile::tempdir().unwrap();
841
842        let root = write_jsonc_object_root(tmp.path(), text, object).unwrap();
843        let root = serde_json::to_value(&root).unwrap();
844        assert_eq!(root["kind"], "object");
845        assert_eq!(root["key_order"], json!(["name", "settings"]));
846        assert_eq!(root["key_files"]["settings"], "settings.jsonc");
847        assert_eq!(root["main_file"], "_main.jsonc");
848        assert!(fs::read_to_string(tmp.path().join("settings.jsonc"))
849            .unwrap()
850            .contains(r#""retry": 3"#));
851        assert!(fs::read_to_string(tmp.path().join("_main.jsonc"))
852            .unwrap()
853            .contains(r#""name": "demo","#));
854    }
855
856    #[test]
857    fn write_jsonc_array_root_rejects_ast_value_length_mismatch() {
858        let text = "[1, 2]";
859        let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
860        let tmp = tempfile::tempdir().unwrap();
861
862        let err = write_jsonc_array_root(tmp.path(), text, array, &[json!(1)], None)
863            .expect_err("should reject mismatched inputs");
864
865        assert!(
866            err.to_string()
867                .contains("JSONC AST and value model disagree on array length"),
868            "got: {err}"
869        );
870    }
871
872    #[test]
873    fn unique_id_basename_accepts_numeric_field() {
874        // Regression guard: a numeric unique-id field must produce a filename,
875        // not fall through to the `None` arm.
876        let v = json!({"id": 42});
877        assert_eq!(unique_id_basename(&v, "id"), Some("42".to_string()));
878    }
879
880    #[test]
881    fn unique_id_basename_accepts_bool_field() {
882        // Regression guard: a boolean unique-id field must produce a filename.
883        let v = json!({"flag": true});
884        assert_eq!(unique_id_basename(&v, "flag"), Some("true".to_string()));
885        let v = json!({"flag": false});
886        assert_eq!(unique_id_basename(&v, "flag"), Some("false".to_string()));
887    }
888
889    #[test]
890    fn unique_id_basename_returns_none_for_missing_or_unsupported() {
891        let v = json!({"id": "x"});
892        assert_eq!(unique_id_basename(&v, "missing"), None);
893        let v = json!({"id": null});
894        assert_eq!(unique_id_basename(&v, "id"), None);
895        let v = json!({"id": ["nested"]});
896        assert_eq!(unique_id_basename(&v, "id"), None);
897    }
898
899    #[test]
900    fn sanitize_preserves_allowed_chars_and_replaces_others() {
901        // Each disjunct of the allowed-char check must be exercised: alphanumeric,
902        // dash, underscore, and dot all survive; anything else becomes `_`.
903        assert_eq!(sanitize("abc123-_."), "abc123-_");
904        assert_eq!(sanitize("foo@bar!"), "foo_bar_");
905        // Leading/trailing dots are trimmed off after the per-char map.
906        assert_eq!(sanitize(".start.end."), "start.end");
907        assert_eq!(sanitize("name with spaces"), "name_with_spaces");
908    }
909
910    #[test]
911    fn hash_string_is_deterministic_truncated_lowercase_hex() {
912        let h = hash_string("hello", 8);
913        assert_eq!(h.len(), 8);
914        assert!(h
915            .chars()
916            .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()));
917        // Determinism.
918        assert_eq!(h, hash_string("hello", 8));
919        // Sensitivity to input.
920        assert_ne!(h, hash_string("world", 8));
921        // First 8 hex chars of SHA-256("hello") are 2cf24dba.
922        assert_eq!(h, "2cf24dba");
923    }
924
925    #[test]
926    fn hash_value_is_deterministic_and_distinguishes_inputs() {
927        let a = hash_value(&json!({"k": 1}), 12);
928        assert_eq!(a.len(), 12);
929        assert!(a.chars().all(|c| c.is_ascii_hexdigit()));
930        assert_eq!(a, hash_value(&json!({"k": 1}), 12));
931        assert_ne!(a, hash_value(&json!({"k": 2}), 12));
932    }
933
934    #[test]
935    fn digit_width_floors_at_four_and_grows_above_four_digit_counts() {
936        // Floor cases: small counts always pad to 4.
937        assert_eq!(digit_width(1), 4);
938        assert_eq!(digit_width(9), 4);
939        assert_eq!(digit_width(10), 4);
940        assert_eq!(digit_width(999), 4);
941        assert_eq!(digit_width(1000), 4);
942        // Above the floor: the function returns the actual digit count.
943        // These assertions distinguish the original arithmetic from mutants
944        // that swap `/=` for `%=`/`*=` or `+=` for `-=`/`*=`.
945        assert_eq!(digit_width(10_000), 5);
946        assert_eq!(digit_width(100_000), 6);
947        assert_eq!(digit_width(1_000_000), 7);
948    }
949
950    #[test]
951    fn leading_comment_start_at_zero_returns_zero_without_looping() {
952        // Mutating the `start > 0` loop guard to `start >= 0` would hang here
953        // because `saturating_sub(1)` on 0 keeps `start` at 0 forever.
954        assert_eq!(leading_comment_start("any leading text", 0), 0);
955        assert_eq!(leading_comment_start("", 0), 0);
956    }
957
958    #[test]
959    fn leading_comment_start_walks_through_consecutive_line_comments() {
960        let text = "// first comment\n// second comment\n  \"a\": 1\n";
961        let property_line_start = text.find("  \"a\"").unwrap();
962        // All preceding lines are comments, so the function walks all the way
963        // back to position 0. A replacement that always returns `1` would
964        // produce a non-zero result.
965        assert_eq!(leading_comment_start(text, property_line_start), 0);
966    }
967
968    #[test]
969    fn leading_comment_start_stops_at_non_comment_line() {
970        let text = "  \"prev\": true,\n// comment\n  \"a\": 1\n";
971        let property_line_start = text.find("  \"a\"").unwrap();
972        let comment_line_start = text.find("// comment").unwrap();
973        assert_eq!(
974            leading_comment_start(text, property_line_start),
975            comment_line_start
976        );
977    }
978
979    #[test]
980    fn line_end_returns_pos_plus_newline_offset() {
981        // The original maps `find('\n')` from `pos` to `pos + idx`. A mutant
982        // that replaces `+` with `*` would yield 0 for `pos = 0` (matching
983        // the original) but 2 for `pos = 1` (where the original returns 3).
984        assert_eq!(line_end("abc\ndef", 0), 3);
985        assert_eq!(line_end("abc\ndef", 1), 3);
986        assert_eq!(line_end("abc\ndef", 2), 3);
987    }
988
989    #[test]
990    fn line_end_returns_text_len_when_no_newline_follows() {
991        assert_eq!(line_end("abcdef", 0), 6);
992        assert_eq!(line_end("abcdef", 3), 6);
993    }
994
995    #[test]
996    fn jsonc_segment_with_comma_strips_surrounding_newlines_before_appending_comma() {
997        // The leading `trim_matches(|c| c == '\r' || c == '\n')` would become a no-op
998        // if the `||` is mutated to `&&` (no character is both \r AND \n).
999        let with_lf = "\n  \"name\": \"demo\"\n";
1000        let out = jsonc_segment_with_comma(with_lf);
1001        assert!(!out.starts_with('\n'), "stripped leading LF: {out:?}");
1002        assert!(out.ends_with(','), "appended trailing comma: {out:?}");
1003
1004        let with_crlf = "\r\n  \"x\": 1\r\n";
1005        let out = jsonc_segment_with_comma(with_crlf);
1006        assert!(!out.starts_with('\r'), "stripped leading CRLF: {out:?}");
1007        assert!(!out.starts_with('\n'), "stripped leading CRLF: {out:?}");
1008    }
1009
1010    #[test]
1011    fn disassemble_file_does_not_purge_existing_output_when_prepurge_false() {
1012        // Regression guard for the `pre_purge && output_dir.exists()` predicate:
1013        // mutating `&&` to `||` would delete a pre-existing output directory
1014        // even when the caller did not ask for it.
1015        let tmp = tempfile::tempdir().unwrap();
1016        let input = tmp.path().join("a.json");
1017        fs::write(&input, r#"{"x": 1}"#).unwrap();
1018        let output_dir = tmp.path().join("split");
1019        fs::create_dir_all(&output_dir).unwrap();
1020        let preexisting = output_dir.join("preexisting.txt");
1021        fs::write(&preexisting, "keep me").unwrap();
1022
1023        disassemble(DisassembleOptions {
1024            input: input.clone(),
1025            input_format: Some(Format::Json),
1026            output_dir: Some(output_dir.clone()),
1027            output_format: Some(Format::Json),
1028            unique_id: None,
1029            pre_purge: false,
1030            post_purge: false,
1031            ignore_path: None,
1032        })
1033        .unwrap();
1034
1035        assert!(
1036            preexisting.exists(),
1037            "pre_purge=false must not remove the existing output directory"
1038        );
1039    }
1040
1041    #[test]
1042    fn for_file_creates_options_with_all_defaults() {
1043        let input = PathBuf::from("config.json");
1044        let opts = DisassembleOptions::for_file(input.clone());
1045        assert_eq!(opts.input, input);
1046        assert!(opts.input_format.is_none());
1047        assert!(opts.output_dir.is_none());
1048        assert!(opts.output_format.is_none());
1049        assert!(opts.unique_id.is_none());
1050        assert!(!opts.pre_purge);
1051        assert!(!opts.post_purge);
1052        assert!(opts.ignore_path.is_none());
1053    }
1054
1055    #[test]
1056    fn unique_filename_for_key_hashes_when_sanitized_is_empty() {
1057        use std::collections::BTreeSet;
1058        // "..." sanitizes to "" after trimming all dots → falls back to hash_string
1059        let result = unique_filename_for_key("...", Format::Json, &BTreeSet::new());
1060        assert!(result.ends_with(".json"), "got: {result}");
1061        let base = result.trim_end_matches(".json");
1062        assert!(
1063            !base.is_empty(),
1064            "base should be a hash, not empty: {result}"
1065        );
1066        assert!(
1067            base.chars().all(|c| c.is_ascii_hexdigit()),
1068            "base should be hex: {base}"
1069        );
1070    }
1071
1072    #[test]
1073    fn unique_filename_for_key_appends_hash_when_name_already_in_used_set() {
1074        use std::collections::BTreeSet;
1075        let mut used = BTreeSet::new();
1076        used.insert("mykey.json".to_string());
1077        // "mykey" sanitizes to "mykey", but "mykey.json" is already taken
1078        let result = unique_filename_for_key("mykey", Format::Json, &used);
1079        assert!(result.starts_with("mykey-"), "got: {result}");
1080        assert!(result.ends_with(".json"), "got: {result}");
1081        assert_ne!(result, "mykey.json");
1082    }
1083
1084    #[test]
1085    fn write_jsonc_array_root_hashes_when_unique_id_collides_with_index_name() {
1086        let text = r#"[
1087  {
1088    "name": "0002",
1089    "value": 1,
1090  },
1091  {
1092    "value": 2,
1093  },
1094]"#;
1095        let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
1096        let items = Format::Jsonc
1097            .parse(text)
1098            .unwrap()
1099            .as_array()
1100            .unwrap()
1101            .clone();
1102        let tmp = tempfile::tempdir().unwrap();
1103
1104        let root = write_jsonc_array_root(tmp.path(), text, array, &items, Some("name")).unwrap();
1105        let root = serde_json::to_value(&root).unwrap();
1106        let files = root["files"].as_array().unwrap();
1107        assert_eq!(files.len(), 2);
1108        assert_eq!(files[0], "0002.jsonc");
1109        let hashed = files[1].as_str().unwrap();
1110        assert!(hashed.starts_with("0002-"), "files: {files:?}");
1111        assert!(tmp.path().join(hashed).exists());
1112    }
1113
1114    #[test]
1115    fn parse_jsonc_ast_returns_error_for_empty_document() {
1116        // Exercises the `ok_or_else(|| Error::Invalid("JSONC document did not contain a value"))` closure.
1117        let err = parse_jsonc_ast("").expect_err("empty document has no value");
1118        assert!(
1119            err.to_string()
1120                .contains("JSONC document did not contain a value"),
1121            "got: {err}"
1122        );
1123    }
1124
1125    #[cfg(unix)]
1126    #[test]
1127    fn collect_disassemble_targets_skips_symlink_entries() {
1128        // On Unix, DirEntry::file_type() returns is_symlink()=true, is_file()=false
1129        // for symlinks, so the `!ft.is_file()` branch (continue) is exercised.
1130        use std::os::unix::fs::symlink;
1131        let tmp = tempfile::tempdir().unwrap();
1132        let real = tmp.path().join("real.json");
1133        fs::write(&real, r#"{"a":1}"#).unwrap();
1134        let link = tmp.path().join("link.json");
1135        symlink(&real, &link).unwrap();
1136
1137        let ignore = load_ignore_rules(None, tmp.path()).unwrap();
1138        let targets = collect_disassemble_targets(tmp.path(), &ignore, Some(Format::Json)).unwrap();
1139        // The real file should appear; the symlink should be skipped
1140        // (symlink's file_type() is_file() returns false → hits `!ft.is_file()` continue)
1141        assert!(
1142            !targets.is_empty(),
1143            "real.json must be collected: {targets:?}"
1144        );
1145    }
1146
1147    #[test]
1148    fn collect_disassemble_targets_skips_subdirs_with_metadata_sidecar() {
1149        // Exercises lines 220-223: subdirectories that contain a metadata
1150        // sidecar are skipped (the `if` body is NOT entered for them), while
1151        // ordinary subdirectories ARE pushed onto the stack.
1152        // A missing `!` on line 220 would cause `stack.push` to be called only
1153        // for split-output dirs and silently skip all plain subdirectories.
1154        let tmp = tempfile::tempdir().unwrap();
1155
1156        // Plain subdirectory: its file must be collected.
1157        let plain_sub = tmp.path().join("plain");
1158        fs::create_dir_all(&plain_sub).unwrap();
1159        let plain_file = plain_sub.join("config.json");
1160        fs::write(&plain_file, r#"{"a": 1}"#).unwrap();
1161
1162        // Previously-disassembled subdirectory: contains a metadata sidecar.
1163        // Files inside must NOT be collected.
1164        let split_sub = tmp.path().join("app");
1165        fs::create_dir_all(&split_sub).unwrap();
1166        fs::write(split_sub.join(crate::meta::META_FILENAME), "{}").unwrap();
1167        let split_file = split_sub.join("_main.json");
1168        fs::write(&split_file, r#"{"a": 1}"#).unwrap();
1169
1170        let ignore = load_ignore_rules(None, tmp.path()).unwrap();
1171        let targets = collect_disassemble_targets(tmp.path(), &ignore, Some(Format::Json)).unwrap();
1172
1173        assert!(
1174            targets.contains(&plain_file),
1175            "file in plain subdir must be collected: {targets:?}"
1176        );
1177        assert!(
1178            !targets.contains(&split_file),
1179            "file in split-output subdir must be skipped: {targets:?}"
1180        );
1181    }
1182}