Skip to main content

config_disassembler/
disassemble.rs

1//! Disassemble a JSON, JSON5, YAML, TOON, TOML, or INI document into a directory of
2//! smaller files, optionally written in a different format than the input.
3//!
4//! The `input` may be either a single file or a directory. When it points
5//! at a directory, every file under the directory whose extension matches
6//! the input format (or, when `input_format` is `None`, any supported
7//! value-model format) is disassembled in place. An optional `ignore_path`
8//! can point at a `.gitignore`-style ignore file used to skip paths.
9
10use std::collections::{BTreeMap, BTreeSet};
11use std::fs;
12use std::path::{Path, PathBuf};
13
14use ignore::gitignore::{Gitignore, GitignoreBuilder};
15use jsonc_parser::ast;
16use jsonc_parser::common::Ranged;
17use serde_json::{Map, Value};
18use sha2::{Digest, Sha256};
19
20use crate::error::{Error, Result};
21use crate::format::{jsonc_parse_options, ConversionOperation, Format};
22use crate::ignore_file::DEFAULT_IGNORE_FILENAME;
23use crate::meta::{Meta, Root};
24
25/// File written for object roots that contains the scalar top-level keys.
26const MAIN_BASENAME: &str = "_main";
27
28/// Options controlling disassembly.
29#[derive(Debug, Clone)]
30pub struct DisassembleOptions {
31    /// Path to the input. May be either a single config file or a
32    /// directory; when it is a directory, every matching file under it
33    /// is disassembled in place (see also `ignore_path`).
34    pub input: PathBuf,
35    /// Format to read the input as. If `None`, the format is inferred
36    /// from each file's extension.
37    pub input_format: Option<Format>,
38    /// Directory to write split files into. Only meaningful when
39    /// `input` is a single file; for directory inputs each file's
40    /// output goes into a sibling directory named after that file's
41    /// stem (mirroring the XML disassembler's behavior).
42    pub output_dir: Option<PathBuf>,
43    /// Format to write split files in. Defaults to `input_format`.
44    pub output_format: Option<Format>,
45    /// For array roots, name array-element files using the value of this
46    /// field if present on each element (must be a scalar).
47    pub unique_id: Option<String>,
48    /// If true, remove the contents of the output directory before writing.
49    pub pre_purge: bool,
50    /// If true, delete the input file (or input directory) after
51    /// disassembling. For directory inputs the entire directory is
52    /// removed only if every file in it was successfully disassembled.
53    pub post_purge: bool,
54    /// Optional path to a `.gitignore`-style ignore file that filters
55    /// which files are processed when `input` is a directory. Pass
56    /// `None` to use [`DEFAULT_IGNORE_FILENAME`] in the input directory
57    /// (silently absent if the file does not exist). Ignored entirely
58    /// for single-file inputs.
59    pub ignore_path: Option<PathBuf>,
60}
61
62impl DisassembleOptions {
63    /// Build options for a single-file disassembly with sensible
64    /// defaults. Directory walks should construct `DisassembleOptions`
65    /// directly so they can opt into `ignore_path`.
66    pub fn for_file(input: PathBuf) -> Self {
67        Self {
68            input,
69            input_format: None,
70            output_dir: None,
71            output_format: None,
72            unique_id: None,
73            pre_purge: false,
74            post_purge: false,
75            ignore_path: None,
76        }
77    }
78}
79
80/// Disassemble a configuration file (or directory of files) into split
81/// files.
82///
83/// * When `opts.input` is a regular file, returns the directory the files
84///   were written to (i.e. the single output directory for that file).
85/// * When `opts.input` is a directory, every matching file under it is
86///   disassembled in place and the input directory itself is returned.
87pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
88    let metadata = fs::metadata(&opts.input)?;
89    if metadata.is_dir() {
90        return disassemble_directory(opts);
91    }
92    disassemble_file(opts)
93}
94
95/// Disassemble a single file. Equivalent to the previous behavior of
96/// [`disassemble`].
97fn disassemble_file(opts: DisassembleOptions) -> Result<PathBuf> {
98    let input_format = match opts.input_format {
99        Some(f) => f,
100        None => Format::from_path(&opts.input)?,
101    };
102    let output_format = opts.output_format.unwrap_or(input_format);
103    input_format.ensure_can_convert_to(output_format, ConversionOperation::Convert)?;
104
105    let output_dir = match opts.output_dir.clone() {
106        Some(d) => d,
107        None => default_output_dir(&opts.input)?,
108    };
109
110    if opts.pre_purge && output_dir.exists() {
111        fs::remove_dir_all(&output_dir)?;
112    }
113    fs::create_dir_all(&output_dir)?;
114
115    let source_filename = opts
116        .input
117        .file_name()
118        .and_then(|n| n.to_str())
119        .map(|s| s.to_string());
120
121    if input_format == Format::Jsonc && output_format == Format::Jsonc {
122        let root =
123            write_jsonc_root_preserving(&opts.input, &output_dir, opts.unique_id.as_deref())?;
124        let meta = Meta {
125            source_format: input_format,
126            file_format: output_format,
127            source_filename,
128            root,
129        };
130        meta.write(&output_dir)?;
131
132        if opts.post_purge {
133            fs::remove_file(&opts.input)?;
134        }
135
136        return Ok(output_dir);
137    }
138
139    let value = input_format.load(&opts.input)?;
140
141    let root = match &value {
142        Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
143        Value::Array(items) => {
144            write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
145        }
146        _ => {
147            return Err(Error::Invalid(
148                "top-level value must be an object or array to disassemble".into(),
149            ));
150        }
151    };
152
153    let meta = Meta {
154        source_format: input_format,
155        file_format: output_format,
156        source_filename,
157        root,
158    };
159    meta.write(&output_dir)?;
160
161    if opts.post_purge {
162        fs::remove_file(&opts.input)?;
163    }
164
165    Ok(output_dir)
166}
167
168/// Disassemble every matching file under a directory. Each file's split
169/// output is placed in a sibling directory named after the file's stem,
170/// matching how the XML disassembler treats directory inputs.
171fn disassemble_directory(opts: DisassembleOptions) -> Result<PathBuf> {
172    if opts.output_dir.is_some() {
173        return Err(Error::Usage(
174            "--output-dir is not supported with a directory input; each file's split output is written next to it".into(),
175        ));
176    }
177
178    let root = opts.input.clone();
179    let ignore = load_ignore_rules(opts.ignore_path.as_deref(), &root)?;
180
181    let mut targets = collect_disassemble_targets(&root, &ignore, opts.input_format)?;
182    targets.sort();
183
184    for file in &targets {
185        let mut child_opts = opts.clone();
186        child_opts.input = file.clone();
187        // Each file's output goes into <stem>/ next to the file itself,
188        // never into a shared --output-dir (we rejected that above).
189        child_opts.output_dir = None;
190        // Per-file post_purge would only delete the file; we honor the
191        // user's intent by keeping post_purge here so each input file is
192        // removed if requested, then we remove the (now empty) input
193        // directory at the very end below.
194        disassemble_file(child_opts)?;
195    }
196
197    Ok(root)
198}
199
200/// Walk `root` and collect every file whose extension matches one of the
201/// supported formats (or, if `expected_format` is `Some`, only that
202/// format), excluding paths matched by `ignore`.
203fn collect_disassemble_targets(
204    root: &Path,
205    ignore: &Option<Gitignore>,
206    expected_format: Option<Format>,
207) -> Result<Vec<PathBuf>> {
208    let mut out = Vec::new();
209    let mut stack = vec![root.to_path_buf()];
210    while let Some(dir) = stack.pop() {
211        for entry in fs::read_dir(&dir)? {
212            let entry = entry?;
213            let path = entry.path();
214            let ft = entry.file_type()?;
215            if is_ignored(ignore, root, &path, ft.is_dir()) {
216                continue;
217            }
218            if ft.is_dir() {
219                stack.push(path);
220                continue;
221            }
222            if !ft.is_file() {
223                continue;
224            }
225            // Only look at files whose extension parses as a known
226            // format, and (when input_format was set) only the matching
227            // format. Anything else is silently skipped — a directory of
228            // mixed config files commonly contains README/.git/etc.
229            let detected = match Format::from_path(&path) {
230                Ok(f) => f,
231                Err(_) => continue,
232            };
233            if let Some(expected) = expected_format {
234                if expected != detected {
235                    continue;
236                }
237            }
238            out.push(path);
239        }
240    }
241    Ok(out)
242}
243
244fn load_ignore_rules(explicit: Option<&Path>, fallback_dir: &Path) -> Result<Option<Gitignore>> {
245    let path = match explicit {
246        Some(p) => p.to_path_buf(),
247        None => fallback_dir.join(DEFAULT_IGNORE_FILENAME),
248    };
249    if !path.exists() {
250        return Ok(None);
251    }
252    let content = fs::read_to_string(&path)?;
253    let anchor = path.parent().unwrap_or(Path::new("."));
254    let mut builder = GitignoreBuilder::new(anchor);
255    for line in content.lines() {
256        // `add_line` returns a pattern-error on malformed globs; mirror
257        // the XML disassembler's tolerant parsing and skip bad lines
258        // rather than failing the whole run.
259        let _ = builder.add_line(None, line);
260    }
261    Ok(builder.build().ok())
262}
263
264fn is_ignored(ignore: &Option<Gitignore>, root: &Path, path: &Path, is_dir: bool) -> bool {
265    let Some(ign) = ignore.as_ref() else {
266        return false;
267    };
268    let candidate = path.strip_prefix(root).unwrap_or(path);
269    ign.matched(candidate, is_dir).is_ignore()
270}
271
272fn default_output_dir(input: &Path) -> Result<PathBuf> {
273    let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
274        Error::Invalid(format!(
275            "could not derive a directory name from {}",
276            input.display()
277        ))
278    })?;
279    let parent = input.parent().unwrap_or(Path::new("."));
280    Ok(parent.join(stem))
281}
282
283fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
284    let mut key_order: Vec<String> = Vec::with_capacity(map.len());
285    let mut key_files: BTreeMap<String, String> = BTreeMap::new();
286    let mut main_object: Map<String, Value> = Map::new();
287    let mut used_names: BTreeSet<String> = BTreeSet::new();
288    used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
289
290    for (key, value) in map {
291        key_order.push(key.clone());
292        if is_scalar(value) {
293            main_object.insert(key.clone(), value.clone());
294            continue;
295        }
296
297        let filename = unique_filename_for_key(key, fmt, &used_names);
298        used_names.insert(filename.clone());
299        let path = dir.join(&filename);
300        let payload = fmt.wrap_split_payload(key, value);
301        fs::write(&path, fmt.serialize(&payload)?)?;
302        key_files.insert(key.clone(), filename);
303    }
304
305    let main_file = if main_object.is_empty() {
306        None
307    } else {
308        let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
309        let path = dir.join(&filename);
310        fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
311        Some(filename)
312    };
313
314    Ok(Root::Object {
315        key_order,
316        key_files,
317        main_file,
318    })
319}
320
321fn write_array_root(
322    dir: &Path,
323    items: &[Value],
324    fmt: Format,
325    unique_id: Option<&str>,
326) -> Result<Root> {
327    let mut files = Vec::with_capacity(items.len());
328    let mut used_names: BTreeSet<String> = BTreeSet::new();
329    let width = digit_width(items.len());
330
331    for (idx, item) in items.iter().enumerate() {
332        let mut basename = if let Some(field) = unique_id {
333            unique_id_basename(item, field)
334        } else {
335            None
336        };
337        if basename
338            .as_ref()
339            .map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
340            .unwrap_or(false)
341        {
342            basename = None;
343        }
344        let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
345
346        let mut filename = format!("{basename}.{}", fmt.extension());
347        if used_names.contains(&filename) {
348            filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
349        }
350        used_names.insert(filename.clone());
351
352        let path = dir.join(&filename);
353        fs::write(&path, fmt.serialize(item)?)?;
354        files.push(filename);
355    }
356
357    Ok(Root::Array { files })
358}
359
360fn write_jsonc_root_preserving(input: &Path, dir: &Path, unique_id: Option<&str>) -> Result<Root> {
361    let text = fs::read_to_string(input)?;
362    let ast = parse_jsonc_ast(&text)?;
363    let value = Format::Jsonc.parse(&text)?;
364
365    match (ast, value) {
366        (ast::Value::Object(object), Value::Object(_)) => {
367            write_jsonc_object_root(dir, &text, object)
368        }
369        (ast::Value::Array(array), Value::Array(items)) => {
370            write_jsonc_array_root(dir, &text, array, &items, unique_id)
371        }
372        _ => Err(Error::Invalid(
373            "top-level value must be an object or array to disassemble".into(),
374        )),
375    }
376}
377
378fn write_jsonc_object_root(dir: &Path, text: &str, object: ast::Object<'_>) -> Result<Root> {
379    let properties = jsonc_object_properties(text, object)?;
380    let mut key_order = Vec::with_capacity(properties.len());
381    let mut key_files: BTreeMap<String, String> = BTreeMap::new();
382    let mut main_segments = Vec::new();
383    let mut used_names: BTreeSet<String> = BTreeSet::new();
384    used_names.insert(format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension()));
385
386    for property in properties {
387        key_order.push(property.key.clone());
388        if property.is_scalar {
389            main_segments.push(property.segment);
390            continue;
391        }
392
393        let filename = unique_filename_for_key(&property.key, Format::Jsonc, &used_names);
394        used_names.insert(filename.clone());
395        let path = dir.join(&filename);
396        let text = ensure_trailing_newline(&property.value_text);
397        fs::write(path, text)?;
398        key_files.insert(property.key, filename);
399    }
400
401    let main_file = if main_segments.is_empty() {
402        None
403    } else {
404        let filename = format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension());
405        let path = dir.join(&filename);
406        let text = render_jsonc_object(main_segments.iter());
407        fs::write(path, text)?;
408        Some(filename)
409    };
410
411    Ok(Root::Object {
412        key_order,
413        key_files,
414        main_file,
415    })
416}
417
418fn write_jsonc_array_root(
419    dir: &Path,
420    text: &str,
421    array: ast::Array<'_>,
422    items: &[Value],
423    unique_id: Option<&str>,
424) -> Result<Root> {
425    if array.elements.len() != items.len() {
426        return Err(Error::Invalid(
427            "JSONC AST and value model disagree on array length".into(),
428        ));
429    }
430
431    let mut files = Vec::with_capacity(array.elements.len());
432    let mut used_names: BTreeSet<String> = BTreeSet::new();
433    let width = digit_width(array.elements.len());
434
435    for (idx, (element, item)) in array.elements.iter().zip(items).enumerate() {
436        let mut basename = unique_id.and_then(|field| unique_id_basename(item, field));
437        if basename
438            .as_ref()
439            .map(|n| used_names.contains(&format!("{n}.{}", Format::Jsonc.extension())))
440            .unwrap_or(false)
441        {
442            basename = None;
443        }
444        let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
445
446        let mut filename = format!("{basename}.{}", Format::Jsonc.extension());
447        if used_names.contains(&filename) {
448            filename = format!(
449                "{basename}-{}.{}",
450                hash_value(item, 8),
451                Format::Jsonc.extension()
452            );
453        }
454        used_names.insert(filename.clone());
455
456        let value_text = element.text(text).trim();
457        fs::write(dir.join(&filename), ensure_trailing_newline(value_text))?;
458        files.push(filename);
459    }
460
461    Ok(Root::Array { files })
462}
463
464struct JsoncPropertySyntax {
465    key: String,
466    is_scalar: bool,
467    segment: String,
468    value_text: String,
469}
470
471fn jsonc_object_properties(
472    text: &str,
473    object: ast::Object<'_>,
474) -> Result<Vec<JsoncPropertySyntax>> {
475    let mut properties = Vec::with_capacity(object.properties.len());
476    for property in object.properties {
477        let key = property.name.clone().into_string();
478        let property_range = property.range();
479        let value_range = property.value.range();
480        properties.push(JsoncPropertySyntax {
481            key,
482            is_scalar: is_jsonc_ast_scalar(&property.value),
483            segment: jsonc_property_segment(text, property_range.start, value_range.end)
484                .to_string(),
485            value_text: property.value.text(text).trim().to_string(),
486        });
487    }
488    Ok(properties)
489}
490
491fn parse_jsonc_ast(text: &str) -> Result<ast::Value<'_>> {
492    jsonc_parser::parse_to_ast(text, &Default::default(), &jsonc_parse_options())
493        .map_err(|e| Error::Invalid(format!("jsonc parse error: {e}")))?
494        .value
495        .ok_or_else(|| Error::Invalid("JSONC document did not contain a value".into()))
496}
497
498fn is_jsonc_ast_scalar(value: &ast::Value<'_>) -> bool {
499    !matches!(value, ast::Value::Object(_) | ast::Value::Array(_))
500}
501
502fn jsonc_property_segment(text: &str, property_start: usize, value_end: usize) -> &str {
503    let start = leading_comment_start(text, line_start(text, property_start));
504    let end = line_end(text, value_end);
505    &text[start..end]
506}
507
508fn leading_comment_start(text: &str, mut start: usize) -> usize {
509    while start > 0 {
510        let previous_line_end = start.saturating_sub(1);
511        let previous_line_start = line_start(text, previous_line_end);
512        let line = &text[previous_line_start..previous_line_end];
513        let trimmed = line.trim();
514        if trimmed.is_empty()
515            || trimmed.starts_with("//")
516            || trimmed.starts_with("/*")
517            || trimmed.starts_with('*')
518            || trimmed.ends_with("*/")
519        {
520            start = previous_line_start;
521        } else {
522            break;
523        }
524    }
525    start
526}
527
528fn line_start(text: &str, pos: usize) -> usize {
529    text[..pos].rfind('\n').map(|idx| idx + 1).unwrap_or(0)
530}
531
532fn line_end(text: &str, pos: usize) -> usize {
533    text[pos..]
534        .find('\n')
535        .map(|idx| pos + idx)
536        .unwrap_or(text.len())
537}
538
539fn render_jsonc_object<'a>(segments: impl IntoIterator<Item = &'a String>) -> String {
540    let mut out = String::from("{\n");
541    for segment in segments {
542        out.push_str(&jsonc_segment_with_comma(segment));
543        out.push('\n');
544    }
545    out.push_str("}\n");
546    out
547}
548
549fn jsonc_segment_with_comma(segment: &str) -> String {
550    let segment = segment.trim_matches(|c| c == '\r' || c == '\n');
551    if segment.trim_end().ends_with(',') {
552        return segment.to_string();
553    }
554
555    let last = last_line(segment);
556    let last_line_start = segment.len() - last.len();
557    if let Some(comment_start) = line_comment_start(last) {
558        let comment_start = last_line_start + comment_start;
559        let (before_comment, comment) = segment.split_at(comment_start);
560        return format!("{},{}", before_comment.trim_end(), comment);
561    }
562
563    format!("{segment},")
564}
565
566/// Slice the substring after the final `\n`, or the entire input if there
567/// is no newline. Pulled out so callers can stay free of explicit
568/// `idx + 1` byte arithmetic -- a `+ 1` -> `* 1` mutant on that
569/// expression was provably equivalent to the original (the resulting
570/// off-by-one in `last_line_start` is exactly compensated by `\n` not
571/// toggling `line_comment_start`'s in-string state), which made the
572/// surviving mutant impossible to kill without contorting tests.
573fn last_line(s: &str) -> &str {
574    s.rsplit('\n').next().unwrap_or(s)
575}
576
577fn line_comment_start(line: &str) -> Option<usize> {
578    let mut chars = line.char_indices().peekable();
579    let mut in_string = false;
580    let mut escaped = false;
581
582    while let Some((idx, ch)) = chars.next() {
583        if in_string {
584            if escaped {
585                escaped = false;
586            } else if ch == '\\' {
587                escaped = true;
588            } else if ch == '"' {
589                in_string = false;
590            }
591            continue;
592        }
593
594        if ch == '"' {
595            in_string = true;
596        } else if ch == '/' && matches!(chars.peek(), Some((_, '/'))) {
597            return Some(idx);
598        }
599    }
600
601    None
602}
603
604fn ensure_trailing_newline(text: &str) -> String {
605    let mut out = text.to_string();
606    if !out.ends_with('\n') {
607        out.push('\n');
608    }
609    out
610}
611
612fn is_scalar(value: &Value) -> bool {
613    !matches!(value, Value::Object(_) | Value::Array(_))
614}
615
616fn digit_width(count: usize) -> usize {
617    let mut w = 1;
618    let mut n = count;
619    while n >= 10 {
620        n /= 10;
621        w += 1;
622    }
623    w.max(4)
624}
625
626fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
627    let sanitized = sanitize(key);
628    let base = if sanitized.is_empty() {
629        hash_string(key, 12)
630    } else {
631        sanitized
632    };
633    let mut filename = format!("{base}.{}", fmt.extension());
634    if used.contains(&filename) {
635        filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
636    }
637    filename
638}
639
640fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
641    let map = item.as_object()?;
642    let raw = match map.get(field)? {
643        Value::String(s) => s.clone(),
644        Value::Number(n) => n.to_string(),
645        Value::Bool(b) => b.to_string(),
646        _ => return None,
647    };
648    let s = sanitize(&raw);
649    if s.is_empty() {
650        None
651    } else {
652        Some(s)
653    }
654}
655
656fn sanitize(input: &str) -> String {
657    input
658        .chars()
659        .map(|c| {
660            if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
661                c
662            } else {
663                '_'
664            }
665        })
666        .collect::<String>()
667        .trim_matches('.')
668        .to_string()
669}
670
671fn hash_string(input: &str, len: usize) -> String {
672    let digest = Sha256::digest(input.as_bytes());
673    let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
674    hex.chars().take(len).collect()
675}
676
677fn hash_value(value: &Value, len: usize) -> String {
678    let canonical = serde_json::to_string(value).unwrap_or_default();
679    hash_string(&canonical, len)
680}
681
682#[cfg(test)]
683mod tests {
684    use super::*;
685    use serde_json::json;
686
687    #[test]
688    fn jsonc_segment_with_comma_inserts_before_trailing_line_comment() {
689        assert_eq!(
690            jsonc_segment_with_comma(r#"  "name": "demo" // keep this comment"#),
691            r#"  "name": "demo",// keep this comment"#
692        );
693    }
694
695    #[test]
696    fn jsonc_segment_with_comma_inserts_comma_before_trailing_comment_on_multi_line() {
697        // Pins `last_line_start = idx + 1` on the `rfind('\n').map(|idx|
698        // idx + 1)` path. Mutating `+ 1` to `- 1` would back the slice up
699        // by two bytes, putting an unbalanced `"` at the start of
700        // `last_line`. That flips `line_comment_start` into in-string mode
701        // for the rest of the slice, it returns None, and we fall through
702        // to `format!("{segment},")` -- the comma ends up *after* the
703        // comment instead of before it.
704        let input = "  \"a\": \"x\"\n  \"b\": 2 // trail";
705        assert_eq!(
706            jsonc_segment_with_comma(input),
707            "  \"a\": \"x\"\n  \"b\": 2,// trail"
708        );
709    }
710
711    #[test]
712    fn jsonc_segment_with_comma_ignores_comment_markers_inside_strings() {
713        assert_eq!(
714            jsonc_segment_with_comma(r#"  "url": "https://example.com/a""#),
715            r#"  "url": "https://example.com/a","#
716        );
717    }
718
719    #[test]
720    fn jsonc_segment_with_comma_leaves_existing_comma_alone() {
721        assert_eq!(
722            jsonc_segment_with_comma("  \"enabled\": true,"),
723            "  \"enabled\": true,"
724        );
725    }
726
727    #[test]
728    fn line_comment_start_respects_escaped_quotes() {
729        let line = r#"  "text": "escaped \" quote // still string" // comment"#;
730        assert_eq!(
731            line_comment_start(line),
732            Some(line.find(" // comment").unwrap() + 1)
733        );
734    }
735
736    #[test]
737    fn ensure_trailing_newline_does_not_duplicate_newline() {
738        assert_eq!(ensure_trailing_newline("value\n"), "value\n");
739        assert_eq!(ensure_trailing_newline("value"), "value\n");
740    }
741
742    #[test]
743    fn jsonc_same_format_post_purge_removes_input_file() {
744        let tmp = tempfile::tempdir().unwrap();
745        let input = tmp.path().join("config.jsonc");
746        fs::write(
747            &input,
748            r#"{
749  "name": "demo",
750  "settings": {
751    "retry": 3,
752  },
753}"#,
754        )
755        .unwrap();
756
757        let output_dir = tmp.path().join("split");
758        let dir = disassemble(DisassembleOptions {
759            input: input.clone(),
760            input_format: Some(Format::Jsonc),
761            output_dir: Some(output_dir),
762            output_format: Some(Format::Jsonc),
763            unique_id: None,
764            pre_purge: false,
765            post_purge: true,
766            ignore_path: None,
767        })
768        .unwrap();
769
770        assert!(!input.exists());
771        assert!(dir.join("settings.jsonc").exists());
772        assert!(dir.join(MAIN_BASENAME).with_extension("jsonc").exists());
773    }
774
775    #[test]
776    fn write_jsonc_object_root_writes_nested_and_main_files() {
777        let text = r#"{
778  "name": "demo",
779  "settings": {
780    "retry": 3,
781  },
782}"#;
783        let object = parse_jsonc_ast(text).unwrap().as_object().unwrap().clone();
784        let tmp = tempfile::tempdir().unwrap();
785
786        let root = write_jsonc_object_root(tmp.path(), text, object).unwrap();
787        let root = serde_json::to_value(&root).unwrap();
788        assert_eq!(root["kind"], "object");
789        assert_eq!(root["key_order"], json!(["name", "settings"]));
790        assert_eq!(root["key_files"]["settings"], "settings.jsonc");
791        assert_eq!(root["main_file"], "_main.jsonc");
792        assert!(fs::read_to_string(tmp.path().join("settings.jsonc"))
793            .unwrap()
794            .contains(r#""retry": 3"#));
795        assert!(fs::read_to_string(tmp.path().join("_main.jsonc"))
796            .unwrap()
797            .contains(r#""name": "demo","#));
798    }
799
800    #[test]
801    fn write_jsonc_array_root_rejects_ast_value_length_mismatch() {
802        let text = "[1, 2]";
803        let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
804        let tmp = tempfile::tempdir().unwrap();
805
806        let err = write_jsonc_array_root(tmp.path(), text, array, &[json!(1)], None)
807            .expect_err("should reject mismatched inputs");
808
809        assert!(
810            err.to_string()
811                .contains("JSONC AST and value model disagree on array length"),
812            "got: {err}"
813        );
814    }
815
816    #[test]
817    fn unique_id_basename_accepts_numeric_field() {
818        // Regression guard: a numeric unique-id field must produce a filename,
819        // not fall through to the `None` arm.
820        let v = json!({"id": 42});
821        assert_eq!(unique_id_basename(&v, "id"), Some("42".to_string()));
822    }
823
824    #[test]
825    fn unique_id_basename_accepts_bool_field() {
826        // Regression guard: a boolean unique-id field must produce a filename.
827        let v = json!({"flag": true});
828        assert_eq!(unique_id_basename(&v, "flag"), Some("true".to_string()));
829        let v = json!({"flag": false});
830        assert_eq!(unique_id_basename(&v, "flag"), Some("false".to_string()));
831    }
832
833    #[test]
834    fn unique_id_basename_returns_none_for_missing_or_unsupported() {
835        let v = json!({"id": "x"});
836        assert_eq!(unique_id_basename(&v, "missing"), None);
837        let v = json!({"id": null});
838        assert_eq!(unique_id_basename(&v, "id"), None);
839        let v = json!({"id": ["nested"]});
840        assert_eq!(unique_id_basename(&v, "id"), None);
841    }
842
843    #[test]
844    fn sanitize_preserves_allowed_chars_and_replaces_others() {
845        // Each disjunct of the allowed-char check must be exercised: alphanumeric,
846        // dash, underscore, and dot all survive; anything else becomes `_`.
847        assert_eq!(sanitize("abc123-_."), "abc123-_");
848        assert_eq!(sanitize("foo@bar!"), "foo_bar_");
849        // Leading/trailing dots are trimmed off after the per-char map.
850        assert_eq!(sanitize(".start.end."), "start.end");
851        assert_eq!(sanitize("name with spaces"), "name_with_spaces");
852    }
853
854    #[test]
855    fn hash_string_is_deterministic_truncated_lowercase_hex() {
856        let h = hash_string("hello", 8);
857        assert_eq!(h.len(), 8);
858        assert!(h
859            .chars()
860            .all(|c| c.is_ascii_hexdigit() && !c.is_ascii_uppercase()));
861        // Determinism.
862        assert_eq!(h, hash_string("hello", 8));
863        // Sensitivity to input.
864        assert_ne!(h, hash_string("world", 8));
865        // First 8 hex chars of SHA-256("hello") are 2cf24dba.
866        assert_eq!(h, "2cf24dba");
867    }
868
869    #[test]
870    fn hash_value_is_deterministic_and_distinguishes_inputs() {
871        let a = hash_value(&json!({"k": 1}), 12);
872        assert_eq!(a.len(), 12);
873        assert!(a.chars().all(|c| c.is_ascii_hexdigit()));
874        assert_eq!(a, hash_value(&json!({"k": 1}), 12));
875        assert_ne!(a, hash_value(&json!({"k": 2}), 12));
876    }
877
878    #[test]
879    fn digit_width_floors_at_four_and_grows_above_four_digit_counts() {
880        // Floor cases: small counts always pad to 4.
881        assert_eq!(digit_width(1), 4);
882        assert_eq!(digit_width(9), 4);
883        assert_eq!(digit_width(10), 4);
884        assert_eq!(digit_width(999), 4);
885        assert_eq!(digit_width(1000), 4);
886        // Above the floor: the function returns the actual digit count.
887        // These assertions distinguish the original arithmetic from mutants
888        // that swap `/=` for `%=`/`*=` or `+=` for `-=`/`*=`.
889        assert_eq!(digit_width(10_000), 5);
890        assert_eq!(digit_width(100_000), 6);
891        assert_eq!(digit_width(1_000_000), 7);
892    }
893
894    #[test]
895    fn leading_comment_start_at_zero_returns_zero_without_looping() {
896        // Mutating the `start > 0` loop guard to `start >= 0` would hang here
897        // because `saturating_sub(1)` on 0 keeps `start` at 0 forever.
898        assert_eq!(leading_comment_start("any leading text", 0), 0);
899        assert_eq!(leading_comment_start("", 0), 0);
900    }
901
902    #[test]
903    fn leading_comment_start_walks_through_consecutive_line_comments() {
904        let text = "// first comment\n// second comment\n  \"a\": 1\n";
905        let property_line_start = text.find("  \"a\"").unwrap();
906        // All preceding lines are comments, so the function walks all the way
907        // back to position 0. A replacement that always returns `1` would
908        // produce a non-zero result.
909        assert_eq!(leading_comment_start(text, property_line_start), 0);
910    }
911
912    #[test]
913    fn leading_comment_start_stops_at_non_comment_line() {
914        let text = "  \"prev\": true,\n// comment\n  \"a\": 1\n";
915        let property_line_start = text.find("  \"a\"").unwrap();
916        let comment_line_start = text.find("// comment").unwrap();
917        assert_eq!(
918            leading_comment_start(text, property_line_start),
919            comment_line_start
920        );
921    }
922
923    #[test]
924    fn line_end_returns_pos_plus_newline_offset() {
925        // The original maps `find('\n')` from `pos` to `pos + idx`. A mutant
926        // that replaces `+` with `*` would yield 0 for `pos = 0` (matching
927        // the original) but 2 for `pos = 1` (where the original returns 3).
928        assert_eq!(line_end("abc\ndef", 0), 3);
929        assert_eq!(line_end("abc\ndef", 1), 3);
930        assert_eq!(line_end("abc\ndef", 2), 3);
931    }
932
933    #[test]
934    fn line_end_returns_text_len_when_no_newline_follows() {
935        assert_eq!(line_end("abcdef", 0), 6);
936        assert_eq!(line_end("abcdef", 3), 6);
937    }
938
939    #[test]
940    fn jsonc_segment_with_comma_strips_surrounding_newlines_before_appending_comma() {
941        // The leading `trim_matches(|c| c == '\r' || c == '\n')` would become a no-op
942        // if the `||` is mutated to `&&` (no character is both \r AND \n).
943        let with_lf = "\n  \"name\": \"demo\"\n";
944        let out = jsonc_segment_with_comma(with_lf);
945        assert!(!out.starts_with('\n'), "stripped leading LF: {out:?}");
946        assert!(out.ends_with(','), "appended trailing comma: {out:?}");
947
948        let with_crlf = "\r\n  \"x\": 1\r\n";
949        let out = jsonc_segment_with_comma(with_crlf);
950        assert!(!out.starts_with('\r'), "stripped leading CRLF: {out:?}");
951        assert!(!out.starts_with('\n'), "stripped leading CRLF: {out:?}");
952    }
953
954    #[test]
955    fn disassemble_file_does_not_purge_existing_output_when_prepurge_false() {
956        // Regression guard for the `pre_purge && output_dir.exists()` predicate:
957        // mutating `&&` to `||` would delete a pre-existing output directory
958        // even when the caller did not ask for it.
959        let tmp = tempfile::tempdir().unwrap();
960        let input = tmp.path().join("a.json");
961        fs::write(&input, r#"{"x": 1}"#).unwrap();
962        let output_dir = tmp.path().join("split");
963        fs::create_dir_all(&output_dir).unwrap();
964        let preexisting = output_dir.join("preexisting.txt");
965        fs::write(&preexisting, "keep me").unwrap();
966
967        disassemble(DisassembleOptions {
968            input: input.clone(),
969            input_format: Some(Format::Json),
970            output_dir: Some(output_dir.clone()),
971            output_format: Some(Format::Json),
972            unique_id: None,
973            pre_purge: false,
974            post_purge: false,
975            ignore_path: None,
976        })
977        .unwrap();
978
979        assert!(
980            preexisting.exists(),
981            "pre_purge=false must not remove the existing output directory"
982        );
983    }
984
985    #[test]
986    fn write_jsonc_array_root_hashes_when_unique_id_collides_with_index_name() {
987        let text = r#"[
988  {
989    "name": "0002",
990    "value": 1,
991  },
992  {
993    "value": 2,
994  },
995]"#;
996        let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
997        let items = Format::Jsonc
998            .parse(text)
999            .unwrap()
1000            .as_array()
1001            .unwrap()
1002            .clone();
1003        let tmp = tempfile::tempdir().unwrap();
1004
1005        let root = write_jsonc_array_root(tmp.path(), text, array, &items, Some("name")).unwrap();
1006        let root = serde_json::to_value(&root).unwrap();
1007        let files = root["files"].as_array().unwrap();
1008        assert_eq!(files.len(), 2);
1009        assert_eq!(files[0], "0002.jsonc");
1010        let hashed = files[1].as_str().unwrap();
1011        assert!(hashed.starts_with("0002-"), "files: {files:?}");
1012        assert!(tmp.path().join(hashed).exists());
1013    }
1014}