1use std::collections::{BTreeMap, BTreeSet};
11use std::fs;
12use std::path::{Path, PathBuf};
13
14use ignore::gitignore::{Gitignore, GitignoreBuilder};
15use jsonc_parser::ast;
16use jsonc_parser::common::Ranged;
17use serde_json::{Map, Value};
18use sha2::{Digest, Sha256};
19
20use crate::error::{Error, Result};
21use crate::format::{jsonc_parse_options, ConversionOperation, Format};
22use crate::ignore_file::DEFAULT_IGNORE_FILENAME;
23use crate::meta::{Meta, Root};
24
25const MAIN_BASENAME: &str = "_main";
27
28#[derive(Debug, Clone)]
30pub struct DisassembleOptions {
31 pub input: PathBuf,
35 pub input_format: Option<Format>,
38 pub output_dir: Option<PathBuf>,
43 pub output_format: Option<Format>,
45 pub unique_id: Option<String>,
48 pub pre_purge: bool,
50 pub post_purge: bool,
54 pub ignore_path: Option<PathBuf>,
60}
61
62impl DisassembleOptions {
63 pub fn for_file(input: PathBuf) -> Self {
67 Self {
68 input,
69 input_format: None,
70 output_dir: None,
71 output_format: None,
72 unique_id: None,
73 pre_purge: false,
74 post_purge: false,
75 ignore_path: None,
76 }
77 }
78}
79
80pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
88 let metadata = fs::metadata(&opts.input)?;
89 if metadata.is_dir() {
90 return disassemble_directory(opts);
91 }
92 disassemble_file(opts)
93}
94
95fn disassemble_file(opts: DisassembleOptions) -> Result<PathBuf> {
98 let input_format = match opts.input_format {
99 Some(f) => f,
100 None => Format::from_path(&opts.input)?,
101 };
102 let output_format = opts.output_format.unwrap_or(input_format);
103 input_format.ensure_can_convert_to(output_format, ConversionOperation::Convert)?;
104
105 let output_dir = match opts.output_dir.clone() {
106 Some(d) => d,
107 None => default_output_dir(&opts.input)?,
108 };
109
110 if opts.pre_purge && output_dir.exists() {
111 fs::remove_dir_all(&output_dir)?;
112 }
113 fs::create_dir_all(&output_dir)?;
114
115 let source_filename = opts
116 .input
117 .file_name()
118 .and_then(|n| n.to_str())
119 .map(|s| s.to_string());
120
121 if input_format == Format::Jsonc && output_format == Format::Jsonc {
122 let root =
123 write_jsonc_root_preserving(&opts.input, &output_dir, opts.unique_id.as_deref())?;
124 let meta = Meta {
125 source_format: input_format,
126 file_format: output_format,
127 source_filename,
128 root,
129 };
130 meta.write(&output_dir)?;
131
132 if opts.post_purge {
133 fs::remove_file(&opts.input)?;
134 }
135
136 return Ok(output_dir);
137 }
138
139 let value = input_format.load(&opts.input)?;
140
141 let root = match &value {
142 Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
143 Value::Array(items) => {
144 write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
145 }
146 _ => {
147 return Err(Error::Invalid(
148 "top-level value must be an object or array to disassemble".into(),
149 ));
150 }
151 };
152
153 let meta = Meta {
154 source_format: input_format,
155 file_format: output_format,
156 source_filename,
157 root,
158 };
159 meta.write(&output_dir)?;
160
161 if opts.post_purge {
162 fs::remove_file(&opts.input)?;
163 }
164
165 Ok(output_dir)
166}
167
168fn disassemble_directory(opts: DisassembleOptions) -> Result<PathBuf> {
172 if opts.output_dir.is_some() {
173 return Err(Error::Usage(
174 "--output-dir is not supported with a directory input; each file's split output is written next to it".into(),
175 ));
176 }
177
178 let root = opts.input.clone();
179 let ignore = load_ignore_rules(opts.ignore_path.as_deref(), &root)?;
180
181 let mut targets = collect_disassemble_targets(&root, &ignore, opts.input_format)?;
182 targets.sort();
183
184 for file in &targets {
185 let mut child_opts = opts.clone();
186 child_opts.input = file.clone();
187 child_opts.output_dir = None;
190 disassemble_file(child_opts)?;
195 }
196
197 Ok(root)
198}
199
200fn collect_disassemble_targets(
204 root: &Path,
205 ignore: &Option<Gitignore>,
206 expected_format: Option<Format>,
207) -> Result<Vec<PathBuf>> {
208 let mut out = Vec::new();
209 let mut stack = vec![root.to_path_buf()];
210 while let Some(dir) = stack.pop() {
211 for entry in fs::read_dir(&dir)? {
212 let entry = entry?;
213 let path = entry.path();
214 let ft = entry.file_type()?;
215 if is_ignored(ignore, root, &path, ft.is_dir()) {
216 continue;
217 }
218 if ft.is_dir() {
219 stack.push(path);
220 continue;
221 }
222 if !ft.is_file() {
223 continue;
224 }
225 let detected = match Format::from_path(&path) {
230 Ok(f) => f,
231 Err(_) => continue,
232 };
233 if let Some(expected) = expected_format {
234 if expected != detected {
235 continue;
236 }
237 }
238 out.push(path);
239 }
240 }
241 Ok(out)
242}
243
244fn load_ignore_rules(explicit: Option<&Path>, fallback_dir: &Path) -> Result<Option<Gitignore>> {
245 let path = match explicit {
246 Some(p) => p.to_path_buf(),
247 None => fallback_dir.join(DEFAULT_IGNORE_FILENAME),
248 };
249 if !path.exists() {
250 return Ok(None);
251 }
252 let content = fs::read_to_string(&path)?;
253 let anchor = path.parent().unwrap_or(Path::new("."));
254 let mut builder = GitignoreBuilder::new(anchor);
255 for line in content.lines() {
256 let _ = builder.add_line(None, line);
260 }
261 Ok(builder.build().ok())
262}
263
264fn is_ignored(ignore: &Option<Gitignore>, root: &Path, path: &Path, is_dir: bool) -> bool {
265 let Some(ign) = ignore.as_ref() else {
266 return false;
267 };
268 let candidate = path.strip_prefix(root).unwrap_or(path);
269 ign.matched(candidate, is_dir).is_ignore()
270}
271
272fn default_output_dir(input: &Path) -> Result<PathBuf> {
273 let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
274 Error::Invalid(format!(
275 "could not derive a directory name from {}",
276 input.display()
277 ))
278 })?;
279 let parent = input.parent().unwrap_or(Path::new("."));
280 Ok(parent.join(stem))
281}
282
283fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
284 let mut key_order: Vec<String> = Vec::with_capacity(map.len());
285 let mut key_files: BTreeMap<String, String> = BTreeMap::new();
286 let mut main_object: Map<String, Value> = Map::new();
287 let mut used_names: BTreeSet<String> = BTreeSet::new();
288 used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
289
290 for (key, value) in map {
291 key_order.push(key.clone());
292 if is_scalar(value) {
293 main_object.insert(key.clone(), value.clone());
294 continue;
295 }
296
297 let filename = unique_filename_for_key(key, fmt, &used_names);
298 used_names.insert(filename.clone());
299 let path = dir.join(&filename);
300 let payload = fmt.wrap_split_payload(key, value);
301 fs::write(&path, fmt.serialize(&payload)?)?;
302 key_files.insert(key.clone(), filename);
303 }
304
305 let main_file = if main_object.is_empty() {
306 None
307 } else {
308 let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
309 let path = dir.join(&filename);
310 fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
311 Some(filename)
312 };
313
314 Ok(Root::Object {
315 key_order,
316 key_files,
317 main_file,
318 })
319}
320
321fn write_array_root(
322 dir: &Path,
323 items: &[Value],
324 fmt: Format,
325 unique_id: Option<&str>,
326) -> Result<Root> {
327 let mut files = Vec::with_capacity(items.len());
328 let mut used_names: BTreeSet<String> = BTreeSet::new();
329 let width = digit_width(items.len());
330
331 for (idx, item) in items.iter().enumerate() {
332 let mut basename = if let Some(field) = unique_id {
333 unique_id_basename(item, field)
334 } else {
335 None
336 };
337 if basename
338 .as_ref()
339 .map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
340 .unwrap_or(false)
341 {
342 basename = None;
343 }
344 let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
345
346 let mut filename = format!("{basename}.{}", fmt.extension());
347 if used_names.contains(&filename) {
348 filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
349 }
350 used_names.insert(filename.clone());
351
352 let path = dir.join(&filename);
353 fs::write(&path, fmt.serialize(item)?)?;
354 files.push(filename);
355 }
356
357 Ok(Root::Array { files })
358}
359
360fn write_jsonc_root_preserving(input: &Path, dir: &Path, unique_id: Option<&str>) -> Result<Root> {
361 let text = fs::read_to_string(input)?;
362 let ast = parse_jsonc_ast(&text)?;
363 let value = Format::Jsonc.parse(&text)?;
364
365 match (ast, value) {
366 (ast::Value::Object(object), Value::Object(_)) => {
367 write_jsonc_object_root(dir, &text, object)
368 }
369 (ast::Value::Array(array), Value::Array(items)) => {
370 write_jsonc_array_root(dir, &text, array, &items, unique_id)
371 }
372 _ => Err(Error::Invalid(
373 "top-level value must be an object or array to disassemble".into(),
374 )),
375 }
376}
377
378fn write_jsonc_object_root(dir: &Path, text: &str, object: ast::Object<'_>) -> Result<Root> {
379 let properties = jsonc_object_properties(text, object)?;
380 let mut key_order = Vec::with_capacity(properties.len());
381 let mut key_files: BTreeMap<String, String> = BTreeMap::new();
382 let mut main_segments = Vec::new();
383 let mut used_names: BTreeSet<String> = BTreeSet::new();
384 used_names.insert(format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension()));
385
386 for property in properties {
387 key_order.push(property.key.clone());
388 if property.is_scalar {
389 main_segments.push(property.segment);
390 continue;
391 }
392
393 let filename = unique_filename_for_key(&property.key, Format::Jsonc, &used_names);
394 used_names.insert(filename.clone());
395 let path = dir.join(&filename);
396 let text = ensure_trailing_newline(&property.value_text);
397 fs::write(path, text)?;
398 key_files.insert(property.key, filename);
399 }
400
401 let main_file = if main_segments.is_empty() {
402 None
403 } else {
404 let filename = format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension());
405 let path = dir.join(&filename);
406 let text = render_jsonc_object(main_segments.iter());
407 fs::write(path, text)?;
408 Some(filename)
409 };
410
411 Ok(Root::Object {
412 key_order,
413 key_files,
414 main_file,
415 })
416}
417
418fn write_jsonc_array_root(
419 dir: &Path,
420 text: &str,
421 array: ast::Array<'_>,
422 items: &[Value],
423 unique_id: Option<&str>,
424) -> Result<Root> {
425 if array.elements.len() != items.len() {
426 return Err(Error::Invalid(
427 "JSONC AST and value model disagree on array length".into(),
428 ));
429 }
430
431 let mut files = Vec::with_capacity(array.elements.len());
432 let mut used_names: BTreeSet<String> = BTreeSet::new();
433 let width = digit_width(array.elements.len());
434
435 for (idx, (element, item)) in array.elements.iter().zip(items).enumerate() {
436 let mut basename = unique_id.and_then(|field| unique_id_basename(item, field));
437 if basename
438 .as_ref()
439 .map(|n| used_names.contains(&format!("{n}.{}", Format::Jsonc.extension())))
440 .unwrap_or(false)
441 {
442 basename = None;
443 }
444 let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
445
446 let mut filename = format!("{basename}.{}", Format::Jsonc.extension());
447 if used_names.contains(&filename) {
448 filename = format!(
449 "{basename}-{}.{}",
450 hash_value(item, 8),
451 Format::Jsonc.extension()
452 );
453 }
454 used_names.insert(filename.clone());
455
456 let value_text = element.text(text).trim();
457 fs::write(dir.join(&filename), ensure_trailing_newline(value_text))?;
458 files.push(filename);
459 }
460
461 Ok(Root::Array { files })
462}
463
464struct JsoncPropertySyntax {
465 key: String,
466 is_scalar: bool,
467 segment: String,
468 value_text: String,
469}
470
471fn jsonc_object_properties(
472 text: &str,
473 object: ast::Object<'_>,
474) -> Result<Vec<JsoncPropertySyntax>> {
475 let mut properties = Vec::with_capacity(object.properties.len());
476 for property in object.properties {
477 let key = property.name.clone().into_string();
478 let property_range = property.range();
479 let value_range = property.value.range();
480 properties.push(JsoncPropertySyntax {
481 key,
482 is_scalar: is_jsonc_ast_scalar(&property.value),
483 segment: jsonc_property_segment(text, property_range.start, value_range.end)
484 .to_string(),
485 value_text: property.value.text(text).trim().to_string(),
486 });
487 }
488 Ok(properties)
489}
490
491fn parse_jsonc_ast(text: &str) -> Result<ast::Value<'_>> {
492 jsonc_parser::parse_to_ast(text, &Default::default(), &jsonc_parse_options())
493 .map_err(|e| Error::Invalid(format!("jsonc parse error: {e}")))?
494 .value
495 .ok_or_else(|| Error::Invalid("JSONC document did not contain a value".into()))
496}
497
498fn is_jsonc_ast_scalar(value: &ast::Value<'_>) -> bool {
499 !matches!(value, ast::Value::Object(_) | ast::Value::Array(_))
500}
501
502fn jsonc_property_segment(text: &str, property_start: usize, value_end: usize) -> &str {
503 let start = leading_comment_start(text, line_start(text, property_start));
504 let end = line_end(text, value_end);
505 &text[start..end]
506}
507
508fn leading_comment_start(text: &str, mut start: usize) -> usize {
509 while start > 0 {
510 let previous_line_end = start.saturating_sub(1);
511 let previous_line_start = line_start(text, previous_line_end);
512 let line = &text[previous_line_start..previous_line_end];
513 let trimmed = line.trim();
514 if trimmed.is_empty()
515 || trimmed.starts_with("//")
516 || trimmed.starts_with("/*")
517 || trimmed.starts_with('*')
518 || trimmed.ends_with("*/")
519 {
520 start = previous_line_start;
521 } else {
522 break;
523 }
524 }
525 start
526}
527
528fn line_start(text: &str, pos: usize) -> usize {
529 text[..pos].rfind('\n').map(|idx| idx + 1).unwrap_or(0)
530}
531
532fn line_end(text: &str, pos: usize) -> usize {
533 text[pos..]
534 .find('\n')
535 .map(|idx| pos + idx)
536 .unwrap_or(text.len())
537}
538
539fn render_jsonc_object<'a>(segments: impl IntoIterator<Item = &'a String>) -> String {
540 let mut out = String::from("{\n");
541 for segment in segments {
542 out.push_str(&jsonc_segment_with_comma(segment));
543 out.push('\n');
544 }
545 out.push_str("}\n");
546 out
547}
548
549fn jsonc_segment_with_comma(segment: &str) -> String {
550 let segment = segment.trim_matches(|c| c == '\r' || c == '\n');
551 if segment.trim_end().ends_with(',') {
552 return segment.to_string();
553 }
554
555 let last_line_start = segment.rfind('\n').map(|idx| idx + 1).unwrap_or(0);
556 let last_line = &segment[last_line_start..];
557 if let Some(comment_start) = line_comment_start(last_line) {
558 let comment_start = last_line_start + comment_start;
559 let (before_comment, comment) = segment.split_at(comment_start);
560 return format!("{},{}", before_comment.trim_end(), comment);
561 }
562
563 format!("{segment},")
564}
565
566fn line_comment_start(line: &str) -> Option<usize> {
567 let mut chars = line.char_indices().peekable();
568 let mut in_string = false;
569 let mut escaped = false;
570
571 while let Some((idx, ch)) = chars.next() {
572 if in_string {
573 if escaped {
574 escaped = false;
575 } else if ch == '\\' {
576 escaped = true;
577 } else if ch == '"' {
578 in_string = false;
579 }
580 continue;
581 }
582
583 if ch == '"' {
584 in_string = true;
585 } else if ch == '/' && matches!(chars.peek(), Some((_, '/'))) {
586 return Some(idx);
587 }
588 }
589
590 None
591}
592
593fn ensure_trailing_newline(text: &str) -> String {
594 let mut out = text.to_string();
595 if !out.ends_with('\n') {
596 out.push('\n');
597 }
598 out
599}
600
601fn is_scalar(value: &Value) -> bool {
602 !matches!(value, Value::Object(_) | Value::Array(_))
603}
604
605fn digit_width(count: usize) -> usize {
606 let mut w = 1;
607 let mut n = count;
608 while n >= 10 {
609 n /= 10;
610 w += 1;
611 }
612 w.max(4)
613}
614
615fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
616 let sanitized = sanitize(key);
617 let base = if sanitized.is_empty() {
618 hash_string(key, 12)
619 } else {
620 sanitized
621 };
622 let mut filename = format!("{base}.{}", fmt.extension());
623 if used.contains(&filename) {
624 filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
625 }
626 filename
627}
628
629fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
630 let map = item.as_object()?;
631 let raw = match map.get(field)? {
632 Value::String(s) => s.clone(),
633 Value::Number(n) => n.to_string(),
634 Value::Bool(b) => b.to_string(),
635 _ => return None,
636 };
637 let s = sanitize(&raw);
638 if s.is_empty() {
639 None
640 } else {
641 Some(s)
642 }
643}
644
645fn sanitize(input: &str) -> String {
646 input
647 .chars()
648 .map(|c| {
649 if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
650 c
651 } else {
652 '_'
653 }
654 })
655 .collect::<String>()
656 .trim_matches('.')
657 .to_string()
658}
659
660fn hash_string(input: &str, len: usize) -> String {
661 let digest = Sha256::digest(input.as_bytes());
662 let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
663 hex.chars().take(len).collect()
664}
665
666fn hash_value(value: &Value, len: usize) -> String {
667 let canonical = serde_json::to_string(value).unwrap_or_default();
668 hash_string(&canonical, len)
669}
670
671#[cfg(test)]
672mod tests {
673 use super::*;
674 use serde_json::json;
675
676 #[test]
677 fn jsonc_segment_with_comma_inserts_before_trailing_line_comment() {
678 assert_eq!(
679 jsonc_segment_with_comma(r#" "name": "demo" // keep this comment"#),
680 r#" "name": "demo",// keep this comment"#
681 );
682 }
683
684 #[test]
685 fn jsonc_segment_with_comma_ignores_comment_markers_inside_strings() {
686 assert_eq!(
687 jsonc_segment_with_comma(r#" "url": "https://example.com/a""#),
688 r#" "url": "https://example.com/a","#
689 );
690 }
691
692 #[test]
693 fn jsonc_segment_with_comma_leaves_existing_comma_alone() {
694 assert_eq!(
695 jsonc_segment_with_comma(" \"enabled\": true,"),
696 " \"enabled\": true,"
697 );
698 }
699
700 #[test]
701 fn line_comment_start_respects_escaped_quotes() {
702 let line = r#" "text": "escaped \" quote // still string" // comment"#;
703 assert_eq!(
704 line_comment_start(line),
705 Some(line.find(" // comment").unwrap() + 1)
706 );
707 }
708
709 #[test]
710 fn ensure_trailing_newline_does_not_duplicate_newline() {
711 assert_eq!(ensure_trailing_newline("value\n"), "value\n");
712 assert_eq!(ensure_trailing_newline("value"), "value\n");
713 }
714
715 #[test]
716 fn jsonc_same_format_post_purge_removes_input_file() {
717 let tmp = tempfile::tempdir().unwrap();
718 let input = tmp.path().join("config.jsonc");
719 fs::write(
720 &input,
721 r#"{
722 "name": "demo",
723 "settings": {
724 "retry": 3,
725 },
726}"#,
727 )
728 .unwrap();
729
730 let output_dir = tmp.path().join("split");
731 let dir = disassemble(DisassembleOptions {
732 input: input.clone(),
733 input_format: Some(Format::Jsonc),
734 output_dir: Some(output_dir),
735 output_format: Some(Format::Jsonc),
736 unique_id: None,
737 pre_purge: false,
738 post_purge: true,
739 ignore_path: None,
740 })
741 .unwrap();
742
743 assert!(!input.exists());
744 assert!(dir.join("settings.jsonc").exists());
745 assert!(dir.join(MAIN_BASENAME).with_extension("jsonc").exists());
746 }
747
748 #[test]
749 fn write_jsonc_object_root_writes_nested_and_main_files() {
750 let text = r#"{
751 "name": "demo",
752 "settings": {
753 "retry": 3,
754 },
755}"#;
756 let object = parse_jsonc_ast(text).unwrap().as_object().unwrap().clone();
757 let tmp = tempfile::tempdir().unwrap();
758
759 let root = write_jsonc_object_root(tmp.path(), text, object).unwrap();
760 let root = serde_json::to_value(&root).unwrap();
761 assert_eq!(root["kind"], "object");
762 assert_eq!(root["key_order"], json!(["name", "settings"]));
763 assert_eq!(root["key_files"]["settings"], "settings.jsonc");
764 assert_eq!(root["main_file"], "_main.jsonc");
765 assert!(fs::read_to_string(tmp.path().join("settings.jsonc"))
766 .unwrap()
767 .contains(r#""retry": 3"#));
768 assert!(fs::read_to_string(tmp.path().join("_main.jsonc"))
769 .unwrap()
770 .contains(r#""name": "demo","#));
771 }
772
773 #[test]
774 fn write_jsonc_array_root_rejects_ast_value_length_mismatch() {
775 let text = "[1, 2]";
776 let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
777 let tmp = tempfile::tempdir().unwrap();
778
779 let err = write_jsonc_array_root(tmp.path(), text, array, &[json!(1)], None)
780 .expect_err("should reject mismatched inputs");
781
782 assert!(
783 err.to_string()
784 .contains("JSONC AST and value model disagree on array length"),
785 "got: {err}"
786 );
787 }
788
789 #[test]
790 fn write_jsonc_array_root_hashes_when_unique_id_collides_with_index_name() {
791 let text = r#"[
792 {
793 "name": "0002",
794 "value": 1,
795 },
796 {
797 "value": 2,
798 },
799]"#;
800 let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
801 let items = Format::Jsonc
802 .parse(text)
803 .unwrap()
804 .as_array()
805 .unwrap()
806 .clone();
807 let tmp = tempfile::tempdir().unwrap();
808
809 let root = write_jsonc_array_root(tmp.path(), text, array, &items, Some("name")).unwrap();
810 let root = serde_json::to_value(&root).unwrap();
811 let files = root["files"].as_array().unwrap();
812 assert_eq!(files.len(), 2);
813 assert_eq!(files[0], "0002.jsonc");
814 let hashed = files[1].as_str().unwrap();
815 assert!(hashed.starts_with("0002-"), "files: {files:?}");
816 assert!(tmp.path().join(hashed).exists());
817 }
818}