1use std::collections::{BTreeMap, BTreeSet};
11use std::fs;
12use std::path::{Path, PathBuf};
13
14use ignore::gitignore::{Gitignore, GitignoreBuilder};
15use jsonc_parser::ast;
16use jsonc_parser::common::Ranged;
17use serde_json::{Map, Value};
18use sha2::{Digest, Sha256};
19
20use crate::error::{Error, Result};
21use crate::format::{jsonc_parse_options, ConversionOperation, Format};
22use crate::ignore_file::DEFAULT_IGNORE_FILENAME;
23use crate::meta::{Meta, Root};
24
25const MAIN_BASENAME: &str = "_main";
27
28#[derive(Debug, Clone)]
30pub struct DisassembleOptions {
31 pub input: PathBuf,
35 pub input_format: Option<Format>,
38 pub output_dir: Option<PathBuf>,
43 pub output_format: Option<Format>,
45 pub unique_id: Option<String>,
48 pub pre_purge: bool,
50 pub post_purge: bool,
54 pub ignore_path: Option<PathBuf>,
60}
61
62impl DisassembleOptions {
63 pub fn for_file(input: PathBuf) -> Self {
67 Self {
68 input,
69 input_format: None,
70 output_dir: None,
71 output_format: None,
72 unique_id: None,
73 pre_purge: false,
74 post_purge: false,
75 ignore_path: None,
76 }
77 }
78}
79
80pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
88 let metadata = fs::metadata(&opts.input)?;
89 if metadata.is_dir() {
90 return disassemble_directory(opts);
91 }
92 disassemble_file(opts)
93}
94
95fn disassemble_file(opts: DisassembleOptions) -> Result<PathBuf> {
98 let input_format = match opts.input_format {
99 Some(f) => f,
100 None => Format::from_path(&opts.input)?,
101 };
102 let output_format = opts.output_format.unwrap_or(input_format);
103 input_format.ensure_can_convert_to(output_format, ConversionOperation::Convert)?;
104
105 let output_dir = match opts.output_dir.clone() {
106 Some(d) => d,
107 None => default_output_dir(&opts.input)?,
108 };
109
110 if opts.pre_purge && output_dir.exists() {
111 fs::remove_dir_all(&output_dir)?;
112 }
113 fs::create_dir_all(&output_dir)?;
114
115 let source_filename = opts
116 .input
117 .file_name()
118 .and_then(|n| n.to_str())
119 .map(|s| s.to_string());
120
121 if input_format == Format::Jsonc && output_format == Format::Jsonc {
122 let root =
123 write_jsonc_root_preserving(&opts.input, &output_dir, opts.unique_id.as_deref())?;
124 let meta = Meta {
125 source_format: input_format,
126 file_format: output_format,
127 source_filename,
128 root,
129 };
130 meta.write(&output_dir)?;
131
132 if opts.post_purge {
133 fs::remove_file(&opts.input)?;
134 }
135
136 return Ok(output_dir);
137 }
138
139 let value = input_format.load(&opts.input)?;
140
141 let root = match &value {
142 Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
143 Value::Array(items) => {
144 write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
145 }
146 _ => {
147 return Err(Error::Invalid(
148 "top-level value must be an object or array to disassemble".into(),
149 ));
150 }
151 };
152
153 let meta = Meta {
154 source_format: input_format,
155 file_format: output_format,
156 source_filename,
157 root,
158 };
159 meta.write(&output_dir)?;
160
161 if opts.post_purge {
162 fs::remove_file(&opts.input)?;
163 }
164
165 Ok(output_dir)
166}
167
168fn disassemble_directory(opts: DisassembleOptions) -> Result<PathBuf> {
172 if opts.output_dir.is_some() {
173 return Err(Error::Usage(
174 "--output-dir is not supported with a directory input; each file's split output is written next to it".into(),
175 ));
176 }
177
178 let root = opts.input.clone();
179 let ignore = load_ignore_rules(opts.ignore_path.as_deref(), &root)?;
180
181 let mut targets = collect_disassemble_targets(&root, &ignore, opts.input_format)?;
182 targets.sort();
183
184 for file in &targets {
185 let mut child_opts = opts.clone();
186 child_opts.input = file.clone();
187 child_opts.output_dir = None;
190 disassemble_file(child_opts)?;
195 }
196
197 if opts.post_purge {
198 if directory_is_empty(&root)? {
203 fs::remove_dir_all(&root)?;
204 }
205 }
206
207 Ok(root)
208}
209
210fn collect_disassemble_targets(
214 root: &Path,
215 ignore: &Option<Gitignore>,
216 expected_format: Option<Format>,
217) -> Result<Vec<PathBuf>> {
218 let mut out = Vec::new();
219 let mut stack = vec![root.to_path_buf()];
220 while let Some(dir) = stack.pop() {
221 for entry in fs::read_dir(&dir)? {
222 let entry = entry?;
223 let path = entry.path();
224 let ft = entry.file_type()?;
225 if is_ignored(ignore, root, &path, ft.is_dir()) {
226 continue;
227 }
228 if ft.is_dir() {
229 stack.push(path);
230 continue;
231 }
232 if !ft.is_file() {
233 continue;
234 }
235 let detected = match Format::from_path(&path) {
240 Ok(f) => f,
241 Err(_) => continue,
242 };
243 if let Some(expected) = expected_format {
244 if expected != detected {
245 continue;
246 }
247 }
248 out.push(path);
249 }
250 }
251 Ok(out)
252}
253
254fn load_ignore_rules(explicit: Option<&Path>, fallback_dir: &Path) -> Result<Option<Gitignore>> {
255 let path = match explicit {
256 Some(p) => p.to_path_buf(),
257 None => fallback_dir.join(DEFAULT_IGNORE_FILENAME),
258 };
259 if !path.exists() {
260 return Ok(None);
261 }
262 let content = fs::read_to_string(&path)?;
263 let anchor = path.parent().unwrap_or(Path::new("."));
264 let mut builder = GitignoreBuilder::new(anchor);
265 for line in content.lines() {
266 let _ = builder.add_line(None, line);
270 }
271 Ok(builder.build().ok())
272}
273
274fn is_ignored(ignore: &Option<Gitignore>, root: &Path, path: &Path, is_dir: bool) -> bool {
275 let Some(ign) = ignore.as_ref() else {
276 return false;
277 };
278 let candidate = path.strip_prefix(root).unwrap_or(path);
279 ign.matched(candidate, is_dir).is_ignore()
280}
281
282fn directory_is_empty(dir: &Path) -> Result<bool> {
283 let mut entries = fs::read_dir(dir)?;
284 Ok(entries.next().is_none())
285}
286
287fn default_output_dir(input: &Path) -> Result<PathBuf> {
288 let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
289 Error::Invalid(format!(
290 "could not derive a directory name from {}",
291 input.display()
292 ))
293 })?;
294 let parent = input.parent().unwrap_or(Path::new("."));
295 Ok(parent.join(stem))
296}
297
298fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
299 let mut key_order: Vec<String> = Vec::with_capacity(map.len());
300 let mut key_files: BTreeMap<String, String> = BTreeMap::new();
301 let mut main_object: Map<String, Value> = Map::new();
302 let mut used_names: BTreeSet<String> = BTreeSet::new();
303 used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
304
305 for (key, value) in map {
306 key_order.push(key.clone());
307 if is_scalar(value) {
308 main_object.insert(key.clone(), value.clone());
309 continue;
310 }
311
312 let filename = unique_filename_for_key(key, fmt, &used_names);
313 used_names.insert(filename.clone());
314 let path = dir.join(&filename);
315 let payload = fmt.wrap_split_payload(key, value);
316 fs::write(&path, fmt.serialize(&payload)?)?;
317 key_files.insert(key.clone(), filename);
318 }
319
320 let main_file = if main_object.is_empty() {
321 None
322 } else {
323 let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
324 let path = dir.join(&filename);
325 fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
326 Some(filename)
327 };
328
329 Ok(Root::Object {
330 key_order,
331 key_files,
332 main_file,
333 })
334}
335
336fn write_array_root(
337 dir: &Path,
338 items: &[Value],
339 fmt: Format,
340 unique_id: Option<&str>,
341) -> Result<Root> {
342 let mut files = Vec::with_capacity(items.len());
343 let mut used_names: BTreeSet<String> = BTreeSet::new();
344 let width = digit_width(items.len());
345
346 for (idx, item) in items.iter().enumerate() {
347 let mut basename = if let Some(field) = unique_id {
348 unique_id_basename(item, field)
349 } else {
350 None
351 };
352 if basename
353 .as_ref()
354 .map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
355 .unwrap_or(false)
356 {
357 basename = None;
358 }
359 let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
360
361 let mut filename = format!("{basename}.{}", fmt.extension());
362 if used_names.contains(&filename) {
363 filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
364 }
365 used_names.insert(filename.clone());
366
367 let path = dir.join(&filename);
368 fs::write(&path, fmt.serialize(item)?)?;
369 files.push(filename);
370 }
371
372 Ok(Root::Array { files })
373}
374
375fn write_jsonc_root_preserving(input: &Path, dir: &Path, unique_id: Option<&str>) -> Result<Root> {
376 let text = fs::read_to_string(input)?;
377 let ast = parse_jsonc_ast(&text)?;
378 let value = Format::Jsonc.parse(&text)?;
379
380 match (ast, value) {
381 (ast::Value::Object(object), Value::Object(_)) => {
382 write_jsonc_object_root(dir, &text, object)
383 }
384 (ast::Value::Array(array), Value::Array(items)) => {
385 write_jsonc_array_root(dir, &text, array, &items, unique_id)
386 }
387 _ => Err(Error::Invalid(
388 "top-level value must be an object or array to disassemble".into(),
389 )),
390 }
391}
392
393fn write_jsonc_object_root(dir: &Path, text: &str, object: ast::Object<'_>) -> Result<Root> {
394 let properties = jsonc_object_properties(text, object)?;
395 let mut key_order = Vec::with_capacity(properties.len());
396 let mut key_files: BTreeMap<String, String> = BTreeMap::new();
397 let mut main_segments = Vec::new();
398 let mut used_names: BTreeSet<String> = BTreeSet::new();
399 used_names.insert(format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension()));
400
401 for property in properties {
402 key_order.push(property.key.clone());
403 if property.is_scalar {
404 main_segments.push(property.segment);
405 continue;
406 }
407
408 let filename = unique_filename_for_key(&property.key, Format::Jsonc, &used_names);
409 used_names.insert(filename.clone());
410 let path = dir.join(&filename);
411 let text = ensure_trailing_newline(&property.value_text);
412 fs::write(path, text)?;
413 key_files.insert(property.key, filename);
414 }
415
416 let main_file = if main_segments.is_empty() {
417 None
418 } else {
419 let filename = format!("{MAIN_BASENAME}.{}", Format::Jsonc.extension());
420 let path = dir.join(&filename);
421 let text = render_jsonc_object(main_segments.iter());
422 fs::write(path, text)?;
423 Some(filename)
424 };
425
426 Ok(Root::Object {
427 key_order,
428 key_files,
429 main_file,
430 })
431}
432
433fn write_jsonc_array_root(
434 dir: &Path,
435 text: &str,
436 array: ast::Array<'_>,
437 items: &[Value],
438 unique_id: Option<&str>,
439) -> Result<Root> {
440 if array.elements.len() != items.len() {
441 return Err(Error::Invalid(
442 "JSONC AST and value model disagree on array length".into(),
443 ));
444 }
445
446 let mut files = Vec::with_capacity(array.elements.len());
447 let mut used_names: BTreeSet<String> = BTreeSet::new();
448 let width = digit_width(array.elements.len());
449
450 for (idx, (element, item)) in array.elements.iter().zip(items).enumerate() {
451 let mut basename = unique_id.and_then(|field| unique_id_basename(item, field));
452 if basename
453 .as_ref()
454 .map(|n| used_names.contains(&format!("{n}.{}", Format::Jsonc.extension())))
455 .unwrap_or(false)
456 {
457 basename = None;
458 }
459 let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
460
461 let mut filename = format!("{basename}.{}", Format::Jsonc.extension());
462 if used_names.contains(&filename) {
463 filename = format!(
464 "{basename}-{}.{}",
465 hash_value(item, 8),
466 Format::Jsonc.extension()
467 );
468 }
469 used_names.insert(filename.clone());
470
471 let value_text = element.text(text).trim();
472 fs::write(dir.join(&filename), ensure_trailing_newline(value_text))?;
473 files.push(filename);
474 }
475
476 Ok(Root::Array { files })
477}
478
479struct JsoncPropertySyntax {
480 key: String,
481 is_scalar: bool,
482 segment: String,
483 value_text: String,
484}
485
486fn jsonc_object_properties(
487 text: &str,
488 object: ast::Object<'_>,
489) -> Result<Vec<JsoncPropertySyntax>> {
490 let mut properties = Vec::with_capacity(object.properties.len());
491 for property in object.properties {
492 let key = property.name.clone().into_string();
493 let property_range = property.range();
494 let value_range = property.value.range();
495 properties.push(JsoncPropertySyntax {
496 key,
497 is_scalar: is_jsonc_ast_scalar(&property.value),
498 segment: jsonc_property_segment(text, property_range.start, value_range.end)
499 .to_string(),
500 value_text: property.value.text(text).trim().to_string(),
501 });
502 }
503 Ok(properties)
504}
505
506fn parse_jsonc_ast(text: &str) -> Result<ast::Value<'_>> {
507 jsonc_parser::parse_to_ast(text, &Default::default(), &jsonc_parse_options())
508 .map_err(|e| Error::Invalid(format!("jsonc parse error: {e}")))?
509 .value
510 .ok_or_else(|| Error::Invalid("JSONC document did not contain a value".into()))
511}
512
513fn is_jsonc_ast_scalar(value: &ast::Value<'_>) -> bool {
514 !matches!(value, ast::Value::Object(_) | ast::Value::Array(_))
515}
516
517fn jsonc_property_segment(text: &str, property_start: usize, value_end: usize) -> &str {
518 let start = leading_comment_start(text, line_start(text, property_start));
519 let end = line_end(text, value_end);
520 &text[start..end]
521}
522
523fn leading_comment_start(text: &str, mut start: usize) -> usize {
524 while start > 0 {
525 let previous_line_end = start.saturating_sub(1);
526 let previous_line_start = line_start(text, previous_line_end);
527 let line = &text[previous_line_start..previous_line_end];
528 let trimmed = line.trim();
529 if trimmed.is_empty()
530 || trimmed.starts_with("//")
531 || trimmed.starts_with("/*")
532 || trimmed.starts_with('*')
533 || trimmed.ends_with("*/")
534 {
535 start = previous_line_start;
536 } else {
537 break;
538 }
539 }
540 start
541}
542
543fn line_start(text: &str, pos: usize) -> usize {
544 text[..pos].rfind('\n').map(|idx| idx + 1).unwrap_or(0)
545}
546
547fn line_end(text: &str, pos: usize) -> usize {
548 text[pos..]
549 .find('\n')
550 .map(|idx| pos + idx)
551 .unwrap_or(text.len())
552}
553
554fn render_jsonc_object<'a>(segments: impl IntoIterator<Item = &'a String>) -> String {
555 let mut out = String::from("{\n");
556 for segment in segments {
557 out.push_str(&jsonc_segment_with_comma(segment));
558 out.push('\n');
559 }
560 out.push_str("}\n");
561 out
562}
563
564fn jsonc_segment_with_comma(segment: &str) -> String {
565 let segment = segment.trim_matches(|c| c == '\r' || c == '\n');
566 if segment.trim_end().ends_with(',') {
567 return segment.to_string();
568 }
569
570 let last_line_start = segment.rfind('\n').map(|idx| idx + 1).unwrap_or(0);
571 let last_line = &segment[last_line_start..];
572 if let Some(comment_start) = line_comment_start(last_line) {
573 let comment_start = last_line_start + comment_start;
574 let (before_comment, comment) = segment.split_at(comment_start);
575 return format!("{},{}", before_comment.trim_end(), comment);
576 }
577
578 format!("{segment},")
579}
580
581fn line_comment_start(line: &str) -> Option<usize> {
582 let mut chars = line.char_indices().peekable();
583 let mut in_string = false;
584 let mut escaped = false;
585
586 while let Some((idx, ch)) = chars.next() {
587 if in_string {
588 if escaped {
589 escaped = false;
590 } else if ch == '\\' {
591 escaped = true;
592 } else if ch == '"' {
593 in_string = false;
594 }
595 continue;
596 }
597
598 if ch == '"' {
599 in_string = true;
600 } else if ch == '/' && matches!(chars.peek(), Some((_, '/'))) {
601 return Some(idx);
602 }
603 }
604
605 None
606}
607
608fn ensure_trailing_newline(text: &str) -> String {
609 let mut out = text.to_string();
610 if !out.ends_with('\n') {
611 out.push('\n');
612 }
613 out
614}
615
616fn is_scalar(value: &Value) -> bool {
617 !matches!(value, Value::Object(_) | Value::Array(_))
618}
619
620fn digit_width(count: usize) -> usize {
621 let mut w = 1;
622 let mut n = count;
623 while n >= 10 {
624 n /= 10;
625 w += 1;
626 }
627 w.max(4)
628}
629
630fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
631 let sanitized = sanitize(key);
632 let base = if sanitized.is_empty() {
633 hash_string(key, 12)
634 } else {
635 sanitized
636 };
637 let mut filename = format!("{base}.{}", fmt.extension());
638 if used.contains(&filename) {
639 filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
640 }
641 filename
642}
643
644fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
645 let map = item.as_object()?;
646 let raw = match map.get(field)? {
647 Value::String(s) => s.clone(),
648 Value::Number(n) => n.to_string(),
649 Value::Bool(b) => b.to_string(),
650 _ => return None,
651 };
652 let s = sanitize(&raw);
653 if s.is_empty() {
654 None
655 } else {
656 Some(s)
657 }
658}
659
660fn sanitize(input: &str) -> String {
661 input
662 .chars()
663 .map(|c| {
664 if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
665 c
666 } else {
667 '_'
668 }
669 })
670 .collect::<String>()
671 .trim_matches('.')
672 .to_string()
673}
674
675fn hash_string(input: &str, len: usize) -> String {
676 let digest = Sha256::digest(input.as_bytes());
677 let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
678 hex.chars().take(len).collect()
679}
680
681fn hash_value(value: &Value, len: usize) -> String {
682 let canonical = serde_json::to_string(value).unwrap_or_default();
683 hash_string(&canonical, len)
684}
685
686#[cfg(test)]
687mod tests {
688 use super::*;
689 use serde_json::json;
690
691 #[test]
692 fn jsonc_segment_with_comma_inserts_before_trailing_line_comment() {
693 assert_eq!(
694 jsonc_segment_with_comma(r#" "name": "demo" // keep this comment"#),
695 r#" "name": "demo",// keep this comment"#
696 );
697 }
698
699 #[test]
700 fn jsonc_segment_with_comma_ignores_comment_markers_inside_strings() {
701 assert_eq!(
702 jsonc_segment_with_comma(r#" "url": "https://example.com/a""#),
703 r#" "url": "https://example.com/a","#
704 );
705 }
706
707 #[test]
708 fn jsonc_segment_with_comma_leaves_existing_comma_alone() {
709 assert_eq!(
710 jsonc_segment_with_comma(" \"enabled\": true,"),
711 " \"enabled\": true,"
712 );
713 }
714
715 #[test]
716 fn line_comment_start_respects_escaped_quotes() {
717 let line = r#" "text": "escaped \" quote // still string" // comment"#;
718 assert_eq!(
719 line_comment_start(line),
720 Some(line.find(" // comment").unwrap() + 1)
721 );
722 }
723
724 #[test]
725 fn ensure_trailing_newline_does_not_duplicate_newline() {
726 assert_eq!(ensure_trailing_newline("value\n"), "value\n");
727 assert_eq!(ensure_trailing_newline("value"), "value\n");
728 }
729
730 #[test]
731 fn jsonc_same_format_post_purge_removes_input_file() {
732 let tmp = tempfile::tempdir().unwrap();
733 let input = tmp.path().join("config.jsonc");
734 fs::write(
735 &input,
736 r#"{
737 "name": "demo",
738 "settings": {
739 "retry": 3,
740 },
741}"#,
742 )
743 .unwrap();
744
745 let output_dir = tmp.path().join("split");
746 let dir = disassemble(DisassembleOptions {
747 input: input.clone(),
748 input_format: Some(Format::Jsonc),
749 output_dir: Some(output_dir),
750 output_format: Some(Format::Jsonc),
751 unique_id: None,
752 pre_purge: false,
753 post_purge: true,
754 ignore_path: None,
755 })
756 .unwrap();
757
758 assert!(!input.exists());
759 assert!(dir.join("settings.jsonc").exists());
760 assert!(dir.join(MAIN_BASENAME).with_extension("jsonc").exists());
761 }
762
763 #[test]
764 fn write_jsonc_object_root_writes_nested_and_main_files() {
765 let text = r#"{
766 "name": "demo",
767 "settings": {
768 "retry": 3,
769 },
770}"#;
771 let object = parse_jsonc_ast(text).unwrap().as_object().unwrap().clone();
772 let tmp = tempfile::tempdir().unwrap();
773
774 let root = write_jsonc_object_root(tmp.path(), text, object).unwrap();
775 let root = serde_json::to_value(&root).unwrap();
776 assert_eq!(root["kind"], "object");
777 assert_eq!(root["key_order"], json!(["name", "settings"]));
778 assert_eq!(root["key_files"]["settings"], "settings.jsonc");
779 assert_eq!(root["main_file"], "_main.jsonc");
780 assert!(fs::read_to_string(tmp.path().join("settings.jsonc"))
781 .unwrap()
782 .contains(r#""retry": 3"#));
783 assert!(fs::read_to_string(tmp.path().join("_main.jsonc"))
784 .unwrap()
785 .contains(r#""name": "demo","#));
786 }
787
788 #[test]
789 fn write_jsonc_array_root_rejects_ast_value_length_mismatch() {
790 let text = "[1, 2]";
791 let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
792 let tmp = tempfile::tempdir().unwrap();
793
794 let err = write_jsonc_array_root(tmp.path(), text, array, &[json!(1)], None)
795 .expect_err("should reject mismatched inputs");
796
797 assert!(
798 err.to_string()
799 .contains("JSONC AST and value model disagree on array length"),
800 "got: {err}"
801 );
802 }
803
804 #[test]
805 fn write_jsonc_array_root_hashes_when_unique_id_collides_with_index_name() {
806 let text = r#"[
807 {
808 "name": "0002",
809 "value": 1,
810 },
811 {
812 "value": 2,
813 },
814]"#;
815 let array = parse_jsonc_ast(text).unwrap().as_array().unwrap().clone();
816 let items = Format::Jsonc
817 .parse(text)
818 .unwrap()
819 .as_array()
820 .unwrap()
821 .clone();
822 let tmp = tempfile::tempdir().unwrap();
823
824 let root = write_jsonc_array_root(tmp.path(), text, array, &items, Some("name")).unwrap();
825 let root = serde_json::to_value(&root).unwrap();
826 let files = root["files"].as_array().unwrap();
827 assert_eq!(files.len(), 2);
828 assert_eq!(files[0], "0002.jsonc");
829 let hashed = files[1].as_str().unwrap();
830 assert!(hashed.starts_with("0002-"), "files: {files:?}");
831 assert!(tmp.path().join(hashed).exists());
832 }
833}