1use std::collections::{BTreeMap, BTreeSet};
11use std::fs;
12use std::path::{Path, PathBuf};
13
14use ignore::gitignore::{Gitignore, GitignoreBuilder};
15use serde_json::{Map, Value};
16use sha2::{Digest, Sha256};
17
18use crate::error::{Error, Result};
19use crate::format::Format;
20use crate::ignore_file::DEFAULT_IGNORE_FILENAME;
21use crate::meta::{Meta, Root};
22
23const MAIN_BASENAME: &str = "_main";
25
26#[derive(Debug, Clone)]
28pub struct DisassembleOptions {
29 pub input: PathBuf,
33 pub input_format: Option<Format>,
36 pub output_dir: Option<PathBuf>,
41 pub output_format: Option<Format>,
43 pub unique_id: Option<String>,
46 pub pre_purge: bool,
48 pub post_purge: bool,
52 pub ignore_path: Option<PathBuf>,
58}
59
60impl DisassembleOptions {
61 pub fn for_file(input: PathBuf) -> Self {
65 Self {
66 input,
67 input_format: None,
68 output_dir: None,
69 output_format: None,
70 unique_id: None,
71 pre_purge: false,
72 post_purge: false,
73 ignore_path: None,
74 }
75 }
76}
77
78pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
86 let metadata = fs::metadata(&opts.input)?;
87 if metadata.is_dir() {
88 return disassemble_directory(opts);
89 }
90 disassemble_file(opts)
91}
92
93fn disassemble_file(opts: DisassembleOptions) -> Result<PathBuf> {
96 let input_format = match opts.input_format {
97 Some(f) => f,
98 None => Format::from_path(&opts.input)?,
99 };
100 let output_format = opts.output_format.unwrap_or(input_format);
101 enforce_toml_isolation(input_format, output_format)?;
102
103 let output_dir = match opts.output_dir.clone() {
104 Some(d) => d,
105 None => default_output_dir(&opts.input)?,
106 };
107
108 if opts.pre_purge && output_dir.exists() {
109 fs::remove_dir_all(&output_dir)?;
110 }
111 fs::create_dir_all(&output_dir)?;
112
113 let value = input_format.load(&opts.input)?;
114 let source_filename = opts
115 .input
116 .file_name()
117 .and_then(|n| n.to_str())
118 .map(|s| s.to_string());
119
120 let root = match &value {
121 Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
122 Value::Array(items) => {
123 write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
124 }
125 _ => {
126 return Err(Error::Invalid(
127 "top-level value must be an object or array to disassemble".into(),
128 ));
129 }
130 };
131
132 let meta = Meta {
133 source_format: input_format.into(),
134 file_format: output_format.into(),
135 source_filename,
136 root,
137 };
138 meta.write(&output_dir)?;
139
140 if opts.post_purge {
141 fs::remove_file(&opts.input)?;
142 }
143
144 Ok(output_dir)
145}
146
147fn disassemble_directory(opts: DisassembleOptions) -> Result<PathBuf> {
151 if opts.output_dir.is_some() {
152 return Err(Error::Usage(
153 "--output-dir is not supported with a directory input; each file's split output is written next to it".into(),
154 ));
155 }
156
157 let root = opts.input.clone();
158 let ignore = load_ignore_rules(opts.ignore_path.as_deref(), &root)?;
159
160 let mut targets = collect_disassemble_targets(&root, &ignore, opts.input_format)?;
161 targets.sort();
162
163 for file in &targets {
164 let mut child_opts = opts.clone();
165 child_opts.input = file.clone();
166 child_opts.output_dir = None;
169 disassemble_file(child_opts)?;
174 }
175
176 if opts.post_purge {
177 if directory_is_empty(&root)? {
182 fs::remove_dir_all(&root)?;
183 }
184 }
185
186 Ok(root)
187}
188
189fn collect_disassemble_targets(
193 root: &Path,
194 ignore: &Option<Gitignore>,
195 expected_format: Option<Format>,
196) -> Result<Vec<PathBuf>> {
197 let mut out = Vec::new();
198 let mut stack = vec![root.to_path_buf()];
199 while let Some(dir) = stack.pop() {
200 for entry in fs::read_dir(&dir)? {
201 let entry = entry?;
202 let path = entry.path();
203 let ft = entry.file_type()?;
204 if is_ignored(ignore, root, &path, ft.is_dir()) {
205 continue;
206 }
207 if ft.is_dir() {
208 stack.push(path);
209 continue;
210 }
211 if !ft.is_file() {
212 continue;
213 }
214 let detected = match Format::from_path(&path) {
219 Ok(f) => f,
220 Err(_) => continue,
221 };
222 if let Some(expected) = expected_format {
223 if expected != detected {
224 continue;
225 }
226 }
227 out.push(path);
228 }
229 }
230 Ok(out)
231}
232
233fn load_ignore_rules(explicit: Option<&Path>, fallback_dir: &Path) -> Result<Option<Gitignore>> {
234 let path = match explicit {
235 Some(p) => p.to_path_buf(),
236 None => fallback_dir.join(DEFAULT_IGNORE_FILENAME),
237 };
238 if !path.exists() {
239 return Ok(None);
240 }
241 let content = fs::read_to_string(&path)?;
242 let anchor = path.parent().unwrap_or(Path::new("."));
243 let mut builder = GitignoreBuilder::new(anchor);
244 for line in content.lines() {
245 let _ = builder.add_line(None, line);
249 }
250 Ok(builder.build().ok())
251}
252
253fn is_ignored(ignore: &Option<Gitignore>, root: &Path, path: &Path, is_dir: bool) -> bool {
254 let Some(ign) = ignore.as_ref() else {
255 return false;
256 };
257 let candidate = path.strip_prefix(root).unwrap_or(path);
258 ign.matched(candidate, is_dir).is_ignore()
259}
260
261fn directory_is_empty(dir: &Path) -> Result<bool> {
262 let mut entries = fs::read_dir(dir)?;
263 Ok(entries.next().is_none())
264}
265
266fn enforce_toml_isolation(input: Format, output: Format) -> Result<()> {
272 if (input == Format::Toml) != (output == Format::Toml) {
273 return Err(Error::Invalid(format!(
274 "TOML can only be converted to and from TOML; got input={input}, output={output}"
275 )));
276 }
277 Ok(())
278}
279
280fn default_output_dir(input: &Path) -> Result<PathBuf> {
281 let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
282 Error::Invalid(format!(
283 "could not derive a directory name from {}",
284 input.display()
285 ))
286 })?;
287 let parent = input.parent().unwrap_or(Path::new("."));
288 Ok(parent.join(stem))
289}
290
291fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
292 let mut key_order: Vec<String> = Vec::with_capacity(map.len());
293 let mut key_files: BTreeMap<String, String> = BTreeMap::new();
294 let mut main_object: Map<String, Value> = Map::new();
295 let mut used_names: BTreeSet<String> = BTreeSet::new();
296 used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
297
298 for (key, value) in map {
299 key_order.push(key.clone());
300 if is_scalar(value) {
301 main_object.insert(key.clone(), value.clone());
302 continue;
303 }
304
305 let filename = unique_filename_for_key(key, fmt, &used_names);
306 used_names.insert(filename.clone());
307 let path = dir.join(&filename);
308 let payload = wrap_per_key_payload(fmt, key, value);
309 fs::write(&path, fmt.serialize(&payload)?)?;
310 key_files.insert(key.clone(), filename);
311 }
312
313 let main_file = if main_object.is_empty() {
314 None
315 } else {
316 let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
317 let path = dir.join(&filename);
318 fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
319 Some(filename)
320 };
321
322 Ok(Root::Object {
323 key_order,
324 key_files,
325 main_file,
326 })
327}
328
329fn write_array_root(
330 dir: &Path,
331 items: &[Value],
332 fmt: Format,
333 unique_id: Option<&str>,
334) -> Result<Root> {
335 let mut files = Vec::with_capacity(items.len());
336 let mut used_names: BTreeSet<String> = BTreeSet::new();
337 let width = digit_width(items.len());
338
339 for (idx, item) in items.iter().enumerate() {
340 let mut basename = if let Some(field) = unique_id {
341 unique_id_basename(item, field)
342 } else {
343 None
344 };
345 if basename
346 .as_ref()
347 .map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
348 .unwrap_or(false)
349 {
350 basename = None;
351 }
352 let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
353
354 let mut filename = format!("{basename}.{}", fmt.extension());
355 if used_names.contains(&filename) {
356 filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
357 }
358 used_names.insert(filename.clone());
359
360 let path = dir.join(&filename);
361 fs::write(&path, fmt.serialize(item)?)?;
362 files.push(filename);
363 }
364
365 Ok(Root::Array { files })
366}
367
368fn wrap_per_key_payload(fmt: Format, key: &str, value: &Value) -> Value {
378 if fmt == Format::Toml {
379 let mut wrapper = Map::new();
380 wrapper.insert(key.to_string(), value.clone());
381 Value::Object(wrapper)
382 } else {
383 value.clone()
384 }
385}
386
387fn is_scalar(value: &Value) -> bool {
388 !matches!(value, Value::Object(_) | Value::Array(_))
389}
390
391fn digit_width(count: usize) -> usize {
392 let mut w = 1;
393 let mut n = count;
394 while n >= 10 {
395 n /= 10;
396 w += 1;
397 }
398 w.max(4)
399}
400
401fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
402 let sanitized = sanitize(key);
403 let base = if sanitized.is_empty() {
404 hash_string(key, 12)
405 } else {
406 sanitized
407 };
408 let mut filename = format!("{base}.{}", fmt.extension());
409 if used.contains(&filename) {
410 filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
411 }
412 filename
413}
414
415fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
416 let map = item.as_object()?;
417 let raw = match map.get(field)? {
418 Value::String(s) => s.clone(),
419 Value::Number(n) => n.to_string(),
420 Value::Bool(b) => b.to_string(),
421 _ => return None,
422 };
423 let s = sanitize(&raw);
424 if s.is_empty() {
425 None
426 } else {
427 Some(s)
428 }
429}
430
431fn sanitize(input: &str) -> String {
432 input
433 .chars()
434 .map(|c| {
435 if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
436 c
437 } else {
438 '_'
439 }
440 })
441 .collect::<String>()
442 .trim_matches('.')
443 .to_string()
444}
445
446fn hash_string(input: &str, len: usize) -> String {
447 let digest = Sha256::digest(input.as_bytes());
448 let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
449 hex.chars().take(len).collect()
450}
451
452fn hash_value(value: &Value, len: usize) -> String {
453 let canonical = serde_json::to_string(value).unwrap_or_default();
454 hash_string(&canonical, len)
455}