use std::collections::{BTreeMap, BTreeSet};
use std::fs;
use std::path::{Path, PathBuf};
use serde_json::{Map, Value};
use sha2::{Digest, Sha256};
use crate::error::{Error, Result};
use crate::format::Format;
use crate::meta::{Meta, Root};
const MAIN_BASENAME: &str = "_main";
#[derive(Debug, Clone)]
pub struct DisassembleOptions {
pub input: PathBuf,
pub input_format: Option<Format>,
pub output_dir: Option<PathBuf>,
pub output_format: Option<Format>,
pub unique_id: Option<String>,
pub pre_purge: bool,
pub post_purge: bool,
}
pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
let input_format = match opts.input_format {
Some(f) => f,
None => Format::from_path(&opts.input)?,
};
let output_format = opts.output_format.unwrap_or(input_format);
let output_dir = match opts.output_dir.clone() {
Some(d) => d,
None => default_output_dir(&opts.input)?,
};
if opts.pre_purge && output_dir.exists() {
fs::remove_dir_all(&output_dir)?;
}
fs::create_dir_all(&output_dir)?;
let value = input_format.load(&opts.input)?;
let source_filename = opts
.input
.file_name()
.and_then(|n| n.to_str())
.map(|s| s.to_string());
let root = match &value {
Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
Value::Array(items) => {
write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
}
_ => {
return Err(Error::Invalid(
"top-level value must be an object or array to disassemble".into(),
));
}
};
let meta = Meta {
source_format: input_format.into(),
file_format: output_format.into(),
source_filename,
root,
};
meta.write(&output_dir)?;
if opts.post_purge {
fs::remove_file(&opts.input)?;
}
Ok(output_dir)
}
fn default_output_dir(input: &Path) -> Result<PathBuf> {
let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
Error::Invalid(format!(
"could not derive a directory name from {}",
input.display()
))
})?;
let parent = input.parent().unwrap_or(Path::new("."));
Ok(parent.join(stem))
}
fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
let mut key_order: Vec<String> = Vec::with_capacity(map.len());
let mut key_files: BTreeMap<String, String> = BTreeMap::new();
let mut main_object: Map<String, Value> = Map::new();
let mut used_names: BTreeSet<String> = BTreeSet::new();
used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
for (key, value) in map {
key_order.push(key.clone());
if is_scalar(value) {
main_object.insert(key.clone(), value.clone());
continue;
}
let filename = unique_filename_for_key(key, fmt, &used_names);
used_names.insert(filename.clone());
let path = dir.join(&filename);
fs::write(&path, fmt.serialize(value)?)?;
key_files.insert(key.clone(), filename);
}
let main_file = if main_object.is_empty() {
None
} else {
let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
let path = dir.join(&filename);
fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
Some(filename)
};
Ok(Root::Object {
key_order,
key_files,
main_file,
})
}
fn write_array_root(
dir: &Path,
items: &[Value],
fmt: Format,
unique_id: Option<&str>,
) -> Result<Root> {
let mut files = Vec::with_capacity(items.len());
let mut used_names: BTreeSet<String> = BTreeSet::new();
let width = digit_width(items.len());
for (idx, item) in items.iter().enumerate() {
let mut basename = if let Some(field) = unique_id {
unique_id_basename(item, field)
} else {
None
};
if basename
.as_ref()
.map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
.unwrap_or(false)
{
basename = None;
}
let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
let mut filename = format!("{basename}.{}", fmt.extension());
if used_names.contains(&filename) {
filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
}
used_names.insert(filename.clone());
let path = dir.join(&filename);
fs::write(&path, fmt.serialize(item)?)?;
files.push(filename);
}
Ok(Root::Array { files })
}
fn is_scalar(value: &Value) -> bool {
!matches!(value, Value::Object(_) | Value::Array(_))
}
fn digit_width(count: usize) -> usize {
let mut w = 1;
let mut n = count;
while n >= 10 {
n /= 10;
w += 1;
}
w.max(4)
}
fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
let sanitized = sanitize(key);
let base = if sanitized.is_empty() {
hash_string(key, 12)
} else {
sanitized
};
let mut filename = format!("{base}.{}", fmt.extension());
if used.contains(&filename) {
filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
}
filename
}
fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
let map = item.as_object()?;
let raw = match map.get(field)? {
Value::String(s) => s.clone(),
Value::Number(n) => n.to_string(),
Value::Bool(b) => b.to_string(),
_ => return None,
};
let s = sanitize(&raw);
if s.is_empty() {
None
} else {
Some(s)
}
}
fn sanitize(input: &str) -> String {
input
.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
c
} else {
'_'
}
})
.collect::<String>()
.trim_matches('.')
.to_string()
}
fn hash_string(input: &str, len: usize) -> String {
let digest = Sha256::digest(input.as_bytes());
let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
hex.chars().take(len).collect()
}
fn hash_value(value: &Value, len: usize) -> String {
let canonical = serde_json::to_string(value).unwrap_or_default();
hash_string(&canonical, len)
}