Skip to main content

config_disassembler/
disassemble.rs

1//! Disassemble a JSON, JSON5, or YAML document into a directory of smaller
2//! files, optionally written in a different format than the input.
3
4use std::collections::{BTreeMap, BTreeSet};
5use std::fs;
6use std::path::{Path, PathBuf};
7
8use serde_json::{Map, Value};
9use sha2::{Digest, Sha256};
10
11use crate::error::{Error, Result};
12use crate::format::Format;
13use crate::meta::{Meta, Root};
14
15/// File written for object roots that contains the scalar top-level keys.
16const MAIN_BASENAME: &str = "_main";
17
18/// Options controlling disassembly.
19#[derive(Debug, Clone)]
20pub struct DisassembleOptions {
21    /// Path to the input config file.
22    pub input: PathBuf,
23    /// Format to read the input as. If `None`, inferred from the extension.
24    pub input_format: Option<Format>,
25    /// Directory to write split files into. If `None`, defaults to a
26    /// directory next to the input file named after the input's file stem.
27    pub output_dir: Option<PathBuf>,
28    /// Format to write split files in. Defaults to `input_format`.
29    pub output_format: Option<Format>,
30    /// For array roots, name array-element files using the value of this
31    /// field if present on each element (must be a scalar).
32    pub unique_id: Option<String>,
33    /// If true, remove the contents of the output directory before writing.
34    pub pre_purge: bool,
35    /// If true, delete the input file after disassembling.
36    pub post_purge: bool,
37}
38
39/// Disassemble a configuration file into a directory of split files.
40///
41/// Returns the directory the files were written to.
42pub fn disassemble(opts: DisassembleOptions) -> Result<PathBuf> {
43    let input_format = match opts.input_format {
44        Some(f) => f,
45        None => Format::from_path(&opts.input)?,
46    };
47    let output_format = opts.output_format.unwrap_or(input_format);
48
49    let output_dir = match opts.output_dir.clone() {
50        Some(d) => d,
51        None => default_output_dir(&opts.input)?,
52    };
53
54    if opts.pre_purge && output_dir.exists() {
55        fs::remove_dir_all(&output_dir)?;
56    }
57    fs::create_dir_all(&output_dir)?;
58
59    let value = input_format.load(&opts.input)?;
60    let source_filename = opts
61        .input
62        .file_name()
63        .and_then(|n| n.to_str())
64        .map(|s| s.to_string());
65
66    let root = match &value {
67        Value::Object(map) => write_object_root(&output_dir, map, output_format)?,
68        Value::Array(items) => {
69            write_array_root(&output_dir, items, output_format, opts.unique_id.as_deref())?
70        }
71        _ => {
72            return Err(Error::Invalid(
73                "top-level value must be an object or array to disassemble".into(),
74            ));
75        }
76    };
77
78    let meta = Meta {
79        source_format: input_format.into(),
80        file_format: output_format.into(),
81        source_filename,
82        root,
83    };
84    meta.write(&output_dir)?;
85
86    if opts.post_purge {
87        fs::remove_file(&opts.input)?;
88    }
89
90    Ok(output_dir)
91}
92
93fn default_output_dir(input: &Path) -> Result<PathBuf> {
94    let stem = input.file_stem().and_then(|s| s.to_str()).ok_or_else(|| {
95        Error::Invalid(format!(
96            "could not derive a directory name from {}",
97            input.display()
98        ))
99    })?;
100    let parent = input.parent().unwrap_or(Path::new("."));
101    Ok(parent.join(stem))
102}
103
104fn write_object_root(dir: &Path, map: &Map<String, Value>, fmt: Format) -> Result<Root> {
105    let mut key_order: Vec<String> = Vec::with_capacity(map.len());
106    let mut key_files: BTreeMap<String, String> = BTreeMap::new();
107    let mut main_object: Map<String, Value> = Map::new();
108    let mut used_names: BTreeSet<String> = BTreeSet::new();
109    used_names.insert(format!("{MAIN_BASENAME}.{}", fmt.extension()));
110
111    for (key, value) in map {
112        key_order.push(key.clone());
113        if is_scalar(value) {
114            main_object.insert(key.clone(), value.clone());
115            continue;
116        }
117
118        let filename = unique_filename_for_key(key, fmt, &used_names);
119        used_names.insert(filename.clone());
120        let path = dir.join(&filename);
121        fs::write(&path, fmt.serialize(value)?)?;
122        key_files.insert(key.clone(), filename);
123    }
124
125    let main_file = if main_object.is_empty() {
126        None
127    } else {
128        let filename = format!("{MAIN_BASENAME}.{}", fmt.extension());
129        let path = dir.join(&filename);
130        fs::write(&path, fmt.serialize(&Value::Object(main_object))?)?;
131        Some(filename)
132    };
133
134    Ok(Root::Object {
135        key_order,
136        key_files,
137        main_file,
138    })
139}
140
141fn write_array_root(
142    dir: &Path,
143    items: &[Value],
144    fmt: Format,
145    unique_id: Option<&str>,
146) -> Result<Root> {
147    let mut files = Vec::with_capacity(items.len());
148    let mut used_names: BTreeSet<String> = BTreeSet::new();
149    let width = digit_width(items.len());
150
151    for (idx, item) in items.iter().enumerate() {
152        let mut basename = if let Some(field) = unique_id {
153            unique_id_basename(item, field)
154        } else {
155            None
156        };
157        if basename
158            .as_ref()
159            .map(|n| used_names.contains(&format!("{n}.{}", fmt.extension())))
160            .unwrap_or(false)
161        {
162            basename = None;
163        }
164        let basename = basename.unwrap_or_else(|| format!("{:0width$}", idx + 1, width = width));
165
166        let mut filename = format!("{basename}.{}", fmt.extension());
167        if used_names.contains(&filename) {
168            filename = format!("{basename}-{}.{}", hash_value(item, 8), fmt.extension());
169        }
170        used_names.insert(filename.clone());
171
172        let path = dir.join(&filename);
173        fs::write(&path, fmt.serialize(item)?)?;
174        files.push(filename);
175    }
176
177    Ok(Root::Array { files })
178}
179
180fn is_scalar(value: &Value) -> bool {
181    !matches!(value, Value::Object(_) | Value::Array(_))
182}
183
184fn digit_width(count: usize) -> usize {
185    let mut w = 1;
186    let mut n = count;
187    while n >= 10 {
188        n /= 10;
189        w += 1;
190    }
191    w.max(4)
192}
193
194fn unique_filename_for_key(key: &str, fmt: Format, used: &BTreeSet<String>) -> String {
195    let sanitized = sanitize(key);
196    let base = if sanitized.is_empty() {
197        hash_string(key, 12)
198    } else {
199        sanitized
200    };
201    let mut filename = format!("{base}.{}", fmt.extension());
202    if used.contains(&filename) {
203        filename = format!("{base}-{}.{}", hash_string(key, 8), fmt.extension());
204    }
205    filename
206}
207
208fn unique_id_basename(item: &Value, field: &str) -> Option<String> {
209    let map = item.as_object()?;
210    let raw = match map.get(field)? {
211        Value::String(s) => s.clone(),
212        Value::Number(n) => n.to_string(),
213        Value::Bool(b) => b.to_string(),
214        _ => return None,
215    };
216    let s = sanitize(&raw);
217    if s.is_empty() {
218        None
219    } else {
220        Some(s)
221    }
222}
223
224fn sanitize(input: &str) -> String {
225    input
226        .chars()
227        .map(|c| {
228            if c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' {
229                c
230            } else {
231                '_'
232            }
233        })
234        .collect::<String>()
235        .trim_matches('.')
236        .to_string()
237}
238
239fn hash_string(input: &str, len: usize) -> String {
240    let digest = Sha256::digest(input.as_bytes());
241    let hex: String = digest.iter().map(|b| format!("{b:02x}")).collect();
242    hex.chars().take(len).collect()
243}
244
245fn hash_value(value: &Value, len: usize) -> String {
246    let canonical = serde_json::to_string(value).unwrap_or_default();
247    hash_string(&canonical, len)
248}