Skip to main content

config_disassembler/
format.rs

1//! Format detection, capabilities, and serialization for value-model formats.
2//!
3//! Each format in this module is loaded into a common [`serde_json::Value`].
4//! Conversion rules are expressed as format capabilities so adding another
5//! value-model format only requires registering its aliases, extensions,
6//! conversion family, and serializer/parser here.
7
8use std::fs;
9use std::path::Path;
10use std::str::FromStr;
11
12use serde::{Deserialize, Serialize};
13use serde_json::Map;
14use serde_json::Value;
15
16use crate::error::{Error, Result};
17
18/// Supported textual formats for the value-model disassembler.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
20#[serde(rename_all = "lowercase")]
21pub enum Format {
22    Json,
23    Json5,
24    Jsonc,
25    Yaml,
26    Toon,
27    /// TOML is intentionally isolated from the other formats: TOML's
28    /// syntactic constraints (no nulls, no array root, bare keys must
29    /// precede tables) mean conversions through TOML can reorder or
30    /// fail to represent values produced by JSON/JSON5/JSONC/YAML/TOON.
31    /// TOML files can therefore only be split into TOML files and
32    /// reassembled into TOML.
33    Toml,
34}
35
36/// A family of formats that can safely convert among themselves.
37#[derive(Debug, Clone, Copy, PartialEq, Eq)]
38pub enum FormatFamily {
39    JsonValue,
40    Toml,
41}
42
43/// Which operation is checking a conversion edge.
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum ConversionOperation {
46    Convert,
47    Reassemble,
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
51enum SplitPayloadLayout {
52    Direct,
53    WrappedByParentKey,
54}
55
56struct FormatSpec {
57    canonical_name: &'static str,
58    display_name: &'static str,
59    aliases: &'static [&'static str],
60    extensions: &'static [&'static str],
61    family: FormatFamily,
62    split_payload_layout: SplitPayloadLayout,
63}
64
65impl Format {
66    /// All formats handled by the value-model disassembler.
67    pub const ALL: &'static [Format] = &[
68        Format::Json,
69        Format::Json5,
70        Format::Jsonc,
71        Format::Yaml,
72        Format::Toon,
73        Format::Toml,
74    ];
75
76    const JSON_VALUE_FAMILY: &'static [Format] = &[
77        Format::Json,
78        Format::Json5,
79        Format::Jsonc,
80        Format::Yaml,
81        Format::Toon,
82    ];
83    const TOML_FAMILY: &'static [Format] = &[Format::Toml];
84
85    fn spec(self) -> &'static FormatSpec {
86        match self {
87            Format::Json => &FormatSpec {
88                canonical_name: "json",
89                display_name: "JSON",
90                aliases: &["json"],
91                extensions: &["json"],
92                family: FormatFamily::JsonValue,
93                split_payload_layout: SplitPayloadLayout::Direct,
94            },
95            Format::Json5 => &FormatSpec {
96                canonical_name: "json5",
97                display_name: "JSON5",
98                aliases: &["json5"],
99                extensions: &["json5"],
100                family: FormatFamily::JsonValue,
101                split_payload_layout: SplitPayloadLayout::Direct,
102            },
103            Format::Jsonc => &FormatSpec {
104                canonical_name: "jsonc",
105                display_name: "JSONC",
106                aliases: &["jsonc"],
107                extensions: &["jsonc"],
108                family: FormatFamily::JsonValue,
109                split_payload_layout: SplitPayloadLayout::Direct,
110            },
111            Format::Yaml => &FormatSpec {
112                canonical_name: "yaml",
113                display_name: "YAML",
114                aliases: &["yaml", "yml"],
115                extensions: &["yaml", "yml"],
116                family: FormatFamily::JsonValue,
117                split_payload_layout: SplitPayloadLayout::Direct,
118            },
119            Format::Toon => &FormatSpec {
120                canonical_name: "toon",
121                display_name: "TOON",
122                aliases: &["toon"],
123                extensions: &["toon"],
124                family: FormatFamily::JsonValue,
125                split_payload_layout: SplitPayloadLayout::Direct,
126            },
127            Format::Toml => &FormatSpec {
128                canonical_name: "toml",
129                display_name: "TOML",
130                aliases: &["toml"],
131                extensions: &["toml"],
132                family: FormatFamily::Toml,
133                split_payload_layout: SplitPayloadLayout::WrappedByParentKey,
134            },
135        }
136    }
137
138    /// Canonical file extension (without the leading dot).
139    pub fn extension(self) -> &'static str {
140        self.spec().canonical_name
141    }
142
143    /// Canonical lower-case name used in CLI and metadata.
144    pub fn canonical_name(self) -> &'static str {
145        self.spec().canonical_name
146    }
147
148    /// Human-facing display name.
149    pub fn display_name(self) -> &'static str {
150        self.spec().display_name
151    }
152
153    /// Accepted names for CLI parsing.
154    pub fn aliases(self) -> &'static [&'static str] {
155        self.spec().aliases
156    }
157
158    /// File extensions that identify this format.
159    pub fn extensions(self) -> &'static [&'static str] {
160        self.spec().extensions
161    }
162
163    /// The conversion family this format belongs to.
164    pub fn family(self) -> FormatFamily {
165        self.spec().family
166    }
167
168    /// Formats that can safely convert to/from this format.
169    pub fn compatible_formats(self) -> &'static [Format] {
170        match self.family() {
171            FormatFamily::JsonValue => Self::JSON_VALUE_FAMILY,
172            FormatFamily::Toml => Self::TOML_FAMILY,
173        }
174    }
175
176    /// Whether CLI `--input-format` / `--output-format` flags are useful
177    /// for this subcommand.
178    pub fn allows_format_overrides(self) -> bool {
179        self.compatible_formats().len() > 1
180    }
181
182    /// Whether this format participates in cross-format conversions.
183    pub fn is_cross_format_compatible(self) -> bool {
184        self.allows_format_overrides()
185    }
186
187    /// Whether this format can be converted into `output`.
188    pub fn can_convert_to(self, output: Format) -> bool {
189        self.family() == output.family()
190    }
191
192    /// Return a clear error if a conversion edge is not allowed.
193    pub fn ensure_can_convert_to(
194        self,
195        output: Format,
196        operation: ConversionOperation,
197    ) -> Result<()> {
198        if self.can_convert_to(output) {
199            return Ok(());
200        }
201
202        if let Some(name) = self
203            .family()
204            .isolated_format_name()
205            .or_else(|| output.family().isolated_format_name())
206        {
207            return match operation {
208                ConversionOperation::Convert => Err(Error::Invalid(format!(
209                    "{name} can only be converted to and from {name}; got input={self}, output={output}"
210                ))),
211                ConversionOperation::Reassemble => Err(Error::Invalid(format!(
212                    "{name} can only be reassembled to and from {name}; the disassembled \
213                     directory was written in {self} but reassembly target is {output}"
214                ))),
215            };
216        }
217
218        Err(Error::Invalid(format!(
219            "conversion from {self} to {output} is not supported"
220        )))
221    }
222
223    /// Best-effort detection of a format from a file path's extension.
224    pub fn from_path(path: &Path) -> Result<Self> {
225        let ext = path
226            .extension()
227            .and_then(|e| e.to_str())
228            .map(|e| e.to_ascii_lowercase());
229        if let Some(ext) = ext.as_deref() {
230            for format in Self::ALL {
231                if format.extensions().contains(&ext) {
232                    return Ok(*format);
233                }
234            }
235        }
236        Err(Error::UnknownFormat(path.to_path_buf()))
237    }
238
239    /// Parse a string in this format into a generic [`Value`].
240    pub fn parse(self, input: &str) -> Result<Value> {
241        match self {
242            Format::Json => Ok(serde_json::from_str(input)?),
243            Format::Json5 => Ok(json5::from_str(input)?),
244            Format::Jsonc => parse_jsonc(input),
245            Format::Yaml => Ok(serde_yaml::from_str(input)?),
246            Format::Toon => toon_format::decode_default(input)
247                .map_err(|e| Error::Invalid(format!("toon parse error: {e}"))),
248            Format::Toml => Ok(toml::from_str(input)?),
249        }
250    }
251
252    /// Serialize a [`Value`] in this format. The output is always
253    /// pretty-printed with newline-terminated content.
254    pub fn serialize(self, value: &Value) -> Result<String> {
255        let mut out = match self {
256            Format::Json => serde_json::to_string_pretty(value)?,
257            Format::Json5 => json5::to_string(value)?,
258            // JSON is a valid JSONC document. Comments from input files are
259            // treated as syntax and are not preserved in the value model.
260            Format::Jsonc => serde_json::to_string_pretty(value)?,
261            Format::Yaml => serde_yaml::to_string(value)?,
262            Format::Toon => toon_format::encode_default(value)
263                .map_err(|e| Error::Invalid(format!("toon serialize error: {e}")))?,
264            Format::Toml => serialize_toml(value)?,
265        };
266        if !out.ends_with('\n') {
267            out.push('\n');
268        }
269        Ok(out)
270    }
271
272    /// Read and parse a file in this format.
273    pub fn load(self, path: &Path) -> Result<Value> {
274        let text = fs::read_to_string(path)?;
275        self.parse(&text)
276    }
277
278    /// Prepare a per-key split payload for this format.
279    ///
280    /// Most formats can write the payload value directly. TOML wraps the
281    /// payload under its parent key so every split file remains a valid TOML
282    /// table document.
283    pub fn wrap_split_payload(self, key: &str, value: &Value) -> Value {
284        match self.spec().split_payload_layout {
285            SplitPayloadLayout::Direct => value.clone(),
286            SplitPayloadLayout::WrappedByParentKey => {
287                let mut wrapper = Map::new();
288                wrapper.insert(key.to_string(), value.clone());
289                Value::Object(wrapper)
290            }
291        }
292    }
293
294    /// Reverse [`Format::wrap_split_payload`] while reassembling.
295    pub fn unwrap_split_payload(self, key: &str, filename: &str, loaded: Value) -> Result<Value> {
296        match self.spec().split_payload_layout {
297            SplitPayloadLayout::Direct => Ok(loaded),
298            SplitPayloadLayout::WrappedByParentKey => {
299                let Value::Object(mut map) = loaded else {
300                    return Err(Error::Invalid(format!(
301                        "{} file `{filename}` did not deserialize to a table",
302                        self.display_name()
303                    )));
304                };
305                map.remove(key).ok_or_else(|| {
306                    Error::Invalid(format!(
307                        "{} file `{filename}` does not contain expected wrapper key `{key}`",
308                        self.display_name()
309                    ))
310                })
311            }
312        }
313    }
314
315    /// Canonical CLI names for all registered formats.
316    pub fn supported_format_list() -> String {
317        Self::ALL
318            .iter()
319            .map(|f| f.canonical_name())
320            .collect::<Vec<_>>()
321            .join(", ")
322    }
323}
324
325impl FromStr for Format {
326    type Err = Error;
327
328    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
329        let s = s.to_ascii_lowercase();
330        for format in Format::ALL {
331            if format.aliases().contains(&s.as_str()) {
332                return Ok(*format);
333            }
334        }
335        Err(Error::Usage(format!(
336            "unknown format `{s}`; expected {}",
337            Format::supported_format_list()
338        )))
339    }
340}
341
342impl FormatFamily {
343    fn isolated_format_name(self) -> Option<&'static str> {
344        match self {
345            FormatFamily::JsonValue => None,
346            FormatFamily::Toml => Some("TOML"),
347        }
348    }
349}
350
351/// Serialize a `Value` as TOML.
352///
353/// TOML cannot represent `null` and the document root must be a table,
354/// so this function pre-validates and returns a clear error before
355/// invoking the underlying TOML serializer.
356fn serialize_toml(value: &Value) -> Result<String> {
357    if !matches!(value, Value::Object(_)) {
358        return Err(Error::Invalid(
359            "TOML documents must have a table (object) root; got an array or scalar".into(),
360        ));
361    }
362    if let Some(path) = find_null_path(value, "") {
363        return Err(Error::Invalid(format!(
364            "TOML cannot represent null values (found at `{}`)",
365            if path.is_empty() { "<root>" } else { &path }
366        )));
367    }
368    // Pre-validation above (root must be a table, no null values) covers
369    // every case the `toml` crate would reject for a `serde_json::Value`
370    // constructed through the normal serde API, so a serialization error
371    // here would indicate an unexpected toml-crate behavior; surface it
372    // with a clear `Invalid` error rather than a dedicated variant.
373    toml::to_string_pretty(value).map_err(|e| Error::Invalid(format!("toml serialize error: {e}")))
374}
375
376/// Parse JSONC as JSON plus comments and trailing commas.
377///
378/// The upstream parser defaults are intentionally loose, so keep the accepted
379/// syntax close to JSONC rather than expanding this into JSON5.
380fn parse_jsonc(input: &str) -> Result<Value> {
381    jsonc_parser::parse_to_serde_value(input, &jsonc_parse_options())
382        .map_err(|e| Error::Invalid(format!("jsonc parse error: {e}")))
383}
384
385pub(crate) fn jsonc_parse_options() -> jsonc_parser::ParseOptions {
386    jsonc_parser::ParseOptions {
387        allow_comments: true,
388        allow_trailing_commas: true,
389        allow_loose_object_property_names: false,
390        allow_missing_commas: false,
391        allow_single_quoted_strings: false,
392        allow_hexadecimal_numbers: false,
393        allow_unary_plus_numbers: false,
394    }
395}
396
397/// Walks a `Value` and returns the first dotted path to a `Null`, if any.
398fn find_null_path(value: &Value, prefix: &str) -> Option<String> {
399    match value {
400        Value::Null => Some(prefix.to_string()),
401        Value::Object(map) => {
402            for (k, v) in map {
403                let next = if prefix.is_empty() {
404                    k.clone()
405                } else {
406                    format!("{prefix}.{k}")
407                };
408                if let Some(p) = find_null_path(v, &next) {
409                    return Some(p);
410                }
411            }
412            None
413        }
414        Value::Array(items) => {
415            for (i, v) in items.iter().enumerate() {
416                let next = format!("{prefix}[{i}]");
417                if let Some(p) = find_null_path(v, &next) {
418                    return Some(p);
419                }
420            }
421            None
422        }
423        _ => None,
424    }
425}
426
427impl std::fmt::Display for Format {
428    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
429        f.write_str(self.canonical_name())
430    }
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436
437    #[test]
438    fn from_str_accepts_canonical_and_aliases() {
439        assert_eq!("json".parse::<Format>().unwrap(), Format::Json);
440        assert_eq!("JSON5".parse::<Format>().unwrap(), Format::Json5);
441        assert_eq!("jsonc".parse::<Format>().unwrap(), Format::Jsonc);
442        assert_eq!("yaml".parse::<Format>().unwrap(), Format::Yaml);
443        assert_eq!("yml".parse::<Format>().unwrap(), Format::Yaml);
444        assert_eq!("toon".parse::<Format>().unwrap(), Format::Toon);
445        assert_eq!("toml".parse::<Format>().unwrap(), Format::Toml);
446    }
447
448    #[test]
449    fn from_str_rejects_unknown() {
450        let err = "xml".parse::<Format>().unwrap_err();
451        assert!(err.to_string().contains("unknown format"));
452    }
453
454    #[test]
455    fn from_path_detects_supported_extensions() {
456        assert_eq!(
457            Format::from_path(Path::new("a.json")).unwrap(),
458            Format::Json
459        );
460        assert_eq!(
461            Format::from_path(Path::new("a.JSON5")).unwrap(),
462            Format::Json5
463        );
464        assert_eq!(
465            Format::from_path(Path::new("a.JSONC")).unwrap(),
466            Format::Jsonc
467        );
468        assert_eq!(Format::from_path(Path::new("a.yml")).unwrap(), Format::Yaml);
469        assert_eq!(
470            Format::from_path(Path::new("a.toon")).unwrap(),
471            Format::Toon
472        );
473        assert_eq!(
474            Format::from_path(Path::new("a.toml")).unwrap(),
475            Format::Toml
476        );
477    }
478
479    #[test]
480    fn from_path_rejects_missing_or_unknown_extension() {
481        assert!(Format::from_path(Path::new("a")).is_err());
482        assert!(Format::from_path(Path::new("a.ini")).is_err());
483    }
484
485    #[test]
486    fn display_matches_extension() {
487        assert_eq!(Format::Json.to_string(), "json");
488        assert_eq!(Format::Json5.to_string(), "json5");
489        assert_eq!(Format::Jsonc.to_string(), "jsonc");
490        assert_eq!(Format::Yaml.to_string(), "yaml");
491        assert_eq!(Format::Toon.to_string(), "toon");
492        assert_eq!(Format::Toml.to_string(), "toml");
493    }
494
495    #[test]
496    fn parse_and_serialize_round_trip_for_all_formats() {
497        for (fmt, text) in [
498            (Format::Json, r#"{"a":1}"#),
499            (Format::Json5, "{ a: 1 }"),
500            (Format::Jsonc, "{ \"a\": 1, } // kept as syntax only"),
501            (Format::Yaml, "a: 1\n"),
502            (Format::Toon, "a: 1\n"),
503            (Format::Toml, "a = 1\n"),
504        ] {
505            let v = fmt.parse(text).unwrap();
506            let out = fmt.serialize(&v).unwrap();
507            assert!(out.ends_with('\n'));
508            assert_eq!(fmt.parse(&out).unwrap(), v);
509        }
510    }
511
512    #[test]
513    fn toml_rejects_array_root() {
514        let v: Value = serde_json::json!([1, 2, 3]);
515        let err = Format::Toml.serialize(&v).unwrap_err();
516        assert!(err.to_string().contains("table"), "got: {err}");
517    }
518
519    #[test]
520    fn toml_rejects_null_values() {
521        let v: Value = serde_json::json!({ "outer": { "inner": null } });
522        let err = Format::Toml.serialize(&v).unwrap_err();
523        assert!(err.to_string().contains("null"), "got: {err}");
524        assert!(err.to_string().contains("outer.inner"), "got: {err}");
525    }
526
527    #[test]
528    fn toml_rejects_null_inside_array() {
529        let v: Value = serde_json::json!({ "items": [1, null, 3] });
530        let err = Format::Toml.serialize(&v).unwrap_err();
531        assert!(err.to_string().contains("null"), "got: {err}");
532        assert!(err.to_string().contains("items[1]"), "got: {err}");
533    }
534
535    #[test]
536    fn cross_format_compatibility_excludes_toml() {
537        assert!(Format::Json.is_cross_format_compatible());
538        assert!(Format::Json5.is_cross_format_compatible());
539        assert!(Format::Jsonc.is_cross_format_compatible());
540        assert!(Format::Yaml.is_cross_format_compatible());
541        assert!(Format::Toon.is_cross_format_compatible());
542        assert!(!Format::Toml.is_cross_format_compatible());
543    }
544
545    #[test]
546    fn compatible_formats_are_grouped_by_conversion_family() {
547        assert_eq!(
548            Format::Json.compatible_formats(),
549            &[
550                Format::Json,
551                Format::Json5,
552                Format::Jsonc,
553                Format::Yaml,
554                Format::Toon
555            ]
556        );
557        assert_eq!(Format::Toml.compatible_formats(), &[Format::Toml]);
558    }
559
560    #[test]
561    fn jsonc_accepts_comments_and_trailing_commas_only() {
562        let parsed = Format::Jsonc
563            .parse(
564                r#"{
565  // JSONC comment
566  "name": "demo",
567  "items": [1, 2,],
568}"#,
569            )
570            .unwrap();
571        assert_eq!(
572            parsed,
573            serde_json::json!({ "name": "demo", "items": [1, 2] })
574        );
575
576        let err = Format::Jsonc.parse("{ name: 'json5-only' }").unwrap_err();
577        assert!(err.to_string().contains("jsonc parse error"));
578    }
579
580    #[test]
581    fn conversion_rules_reject_cross_family_edges() {
582        assert!(Format::Json
583            .ensure_can_convert_to(Format::Yaml, ConversionOperation::Convert)
584            .is_ok());
585        let err = Format::Json
586            .ensure_can_convert_to(Format::Toml, ConversionOperation::Convert)
587            .unwrap_err();
588        assert!(err.to_string().contains("TOML can only be converted"));
589    }
590
591    #[test]
592    fn split_payload_wrapping_is_capability_driven() {
593        let value = serde_json::json!([{ "host": "a" }]);
594        assert_eq!(Format::Json.wrap_split_payload("servers", &value), value);
595
596        let wrapped = Format::Toml.wrap_split_payload("servers", &value);
597        assert_eq!(wrapped, serde_json::json!({ "servers": value }));
598        assert_eq!(
599            Format::Toml
600                .unwrap_split_payload("servers", "servers.toml", wrapped)
601                .unwrap(),
602            serde_json::json!([{ "host": "a" }])
603        );
604    }
605}