Skip to main content

cmakefmt/spec/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Command-spec data model used by the formatter.
6//!
7//! The built-in registry describes the argument structure of known commands so
8//! the formatter can recognize positional arguments, keywords, flags, and
9//! command-specific layout hints.
10//!
11//! # Entry point
12//!
13//! Use [`crate::CommandRegistry`] to obtain a resolved registry —
14//! either [`CommandRegistry::builtins`](crate::CommandRegistry::builtins)
15//! for the lazily-initialised built-in singleton, or
16//! [`CommandRegistry::from_builtins_and_overrides`](crate::CommandRegistry::from_builtins_and_overrides)
17//! to merge a user override file on top of the built-ins.
18//!
19//! # Where the built-in spec lives
20//!
21//! The full CMake standard-library spec is compiled into the binary
22//! from `src/spec/builtins.yaml`. That file also carries a
23//! `[metadata]` block recording the upstream CMake version it was
24//! last audited against; the same version is reported by
25//! [`CommandRegistry::audited_cmake_version`](crate::CommandRegistry::audited_cmake_version).
26
27pub mod registry;
28
29use indexmap::{IndexMap, IndexSet};
30use serde::{Deserialize, Deserializer, Serialize, Serializer};
31use std::fmt;
32
33// ── NArgs ────────────────────────────────────────────────────────────────────
34
35/// How many arguments a positional slot or keyword takes.
36///
37/// In TOML this can be written as:
38///   - integer   `nargs = 1`       → `Fixed(1)`
39///   - `"*"`                      → `ZeroOrMore`
40///   - `"+"`                      → `OneOrMore`
41///   - `"?"`                      → `Optional`
42///   - `"N+"` e.g. `"2+"`         → `AtLeast(2)`
43#[derive(Debug, Clone, PartialEq, Eq, Default)]
44#[non_exhaustive]
45pub enum NArgs {
46    /// Exactly `n` positional arguments. `Fixed(0)` means a
47    /// keyword-only marker (no values of its own).
48    Fixed(usize),
49    /// Zero or more positional arguments — the keyword may appear
50    /// alone or be followed by any number of values until the next
51    /// sibling keyword. The default.
52    #[default]
53    ZeroOrMore,
54    /// One or more positional arguments. CMake requires at least one
55    /// value; the splitter force-consumes the first value regardless
56    /// of token classification so a value that spells a sibling
57    /// keyword name is still captured.
58    OneOrMore,
59    /// Either zero or one positional argument.
60    Optional,
61    /// At least `n` positional arguments; additional values are
62    /// consumed until the next sibling keyword.
63    AtLeast(usize),
64}
65
66impl Serialize for NArgs {
67    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
68        match self {
69            NArgs::Fixed(value) => serializer.serialize_u64(*value as u64),
70            NArgs::ZeroOrMore => serializer.serialize_str("*"),
71            NArgs::OneOrMore => serializer.serialize_str("+"),
72            NArgs::Optional => serializer.serialize_str("?"),
73            NArgs::AtLeast(value) => serializer.serialize_str(&format!("{value}+")),
74        }
75    }
76}
77
78impl<'de> Deserialize<'de> for NArgs {
79    fn deserialize<D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
80        struct Visitor;
81
82        impl<'de> serde::de::Visitor<'de> for Visitor {
83            type Value = NArgs;
84
85            fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
86                write!(f, r#"integer or string ("*", "+", "?", "N+")"#)
87            }
88
89            fn visit_u64<E: serde::de::Error>(self, v: u64) -> Result<NArgs, E> {
90                Ok(NArgs::Fixed(v as usize))
91            }
92
93            fn visit_i64<E: serde::de::Error>(self, v: i64) -> Result<NArgs, E> {
94                Ok(NArgs::Fixed(v.max(0) as usize))
95            }
96
97            fn visit_str<E: serde::de::Error>(self, v: &str) -> Result<NArgs, E> {
98                match v {
99                    "*" => Ok(NArgs::ZeroOrMore),
100                    "+" => Ok(NArgs::OneOrMore),
101                    "?" => Ok(NArgs::Optional),
102                    s if s.ends_with('+') && s.len() > 1 => {
103                        let n = s[..s.len() - 1]
104                            .parse::<usize>()
105                            .map_err(|_| E::custom(format!("invalid NArgs pattern: {s}")))?;
106                        Ok(NArgs::AtLeast(n))
107                    }
108                    s => {
109                        let n = s
110                            .parse::<usize>()
111                            .map_err(|_| E::custom(format!("invalid NArgs value: {s}")))?;
112                        Ok(NArgs::Fixed(n))
113                    }
114                }
115            }
116        }
117
118        d.deserialize_any(Visitor)
119    }
120}
121
122// ── Fully specified command model ────────────────────────────────────────────
123
124/// Per-command-form layout hints that override global [`crate::Config`] values.
125#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize)]
126#[serde(deny_unknown_fields)]
127#[non_exhaustive]
128pub struct LayoutOverrides {
129    /// Override line width for this command form.
130    pub line_width: Option<usize>,
131    /// Override indentation width for this command form.
132    pub tab_size: Option<usize>,
133    /// Override dangling-paren behavior for this command form.
134    pub dangle_parens: Option<bool>,
135    /// Force this command form into a wrapped layout.
136    pub always_wrap: Option<bool>,
137    /// Override the positional-argument hanging-wrap threshold for this form.
138    pub max_pargs_hwrap: Option<usize>,
139    /// Keep the first positional argument on the command line when wrapping.
140    /// When `true`, wrapping happens after the first argument with
141    /// continuation lines aligned to the open parenthesis. When `false`,
142    /// all arguments wrap to the next line at the base indent.
143    pub wrap_after_first_arg: Option<bool>,
144    /// Override continuation-alignment behaviour for this command form.
145    pub continuation_align: Option<crate::config::ContinuationAlign>,
146}
147
148/// Specification for a keyword section and any nested sub-keywords it accepts.
149#[derive(Debug, Clone, Default, PartialEq, Eq, Deserialize, Serialize)]
150#[serde(deny_unknown_fields)]
151#[non_exhaustive]
152pub struct KwargSpec {
153    /// Number of positional arguments accepted after the keyword itself.
154    #[serde(default)]
155    pub nargs: NArgs,
156    /// Nested keywords that may appear after this keyword.
157    #[serde(default)]
158    pub kwargs: IndexMap<String, KwargSpec>,
159    /// Flag tokens accepted within this keyword section.
160    #[serde(default)]
161    pub flags: IndexSet<String>,
162    /// When `true`, arguments in this keyword section may be sorted
163    /// lexicographically if `enable_sort` is enabled in the config.
164    #[serde(default)]
165    pub sortable: bool,
166    /// When `true`, the autosort heuristic must never reorder
167    /// arguments in this section. Use for kwargs whose value list has
168    /// positional semantics that flat sorting would corrupt — e.g.
169    /// `PROPERTY <name> <values…>` in `set_property` or the
170    /// `<name> <value>` pair structure under `PROPERTIES`. The spec's
171    /// `sortable: true` setting still wins over this — if a section is
172    /// explicitly marked sortable, that's a deliberate opt-in.
173    #[serde(default)]
174    pub no_autosort: bool,
175}
176
177/// One fully resolved command form.
178#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
179#[serde(deny_unknown_fields)]
180#[non_exhaustive]
181pub struct CommandForm {
182    /// Number of positional arguments before keyword/flag processing starts.
183    #[serde(default)]
184    pub pargs: NArgs,
185    /// Recognized top-level keywords for this form.
186    #[serde(default)]
187    pub kwargs: IndexMap<String, KwargSpec>,
188    /// Recognized top-level flags for this form.
189    #[serde(default)]
190    pub flags: IndexSet<String>,
191    /// Optional per-form layout hints. `None` means "inherit every
192    /// layout decision from the global [`crate::Config`]"; `Some`
193    /// overrides only the fields that are set, with unset fields
194    /// still falling back to the global config.
195    #[serde(default)]
196    pub layout: Option<LayoutOverrides>,
197}
198
199impl Default for CommandForm {
200    fn default() -> Self {
201        Self {
202            pargs: NArgs::ZeroOrMore,
203            kwargs: IndexMap::new(),
204            flags: IndexSet::new(),
205            layout: None,
206        }
207    }
208}
209
210#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
211#[serde(untagged)]
212#[non_exhaustive]
213pub enum CommandSpec {
214    /// A command whose structure depends on a discriminator token,
215    /// usually the first positional argument. `file(...)`,
216    /// `install(...)`, and `export(...)` are canonical examples —
217    /// their argument shape differs entirely based on the first
218    /// token (`TARGETS`, `FILES`, `DIRECTORY`, …).
219    Discriminated {
220        /// Known forms keyed by normalized discriminator token.
221        forms: IndexMap<String, CommandForm>,
222        /// Fallback form to use when no discriminator matches.
223        #[serde(default)]
224        fallback: Option<CommandForm>,
225    },
226    /// A command with a single argument structure. Most CMake
227    /// commands fall here — `target_link_libraries`, `project`,
228    /// `cmake_minimum_required`, user-defined commands, etc.
229    Single(CommandForm),
230}
231
232impl CommandSpec {
233    /// Resolve the command form for a specific invocation.
234    ///
235    /// `first_arg` is typically the first non-comment argument in the call and
236    /// is used for discriminated commands such as `file(...)` or `install(...)`.
237    pub fn form_for(&self, first_arg: Option<&str>) -> &CommandForm {
238        match self {
239            CommandSpec::Single(form) => form,
240            CommandSpec::Discriminated { forms, fallback } => {
241                let key = first_arg.unwrap_or_default();
242                forms
243                    .get(key)
244                    .or_else(|| {
245                        has_ascii_lowercase(key)
246                            .then(|| key.to_ascii_uppercase())
247                            .and_then(|normalized| forms.get(&normalized))
248                    })
249                    .or(fallback.as_ref())
250                    .or_else(|| forms.values().next())
251                    // Last-resort default for the ill-formed case where a
252                    // user-supplied override declares a `Discriminated`
253                    // spec with an empty `forms` map and no `fallback`.
254                    // Previously this branch panicked via `.expect()`,
255                    // making malformed override files crash the
256                    // formatter rather than degrade gracefully.
257                    .unwrap_or_else(|| empty_command_form())
258            }
259        }
260    }
261}
262
263fn empty_command_form() -> &'static CommandForm {
264    static EMPTY: std::sync::OnceLock<CommandForm> = std::sync::OnceLock::new();
265    EMPTY.get_or_init(CommandForm::default)
266}
267
268pub(crate) fn has_ascii_lowercase(s: &str) -> bool {
269    s.bytes().any(|byte| byte.is_ascii_lowercase())
270}
271
272pub(crate) fn has_ascii_uppercase(s: &str) -> bool {
273    s.bytes().any(|byte| byte.is_ascii_uppercase())
274}
275
276#[derive(Debug, Clone, PartialEq, Eq, Default, Deserialize, Serialize)]
277pub(crate) struct SpecMetadata {
278    /// Upstream CMake version the built-in spec was last audited against.
279    #[serde(default)]
280    pub cmake_version: String,
281    /// Date of the most recent audit.
282    #[serde(default)]
283    pub audited_at: String,
284    /// Free-form notes about the current audit state.
285    #[serde(default)]
286    pub notes: String,
287}
288
289/// Top-level spec file containing metadata plus command entries.
290#[derive(Debug, Default, Deserialize, Serialize)]
291pub(crate) struct SpecFile {
292    /// Version and audit metadata for the built-in spec surface.
293    #[serde(default)]
294    pub metadata: SpecMetadata,
295    /// Built-in command specifications keyed by command name.
296    #[serde(default)]
297    pub commands: IndexMap<String, CommandSpec>,
298}
299
300// ── Mergeable override model ─────────────────────────────────────────────────
301
302#[derive(Debug, Clone, Default, Deserialize, Serialize)]
303#[serde(deny_unknown_fields)]
304pub(crate) struct LayoutOverridesOverride {
305    /// Override line width for this command form.
306    #[serde(skip_serializing_if = "Option::is_none")]
307    pub line_width: Option<usize>,
308    /// Override indentation width for this command form.
309    #[serde(skip_serializing_if = "Option::is_none")]
310    pub tab_size: Option<usize>,
311    /// Override dangling-paren behavior for this command form.
312    #[serde(skip_serializing_if = "Option::is_none")]
313    pub dangle_parens: Option<bool>,
314    /// Force this command form into a wrapped layout.
315    #[serde(skip_serializing_if = "Option::is_none")]
316    pub always_wrap: Option<bool>,
317    /// Override the positional-argument hanging-wrap threshold for this form.
318    #[serde(skip_serializing_if = "Option::is_none")]
319    pub max_pargs_hwrap: Option<usize>,
320    /// Keep the first positional argument on the command line when wrapping.
321    #[serde(skip_serializing_if = "Option::is_none")]
322    pub wrap_after_first_arg: Option<bool>,
323    /// Override continuation-alignment behaviour for this command form.
324    #[serde(skip_serializing_if = "Option::is_none")]
325    pub continuation_align: Option<crate::config::ContinuationAlign>,
326}
327
328/// Partial override for a keyword specification.
329#[derive(Debug, Clone, Default, Deserialize, Serialize)]
330#[serde(deny_unknown_fields)]
331pub(crate) struct KwargSpecOverride {
332    /// Override the number of positional arguments accepted after the keyword.
333    #[serde(skip_serializing_if = "Option::is_none")]
334    pub nargs: Option<NArgs>,
335    /// Nested keyword overrides.
336    #[serde(default)]
337    #[serde(skip_serializing_if = "IndexMap::is_empty")]
338    pub kwargs: IndexMap<String, KwargSpecOverride>,
339    /// Additional supported flags.
340    #[serde(default)]
341    #[serde(skip_serializing_if = "IndexSet::is_empty")]
342    pub flags: IndexSet<String>,
343    /// Mark this keyword section as sortable.
344    #[serde(default)]
345    pub sortable: bool,
346    /// Mark this keyword section as exempt from autosort.
347    #[serde(default)]
348    pub no_autosort: bool,
349}
350
351/// Partial override for a command form.
352#[derive(Debug, Clone, Default, Deserialize, Serialize)]
353#[serde(deny_unknown_fields)]
354pub(crate) struct CommandFormOverride {
355    /// Override the positional argument count for the form.
356    #[serde(skip_serializing_if = "Option::is_none")]
357    pub pargs: Option<NArgs>,
358    /// Keyword overrides to merge into the form.
359    #[serde(default)]
360    #[serde(skip_serializing_if = "IndexMap::is_empty")]
361    pub kwargs: IndexMap<String, KwargSpecOverride>,
362    /// Additional supported flags.
363    #[serde(default)]
364    #[serde(skip_serializing_if = "IndexSet::is_empty")]
365    pub flags: IndexSet<String>,
366    /// Optional layout overrides for the form.
367    #[serde(skip_serializing_if = "Option::is_none")]
368    pub layout: Option<LayoutOverridesOverride>,
369}
370
371/// Partial override for a full command spec.
372#[derive(Debug, Clone, Deserialize, Serialize)]
373#[serde(untagged)]
374pub(crate) enum CommandSpecOverride {
375    /// Override a single-form command.
376    Single(CommandFormOverride),
377    /// Override one or more discriminated forms.
378    Discriminated {
379        /// Per-discriminator form overrides.
380        #[serde(default)]
381        #[serde(skip_serializing_if = "IndexMap::is_empty")]
382        forms: IndexMap<String, CommandFormOverride>,
383        /// Optional fallback form override.
384        #[serde(default)]
385        #[serde(skip_serializing_if = "Option::is_none")]
386        fallback: Option<CommandFormOverride>,
387    },
388}
389
390/// Top-level user override file containing command overrides only.
391#[derive(Debug, Default, Deserialize, Serialize)]
392pub(crate) struct SpecOverrideFile {
393    /// Override specs keyed by command name.
394    #[serde(default)]
395    pub commands: IndexMap<String, CommandSpecOverride>,
396}
397
398impl CommandSpecOverride {
399    /// Convert a partial override into a fully specified standalone command
400    /// spec.
401    pub(crate) fn into_full_spec(self) -> CommandSpec {
402        match self {
403            CommandSpecOverride::Single(form) => CommandSpec::Single(form.into_full_form()),
404            CommandSpecOverride::Discriminated { forms, fallback } => CommandSpec::Discriminated {
405                forms: forms
406                    .into_iter()
407                    .map(|(name, form)| (name.to_ascii_uppercase(), form.into_full_form()))
408                    .collect(),
409                fallback: fallback.map(CommandFormOverride::into_full_form),
410            },
411        }
412    }
413}
414
415impl CommandFormOverride {
416    /// Convert a partial command form override into a fully specified form.
417    pub(crate) fn into_full_form(self) -> CommandForm {
418        CommandForm {
419            pargs: self.pargs.unwrap_or_default(),
420            kwargs: self
421                .kwargs
422                .into_iter()
423                .map(|(name, spec)| (name.to_ascii_uppercase(), spec.into_full_spec()))
424                .collect(),
425            flags: self
426                .flags
427                .into_iter()
428                .map(|flag| flag.to_ascii_uppercase())
429                .collect(),
430            layout: self.layout.map(LayoutOverridesOverride::into_full_layout),
431        }
432    }
433}
434
435impl KwargSpecOverride {
436    /// Convert a partial keyword override into a fully specified keyword spec.
437    pub(crate) fn into_full_spec(self) -> KwargSpec {
438        KwargSpec {
439            nargs: self.nargs.unwrap_or_default(),
440            kwargs: self
441                .kwargs
442                .into_iter()
443                .map(|(name, spec)| (name.to_ascii_uppercase(), spec.into_full_spec()))
444                .collect(),
445            flags: self
446                .flags
447                .into_iter()
448                .map(|flag| flag.to_ascii_uppercase())
449                .collect(),
450            sortable: self.sortable,
451            no_autosort: self.no_autosort,
452        }
453    }
454}
455
456impl LayoutOverridesOverride {
457    /// Convert a partial layout override into a fully specified layout block.
458    pub(crate) fn into_full_layout(self) -> LayoutOverrides {
459        LayoutOverrides {
460            line_width: self.line_width,
461            tab_size: self.tab_size,
462            dangle_parens: self.dangle_parens,
463            always_wrap: self.always_wrap,
464            max_pargs_hwrap: self.max_pargs_hwrap,
465            wrap_after_first_arg: self.wrap_after_first_arg,
466            continuation_align: self.continuation_align,
467        }
468    }
469}
470
471#[cfg(test)]
472mod tests {
473    use super::*;
474
475    #[test]
476    fn nargs_serialize_round_trip() {
477        let values = [
478            NArgs::Fixed(3),
479            NArgs::ZeroOrMore,
480            NArgs::OneOrMore,
481            NArgs::Optional,
482            NArgs::AtLeast(2),
483        ];
484        for value in values {
485            let encoded = serde_json::to_string(&value).unwrap();
486            let decoded: NArgs = serde_json::from_str(&encoded).unwrap();
487            assert_eq!(decoded, value);
488        }
489    }
490
491    #[test]
492    fn nargs_invalid_pattern_is_rejected() {
493        let err = toml::from_str::<KwargSpec>("nargs = \"abc+\"\n").unwrap_err();
494        assert!(err.to_string().contains("invalid NArgs pattern"));
495    }
496
497    #[test]
498    fn nargs_integer() {
499        let src = "nargs = 1\n";
500        let spec: KwargSpec = toml::from_str(src).unwrap();
501        assert_eq!(spec.nargs, NArgs::Fixed(1));
502    }
503
504    #[test]
505    fn nargs_zero_or_more() {
506        let src = "nargs = \"*\"\n";
507        let spec: KwargSpec = toml::from_str(src).unwrap();
508        assert_eq!(spec.nargs, NArgs::ZeroOrMore);
509    }
510
511    #[test]
512    fn nargs_one_or_more() {
513        let src = "nargs = \"+\"\n";
514        let spec: KwargSpec = toml::from_str(src).unwrap();
515        assert_eq!(spec.nargs, NArgs::OneOrMore);
516    }
517
518    #[test]
519    fn nargs_optional() {
520        let src = "nargs = \"?\"\n";
521        let spec: KwargSpec = toml::from_str(src).unwrap();
522        assert_eq!(spec.nargs, NArgs::Optional);
523    }
524
525    #[test]
526    fn nargs_at_least() {
527        let src = "nargs = \"2+\"\n";
528        let spec: KwargSpec = toml::from_str(src).unwrap();
529        assert_eq!(spec.nargs, NArgs::AtLeast(2));
530    }
531
532    #[test]
533    fn single_command_form() {
534        let src = r#"
535pargs = 1
536flags = ["REQUIRED"]
537
538[kwargs.COMPONENTS]
539nargs = "+"
540"#;
541        let form: CommandForm = toml::from_str(src).unwrap();
542        assert_eq!(form.pargs, NArgs::Fixed(1));
543        assert!(form.flags.contains("REQUIRED"));
544        assert!(form.kwargs.contains_key("COMPONENTS"));
545    }
546
547    #[test]
548    fn discriminated_command() {
549        let src = r#"
550[forms.TARGETS]
551pargs = "+"
552
553[forms.TARGETS.kwargs.DESTINATION]
554nargs = 1
555
556[forms.FILES]
557pargs = "+"
558"#;
559        let spec: CommandSpec = toml::from_str(src).unwrap();
560        assert!(matches!(spec, CommandSpec::Discriminated { .. }));
561        let form = spec.form_for(Some("targets"));
562        assert!(form.kwargs.contains_key("DESTINATION"));
563    }
564
565    #[test]
566    fn discriminated_command_uses_fallback_when_no_key_matches() {
567        let src = r#"
568[forms.FILE]
569pargs = 1
570
571[fallback]
572pargs = 2
573"#;
574        let spec: CommandSpec = toml::from_str(src).unwrap();
575        let form = spec.form_for(Some("unknown"));
576        assert_eq!(form.pargs, NArgs::Fixed(2));
577    }
578
579    #[test]
580    fn command_spec_override_into_full_spec_normalizes_casing() {
581        let override_spec = CommandSpecOverride::Single(CommandFormOverride {
582            pargs: Some(NArgs::Fixed(1)),
583            flags: ["quiet".to_owned()].into_iter().collect(),
584            kwargs: [(
585                "sources".to_owned(),
586                KwargSpecOverride {
587                    nargs: Some(NArgs::OneOrMore),
588                    ..KwargSpecOverride::default()
589                },
590            )]
591            .into_iter()
592            .collect(),
593            layout: Some(LayoutOverridesOverride {
594                always_wrap: Some(true),
595                ..LayoutOverridesOverride::default()
596            }),
597        });
598
599        let full = override_spec.into_full_spec();
600        let form = full.form_for(None);
601        assert!(form.flags.contains("QUIET"));
602        assert!(form.kwargs.contains_key("SOURCES"));
603        assert_eq!(form.kwargs["SOURCES"].nargs, NArgs::OneOrMore);
604        assert_eq!(form.layout.as_ref().unwrap().always_wrap, Some(true));
605    }
606
607    #[test]
608    fn partial_override_round_trips() {
609        let src = r#"
610layout.always_wrap = true
611
612[kwargs.COMPONENTS]
613nargs = "+"
614"#;
615        let override_form: CommandFormOverride = toml::from_str(src).unwrap();
616        assert_eq!(override_form.layout.unwrap().always_wrap, Some(true));
617        assert_eq!(
618            override_form.kwargs["COMPONENTS"].nargs,
619            Some(NArgs::OneOrMore)
620        );
621    }
622}