Skip to main content

cmakefmt/
semantic.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Semantic-level normalisation for parsed CMake.
6//!
7//! Helpers that strip away parts of the AST that don't affect CMake
8//! semantics — comments, line endings, keyword casing — so two files
9//! can be compared "would they behave identically?" without worrying
10//! about cosmetic-only differences.
11//!
12//! Used by:
13//!
14//! - `cmakefmt --verify` and `cmakefmt --in-place` (in `main.rs`)
15//!   to confirm formatted output preserves CMake semantics.
16//! - `tests/idempotency.rs` to assert formatter idempotency on the
17//!   real-world corpus.
18//!
19//! Previously these helpers were duplicated between `main.rs` and the
20//! integration test, with a hand-coded "keep in sync" comment that
21//! Phase 47g's deduplication audit flagged. They now live here as the
22//! single source of truth.
23//!
24//! All helpers walk a fully-parsed `CommandInvocation` in place. The
25//! public surface is `normalize_command_literals` (strip cosmetic
26//! differences from a single command) and `normalize_keyword_args`
27//! (uppercase known keyword tokens for case-insensitive comparison).
28//! Internal helpers stay private to the module.
29
30use std::collections::BTreeSet;
31
32use crate::parser::ast::{Argument, CommandInvocation, File, Statement};
33use crate::parser::{self};
34use crate::spec::registry::CommandRegistry;
35use crate::spec::{CommandForm, KwargSpec};
36
37/// Return `true` if `left` and `right` are the same CMake program once
38/// cosmetic-only differences (comments, blank lines, whitespace, line
39/// endings, command-name and keyword casing) are stripped.
40///
41/// This is the equivalence relation behind `cmakefmt --verify`: a
42/// formatter run is safe exactly when `semantic_equivalent(original,
43/// formatted)` holds. It uses the built-in command registry; callers
44/// that need a customised registry (e.g. the CLI, which honours user
45/// override files) should normalise via [`normalize_semantics`] with
46/// their own registry instead.
47///
48/// If *either* side fails to parse the inputs cannot be compared, so
49/// the function conservatively returns `true` — this keeps fuzz and
50/// round-trip callers from false-positiving on inputs the parser
51/// rejects.
52pub fn semantic_equivalent(left: &str, right: &str) -> bool {
53    let registry = CommandRegistry::builtins();
54    match (parser::parse(left), parser::parse(right)) {
55        (Ok(left), Ok(right)) => {
56            normalize_semantics(left, registry) == normalize_semantics(right, registry)
57        }
58        _ => true,
59    }
60}
61
62/// Reduce a parsed file to its semantic skeleton: drop standalone
63/// comments and blank lines, zero out spans, lowercase command names,
64/// and normalise each command's literals and keyword casing. Two files
65/// that behave identically in CMake produce equal skeletons.
66pub fn normalize_semantics(mut file: File, registry: &CommandRegistry) -> File {
67    // Strip standalone comments and blank lines — they have no CMake semantic
68    // meaning and may change structure when the formatter reflows them.
69    file.statements
70        .retain(|s| !matches!(s, Statement::Comment(_) | Statement::BlankLines(_)));
71
72    for statement in &mut file.statements {
73        match statement {
74            Statement::Command(command) => {
75                command.span = (0, 0);
76                command.name.make_ascii_lowercase();
77                normalize_command_literals(command);
78                normalize_keyword_args(command, registry);
79            }
80            Statement::TemplatePlaceholder(value) => normalize_line_endings(value),
81            Statement::Comment(_) | Statement::BlankLines(_) => unreachable!(),
82        }
83    }
84
85    file
86}
87
88/// Strip comment and line-ending differences from a parsed
89/// `CommandInvocation` so two semantically-equivalent commands
90/// compare equal regardless of cosmetic formatting.
91pub fn normalize_command_literals(command: &mut CommandInvocation) {
92    // Strip trailing and inline comments — they have no CMake semantic
93    // meaning.
94    command.trailing_comment = None;
95    command
96        .arguments
97        .retain(|a| !matches!(a, Argument::InlineComment(_)));
98
99    for argument in &mut command.arguments {
100        match argument {
101            Argument::Bracket(bracket) => normalize_line_endings(&mut bracket.raw),
102            Argument::Quoted(value) | Argument::Unquoted(value) => normalize_line_endings(value),
103            Argument::InlineComment(_) => unreachable!(),
104        }
105    }
106}
107
108/// Uppercase any unquoted argument that matches a known keyword for
109/// the command's spec. CMake keywords are case-insensitive at the
110/// language level, so two files that differ only in the casing of
111/// `PUBLIC` vs `public` are semantically equivalent.
112pub fn normalize_keyword_args(command: &mut CommandInvocation, registry: &CommandRegistry) {
113    let spec = registry.get(&command.name);
114    let first_arg = command.arguments.iter().find_map(first_arg_text);
115    let form = spec.form_for(first_arg);
116    let keyword_set = collect_keywords(form);
117
118    for arg in &mut command.arguments {
119        if let Argument::Unquoted(value) = arg {
120            let upper = value.to_ascii_uppercase();
121            if keyword_set.contains(upper.as_str()) {
122                *value = upper;
123            }
124        }
125    }
126}
127
128/// Strip Windows-style `\r\n` line endings to plain `\n` in place.
129///
130/// Public because callers normalise `TemplatePlaceholder` statements
131/// directly (these aren't `CommandInvocation`s and so don't go through
132/// [`normalize_command_literals`]).
133pub fn normalize_line_endings(value: &mut String) {
134    if value.contains('\r') {
135        *value = value.replace("\r\n", "\n");
136    }
137}
138
139fn first_arg_text(argument: &Argument) -> Option<&str> {
140    match argument {
141        Argument::Quoted(_) | Argument::Bracket(_) | Argument::InlineComment(_) => None,
142        Argument::Unquoted(value) => Some(value.as_str()),
143    }
144}
145
146fn collect_keywords(form: &CommandForm) -> BTreeSet<String> {
147    let mut keywords = BTreeSet::new();
148    collect_form_keywords(form, &mut keywords);
149    keywords
150}
151
152fn collect_form_keywords(form: &CommandForm, keywords: &mut BTreeSet<String>) {
153    keywords.extend(form.flags.iter().cloned());
154
155    for (name, spec) in &form.kwargs {
156        keywords.insert(name.clone());
157        collect_kwarg_keywords(spec, keywords);
158    }
159}
160
161fn collect_kwarg_keywords(spec: &KwargSpec, keywords: &mut BTreeSet<String>) {
162    keywords.extend(spec.flags.iter().cloned());
163
164    for (name, child) in &spec.kwargs {
165        keywords.insert(name.clone());
166        collect_kwarg_keywords(child, keywords);
167    }
168}