cmakefmt/semantic.rs
1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Semantic-level normalisation for parsed CMake.
6//!
7//! Helpers that strip away parts of the AST that don't affect CMake
8//! semantics — comments, line endings, keyword casing — so two files
9//! can be compared "would they behave identically?" without worrying
10//! about cosmetic-only differences.
11//!
12//! Used by:
13//!
14//! - `cmakefmt --verify` and `cmakefmt --in-place` (in `main.rs`)
15//! to confirm formatted output preserves CMake semantics.
16//! - `tests/idempotency.rs` to assert formatter idempotency on the
17//! real-world corpus.
18//!
19//! Previously these helpers were duplicated between `main.rs` and the
20//! integration test, with a hand-coded "keep in sync" comment that
21//! Phase 47g's deduplication audit flagged. They now live here as the
22//! single source of truth.
23//!
24//! All helpers walk a fully-parsed `CommandInvocation` in place. The
25//! public surface is `normalize_command_literals` (strip cosmetic
26//! differences from a single command) and `normalize_keyword_args`
27//! (uppercase known keyword tokens for case-insensitive comparison).
28//! Internal helpers stay private to the module.
29
30use std::collections::BTreeSet;
31
32use crate::parser::ast::{Argument, CommandInvocation};
33use crate::spec::registry::CommandRegistry;
34use crate::spec::{CommandForm, KwargSpec};
35
36/// Strip comment and line-ending differences from a parsed
37/// `CommandInvocation` so two semantically-equivalent commands
38/// compare equal regardless of cosmetic formatting.
39pub fn normalize_command_literals(command: &mut CommandInvocation) {
40 // Strip trailing and inline comments — they have no CMake semantic
41 // meaning.
42 command.trailing_comment = None;
43 command
44 .arguments
45 .retain(|a| !matches!(a, Argument::InlineComment(_)));
46
47 for argument in &mut command.arguments {
48 match argument {
49 Argument::Bracket(bracket) => normalize_line_endings(&mut bracket.raw),
50 Argument::Quoted(value) | Argument::Unquoted(value) => normalize_line_endings(value),
51 Argument::InlineComment(_) => unreachable!(),
52 }
53 }
54}
55
56/// Uppercase any unquoted argument that matches a known keyword for
57/// the command's spec. CMake keywords are case-insensitive at the
58/// language level, so two files that differ only in the casing of
59/// `PUBLIC` vs `public` are semantically equivalent.
60pub fn normalize_keyword_args(command: &mut CommandInvocation, registry: &CommandRegistry) {
61 let spec = registry.get(&command.name);
62 let first_arg = command.arguments.iter().find_map(first_arg_text);
63 let form = spec.form_for(first_arg);
64 let keyword_set = collect_keywords(form);
65
66 for arg in &mut command.arguments {
67 if let Argument::Unquoted(value) = arg {
68 let upper = value.to_ascii_uppercase();
69 if keyword_set.contains(upper.as_str()) {
70 *value = upper;
71 }
72 }
73 }
74}
75
76/// Strip Windows-style `\r\n` line endings to plain `\n` in place.
77///
78/// Public because callers normalise `TemplatePlaceholder` statements
79/// directly (these aren't `CommandInvocation`s and so don't go through
80/// [`normalize_command_literals`]).
81pub fn normalize_line_endings(value: &mut String) {
82 if value.contains('\r') {
83 *value = value.replace("\r\n", "\n");
84 }
85}
86
87fn first_arg_text(argument: &Argument) -> Option<&str> {
88 match argument {
89 Argument::Quoted(_) | Argument::Bracket(_) | Argument::InlineComment(_) => None,
90 Argument::Unquoted(value) => Some(value.as_str()),
91 }
92}
93
94fn collect_keywords(form: &CommandForm) -> BTreeSet<String> {
95 let mut keywords = BTreeSet::new();
96 collect_form_keywords(form, &mut keywords);
97 keywords
98}
99
100fn collect_form_keywords(form: &CommandForm, keywords: &mut BTreeSet<String>) {
101 keywords.extend(form.flags.iter().cloned());
102
103 for (name, spec) in &form.kwargs {
104 keywords.insert(name.clone());
105 collect_kwarg_keywords(spec, keywords);
106 }
107}
108
109fn collect_kwarg_keywords(spec: &KwargSpec, keywords: &mut BTreeSet<String>) {
110 keywords.extend(spec.flags.iter().cloned());
111
112 for (name, child) in &spec.kwargs {
113 keywords.insert(name.clone());
114 collect_kwarg_keywords(child, keywords);
115 }
116}