Skip to main content

cmakefmt/formatter/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Top-level formatter entry points.
6//!
7//! These functions parse input, apply barrier handling, and render a formatted
8//! output string using the command registry and runtime configuration.
9//!
10//! # Format-barrier directives
11//!
12//! The source-string entry points ([`format_source`],
13//! [`format_source_with_registry`], [`format_source_with_debug`],
14//! [`format_source_with_registry_debug`]) scan each input line for
15//! *barrier directives* that toggle formatting on and off:
16//!
17//! | Directive | Effect |
18//! |-----------|--------|
19//! | `# cmake-format: off` / `# cmake-format: on` | Skip / resume formatting |
20//! | `# cmakefmt: off` / `# cmakefmt: on` | Same, cmakefmt-branded |
21//! | `# fmt: off` / `# fmt: on` | Generic alias |
22//! | `# ~~~` (matched pair) | Fence region — content between fences is emitted verbatim |
23//!
24//! Leading whitespace before the `#` is allowed. Lines inside a
25//! disabled region are passed through unchanged.
26//!
27//! Note: [`format_parsed_file`] does **not** honour these directives
28//! — barrier detection happens pre-parse, so if you have the AST
29//! already you've bypassed that step. Use a source-string entry
30//! point if you need barriers.
31
32pub(crate) mod comment;
33pub(crate) mod node;
34
35// `dump.rs` is the only consumer of these re-exports and is itself
36// `#[cfg(feature = "cli")]`. Without the gate this `use` warns under
37// `--no-default-features` and `--features lsp` builds.
38#[cfg(feature = "cli")]
39pub(crate) use node::{split_sections, HeaderKind};
40
41use std::path::PathBuf;
42
43use crate::config::{Config, LineEnding};
44use crate::error::{Error, FileParseError, Result};
45use crate::parser::{self, ast::File, ast::Statement};
46use crate::spec::registry::CommandRegistry;
47
48/// Format raw CMake source using the built-in command registry.
49///
50/// The output always ends with a newline. When
51/// [`Config::line_ending`] is [`LineEnding::Auto`], the output line
52/// ending is detected from the input (CRLF if the source contains
53/// any `\r\n`, otherwise LF).
54///
55/// # Examples
56///
57/// ```
58/// use cmakefmt::{format_source, Config};
59///
60/// let cmake = "CMAKE_MINIMUM_REQUIRED(VERSION 3.20)\n";
61/// let formatted = format_source(cmake, &Config::default()).unwrap();
62/// assert_eq!(formatted, "cmake_minimum_required(VERSION 3.20)\n");
63/// ```
64pub fn format_source(source: &str, config: &Config) -> Result<String> {
65    format_source_with_registry(source, config, CommandRegistry::builtins())
66}
67
68/// Format raw CMake source using the built-in registry and also return debug
69/// lines describing the formatter's decisions.
70///
71/// The returned `Vec<String>` contains one human-readable log line
72/// per formatting decision (layout choice, section split, fallback
73/// paths, barrier events). The exact wording is **unstable across
74/// releases** and intended for interactive debugging and bug
75/// reports, not programmatic consumption.
76pub fn format_source_with_debug(source: &str, config: &Config) -> Result<(String, Vec<String>)> {
77    format_source_with_registry_debug(source, config, CommandRegistry::builtins())
78}
79
80/// Format raw CMake source using an explicit command registry.
81///
82/// Use this when you need a registry that merges the built-ins with a user
83/// override file.
84///
85/// # Examples
86///
87/// ```
88/// use cmakefmt::{format_source_with_registry, Config, CommandRegistry};
89///
90/// let registry = CommandRegistry::from_builtins_and_overrides(
91///     None::<&std::path::Path>,
92/// ).unwrap();
93/// let cmake = "TARGET_LINK_LIBRARIES(mylib PUBLIC dep1)\n";
94/// let formatted = format_source_with_registry(
95///     cmake, &Config::default(), &registry,
96/// ).unwrap();
97/// assert_eq!(formatted, "target_link_libraries(mylib PUBLIC dep1)\n");
98/// ```
99pub fn format_source_with_registry(
100    source: &str,
101    config: &Config,
102    registry: &CommandRegistry,
103) -> Result<String> {
104    if config.disable {
105        return Ok(source.to_owned());
106    }
107    validate_runtime_config(config)?;
108    let formatted = format_source_impl(source, config, registry, &mut DebugLog::disabled())?.0;
109    Ok(apply_line_ending(source, &formatted, config.line_ending))
110}
111
112/// Format raw CMake source using an explicit registry and return debug output.
113pub fn format_source_with_registry_debug(
114    source: &str,
115    config: &Config,
116    registry: &CommandRegistry,
117) -> Result<(String, Vec<String>)> {
118    if config.disable {
119        return Ok((source.to_owned(), Vec::new()));
120    }
121    validate_runtime_config(config)?;
122    let mut lines = Vec::new();
123    let mut debug = DebugLog::enabled(&mut lines);
124    let (formatted, _) = format_source_impl(source, config, registry, &mut debug)?;
125    Ok((
126        apply_line_ending(source, &formatted, config.line_ending),
127        lines,
128    ))
129}
130
131/// Format an already parsed AST file using the original source text.
132///
133/// This entry point preserves the same high-level config semantics as
134/// [`format_source_with_registry`]: `disable` returns the original `source`
135/// unchanged and `line_ending` is applied relative to the original source.
136///
137/// Useful when you want to parse once and format the same AST repeatedly with
138/// different [`Config`] or registry settings, avoiding re-parsing overhead.
139///
140/// # Caveat: no barrier handling
141///
142/// Unlike [`format_source`] and its siblings, this function does
143/// **not** honour `# cmake-format: off/on`, `# cmakefmt: off/on`,
144/// `# fmt: off/on`, or `# ~~~` fence regions. Barrier detection
145/// happens pre-parse in the source-string pipeline, so by the time
146/// you hand in a parsed AST the opportunity has passed. Use one of
147/// the source-string entry points if your input contains barrier
148/// directives.
149///
150/// # Examples
151///
152/// ```
153/// use cmakefmt::{format_parsed_file, Config, CommandRegistry};
154///
155/// let cmake = "PROJECT(MyProject)\n";
156/// let file = cmakefmt::parser::parse(cmake).unwrap();
157/// let formatted = format_parsed_file(
158///     cmake,
159///     &file,
160///     &Config::default(),
161///     CommandRegistry::builtins(),
162/// ).unwrap();
163/// assert_eq!(formatted, "project(MyProject)\n");
164/// ```
165pub fn format_parsed_file(
166    source: &str,
167    file: &File,
168    config: &Config,
169    registry: &CommandRegistry,
170) -> Result<String> {
171    if config.disable {
172        return Ok(source.to_owned());
173    }
174    validate_runtime_config(config)?;
175    let formatted =
176        format_parsed_file_with_debug(file, config, registry, &mut DebugLog::disabled())?;
177    Ok(apply_line_ending(source, &formatted, config.line_ending))
178}
179
180fn format_parsed_file_with_debug(
181    file: &File,
182    config: &Config,
183    registry: &CommandRegistry,
184    debug: &mut DebugLog<'_>,
185) -> Result<String> {
186    let patterns = config.compiled_patterns().map_err(runtime_config_error)?;
187    let mut output = String::new();
188    let mut previous_was_content = false;
189    let mut block_depth = 0usize;
190
191    for statement in &file.statements {
192        match statement {
193            Statement::Command(command) => {
194                block_depth = block_depth.saturating_sub(block_dedent_before(&command.name));
195
196                if previous_was_content {
197                    output.push('\n');
198                }
199
200                output.push_str(&node::format_command(
201                    command,
202                    config,
203                    &patterns,
204                    registry,
205                    block_depth,
206                    debug,
207                )?);
208
209                if let Some(trailing) = &command.trailing_comment {
210                    let comment_indent_width = output
211                        .rsplit('\n')
212                        .next()
213                        .unwrap_or_default()
214                        .chars()
215                        .count()
216                        + 1;
217                    let comment_lines = comment::format_comment_lines(
218                        trailing,
219                        config,
220                        &patterns,
221                        comment_indent_width,
222                        config.line_width,
223                    );
224                    if let Some((first, rest)) = comment_lines.split_first() {
225                        output.push(' ');
226                        output.push_str(first);
227                        let continuation_indent = " ".repeat(comment_indent_width);
228                        for line in rest {
229                            output.push('\n');
230                            output.push_str(&continuation_indent);
231                            output.push_str(line);
232                        }
233                    }
234                }
235
236                previous_was_content = true;
237                block_depth += block_indent_after(&command.name);
238            }
239            Statement::TemplatePlaceholder(placeholder) => {
240                if previous_was_content {
241                    output.push('\n');
242                }
243
244                output.push_str(placeholder);
245                previous_was_content = true;
246            }
247            Statement::BlankLines(count) => {
248                let newline_count = if previous_was_content {
249                    count + 1
250                } else {
251                    *count
252                };
253                let newline_count = newline_count.min(config.max_empty_lines + 1);
254                for _ in 0..newline_count {
255                    output.push('\n');
256                }
257                previous_was_content = false;
258            }
259            Statement::Comment(c) => {
260                if previous_was_content {
261                    output.push('\n');
262                }
263
264                let indent = config.indent_str().repeat(block_depth);
265                let comment_lines = comment::format_comment_lines(
266                    c,
267                    config,
268                    &patterns,
269                    indent.chars().count(),
270                    config.line_width,
271                );
272                for (index, line) in comment_lines.iter().enumerate() {
273                    if index > 0 {
274                        output.push('\n');
275                    }
276                    output.push_str(&indent);
277                    output.push_str(line);
278                }
279                previous_was_content = true;
280            }
281        }
282    }
283
284    if !output.ends_with('\n') {
285        output.push('\n');
286    }
287
288    if config.require_valid_layout {
289        for (i, line) in output.split('\n').enumerate() {
290            // Skip the final empty string produced by the trailing newline.
291            if line.is_empty() {
292                continue;
293            }
294            let width = line.chars().count();
295            if width > config.line_width {
296                return Err(Error::LayoutTooWide {
297                    line_no: i + 1,
298                    width,
299                    limit: config.line_width,
300                });
301            }
302        }
303    }
304
305    Ok(output)
306}
307
308/// Apply the configured line-ending style to `formatted` output.
309///
310/// The formatter always emits LF internally. `source` is consulted when
311/// `line_ending` is [`LineEnding::Auto`] to detect the predominant style.
312fn apply_line_ending(source: &str, formatted: &str, line_ending: LineEnding) -> String {
313    let use_crlf = match line_ending {
314        LineEnding::Unix => false,
315        LineEnding::Windows => true,
316        LineEnding::Auto => {
317            // Detect from input: if any \r\n is present, assume CRLF.
318            source.contains("\r\n")
319        }
320    };
321    if use_crlf {
322        formatted.replace('\n', "\r\n")
323    } else {
324        formatted.to_owned()
325    }
326}
327
328fn format_source_impl(
329    source: &str,
330    config: &Config,
331    registry: &CommandRegistry,
332    debug: &mut DebugLog<'_>,
333) -> Result<(String, usize)> {
334    // Preserve a leading UTF-8 BOM if the input had one. The parser
335    // strips the BOM before parsing, so without re-prepending it here
336    // the formatter would silently drop encoding markers used by some
337    // editors (notably MSVC on Windows) to identify the file as UTF-8.
338    // Strip the BOM from the source we feed to the line-by-line loop
339    // so it doesn't end up duplicated on the first emitted line.
340    const BOM: char = '\u{feff}';
341    let (had_bom, source) = match source.strip_prefix(BOM) {
342        Some(rest) => (true, rest),
343        None => (false, source),
344    };
345
346    let mut output = String::new();
347    if had_bom {
348        output.push(BOM);
349    }
350    let mut enabled_chunk = String::new();
351    let mut total_statements = 0usize;
352    let mut mode = BarrierMode::Enabled;
353    let mut enabled_chunk_start_line = 1usize;
354    let mut saw_barrier = false;
355
356    for (line_index, line) in source.split_inclusive('\n').enumerate() {
357        let line_no = line_index + 1;
358        match detect_barrier(line) {
359            Some(BarrierEvent::DisableByDirective(kind)) => {
360                let statements = flush_enabled_chunk(
361                    &mut output,
362                    &mut enabled_chunk,
363                    config,
364                    registry,
365                    debug,
366                    enabled_chunk_start_line,
367                    saw_barrier,
368                )?;
369                total_statements += statements;
370                debug.log(format!(
371                    "formatter: disabled formatting at line {line_no} via {kind}: off"
372                ));
373                output.push_str(line);
374                mode = BarrierMode::DisabledByDirective;
375                saw_barrier = true;
376            }
377            Some(BarrierEvent::EnableByDirective(kind)) => {
378                let statements = flush_enabled_chunk(
379                    &mut output,
380                    &mut enabled_chunk,
381                    config,
382                    registry,
383                    debug,
384                    enabled_chunk_start_line,
385                    saw_barrier,
386                )?;
387                total_statements += statements;
388                debug.log(format!(
389                    "formatter: enabled formatting at line {line_no} via {kind}: on"
390                ));
391                output.push_str(line);
392                if matches!(mode, BarrierMode::DisabledByDirective) {
393                    mode = BarrierMode::Enabled;
394                }
395                saw_barrier = true;
396            }
397            Some(BarrierEvent::Fence) => {
398                let statements = flush_enabled_chunk(
399                    &mut output,
400                    &mut enabled_chunk,
401                    config,
402                    registry,
403                    debug,
404                    enabled_chunk_start_line,
405                    saw_barrier,
406                )?;
407                total_statements += statements;
408                let next_mode = if matches!(mode, BarrierMode::DisabledByFence) {
409                    BarrierMode::Enabled
410                } else {
411                    BarrierMode::DisabledByFence
412                };
413                debug.log(format!(
414                    "formatter: toggled fence region at line {line_no} -> {}",
415                    next_mode.as_str()
416                ));
417                output.push_str(line);
418                mode = next_mode;
419                saw_barrier = true;
420            }
421            None => {
422                if matches!(mode, BarrierMode::Enabled) {
423                    if enabled_chunk.is_empty() {
424                        enabled_chunk_start_line = line_no;
425                    }
426                    enabled_chunk.push_str(line);
427                } else {
428                    output.push_str(line);
429                }
430            }
431        }
432    }
433
434    total_statements += flush_enabled_chunk(
435        &mut output,
436        &mut enabled_chunk,
437        config,
438        registry,
439        debug,
440        enabled_chunk_start_line,
441        saw_barrier,
442    )?;
443    Ok((output, total_statements))
444}
445
446fn flush_enabled_chunk(
447    output: &mut String,
448    enabled_chunk: &mut String,
449    config: &Config,
450    registry: &CommandRegistry,
451    debug: &mut DebugLog<'_>,
452    chunk_start_line: usize,
453    barrier_context: bool,
454) -> Result<usize> {
455    if enabled_chunk.is_empty() {
456        return Ok(0);
457    }
458
459    let file = match parser::parse(enabled_chunk) {
460        Ok(file) => file,
461        Err(Error::Parse(parse_error)) => {
462            let _ = barrier_context;
463            return Err(Error::Parse(crate::error::ParseError {
464                display_name: "<source>".to_owned(),
465                source_text: enabled_chunk.clone().into_boxed_str(),
466                start_line: chunk_start_line,
467                diagnostic: parse_error.diagnostic,
468            }));
469        }
470        Err(err) => return Err(err),
471    };
472    let statement_count = file.statements.len();
473    debug.log(format!(
474        "formatter: formatting enabled chunk with {statement_count} statement(s) starting at source line {chunk_start_line}"
475    ));
476    let formatted = format_parsed_file_with_debug(&file, config, registry, debug)?;
477    output.push_str(&formatted);
478    enabled_chunk.clear();
479    Ok(statement_count)
480}
481
482fn validate_runtime_config(config: &Config) -> Result<()> {
483    config.validate_patterns().map_err(runtime_config_error)?;
484    Ok(())
485}
486
487fn runtime_config_error(message: String) -> Error {
488    Error::Config(crate::error::ConfigError {
489        path: PathBuf::from("<programmatic-config>"),
490        details: FileParseError {
491            format: "runtime",
492            message: message.into_boxed_str(),
493            line: None,
494            column: None,
495        },
496    })
497}
498
499fn detect_barrier(line: &str) -> Option<BarrierEvent<'_>> {
500    let trimmed = line.trim_start();
501    if !trimmed.starts_with('#') {
502        return None;
503    }
504
505    let body = trimmed[1..].trim_start().trim_end();
506    if body.starts_with("~~~") {
507        return Some(BarrierEvent::Fence);
508    }
509
510    if body == "cmake-format: off" {
511        return Some(BarrierEvent::DisableByDirective("cmake-format"));
512    }
513    if body == "cmake-format: on" {
514        return Some(BarrierEvent::EnableByDirective("cmake-format"));
515    }
516    if body == "cmakefmt: off" {
517        return Some(BarrierEvent::DisableByDirective("cmakefmt"));
518    }
519    if body == "cmakefmt: on" {
520        return Some(BarrierEvent::EnableByDirective("cmakefmt"));
521    }
522    if body == "fmt: off" {
523        return Some(BarrierEvent::DisableByDirective("fmt"));
524    }
525    if body == "fmt: on" {
526        return Some(BarrierEvent::EnableByDirective("fmt"));
527    }
528
529    None
530}
531
532#[derive(Debug, Clone, Copy, PartialEq, Eq)]
533enum BarrierMode {
534    Enabled,
535    DisabledByDirective,
536    DisabledByFence,
537}
538
539impl BarrierMode {
540    fn as_str(self) -> &'static str {
541        match self {
542            BarrierMode::Enabled => "enabled",
543            BarrierMode::DisabledByDirective => "disabled-by-directive",
544            BarrierMode::DisabledByFence => "disabled-by-fence",
545        }
546    }
547}
548
549#[derive(Debug, Clone, Copy, PartialEq, Eq)]
550enum BarrierEvent<'a> {
551    DisableByDirective(&'a str),
552    EnableByDirective(&'a str),
553    Fence,
554}
555
556pub(crate) struct DebugLog<'a> {
557    lines: Option<&'a mut Vec<String>>,
558}
559
560impl<'a> DebugLog<'a> {
561    fn disabled() -> Self {
562        Self { lines: None }
563    }
564
565    fn enabled(lines: &'a mut Vec<String>) -> Self {
566        Self { lines: Some(lines) }
567    }
568
569    fn log(&mut self, message: impl Into<String>) {
570        if let Some(lines) = self.lines.as_deref_mut() {
571            lines.push(message.into());
572        }
573    }
574}
575
576fn block_dedent_before(command_name: &str) -> usize {
577    usize::from(matches_ascii_insensitive(
578        command_name,
579        &[
580            "elseif",
581            "else",
582            "endif",
583            "endforeach",
584            "endwhile",
585            "endfunction",
586            "endmacro",
587            "endblock",
588        ],
589    ))
590}
591
592fn block_indent_after(command_name: &str) -> usize {
593    usize::from(matches_ascii_insensitive(
594        command_name,
595        &[
596            "if", "foreach", "while", "function", "macro", "block", "elseif", "else",
597        ],
598    ))
599}
600
601fn matches_ascii_insensitive(input: &str, candidates: &[&str]) -> bool {
602    candidates
603        .iter()
604        .any(|candidate| input.eq_ignore_ascii_case(candidate))
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610
611    #[test]
612    fn format_parsed_file_honors_disable() {
613        let source = "set(  X  1 )\n";
614        let file = parser::parse(source).unwrap();
615        let config = Config {
616            disable: true,
617            ..Config::default()
618        };
619
620        let formatted =
621            format_parsed_file(source, &file, &config, CommandRegistry::builtins()).unwrap();
622
623        assert_eq!(formatted, source);
624    }
625
626    #[test]
627    fn format_parsed_file_applies_line_endings_relative_to_source() {
628        let source = "set(  X  1 )\r\n";
629        let file = parser::parse(source).unwrap();
630        let config = Config {
631            line_ending: LineEnding::Auto,
632            ..Config::default()
633        };
634
635        let formatted =
636            format_parsed_file(source, &file, &config, CommandRegistry::builtins()).unwrap();
637
638        assert_eq!(formatted, "set(X 1)\r\n");
639    }
640
641    #[test]
642    fn format_source_rejects_invalid_programmatic_regex_config() {
643        let config = Config {
644            fence_pattern: "[".to_owned(),
645            ..Config::default()
646        };
647
648        let err = format_source("set(X 1)\n", &config).unwrap_err();
649        match err {
650            Error::Config(config_err) => {
651                assert_eq!(config_err.path, PathBuf::from("<programmatic-config>"));
652                assert_eq!(config_err.details.format, "runtime");
653                assert!(config_err.details.message.contains("invalid regex"));
654            }
655            other => panic!("expected config error, got {other:?}"),
656        }
657    }
658
659    #[test]
660    fn format_source_preserves_leading_utf8_bom() {
661        let source = "\u{feff}set(FOO bar)\n";
662        let formatted = format_source(source, &Config::default()).unwrap();
663        assert!(
664            formatted.starts_with('\u{feff}'),
665            "BOM was stripped from output: {formatted:?}"
666        );
667    }
668
669    #[test]
670    fn format_source_does_not_add_a_bom() {
671        let source = "set(FOO bar)\n";
672        let formatted = format_source(source, &Config::default()).unwrap();
673        assert!(
674            !formatted.starts_with('\u{feff}'),
675            "BOM was added to output without one in input: {formatted:?}"
676        );
677    }
678}