Skip to main content

cmakefmt/formatter/
mod.rs

1// SPDX-FileCopyrightText: Copyright 2026 Puneet Matharu
2//
3// SPDX-License-Identifier: MIT OR Apache-2.0
4
5//! Top-level formatter entry points.
6//!
7//! These functions parse input, apply barrier handling, and render a formatted
8//! output string using the command registry and runtime configuration.
9//!
10//! # Format-barrier directives
11//!
12//! The source-string entry points ([`format_source`],
13//! [`format_source_with_registry`], [`format_source_with_debug`],
14//! [`format_source_with_registry_debug`]) scan each input line for
15//! *barrier directives* that toggle formatting on and off:
16//!
17//! | Directive | Effect |
18//! |-----------|--------|
19//! | `# cmake-format: off` / `# cmake-format: on` | Skip / resume formatting |
20//! | `# cmakefmt: off` / `# cmakefmt: on` | Same, cmakefmt-branded |
21//! | `# fmt: off` / `# fmt: on` | Generic alias |
22//! | `# ~~~` (matched pair) | Fence region — content between fences is emitted verbatim |
23//!
24//! Leading whitespace before the `#` is allowed. Lines inside a
25//! disabled region are passed through unchanged.
26//!
27//! Note: [`format_parsed_file`] does **not** honour these directives
28//! — barrier detection happens pre-parse, so if you have the AST
29//! already you've bypassed that step. Use a source-string entry
30//! point if you need barriers.
31
32pub(crate) mod comment;
33pub(crate) mod node;
34
35// `dump.rs` is the only consumer of these re-exports and is itself
36// `#[cfg(feature = "cli")]`. Without the gate this `use` warns under
37// `--no-default-features` and `--features lsp` builds.
38#[cfg(feature = "cli")]
39pub(crate) use node::{split_sections, HeaderKind};
40
41use std::path::PathBuf;
42
43use crate::config::{Config, LineEnding};
44use crate::error::{Error, FileParseError, Result};
45use crate::parser::{self, ast::File, ast::Statement};
46use crate::spec::registry::CommandRegistry;
47
48/// Format raw CMake source using the built-in command registry.
49///
50/// The output always ends with a newline. When
51/// [`Config::line_ending`] is [`LineEnding::Auto`], the output line
52/// ending is detected from the input (CRLF if the source contains
53/// any `\r\n`, otherwise LF).
54///
55/// # Examples
56///
57/// ```
58/// use cmakefmt::{format_source, Config};
59///
60/// let cmake = "CMAKE_MINIMUM_REQUIRED(VERSION 3.20)\n";
61/// let formatted = format_source(cmake, &Config::default()).unwrap();
62/// assert_eq!(formatted, "cmake_minimum_required(VERSION 3.20)\n");
63/// ```
64pub fn format_source(source: &str, config: &Config) -> Result<String> {
65    format_source_with_registry(source, config, CommandRegistry::builtins())
66}
67
68/// Format raw CMake source using the built-in registry and also return debug
69/// lines describing the formatter's decisions.
70///
71/// The returned `Vec<String>` contains one human-readable log line
72/// per formatting decision (layout choice, section split, fallback
73/// paths, barrier events). The exact wording is **unstable across
74/// releases** and intended for interactive debugging and bug
75/// reports, not programmatic consumption.
76pub fn format_source_with_debug(source: &str, config: &Config) -> Result<(String, Vec<String>)> {
77    format_source_with_registry_debug(source, config, CommandRegistry::builtins())
78}
79
80/// Format raw CMake source using an explicit command registry.
81///
82/// Use this when you need a registry that merges the built-ins with a user
83/// override file.
84///
85/// # Examples
86///
87/// ```
88/// use cmakefmt::{format_source_with_registry, Config, CommandRegistry};
89///
90/// let registry = CommandRegistry::from_builtins_and_overrides(
91///     None::<&std::path::Path>,
92/// ).unwrap();
93/// let cmake = "TARGET_LINK_LIBRARIES(mylib PUBLIC dep1)\n";
94/// let formatted = format_source_with_registry(
95///     cmake, &Config::default(), &registry,
96/// ).unwrap();
97/// assert_eq!(formatted, "target_link_libraries(mylib PUBLIC dep1)\n");
98/// ```
99pub fn format_source_with_registry(
100    source: &str,
101    config: &Config,
102    registry: &CommandRegistry,
103) -> Result<String> {
104    if config.disable {
105        return Ok(source.to_owned());
106    }
107    validate_runtime_config(config)?;
108    let formatted = format_source_impl(source, config, registry, &mut DebugLog::disabled())?.0;
109    Ok(apply_line_ending(source, &formatted, config.line_ending))
110}
111
112/// Format raw CMake source using an explicit registry and return debug output.
113pub fn format_source_with_registry_debug(
114    source: &str,
115    config: &Config,
116    registry: &CommandRegistry,
117) -> Result<(String, Vec<String>)> {
118    if config.disable {
119        return Ok((source.to_owned(), Vec::new()));
120    }
121    validate_runtime_config(config)?;
122    let mut lines = Vec::new();
123    let mut debug = DebugLog::enabled(&mut lines);
124    let (formatted, _) = format_source_impl(source, config, registry, &mut debug)?;
125    Ok((
126        apply_line_ending(source, &formatted, config.line_ending),
127        lines,
128    ))
129}
130
131/// Format an already parsed AST file using the original source text.
132///
133/// This entry point preserves the same high-level config semantics as
134/// [`format_source_with_registry`]: `disable` returns the original `source`
135/// unchanged and `line_ending` is applied relative to the original source.
136///
137/// Useful when you want to parse once and format the same AST repeatedly with
138/// different [`Config`] or registry settings, avoiding re-parsing overhead.
139///
140/// # Caveat: no barrier handling
141///
142/// Unlike [`format_source`] and its siblings, this function does
143/// **not** honour `# cmake-format: off/on`, `# cmakefmt: off/on`,
144/// `# fmt: off/on`, or `# ~~~` fence regions. Barrier detection
145/// happens pre-parse in the source-string pipeline, so by the time
146/// you hand in a parsed AST the opportunity has passed. Use one of
147/// the source-string entry points if your input contains barrier
148/// directives.
149///
150/// # Examples
151///
152/// ```
153/// use cmakefmt::{format_parsed_file, Config, CommandRegistry};
154///
155/// let cmake = "PROJECT(MyProject)\n";
156/// let file = cmakefmt::parser::parse(cmake).unwrap();
157/// let formatted = format_parsed_file(
158///     cmake,
159///     &file,
160///     &Config::default(),
161///     CommandRegistry::builtins(),
162/// ).unwrap();
163/// assert_eq!(formatted, "project(MyProject)\n");
164/// ```
165pub fn format_parsed_file(
166    source: &str,
167    file: &File,
168    config: &Config,
169    registry: &CommandRegistry,
170) -> Result<String> {
171    if config.disable {
172        return Ok(source.to_owned());
173    }
174    validate_runtime_config(config)?;
175    let formatted =
176        format_parsed_file_with_debug(file, config, registry, &mut DebugLog::disabled())?;
177    Ok(apply_line_ending(source, &formatted, config.line_ending))
178}
179
180fn format_parsed_file_with_debug(
181    file: &File,
182    config: &Config,
183    registry: &CommandRegistry,
184    debug: &mut DebugLog<'_>,
185) -> Result<String> {
186    let mut block_depth = 0usize;
187    format_parsed_file_from_depth(file, config, registry, debug, &mut block_depth)
188}
189
190/// Format a parsed chunk, starting from `*block_depth_io` and updating it to
191/// the nesting level left open at the end of the chunk.
192///
193/// Threading the depth in and out lets the formatter keep correct indentation
194/// across format-barrier boundaries: a `# fmt: off`/`on` region (or a `# ~~~`
195/// fence) placed between an `if()` and its `endif()` splits the file into
196/// separate parsed chunks. The block-opening/closing commands live in the
197/// enabled chunks on either side of the barrier, so carrying the depth across
198/// the boundary keeps the chunk after the barrier indented inside the block
199/// instead of restarting at column zero. (Disabled regions are emitted
200/// verbatim and deliberately do not affect the depth — they often contain
201/// intentionally unbalanced CMake.)
202fn format_parsed_file_from_depth(
203    file: &File,
204    config: &Config,
205    registry: &CommandRegistry,
206    debug: &mut DebugLog<'_>,
207    block_depth_io: &mut usize,
208) -> Result<String> {
209    let patterns = config.compiled_patterns().map_err(runtime_config_error)?;
210    let mut output = String::new();
211    let mut previous_was_content = false;
212    let mut block_depth = *block_depth_io;
213
214    for statement in &file.statements {
215        match statement {
216            Statement::Command(command) => {
217                block_depth = block_depth.saturating_sub(block_dedent_before(&command.name));
218
219                if previous_was_content {
220                    output.push('\n');
221                }
222
223                output.push_str(&node::format_command(
224                    command,
225                    config,
226                    &patterns,
227                    registry,
228                    block_depth,
229                    debug,
230                )?);
231
232                if let Some(trailing) = &command.trailing_comment {
233                    let comment_indent_width = output
234                        .rsplit('\n')
235                        .next()
236                        .unwrap_or_default()
237                        .chars()
238                        .count()
239                        + 1;
240                    let comment_lines = comment::format_comment_lines(
241                        trailing,
242                        config,
243                        &patterns,
244                        comment_indent_width,
245                        config.line_width,
246                    );
247                    if let Some((first, rest)) = comment_lines.split_first() {
248                        output.push(' ');
249                        output.push_str(first);
250                        let continuation_indent = " ".repeat(comment_indent_width);
251                        for line in rest {
252                            output.push('\n');
253                            output.push_str(&continuation_indent);
254                            output.push_str(line);
255                        }
256                    }
257                }
258
259                previous_was_content = true;
260                block_depth += block_indent_after(&command.name);
261            }
262            Statement::TemplatePlaceholder(placeholder) => {
263                if previous_was_content {
264                    output.push('\n');
265                }
266
267                output.push_str(placeholder);
268                previous_was_content = true;
269            }
270            Statement::BlankLines(count) => {
271                let newline_count = if previous_was_content {
272                    count + 1
273                } else {
274                    *count
275                };
276                let newline_count = newline_count.min(config.max_empty_lines + 1);
277                for _ in 0..newline_count {
278                    output.push('\n');
279                }
280                previous_was_content = false;
281            }
282            Statement::Comment(c) => {
283                if previous_was_content {
284                    output.push('\n');
285                }
286
287                let indent = config.indent_str().repeat(block_depth);
288                let comment_lines = comment::format_comment_lines(
289                    c,
290                    config,
291                    &patterns,
292                    indent.chars().count(),
293                    config.line_width,
294                );
295                for (index, line) in comment_lines.iter().enumerate() {
296                    if index > 0 {
297                        output.push('\n');
298                    }
299                    output.push_str(&indent);
300                    output.push_str(line);
301                }
302                previous_was_content = true;
303            }
304        }
305    }
306
307    // Hand the nesting level reached in this chunk back to the caller so the
308    // next chunk after a format barrier continues at the right indentation.
309    *block_depth_io = block_depth;
310
311    if !output.ends_with('\n') {
312        output.push('\n');
313    }
314
315    if config.require_valid_layout {
316        for (i, line) in output.split('\n').enumerate() {
317            // Skip the final empty string produced by the trailing newline.
318            if line.is_empty() {
319                continue;
320            }
321            let width = line.chars().count();
322            if width > config.line_width {
323                return Err(Error::LayoutTooWide {
324                    line_no: i + 1,
325                    width,
326                    limit: config.line_width,
327                });
328            }
329        }
330    }
331
332    Ok(output)
333}
334
335/// Apply the configured line-ending style to `formatted` output.
336///
337/// The formatter always emits LF internally. `source` is consulted when
338/// `line_ending` is [`LineEnding::Auto`] to detect the predominant style.
339fn apply_line_ending(source: &str, formatted: &str, line_ending: LineEnding) -> String {
340    let use_crlf = match line_ending {
341        LineEnding::Unix => false,
342        LineEnding::Windows => true,
343        LineEnding::Auto => {
344            // Detect from input: if any \r\n is present, assume CRLF.
345            source.contains("\r\n")
346        }
347    };
348    if use_crlf {
349        formatted.replace('\n', "\r\n")
350    } else {
351        formatted.to_owned()
352    }
353}
354
355fn format_source_impl(
356    source: &str,
357    config: &Config,
358    registry: &CommandRegistry,
359    debug: &mut DebugLog<'_>,
360) -> Result<(String, usize)> {
361    // Preserve a leading UTF-8 BOM if the input had one. The parser
362    // strips the BOM before parsing, so without re-prepending it here
363    // the formatter would silently drop encoding markers used by some
364    // editors (notably MSVC on Windows) to identify the file as UTF-8.
365    // Strip the BOM from the source we feed to the line-by-line loop
366    // so it doesn't end up duplicated on the first emitted line.
367    const BOM: char = '\u{feff}';
368    let (had_bom, source) = match source.strip_prefix(BOM) {
369        Some(rest) => (true, rest),
370        None => (false, source),
371    };
372
373    let mut output = String::new();
374    if had_bom {
375        output.push(BOM);
376    }
377    let mut enabled_chunk = String::new();
378    let mut total_statements = 0usize;
379    let mut mode = BarrierMode::Enabled;
380    let mut enabled_chunk_start_line = 1usize;
381    let mut saw_barrier = false;
382    // Carried across chunks so a barrier inside a control-flow block does not
383    // reset the indentation of the formatted region that follows it.
384    let mut block_depth = 0usize;
385
386    for (line_index, line) in source.split_inclusive('\n').enumerate() {
387        let line_no = line_index + 1;
388        match detect_barrier(line) {
389            Some(BarrierEvent::DisableByDirective(kind)) => {
390                let statements = flush_enabled_chunk(
391                    &mut output,
392                    &mut enabled_chunk,
393                    config,
394                    registry,
395                    debug,
396                    enabled_chunk_start_line,
397                    saw_barrier,
398                    &mut block_depth,
399                )?;
400                total_statements += statements;
401                debug.log(format!(
402                    "formatter: disabled formatting at line {line_no} via {kind}: off"
403                ));
404                output.push_str(line);
405                mode = BarrierMode::DisabledByDirective;
406                saw_barrier = true;
407            }
408            Some(BarrierEvent::EnableByDirective(kind)) => {
409                let statements = flush_enabled_chunk(
410                    &mut output,
411                    &mut enabled_chunk,
412                    config,
413                    registry,
414                    debug,
415                    enabled_chunk_start_line,
416                    saw_barrier,
417                    &mut block_depth,
418                )?;
419                total_statements += statements;
420                debug.log(format!(
421                    "formatter: enabled formatting at line {line_no} via {kind}: on"
422                ));
423                output.push_str(line);
424                if matches!(mode, BarrierMode::DisabledByDirective) {
425                    mode = BarrierMode::Enabled;
426                }
427                saw_barrier = true;
428            }
429            Some(BarrierEvent::Fence) => {
430                let statements = flush_enabled_chunk(
431                    &mut output,
432                    &mut enabled_chunk,
433                    config,
434                    registry,
435                    debug,
436                    enabled_chunk_start_line,
437                    saw_barrier,
438                    &mut block_depth,
439                )?;
440                total_statements += statements;
441                let next_mode = if matches!(mode, BarrierMode::DisabledByFence) {
442                    BarrierMode::Enabled
443                } else {
444                    BarrierMode::DisabledByFence
445                };
446                debug.log(format!(
447                    "formatter: toggled fence region at line {line_no} -> {}",
448                    next_mode.as_str()
449                ));
450                output.push_str(line);
451                mode = next_mode;
452                saw_barrier = true;
453            }
454            None => {
455                if matches!(mode, BarrierMode::Enabled) {
456                    if enabled_chunk.is_empty() {
457                        enabled_chunk_start_line = line_no;
458                    }
459                    enabled_chunk.push_str(line);
460                } else {
461                    output.push_str(line);
462                }
463            }
464        }
465    }
466
467    total_statements += flush_enabled_chunk(
468        &mut output,
469        &mut enabled_chunk,
470        config,
471        registry,
472        debug,
473        enabled_chunk_start_line,
474        saw_barrier,
475        &mut block_depth,
476    )?;
477    Ok((output, total_statements))
478}
479
480#[allow(clippy::too_many_arguments)]
481fn flush_enabled_chunk(
482    output: &mut String,
483    enabled_chunk: &mut String,
484    config: &Config,
485    registry: &CommandRegistry,
486    debug: &mut DebugLog<'_>,
487    chunk_start_line: usize,
488    barrier_context: bool,
489    block_depth: &mut usize,
490) -> Result<usize> {
491    if enabled_chunk.is_empty() {
492        return Ok(0);
493    }
494
495    let file = match parser::parse(enabled_chunk) {
496        Ok(file) => file,
497        Err(Error::Parse(parse_error)) => {
498            let _ = barrier_context;
499            return Err(Error::Parse(crate::error::ParseError {
500                display_name: "<source>".to_owned(),
501                source_text: enabled_chunk.clone().into_boxed_str(),
502                start_line: chunk_start_line,
503                diagnostic: parse_error.diagnostic,
504            }));
505        }
506        Err(err) => return Err(err),
507    };
508    let statement_count = file.statements.len();
509    debug.log(format!(
510        "formatter: formatting enabled chunk with {statement_count} statement(s) starting at source line {chunk_start_line}"
511    ));
512    let formatted = format_parsed_file_from_depth(&file, config, registry, debug, block_depth)?;
513    output.push_str(&formatted);
514    enabled_chunk.clear();
515    Ok(statement_count)
516}
517
518fn validate_runtime_config(config: &Config) -> Result<()> {
519    config.validate_patterns().map_err(runtime_config_error)?;
520    Ok(())
521}
522
523fn runtime_config_error(message: String) -> Error {
524    Error::Config(crate::error::ConfigError {
525        path: PathBuf::from("<programmatic-config>"),
526        details: FileParseError {
527            format: "runtime",
528            message: message.into_boxed_str(),
529            line: None,
530            column: None,
531        },
532    })
533}
534
535fn detect_barrier(line: &str) -> Option<BarrierEvent<'_>> {
536    let trimmed = line.trim_start();
537    if !trimmed.starts_with('#') {
538        return None;
539    }
540
541    let body = trimmed[1..].trim_start().trim_end();
542    if body.starts_with("~~~") {
543        return Some(BarrierEvent::Fence);
544    }
545
546    if body == "cmake-format: off" {
547        return Some(BarrierEvent::DisableByDirective("cmake-format"));
548    }
549    if body == "cmake-format: on" {
550        return Some(BarrierEvent::EnableByDirective("cmake-format"));
551    }
552    if body == "cmakefmt: off" {
553        return Some(BarrierEvent::DisableByDirective("cmakefmt"));
554    }
555    if body == "cmakefmt: on" {
556        return Some(BarrierEvent::EnableByDirective("cmakefmt"));
557    }
558    if body == "fmt: off" {
559        return Some(BarrierEvent::DisableByDirective("fmt"));
560    }
561    if body == "fmt: on" {
562        return Some(BarrierEvent::EnableByDirective("fmt"));
563    }
564
565    None
566}
567
568#[derive(Debug, Clone, Copy, PartialEq, Eq)]
569enum BarrierMode {
570    Enabled,
571    DisabledByDirective,
572    DisabledByFence,
573}
574
575impl BarrierMode {
576    fn as_str(self) -> &'static str {
577        match self {
578            BarrierMode::Enabled => "enabled",
579            BarrierMode::DisabledByDirective => "disabled-by-directive",
580            BarrierMode::DisabledByFence => "disabled-by-fence",
581        }
582    }
583}
584
585#[derive(Debug, Clone, Copy, PartialEq, Eq)]
586enum BarrierEvent<'a> {
587    DisableByDirective(&'a str),
588    EnableByDirective(&'a str),
589    Fence,
590}
591
592pub(crate) struct DebugLog<'a> {
593    lines: Option<&'a mut Vec<String>>,
594}
595
596impl<'a> DebugLog<'a> {
597    fn disabled() -> Self {
598        Self { lines: None }
599    }
600
601    fn enabled(lines: &'a mut Vec<String>) -> Self {
602        Self { lines: Some(lines) }
603    }
604
605    fn log(&mut self, message: impl Into<String>) {
606        if let Some(lines) = self.lines.as_deref_mut() {
607            lines.push(message.into());
608        }
609    }
610}
611
612fn block_dedent_before(command_name: &str) -> usize {
613    usize::from(matches_ascii_insensitive(
614        command_name,
615        &[
616            "elseif",
617            "else",
618            "endif",
619            "endforeach",
620            "endwhile",
621            "endfunction",
622            "endmacro",
623            "endblock",
624        ],
625    ))
626}
627
628fn block_indent_after(command_name: &str) -> usize {
629    usize::from(matches_ascii_insensitive(
630        command_name,
631        &[
632            "if", "foreach", "while", "function", "macro", "block", "elseif", "else",
633        ],
634    ))
635}
636
637fn matches_ascii_insensitive(input: &str, candidates: &[&str]) -> bool {
638    candidates
639        .iter()
640        .any(|candidate| input.eq_ignore_ascii_case(candidate))
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646
647    #[test]
648    fn format_parsed_file_honors_disable() {
649        let source = "set(  X  1 )\n";
650        let file = parser::parse(source).unwrap();
651        let config = Config {
652            disable: true,
653            ..Config::default()
654        };
655
656        let formatted =
657            format_parsed_file(source, &file, &config, CommandRegistry::builtins()).unwrap();
658
659        assert_eq!(formatted, source);
660    }
661
662    #[test]
663    fn format_parsed_file_applies_line_endings_relative_to_source() {
664        let source = "set(  X  1 )\r\n";
665        let file = parser::parse(source).unwrap();
666        let config = Config {
667            line_ending: LineEnding::Auto,
668            ..Config::default()
669        };
670
671        let formatted =
672            format_parsed_file(source, &file, &config, CommandRegistry::builtins()).unwrap();
673
674        assert_eq!(formatted, "set(X 1)\r\n");
675    }
676
677    #[test]
678    fn format_source_rejects_invalid_programmatic_regex_config() {
679        let config = Config {
680            fence_pattern: "[".to_owned(),
681            ..Config::default()
682        };
683
684        let err = format_source("set(X 1)\n", &config).unwrap_err();
685        match err {
686            Error::Config(config_err) => {
687                assert_eq!(config_err.path, PathBuf::from("<programmatic-config>"));
688                assert_eq!(config_err.details.format, "runtime");
689                assert!(config_err.details.message.contains("invalid regex"));
690            }
691            other => panic!("expected config error, got {other:?}"),
692        }
693    }
694
695    #[test]
696    fn format_source_preserves_leading_utf8_bom() {
697        let source = "\u{feff}set(FOO bar)\n";
698        let formatted = format_source(source, &Config::default()).unwrap();
699        assert!(
700            formatted.starts_with('\u{feff}'),
701            "BOM was stripped from output: {formatted:?}"
702        );
703    }
704
705    #[test]
706    fn format_source_does_not_add_a_bom() {
707        let source = "set(FOO bar)\n";
708        let formatted = format_source(source, &Config::default()).unwrap();
709        assert!(
710            !formatted.starts_with('\u{feff}'),
711            "BOM was added to output without one in input: {formatted:?}"
712        );
713    }
714}