Skip to main content

chordsketch_core/
formatter.rs

1//! ChordPro source formatter.
2//!
3//! The [`format()`] function normalizes ChordPro text to a canonical style:
4//! directive names are expanded to their canonical form, spacing inside
5//! directives is normalized, chord spelling is canonicalized, and blank lines
6//! between sections are made consistent.
7//!
8//! # Usage
9//!
10//! ```
11//! use chordsketch_core::formatter::{FormatOptions, format};
12//!
13//! let input = "{soc}\n[am]Hello\n{eoc}\n";
14//! let output = format(input, &FormatOptions::default());
15//! assert_eq!(output, "{start_of_chorus}\n[Am]Hello\n{end_of_chorus}\n");
16//! ```
17
18use crate::ast::DirectiveKind;
19use crate::chord::parse_chord;
20
21/// Options that control which normalizations [`format()`] applies.
22#[derive(Debug, Clone)]
23pub struct FormatOptions {
24    /// Expand directive name aliases to their canonical long form.
25    ///
26    /// Example: `{soc}` → `{start_of_chorus}`, `{t: My Song}` → `{title: My Song}`.
27    ///
28    /// Default: `true`.
29    pub normalize_directive_names: bool,
30
31    /// Normalize chord spelling: capitalize the root note.
32    ///
33    /// Example: `[am]` → `[Am]`, `[c#m7]` → `[C#m7]`.
34    ///
35    /// Default: `true`.
36    pub normalize_chord_spelling: bool,
37
38    /// Ensure exactly one blank line between section blocks
39    /// (`{end_of_*}` … next non-blank line).
40    ///
41    /// Default: `true`.
42    pub section_blank_lines: bool,
43}
44
45impl Default for FormatOptions {
46    fn default() -> Self {
47        Self {
48            normalize_directive_names: true,
49            normalize_chord_spelling: true,
50            section_blank_lines: true,
51        }
52    }
53}
54
55/// Format a ChordPro source string.
56///
57/// Applies the normalizations described in [`FormatOptions`] and returns the
58/// reformatted source. The output is always syntactically valid ChordPro.
59///
60/// # Idempotence
61///
62/// `format(format(s, opts), opts) == format(s, opts)` for any valid `s` and
63/// `opts`.
64///
65/// # Line endings
66///
67/// All line endings in the input (`\r\n`, `\r`, `\n`) are normalized to `\n`.
68/// The output always ends with a `\n` unless the input contained no non-blank
69/// content (in which case the output is an empty string).
70#[must_use]
71pub fn format(input: &str, options: &FormatOptions) -> String {
72    // Normalize line endings to LF first.
73    let normalized = input.replace("\r\n", "\n").replace('\r', "\n");
74
75    let mut out: Vec<String> = Vec::new();
76    // Blank lines accumulated since the last emitted non-blank line.
77    let mut pending_blanks: usize = 0;
78    // Whether the last emitted non-blank line was a section-ending directive.
79    let mut after_section_end = false;
80
81    for raw_line in normalized.lines() {
82        if raw_line.trim().is_empty() {
83            pending_blanks += 1;
84            continue;
85        }
86
87        // Format the non-blank line.
88        let formatted = format_line(raw_line, options);
89        let is_end = is_section_end_directive(&formatted);
90
91        // Emit pending blank lines (collapsed to at most one), or inject a
92        // mandatory blank line after a section-end boundary.
93        if options.section_blank_lines && after_section_end {
94            // Always exactly one blank line after a section end, even if the
95            // original had none.
96            out.push(String::new());
97        } else if pending_blanks > 0 {
98            // Collapse multiple consecutive blank lines to one.
99            out.push(String::new());
100        }
101        pending_blanks = 0;
102
103        out.push(formatted);
104        after_section_end = is_end;
105    }
106
107    // Trailing blank lines are discarded (they were only accumulated, never
108    // emitted because no subsequent non-blank line triggered their flush).
109
110    if out.is_empty() {
111        return String::new();
112    }
113
114    let mut result = out.join("\n");
115    result.push('\n');
116    result
117}
118
119/// Format a single non-blank ChordPro line.
120fn format_line(line: &str, options: &FormatOptions) -> String {
121    // Remove trailing whitespace only; preserve leading indentation (unusual
122    // in ChordPro, but some editors add it).
123    let trimmed = line.trim_end();
124
125    // Comment lines are preserved verbatim.
126    if trimmed.trim_start().starts_with('#') {
127        return trimmed.to_string();
128    }
129
130    // Try to parse and reformat as a directive line.
131    if let Some(formatted) = try_format_directive(trimmed, options) {
132        return formatted;
133    }
134
135    // Lyrics / chords line: optionally normalize chord spellings.
136    if options.normalize_chord_spelling {
137        normalize_chords_in_line(trimmed)
138    } else {
139        trimmed.to_string()
140    }
141}
142
143/// Try to parse and reformat a line as a ChordPro directive.
144///
145/// Returns `Some(formatted)` when the entire (trimmed) line is a single
146/// `{…}` directive block, `None` otherwise.
147fn try_format_directive(line: &str, options: &FormatOptions) -> Option<String> {
148    // Must start with `{` and end with `}`.
149    let inner = line.strip_prefix('{')?.strip_suffix('}')?;
150
151    // Skip config-override directives (`{+config.key: value}`).
152    if inner.starts_with('+') {
153        return Some(line.to_string());
154    }
155
156    // Split at the first `:` to separate name from optional value.
157    let (name_raw, value_opt) = match inner.find(':') {
158        Some(pos) => (&inner[..pos], Some(&inner[pos + 1..])),
159        None => (inner, None),
160    };
161
162    let name_trimmed = name_raw.trim();
163
164    // Use the AST resolver to handle selector suffixes (e.g., `textfont-piano`).
165    let (kind, selector) = DirectiveKind::resolve_with_selector(name_trimmed);
166
167    let canonical_name = if options.normalize_directive_names {
168        kind.full_canonical_name()
169    } else {
170        name_trimmed.to_string()
171    };
172
173    // Reconstruct the directive.
174    //
175    // When normalization is ON, `canonical_name` is the base name only
176    // (e.g. `"textfont"`), so we must append the selector separately.
177    // When normalization is OFF, `canonical_name` is `name_trimmed` which
178    // already contains the selector (e.g. `"textfont-piano"`), so appending
179    // it again would produce a doubled suffix like `"textfont-piano-piano"`.
180    let mut result = String::from("{");
181    result.push_str(&canonical_name);
182    if options.normalize_directive_names {
183        if let Some(sel) = &selector {
184            result.push('-');
185            result.push_str(sel);
186        }
187    }
188    if let Some(value) = value_opt {
189        let v = value.trim();
190        result.push_str(": ");
191        result.push_str(v);
192    }
193    result.push('}');
194    Some(result)
195}
196
197/// Returns `true` if `line` is a section-closing directive.
198///
199/// Covers all `{end_of_*}` variants: chorus, verse, bridge, tab, grid,
200/// ABC, Lilypond, SVG, textblock, and user-defined custom sections.
201fn is_section_end_directive(line: &str) -> bool {
202    let inner = match line.strip_prefix('{').and_then(|s| s.strip_suffix('}')) {
203        Some(s) => s,
204        None => return false,
205    };
206    // Ignore selector and value; use only the name part.
207    let name = inner.split(':').next().unwrap_or(inner).trim();
208    let (kind, _) = DirectiveKind::resolve_with_selector(name);
209    matches!(
210        kind,
211        DirectiveKind::EndOfChorus
212            | DirectiveKind::EndOfVerse
213            | DirectiveKind::EndOfBridge
214            | DirectiveKind::EndOfTab
215            | DirectiveKind::EndOfGrid
216            | DirectiveKind::EndOfAbc
217            | DirectiveKind::EndOfLy
218            | DirectiveKind::EndOfSvg
219            | DirectiveKind::EndOfTextblock
220            | DirectiveKind::EndOfSection(_)
221    )
222}
223
224/// Normalize chord spellings within a lyrics line.
225///
226/// Each `[…]` bracket group is treated as a chord name. If the chord can be
227/// parsed, it is re-serialized from the structured representation to produce
228/// consistent capitalization. Unrecognized chord strings are kept verbatim.
229fn normalize_chords_in_line(line: &str) -> String {
230    let mut result = String::with_capacity(line.len());
231    let mut chars = line.chars().peekable();
232
233    while let Some(c) = chars.next() {
234        if c != '[' {
235            result.push(c);
236            continue;
237        }
238
239        // Collect characters until the matching `]`.
240        let mut chord_raw = String::new();
241        let mut closed = false;
242        for ch in chars.by_ref() {
243            if ch == ']' {
244                closed = true;
245                break;
246            }
247            chord_raw.push(ch);
248        }
249
250        result.push('[');
251        result.push_str(&normalize_chord_name(&chord_raw));
252        if closed {
253            result.push(']');
254        }
255    }
256    result
257}
258
259/// Normalize a single chord name string.
260///
261/// Capitalizes the root letter and re-serializes via [`ChordDetail`] display
262/// if parsing succeeds. Falls back to the original string on failure.
263fn normalize_chord_name(raw: &str) -> String {
264    if raw.is_empty() {
265        return raw.to_string();
266    }
267    // Capitalize the first character so the chord parser — which requires an
268    // uppercase root letter — can handle lowercase input (e.g., `"am"` → `"Am"`).
269    let capitalized = crate::capitalize(raw);
270    match parse_chord(&capitalized) {
271        Some(detail) => detail.to_string(),
272        None => raw.to_string(),
273    }
274}
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    fn opts() -> FormatOptions {
281        FormatOptions::default()
282    }
283
284    // --- Directive name normalization ----------------------------------------
285
286    #[test]
287    fn directive_alias_soc_expanded() {
288        assert_eq!(format("{soc}\n", &opts()), "{start_of_chorus}\n");
289    }
290
291    #[test]
292    fn directive_alias_eoc_expanded() {
293        assert_eq!(format("{eoc}\n", &opts()), "{end_of_chorus}\n");
294    }
295
296    #[test]
297    fn directive_alias_sov_expanded() {
298        assert_eq!(format("{sov}\n", &opts()), "{start_of_verse}\n");
299    }
300
301    #[test]
302    fn directive_alias_t_with_value() {
303        assert_eq!(format("{t: My Song}\n", &opts()), "{title: My Song}\n");
304    }
305
306    #[test]
307    fn directive_alias_np_expanded() {
308        assert_eq!(format("{np}\n", &opts()), "{new_page}\n");
309    }
310
311    // --- Directive spacing normalization ------------------------------------
312
313    #[test]
314    fn directive_spacing_added_after_colon() {
315        assert_eq!(format("{title:My Song}\n", &opts()), "{title: My Song}\n");
316    }
317
318    #[test]
319    fn directive_spacing_idempotent() {
320        assert_eq!(format("{title: My Song}\n", &opts()), "{title: My Song}\n");
321    }
322
323    #[test]
324    fn directive_no_value_preserved() {
325        assert_eq!(format("{new_page}\n", &opts()), "{new_page}\n");
326    }
327
328    #[test]
329    fn directive_with_selector_preserved() {
330        assert_eq!(
331            format("{textfont-piano: Courier}\n", &opts()),
332            "{textfont-piano: Courier}\n"
333        );
334    }
335
336    #[test]
337    fn directive_name_normalization_disabled() {
338        let opts = FormatOptions {
339            normalize_directive_names: false,
340            ..FormatOptions::default()
341        };
342        assert_eq!(format("{soc}\n", &opts), "{soc}\n");
343    }
344
345    #[test]
346    fn directive_with_selector_normalization_disabled() {
347        // Regression test: when normalize_directive_names is false the selector
348        // must NOT be appended a second time.  Previously this produced
349        // `{textfont-piano-piano: Courier}`.
350        let opts = FormatOptions {
351            normalize_directive_names: false,
352            ..FormatOptions::default()
353        };
354        assert_eq!(
355            format("{textfont-piano: Courier}\n", &opts),
356            "{textfont-piano: Courier}\n"
357        );
358    }
359
360    // --- Chord spelling normalization ----------------------------------------
361
362    #[test]
363    fn chord_root_capitalized() {
364        assert_eq!(format("[am]Hello\n", &opts()), "[Am]Hello\n");
365    }
366
367    #[test]
368    fn chord_sharp_root_capitalized() {
369        assert_eq!(
370            format("[c#m7]Hello [g]World\n", &opts()),
371            "[C#m7]Hello [G]World\n"
372        );
373    }
374
375    #[test]
376    fn chord_already_canonical_unchanged() {
377        assert_eq!(format("[Am]Hello\n", &opts()), "[Am]Hello\n");
378    }
379
380    #[test]
381    fn chord_spelling_disabled() {
382        let opts = FormatOptions {
383            normalize_chord_spelling: false,
384            ..FormatOptions::default()
385        };
386        assert_eq!(format("[am]Hello\n", &opts), "[am]Hello\n");
387    }
388
389    // --- Section blank lines -------------------------------------------------
390
391    #[test]
392    fn section_blank_line_inserted_after_end() {
393        let input = "{start_of_chorus}\n[C]Hello\n{end_of_chorus}\n{start_of_verse}\n[G]World\n{end_of_verse}\n";
394        let result = format(input, &opts());
395        assert!(
396            result.contains("{end_of_chorus}\n\n{start_of_verse}"),
397            "expected blank line between sections, got:\n{result}"
398        );
399    }
400
401    #[test]
402    fn section_blank_line_not_doubled() {
403        // Input already has a blank line — should not produce two blank lines.
404        let input = "{start_of_chorus}\n[C]Hello\n{end_of_chorus}\n\n{start_of_verse}\n[G]World\n{end_of_verse}\n";
405        let result = format(input, &opts());
406        assert!(
407            !result.contains("{end_of_chorus}\n\n\n"),
408            "unexpected double blank line, got:\n{result}"
409        );
410    }
411
412    #[test]
413    fn section_blank_lines_disabled() {
414        let opts = FormatOptions {
415            section_blank_lines: false,
416            ..FormatOptions::default()
417        };
418        let input = "{start_of_chorus}\n[C]Hello\n{end_of_chorus}\n{start_of_verse}\n[G]World\n{end_of_verse}\n";
419        let result = format(input, &opts);
420        assert!(
421            !result.contains("{end_of_chorus}\n\n"),
422            "expected no blank line insertion, got:\n{result}"
423        );
424    }
425
426    // --- Blank line collapsing -----------------------------------------------
427
428    #[test]
429    fn multiple_blank_lines_collapsed() {
430        let result = format("[C]Hello\n\n\n[G]World\n", &opts());
431        assert_eq!(result, "[C]Hello\n\n[G]World\n");
432    }
433
434    #[test]
435    fn trailing_blank_lines_removed() {
436        let result = format("[C]Hello\n\n\n", &opts());
437        assert_eq!(result, "[C]Hello\n");
438    }
439
440    // --- Encoding and newline normalization -----------------------------------
441
442    #[test]
443    fn crlf_normalized() {
444        let result = format("[C]Hello\r\n[G]World\r\n", &opts());
445        assert_eq!(result, "[C]Hello\n[G]World\n");
446    }
447
448    #[test]
449    fn cr_normalized() {
450        let result = format("[C]Hello\r[G]World\r", &opts());
451        assert_eq!(result, "[C]Hello\n[G]World\n");
452    }
453
454    #[test]
455    fn file_ends_with_newline() {
456        let result = format("[C]Hello", &opts());
457        assert!(result.ends_with('\n'));
458    }
459
460    #[test]
461    fn empty_input_returns_empty() {
462        assert_eq!(format("", &opts()), "");
463    }
464
465    #[test]
466    fn blank_only_input_returns_empty() {
467        assert_eq!(format("\n\n\n", &opts()), "");
468    }
469
470    // --- Comment preservation ------------------------------------------------
471
472    #[test]
473    fn comment_line_preserved() {
474        assert_eq!(
475            format("# This is a comment\n", &opts()),
476            "# This is a comment\n"
477        );
478    }
479
480    // --- Idempotence ---------------------------------------------------------
481
482    #[test]
483    fn idempotent_full_song() {
484        let input = "{t:My Song}\n{artist:Test}\n{soc}\n[am]Hello [g]World\n{eoc}\n";
485        let first = format(input, &opts());
486        let second = format(&first, &opts());
487        assert_eq!(first, second, "format is not idempotent");
488    }
489
490    #[test]
491    fn idempotent_already_clean() {
492        let clean = "{title: My Song}\n{start_of_chorus}\n[Am]Hello [G]World\n{end_of_chorus}\n";
493        let result = format(clean, &opts());
494        assert_eq!(result, clean, "clean input should be unchanged");
495    }
496}