Skip to main content

suno_core/
naming.rs

1//! Pure naming and relative path rendering for [`Clip`] values.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::fmt;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use serde::{Deserialize, Serialize};
9use unicode_normalization::UnicodeNormalization as _;
10
11use crate::Clip;
12use crate::error::{Error, Result};
13use crate::lineage::LineageContext;
14
15/// The default relative path template.
16///
17/// Supported placeholders are `{creator}`, `{handle}`, `{album}`, `{title}`,
18/// `{id}`, `{id8}` (first 8 characters of the clip id), and `{root_id8}`
19/// (first 8 of the resolved lineage root id). Empty path segments are dropped
20/// after rendering.
21///
22/// The default embeds `[{id8}]` in the file name so same-title clips never
23/// collide, and folders under `{album}`, which resolves to the lineage root's
24/// title (else the clip's own title).
25pub const DEFAULT_TEMPLATE: &str = "{creator}/{album}/{creator}-{title} [{id8}]";
26const DEFAULT_MAX_COMPONENT_LEN: usize = 80;
27
28const MIN_BASE_CHARS_WITH_SUFFIX: usize = 1;
29
30#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
31#[serde(rename_all = "lowercase")]
32pub enum CharacterSet {
33    #[default]
34    Unicode,
35    Ascii,
36}
37
38impl FromStr for CharacterSet {
39    type Err = Error;
40
41    fn from_str(s: &str) -> Result<Self> {
42        match s.to_ascii_lowercase().as_str() {
43            "unicode" => Ok(Self::Unicode),
44            "ascii" => Ok(Self::Ascii),
45            other => Err(Error::Config(format!(
46                "unknown character_set '{other}'; expected 'unicode' or 'ascii'"
47            ))),
48        }
49    }
50}
51
52impl fmt::Display for CharacterSet {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::Unicode => f.write_str("unicode"),
56            Self::Ascii => f.write_str("ascii"),
57        }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct NamingConfig {
63    pub template: String,
64    pub character_set: CharacterSet,
65    pub max_component_len: usize,
66}
67
68impl Default for NamingConfig {
69    fn default() -> Self {
70        Self {
71            template: DEFAULT_TEMPLATE.to_string(),
72            character_set: CharacterSet::Unicode,
73            max_component_len: DEFAULT_MAX_COMPONENT_LEN,
74        }
75    }
76}
77
78#[derive(Debug, Clone, Copy)]
79pub struct NamingRequest<'a> {
80    pub clip: &'a Clip,
81    pub lineage: &'a LineageContext,
82}
83
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct RenderedName {
86    pub relative_path: PathBuf,
87    pub base_name: String,
88}
89
90pub fn render_clip_name(request: NamingRequest<'_>, config: &NamingConfig) -> RenderedName {
91    let album = album_component(request, config);
92    render_with_album(request, config, &album)
93}
94
95pub fn render_clip_names(
96    requests: &[NamingRequest<'_>],
97    config: &NamingConfig,
98    colliding_albums: &BTreeSet<String>,
99) -> Vec<RenderedName> {
100    let albums = disambiguated_albums(requests, config, colliding_albums);
101    let mut rendered = requests
102        .iter()
103        .zip(&albums)
104        .map(|(request, album)| render_with_album(*request, config, album))
105        .collect::<Vec<_>>();
106
107    // Two passes to keep distinct clips from landing on one path.  The first
108    // pass keys on the exact rendered string; the second on the filesystem-
109    // canonical form (NFC + lowercase) so that paths differing only by case or
110    // Unicode normalisation (NFD vs NFC) are caught too — they would collide on
111    // case-insensitive or NFC-normalising filesystems (Windows, macOS default).
112    for apply_canonical in [false, true] {
113        let mut collisions = BTreeMap::<String, Vec<usize>>::new();
114        for (index, name) in rendered.iter().enumerate() {
115            let key = if apply_canonical {
116                canonical_path_key(&name.relative_path.to_string_lossy())
117            } else {
118                name.relative_path.to_string_lossy().into_owned()
119            };
120            collisions.entry(key).or_default().push(index);
121        }
122        for indexes in collisions.into_values().filter(|v| v.len() > 1) {
123            for index in indexes {
124                let suffix = &requests[index].clip.id;
125                rendered[index] = with_suffix(
126                    rendered[index].clone(),
127                    suffix,
128                    config.character_set,
129                    config.max_component_len,
130                );
131            }
132        }
133    }
134
135    rendered
136}
137
138/// Filesystem-canonical key: NFC-normalise then lowercase, so paths that differ
139/// only by case or by NFC/NFD encoding hash to the same bucket.
140fn canonical_path_key(path: &str) -> String {
141    path.nfc().flat_map(char::to_lowercase).collect()
142}
143
144/// The album path component for every request, with a clip whose root title
145/// collides across distinct roots disambiguated by `[{root_id8}]`.
146///
147/// Distinct roots must never share an album folder (two different upload roots
148/// titled "Break Through" exist). `colliding_albums` is the authoritative set
149/// of such shared root titles, computed once from the whole lineage store, so
150/// the decision is stable across runs and independent of which clips appear in
151/// this batch. A clip whose resolved album is in that set always gets its
152/// root's short id appended; every other clip keeps the bare album and groups
153/// with its same-root siblings.
154fn disambiguated_albums(
155    requests: &[NamingRequest<'_>],
156    config: &NamingConfig,
157    colliding_albums: &BTreeSet<String>,
158) -> Vec<String> {
159    requests
160        .iter()
161        .map(|request| album_for(*request, config, colliding_albums))
162        .collect()
163}
164
165/// The (possibly disambiguated) album component for one request.
166fn album_for(
167    request: NamingRequest<'_>,
168    config: &NamingConfig,
169    colliding_albums: &BTreeSet<String>,
170) -> String {
171    let raw_album = request.lineage.album(&title_name(request.clip));
172    let album = sanitise_component(&raw_album, config.character_set, config.max_component_len);
173    if colliding_albums.contains(raw_album.trim()) {
174        let suffix = truncate_chars(&request.lineage.root_id, 8);
175        append_suffix(
176            &album,
177            &suffix,
178            config.character_set,
179            config.max_component_len,
180        )
181    } else {
182        album
183    }
184}
185
186/// The sanitised album component: the resolved lineage album (root title, else
187/// the clip's own title).
188fn album_component(request: NamingRequest<'_>, config: &NamingConfig) -> String {
189    let album = request.lineage.album(&title_name(request.clip));
190    sanitise_component(&album, config.character_set, config.max_component_len)
191}
192
193/// Render one clip's path with an already-resolved album component.
194fn render_with_album(
195    request: NamingRequest<'_>,
196    config: &NamingConfig,
197    album: &str,
198) -> RenderedName {
199    let clip = request.clip;
200    let creator = sanitise_component(
201        &creator_name(clip),
202        config.character_set,
203        config.max_component_len,
204    );
205    let handle = sanitise_component(&clip.handle, config.character_set, config.max_component_len);
206    let title = sanitise_component(
207        &title_name(clip),
208        config.character_set,
209        config.max_component_len,
210    );
211    let id = sanitise_component(&clip.id, CharacterSet::Ascii, config.max_component_len);
212    let id8 = sanitise_component(
213        &truncate_chars(&clip.id, 8),
214        CharacterSet::Ascii,
215        config.max_component_len,
216    );
217    let root_id8 = sanitise_component(
218        &truncate_chars(&request.lineage.root_id, 8),
219        CharacterSet::Ascii,
220        config.max_component_len,
221    );
222    let mut components = config
223        .template
224        .split('/')
225        .filter_map(|segment| {
226            let rendered = segment
227                .replace("{creator}", &creator)
228                .replace("{handle}", &handle)
229                .replace("{album}", album)
230                .replace("{title}", &title)
231                .replace("{root_id8}", &root_id8)
232                .replace("{id8}", &id8)
233                .replace("{id}", &id);
234            let sanitised = sanitise_segment(
235                &rendered,
236                config.character_set,
237                config.max_component_len,
238                [id8.as_str(), root_id8.as_str()],
239            );
240            (!sanitised.is_empty()).then_some(sanitised)
241        })
242        .collect::<Vec<_>>();
243
244    if components.is_empty() {
245        components.push(title.clone());
246    }
247
248    let mut base_name = components
249        .pop()
250        .filter(|value| !value.is_empty())
251        .unwrap_or_else(|| title.clone());
252    // Guarantee a non-empty file name even when every token sanitises away.
253    if base_name.is_empty() {
254        base_name = append_suffix(
255            &base_name,
256            &clip.id,
257            config.character_set,
258            config.max_component_len,
259        );
260    }
261
262    let mut relative_path = PathBuf::new();
263    for component in components {
264        relative_path.push(component);
265    }
266
267    relative_path.push(&base_name);
268    RenderedName {
269        relative_path,
270        base_name,
271    }
272}
273
274fn with_suffix(
275    mut rendered: RenderedName,
276    suffix: &str,
277    character_set: CharacterSet,
278    max_component_len: usize,
279) -> RenderedName {
280    rendered.base_name = append_suffix(
281        &rendered.base_name,
282        suffix,
283        character_set,
284        max_component_len,
285    );
286    rendered.relative_path.set_file_name(&rendered.base_name);
287    rendered
288}
289
290fn creator_name(clip: &Clip) -> String {
291    non_blank(&clip.display_name)
292        .or_else(|| non_blank(&clip.handle))
293        .unwrap_or("Unknown Creator")
294        .to_string()
295}
296
297fn title_name(clip: &Clip) -> String {
298    let title = clip.title.trim();
299    if title.is_empty() || title.eq_ignore_ascii_case("untitled") {
300        "Untitled".to_string()
301    } else {
302        title.to_string()
303    }
304}
305
306fn append_suffix(
307    base: &str,
308    suffix: &str,
309    character_set: CharacterSet,
310    max_component_len: usize,
311) -> String {
312    let suffix_pattern = format!(" [{suffix}]");
313    if base.ends_with(&suffix_pattern) {
314        return sanitise_component(base, character_set, max_component_len);
315    }
316
317    let max_len =
318        max_component_len.max(suffix_pattern.chars().count() + MIN_BASE_CHARS_WITH_SUFFIX);
319    let allowed = max_len.saturating_sub(suffix_pattern.chars().count());
320    // Sanitise the base before measuring it. The character set can expand a
321    // character (ascii turns `ß` into `ss`), so budgeting the cut on the raw
322    // length could let the sanitised prefix grow back over the room reserved for
323    // the suffix and slice through it again (#120).
324    let base = sanitise_component(base, character_set, max_len);
325    let truncated = truncate_chars(base.trim_end(), allowed);
326    let combined = format!("{truncated}{suffix_pattern}");
327    sanitise_component(&combined, character_set, max_len)
328}
329
330/// Sanitise a rendered template segment, preserving a trailing ` [id]`
331/// disambiguator (the `[{id8}]` or `[{root_id8}]` the template embeds) when the
332/// segment would otherwise be truncated through it. Only the title portion is
333/// shortened, so two long-titled siblings keep their distinguishing id and the
334/// closing bracket is never left unbalanced (#120). A segment that does not end
335/// in a disambiguator is sanitised exactly as before.
336fn sanitise_segment(
337    rendered: &str,
338    character_set: CharacterSet,
339    max_component_len: usize,
340    disambiguators: [&str; 2],
341) -> String {
342    for suffix in disambiguators {
343        if suffix.is_empty() {
344            continue;
345        }
346        let pattern = format!(" [{suffix}]");
347        if let Some(prefix) = rendered.strip_suffix(&pattern) {
348            return append_suffix(prefix, suffix, character_set, max_component_len);
349        }
350    }
351    sanitise_component(rendered, character_set, max_component_len)
352}
353
354/// Sanitise a free-form playlist name into a single safe path component.
355///
356/// Applies the same Unicode filtering and length cap as clip path components
357/// (default [`CharacterSet::Unicode`], [`DEFAULT_MAX_COMPONENT_LEN`]), so a
358/// playlist file name obeys the same filesystem rules as the rest of the
359/// library. An empty or fully-stripped name falls back to `playlist` so the
360/// caller always has a non-empty stem to append `.m3u8` to.
361pub fn sanitise_name(name: &str) -> String {
362    let cleaned = sanitise_component(name, CharacterSet::Unicode, DEFAULT_MAX_COMPONENT_LEN);
363    if cleaned.is_empty() {
364        "playlist".to_string()
365    } else {
366        cleaned
367    }
368}
369
370/// The `.stems` sub-folder that sits beside a song's audio file.
371///
372/// `base` is the song's extensionless relative path (the same value the audio
373/// and its sidecars are built from), so the folder is `{base}.stems`. It cannot
374/// collide with the audio file (`{base}.<ext>`) or any `{base}.<sidecar>`
375/// because the `.stems` suffix is distinct, mirroring the sidecar convention.
376pub fn stems_folder(base: &str) -> String {
377    format!("{base}.stems")
378}
379
380/// The relative path of one stem file inside a song's [`stems_folder`].
381///
382/// Named base+label+disambiguation rather than label-only, because Auto Split
383/// can mislabel stems and Advanced Split yields ~100 instruments, so blank or
384/// duplicate labels are expected. The file is
385/// `{song file name} - {label} [{stem id8}].{ext}`; the ` - {label}` piece is
386/// dropped when the label sanitises to empty, and the `[{stem id8}]`
387/// disambiguator (the first 8 characters of the stable stem id) keeps blank or
388/// duplicate labels collision-free. Every component is run through the same
389/// [`sanitise_component`] filter as the rest of the library, honouring
390/// `character_set`.
391pub fn stem_file_path(
392    base: &str,
393    label: &str,
394    stem_id: &str,
395    ext: &str,
396    character_set: CharacterSet,
397) -> String {
398    let folder = stems_folder(base);
399    // The song's own file-name stem (the last path component of `base`), reused
400    // so a stem stays identifiable even when viewed outside its `.stems` folder.
401    let song_stem = base.rsplit('/').next().unwrap_or(base);
402    let label = sanitise_component(label, character_set, DEFAULT_MAX_COMPONENT_LEN);
403    let id8 = sanitise_component(
404        &truncate_chars(stem_id, 8),
405        CharacterSet::Ascii,
406        DEFAULT_MAX_COMPONENT_LEN,
407    );
408
409    let mut name = song_stem.to_string();
410    if !label.is_empty() {
411        name.push_str(" - ");
412        name.push_str(&label);
413    }
414    if !id8.is_empty() {
415        name.push_str(" [");
416        name.push_str(&id8);
417        name.push(']');
418    }
419    // A degenerate base (empty song stem, blank label, empty id) must still
420    // yield a usable name rather than a hidden dotfile.
421    if name.trim().is_empty() {
422        name = "stem".to_string();
423    }
424    format!("{folder}/{name}.{}", sanitise_ext(ext))
425}
426
427/// Reduce a candidate extension to a safe lowercase alphanumeric token,
428/// defaulting to `mp3` when it is empty or fully stripped. The caller passes the
429/// resolved stem format's extension (`wav` or `mp3`); stems are stored RAW.
430fn sanitise_ext(ext: &str) -> String {
431    let cleaned: String = ext
432        .trim_start_matches('.')
433        .chars()
434        .filter(|c| c.is_ascii_alphanumeric())
435        .flat_map(char::to_lowercase)
436        .take(8)
437        .collect();
438    if cleaned.is_empty() {
439        "mp3".to_string()
440    } else {
441        cleaned
442    }
443}
444
445fn sanitise_component(
446    value: &str,
447    character_set: CharacterSet,
448    max_component_len: usize,
449) -> String {
450    let filtered = match character_set {
451        CharacterSet::Unicode => value.chars().map(unicode_char).collect::<String>(),
452        CharacterSet::Ascii => value.chars().flat_map(ascii_chars).collect::<String>(),
453    };
454    let collapsed = filtered.split_whitespace().collect::<Vec<_>>().join(" ");
455    let trimmed = collapsed.trim_matches([' ', '.']);
456    if trimmed.is_empty() {
457        return String::new();
458    }
459
460    let mut result = truncate_chars(trimmed, max_component_len.max(1));
461    result = result.trim_matches([' ', '.']).to_string();
462    if result.is_empty() {
463        return String::new();
464    }
465    if result == "." || result == ".." {
466        return "item".to_string();
467    }
468    if !result.ends_with('_') && is_reserved_name(&result) {
469        result.push('_');
470    }
471    result
472}
473
474fn unicode_char(ch: char) -> char {
475    if matches!(
476        ch,
477        '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' | '\0'
478    ) || ch.is_control()
479    {
480        ' '
481    } else {
482        ch
483    }
484}
485
486fn ascii_chars(ch: char) -> Vec<char> {
487    if ch.is_ascii() {
488        return vec![unicode_char(ch)];
489    }
490
491    match ch {
492        'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' => vec!['A'],
493        'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' => vec!['a'],
494        'Ç' => vec!['C'],
495        'ç' => vec!['c'],
496        'È' | 'É' | 'Ê' | 'Ë' => vec!['E'],
497        'è' | 'é' | 'ê' | 'ë' => vec!['e'],
498        'Ì' | 'Í' | 'Î' | 'Ï' => vec!['I'],
499        'ì' | 'í' | 'î' | 'ï' => vec!['i'],
500        'Ñ' => vec!['N'],
501        'ñ' => vec!['n'],
502        'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ø' => vec!['O'],
503        'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ø' => vec!['o'],
504        'Ù' | 'Ú' | 'Û' | 'Ü' => vec!['U'],
505        'ù' | 'ú' | 'û' | 'ü' => vec!['u'],
506        'Ý' | 'Ÿ' => vec!['Y'],
507        'ý' | 'ÿ' => vec!['y'],
508        'Æ' => vec!['A', 'E'],
509        'æ' => vec!['a', 'e'],
510        'Œ' => vec!['O', 'E'],
511        'œ' => vec!['o', 'e'],
512        'ß' => vec!['s', 's'],
513        _ => vec![' '],
514    }
515}
516
517fn truncate_chars(value: &str, max_len: usize) -> String {
518    value.chars().take(max_len).collect()
519}
520
521fn non_blank(value: &str) -> Option<&str> {
522    let trimmed = value.trim();
523    (!trimmed.is_empty()).then_some(trimmed)
524}
525
526fn is_reserved_name(value: &str) -> bool {
527    let stem = value.split('.').next().unwrap_or(value);
528    matches!(
529        stem.to_ascii_uppercase().as_str(),
530        "CON"
531            | "PRN"
532            | "AUX"
533            | "NUL"
534            | "COM1"
535            | "COM2"
536            | "COM3"
537            | "COM4"
538            | "COM5"
539            | "COM6"
540            | "COM7"
541            | "COM8"
542            | "COM9"
543            | "LPT1"
544            | "LPT2"
545            | "LPT3"
546            | "LPT4"
547            | "LPT5"
548            | "LPT6"
549            | "LPT7"
550            | "LPT8"
551            | "LPT9"
552    )
553}
554
555#[cfg(test)]
556mod tests {
557    use super::*;
558    use crate::lineage::{EdgeType, ResolveStatus};
559    use std::collections::{BTreeMap, BTreeSet};
560
561    fn test_clip(id: &str, title: &str) -> Clip {
562        Clip {
563            id: id.to_string(),
564            title: title.to_string(),
565            display_name: "München".to_string(),
566            handle: "munchen".to_string(),
567            album_title: String::new(),
568            root_ancestor_id: String::new(),
569            ..Clip::default()
570        }
571    }
572
573    fn render_own(clip: &Clip, config: &NamingConfig) -> RenderedName {
574        let lineage = LineageContext::own_root(clip);
575        render_clip_name(
576            NamingRequest {
577                clip,
578                lineage: &lineage,
579            },
580            config,
581        )
582    }
583
584    fn render_all_own(
585        clips: &[Clip],
586        config: &NamingConfig,
587        colliding: &BTreeSet<String>,
588    ) -> Vec<RenderedName> {
589        let lineages: Vec<LineageContext> = clips.iter().map(LineageContext::own_root).collect();
590        let requests: Vec<NamingRequest> = clips
591            .iter()
592            .zip(&lineages)
593            .map(|(clip, lineage)| NamingRequest { clip, lineage })
594            .collect();
595        render_clip_names(&requests, config, colliding)
596    }
597
598    #[test]
599    fn unicode_names_are_preserved_and_ascii_falls_back() {
600        let clip = test_clip("abc12345", "Beyoncé/東京");
601
602        let unicode = render_own(&clip, &NamingConfig::default());
603        assert_eq!(
604            unicode.relative_path.to_string_lossy(),
605            "München/Beyoncé 東京/München-Beyoncé 東京 [abc12345]"
606        );
607
608        let ascii = render_own(
609            &clip,
610            &NamingConfig {
611                character_set: CharacterSet::Ascii,
612                ..NamingConfig::default()
613            },
614        );
615        assert_eq!(
616            ascii.relative_path.to_string_lossy(),
617            "Munchen/Beyonce/Munchen-Beyonce [abc12345]"
618        );
619    }
620
621    #[test]
622    fn reserved_and_hostile_names_are_sanitised() {
623        let clip = Clip {
624            id: "deadbeef".to_string(),
625            title: "CON<>:\"/\\|?*.".to_string(),
626            display_name: "AUX".to_string(),
627            ..Clip::default()
628        };
629
630        let rendered = render_own(&clip, &NamingConfig::default());
631        let path = rendered.relative_path.to_string_lossy();
632        assert!(path.starts_with("AUX_/CON_/"), "path was {path}");
633        assert!(rendered.base_name.contains("[deadbeef]"));
634    }
635
636    #[test]
637    fn default_template_always_embeds_id8() {
638        let clip = test_clip("abcdef1234567890", "Any Title");
639        let rendered = render_own(&clip, &NamingConfig::default());
640        assert!(
641            rendered.base_name.contains("[abcdef12]"),
642            "base_name was {}",
643            rendered.base_name
644        );
645    }
646
647    #[test]
648    fn blank_titles_use_a_stable_suffix() {
649        let clip = test_clip("12345678-clip", "   ");
650
651        let rendered = render_own(&clip, &NamingConfig::default());
652        assert_eq!(rendered.base_name, "München-Untitled [12345678]");
653        assert_eq!(
654            rendered.relative_path.to_string_lossy(),
655            "München/Untitled/München-Untitled [12345678]"
656        );
657    }
658
659    #[test]
660    fn very_long_titles_are_trimmed() {
661        let clip = test_clip("abcdef12", &"a".repeat(120));
662        let rendered = render_own(
663            &clip,
664            &NamingConfig {
665                max_component_len: 24,
666                ..NamingConfig::default()
667            },
668        );
669
670        for component in rendered.relative_path.components() {
671            let text = component.as_os_str().to_string_lossy();
672            assert!(
673                text.chars().count() <= 24,
674                "component {text:?} exceeds 24 chars"
675            );
676        }
677        // The trailing [id8] must survive the truncation intact (#120).
678        assert!(
679            rendered.base_name.ends_with(" [abcdef12]"),
680            "id8 disambiguator was sliced; base_name was {:?}",
681            rendered.base_name
682        );
683    }
684
685    #[test]
686    fn long_names_keep_the_full_id8_disambiguator() {
687        // A creator+title long enough to overflow the cap keeps the whole
688        // trailing [id8]: the title is shortened, not the id, so the name stays
689        // complete and the bracket stays balanced (#120).
690        let clip = test_clip("1234abcd-tail", &"a".repeat(120));
691        let config = NamingConfig {
692            max_component_len: 40,
693            ..NamingConfig::default()
694        };
695        let rendered = render_own(&clip, &config);
696
697        assert!(
698            rendered.base_name.ends_with(" [1234abcd]"),
699            "base_name must end with the full disambiguator, was {:?}",
700            rendered.base_name
701        );
702        assert_eq!(rendered.base_name.chars().count(), 40);
703    }
704
705    #[test]
706    fn long_titled_siblings_stay_distinct_with_balanced_brackets() {
707        // Two same-(long-)titled clips sharing a root must remain distinct: only
708        // the title is shortened, so their [id8] suffixes differ and neither name
709        // ends up with an unbalanced bracket (#120).
710        let lineage = LineageContext {
711            root_id: "root-42".to_string(),
712            root_title: "Origin".to_string(),
713            root_date: String::new(),
714            parent_id: "root-42".to_string(),
715            edge_type: Some(EdgeType::Cover),
716            status: ResolveStatus::Resolved,
717        };
718        let title = "z".repeat(200);
719        let first = test_clip("aaaa1111-x", &title);
720        let second = test_clip("bbbb2222-y", &title);
721        let requests = [
722            NamingRequest {
723                clip: &first,
724                lineage: &lineage,
725            },
726            NamingRequest {
727                clip: &second,
728                lineage: &lineage,
729            },
730        ];
731
732        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
733
734        assert!(names[0].base_name.ends_with(" [aaaa1111]"));
735        assert!(names[1].base_name.ends_with(" [bbbb2222]"));
736        assert_ne!(names[0].relative_path, names[1].relative_path);
737        for name in &names {
738            assert!(name.base_name.chars().count() <= 80);
739            assert_eq!(name.base_name.matches('[').count(), 1, "unbalanced '['");
740            assert_eq!(name.base_name.matches(']').count(), 1, "unbalanced ']'");
741        }
742    }
743
744    #[test]
745    fn long_colliding_album_keeps_its_root_id8() {
746        // The album [root_id8] disambiguator is preserved when a long album title
747        // must be truncated, mirroring the file-name fix (#120).
748        let long = "Break Through ".repeat(20);
749        let title = long.trim().to_string();
750        let clip = Clip {
751            id: "aaaa1111-x".to_string(),
752            title: title.clone(),
753            display_name: "München".to_string(),
754            ..Clip::default()
755        };
756        let colliding: BTreeSet<String> = [title].into_iter().collect();
757        let names = render_all_own(&[clip], &NamingConfig::default(), &colliding);
758
759        let album = names[0]
760            .relative_path
761            .components()
762            .nth(1)
763            .map(|component| component.as_os_str().to_string_lossy().into_owned())
764            .unwrap_or_default();
765        assert!(album.ends_with(" [aaaa1111]"), "album was {album:?}");
766        assert!(album.chars().count() <= 80);
767    }
768
769    #[test]
770    fn ascii_expanding_chars_do_not_slice_the_disambiguator() {
771        // A literal expanding character (`ß` -> `ss` under ascii) in a custom
772        // template, right before the trailing ` [{id8}]`, must not grow back over
773        // the suffix and slice it: the base is sized after expansion (#120).
774        let clip = test_clip("1234abcd", "Title");
775        let config = NamingConfig {
776            template: format!("{}{{title}} [{{id8}}]", "ß".repeat(80)),
777            character_set: CharacterSet::Ascii,
778            max_component_len: 40,
779        };
780        let rendered = render_own(&clip, &config);
781
782        assert!(
783            rendered.base_name.ends_with(" [1234abcd]"),
784            "expansion sliced the id8; base_name was {:?}",
785            rendered.base_name
786        );
787        assert!(rendered.base_name.chars().count() <= 40);
788    }
789
790    #[test]
791    fn same_title_siblings_stay_distinct_via_id8() {
792        // Two clips sharing a root (same album folder) and the same title must
793        // still land on distinct files; the default template's {id8} does that.
794        let lineage = LineageContext {
795            root_id: "root-9".to_string(),
796            root_title: "Origin".to_string(),
797            root_date: String::new(),
798            parent_id: "root-9".to_string(),
799            edge_type: Some(EdgeType::Cover),
800            status: ResolveStatus::Resolved,
801        };
802        let first = test_clip("11111111-alpha", "Shared");
803        let second = test_clip("22222222-beta", "Shared");
804        let requests = [
805            NamingRequest {
806                clip: &first,
807                lineage: &lineage,
808            },
809            NamingRequest {
810                clip: &second,
811                lineage: &lineage,
812            },
813        ];
814
815        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
816
817        assert_eq!(
818            names[0].relative_path.to_string_lossy(),
819            "München/Origin/München-Shared [11111111]"
820        );
821        assert_eq!(
822            names[1].relative_path.to_string_lossy(),
823            "München/Origin/München-Shared [22222222]"
824        );
825    }
826
827    #[test]
828    fn id8_prefix_collision_falls_back_to_full_id() {
829        // Custom template without {id8} so identical titles collide and the
830        // filename fallback (full id) has to keep them distinct.
831        let config = NamingConfig {
832            template: "{creator}/{title}".to_string(),
833            ..NamingConfig::default()
834        };
835        let first = test_clip("abcd1234-first", "Untitled");
836        let second = test_clip("abcd1234-second", "Untitled");
837
838        let names = render_all_own(&[first.clone(), second.clone()], &config, &BTreeSet::new());
839        let swapped = render_all_own(&[second.clone(), first.clone()], &config, &BTreeSet::new());
840
841        assert_ne!(
842            names[0].relative_path.to_string_lossy(),
843            names[1].relative_path.to_string_lossy()
844        );
845
846        let ordered = |rendered: &[RenderedName], clips: &[Clip]| {
847            clips
848                .iter()
849                .zip(rendered)
850                .map(|(clip, name)| {
851                    (
852                        clip.id.clone(),
853                        name.relative_path.to_string_lossy().into_owned(),
854                    )
855                })
856                .collect::<BTreeMap<_, _>>()
857        };
858        assert_eq!(
859            ordered(&names, &[first.clone(), second.clone()]),
860            ordered(&swapped, &[second, first])
861        );
862    }
863
864    #[test]
865    fn album_is_root_title_for_a_remix() {
866        let clip = Clip {
867            id: "child".to_string(),
868            title: "Remix".to_string(),
869            display_name: "München".to_string(),
870            ..Clip::default()
871        };
872        let lineage = LineageContext {
873            root_id: "root-1".to_string(),
874            root_title: "Original".to_string(),
875            root_date: String::new(),
876            parent_id: "root-1".to_string(),
877            edge_type: Some(EdgeType::Cover),
878            status: ResolveStatus::Resolved,
879        };
880
881        let rendered = render_clip_name(
882            NamingRequest {
883                clip: &clip,
884                lineage: &lineage,
885            },
886            &NamingConfig::default(),
887        );
888        assert_eq!(
889            rendered.relative_path.to_string_lossy(),
890            "München/Original/München-Remix [child]"
891        );
892    }
893
894    #[test]
895    fn overridden_album_drives_the_folder_path() {
896        // A LineageContext whose root_title carries a manual override (as the
897        // store produces it) folders the clip under the preferred album name.
898        let clip = Clip {
899            id: "child".to_string(),
900            title: "Remix".to_string(),
901            display_name: "München".to_string(),
902            ..Clip::default()
903        };
904        let lineage = LineageContext {
905            root_id: "root-1".to_string(),
906            root_title: "Preferred Album".to_string(),
907            root_date: String::new(),
908            parent_id: "root-1".to_string(),
909            edge_type: Some(EdgeType::Cover),
910            status: ResolveStatus::Resolved,
911        };
912
913        let rendered = render_clip_name(
914            NamingRequest {
915                clip: &clip,
916                lineage: &lineage,
917            },
918            &NamingConfig::default(),
919        );
920        assert_eq!(
921            rendered.relative_path.to_string_lossy(),
922            "München/Preferred Album/München-Remix [child]"
923        );
924    }
925
926    #[test]
927    fn album_is_own_title_for_a_root() {
928        let clip = Clip {
929            id: "root-1".to_string(),
930            title: "Original".to_string(),
931            display_name: "München".to_string(),
932            ..Clip::default()
933        };
934
935        let rendered = render_own(&clip, &NamingConfig::default());
936        assert_eq!(
937            rendered.relative_path.to_string_lossy(),
938            "München/Original/München-Original [root-1]"
939        );
940    }
941
942    #[test]
943    fn shared_album_title_from_distinct_roots_is_disambiguated() {
944        let first = Clip {
945            id: "aaaa1111-x".to_string(),
946            title: "Break Through".to_string(),
947            display_name: "München".to_string(),
948            ..Clip::default()
949        };
950        let second = Clip {
951            id: "bbbb2222-y".to_string(),
952            title: "Break Through".to_string(),
953            display_name: "München".to_string(),
954            ..Clip::default()
955        };
956
957        // The colliding set is authoritative (store-driven), so disambiguation
958        // does not depend on both roots appearing in the same batch.
959        let colliding: BTreeSet<String> = ["Break Through".to_string()].into_iter().collect();
960        let names = render_all_own(
961            &[first.clone(), second.clone()],
962            &NamingConfig::default(),
963            &colliding,
964        );
965        let swapped = render_all_own(
966            &[second.clone(), first.clone()],
967            &NamingConfig::default(),
968            &colliding,
969        );
970
971        let album_of = |rendered: &RenderedName| {
972            rendered
973                .relative_path
974                .components()
975                .nth(1)
976                .map(|component| component.as_os_str().to_string_lossy().into_owned())
977                .unwrap_or_default()
978        };
979
980        assert_eq!(album_of(&names[0]), "Break Through [aaaa1111]");
981        assert_eq!(album_of(&names[1]), "Break Through [bbbb2222]");
982        // Deterministic regardless of input order.
983        assert_eq!(album_of(&swapped[0]), "Break Through [bbbb2222]");
984        assert_eq!(album_of(&swapped[1]), "Break Through [aaaa1111]");
985
986        // The MEDIUM fix: a narrowed run showing only one of the two roots
987        // still gets the suffixed folder, so folders never oscillate.
988        let alone = render_all_own(
989            std::slice::from_ref(&first),
990            &NamingConfig::default(),
991            &colliding,
992        );
993        assert_eq!(album_of(&alone[0]), "Break Through [aaaa1111]");
994    }
995
996    #[test]
997    fn unique_root_title_stays_a_bare_album() {
998        // A title absent from the colliding set keeps its bare folder even when
999        // the batch happens to hold a same-titled sibling of the same root.
1000        let clip = Clip {
1001            id: "solo-1".to_string(),
1002            title: "Solo".to_string(),
1003            display_name: "München".to_string(),
1004            ..Clip::default()
1005        };
1006        let names = render_all_own(&[clip], &NamingConfig::default(), &BTreeSet::new());
1007        assert_eq!(
1008            names[0].relative_path.to_string_lossy(),
1009            "München/Solo/München-Solo [solo-1]"
1010        );
1011    }
1012
1013    #[test]
1014    fn sanitise_name_strips_separators_and_falls_back_when_empty() {
1015        assert_eq!(sanitise_name("Road/Trip: 2024"), "Road Trip 2024");
1016        assert_eq!(sanitise_name(""), "playlist");
1017        // A name made only of illegal characters strips to nothing, so the
1018        // caller still gets a usable, non-empty stem.
1019        assert_eq!(sanitise_name("///"), "playlist");
1020    }
1021
1022    #[test]
1023    fn stems_folder_is_a_sibling_suffix_of_the_song_base() {
1024        assert_eq!(
1025            stems_folder("Creator/Album/Creator-Song [abcd1234]"),
1026            "Creator/Album/Creator-Song [abcd1234].stems"
1027        );
1028    }
1029
1030    #[test]
1031    fn stem_file_path_combines_song_stem_label_and_disambiguator() {
1032        let path = stem_file_path(
1033            "Creator/Album/Creator-Song [abcd1234]",
1034            "Vocals",
1035            "stem-vocals-9f8e7d6c",
1036            "mp3",
1037            CharacterSet::Unicode,
1038        );
1039        assert_eq!(
1040            path,
1041            "Creator/Album/Creator-Song [abcd1234].stems/Creator-Song [abcd1234] - Vocals [stem-voc].mp3"
1042        );
1043    }
1044
1045    #[test]
1046    fn stem_file_path_disambiguates_blank_and_duplicate_labels_by_id() {
1047        // Two stems with the SAME (blank) label must not collide: the stem-id
1048        // disambiguator keeps them distinct even with no usable label.
1049        let a = stem_file_path("song", "", "id-aaaaaaaa", "wav", CharacterSet::Unicode);
1050        let b = stem_file_path("song", "", "id-bbbbbbbb", "wav", CharacterSet::Unicode);
1051        assert_eq!(a, "song.stems/song [id-aaaaa].wav");
1052        assert_eq!(b, "song.stems/song [id-bbbbb].wav");
1053        assert_ne!(a, b);
1054    }
1055
1056    #[test]
1057    fn stem_file_path_sanitises_label_and_extension_and_honours_ascii() {
1058        // Illegal path characters in the label are stripped, the extension is
1059        // reduced to a safe lowercase token, and ASCII folding applies.
1060        let path = stem_file_path(
1061            "song",
1062            "Lead/Vocal: Æ",
1063            "STEMID12",
1064            ".FLAC",
1065            CharacterSet::Ascii,
1066        );
1067        assert_eq!(path, "song.stems/song - Lead Vocal AE [STEMID12].flac");
1068        // A junk extension falls back to mp3 (defensive; callers pass wav/mp3).
1069        let fallback = stem_file_path("s", "Bass", "x", "??", CharacterSet::Unicode);
1070        assert_eq!(fallback, "s.stems/s - Bass [x].mp3");
1071    }
1072
1073    #[test]
1074    fn case_only_path_difference_is_a_canonical_collision() {
1075        // A custom template without {id8}: clips whose titles differ only in
1076        // case produce different exact paths but the same canonical path and
1077        // must be disambiguated to avoid clobbering on case-insensitive FSes.
1078        let config = NamingConfig {
1079            template: "{creator}/{title}".to_string(),
1080            ..NamingConfig::default()
1081        };
1082        let first = test_clip("aaaa1111-x", "sunrise");
1083        let second = test_clip("bbbb2222-y", "SUNRISE");
1084
1085        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1086
1087        assert_ne!(
1088            names[0].relative_path.to_string_lossy(),
1089            names[1].relative_path.to_string_lossy(),
1090            "canonical collision was not disambiguated"
1091        );
1092    }
1093
1094    #[test]
1095    fn nfc_nfd_path_difference_is_a_canonical_collision() {
1096        // The same character encoded as NFC vs NFD produces different byte
1097        // strings but the same file on NFC-normalising filesystems (macOS APFS).
1098        let config = NamingConfig {
1099            template: "{creator}/{title}".to_string(),
1100            ..NamingConfig::default()
1101        };
1102        // "é" as NFC (U+00E9) vs NFD (e + U+0301).
1103        let nfc_title = "\u{00e9}toile";
1104        let nfd_title = "e\u{0301}toile";
1105        let first = test_clip("aaaa1111-x", nfc_title);
1106        let second = test_clip("bbbb2222-y", nfd_title);
1107
1108        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1109
1110        assert_ne!(
1111            names[0].relative_path.to_string_lossy(),
1112            names[1].relative_path.to_string_lossy(),
1113            "NFC/NFD canonical collision was not disambiguated"
1114        );
1115    }
1116
1117    #[test]
1118    fn genuinely_distinct_paths_are_never_wrongly_disambiguated() {
1119        // Clips with distinct titles (not even canonically equivalent) must not
1120        // receive unnecessary suffixes — the canonical check must not produce
1121        // false positives.
1122        let config = NamingConfig {
1123            template: "{creator}/{title}".to_string(),
1124            ..NamingConfig::default()
1125        };
1126        let first = test_clip("aaaa1111-x", "Alpha");
1127        let second = test_clip("bbbb2222-y", "Beta");
1128
1129        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1130
1131        assert_eq!(
1132            names[0].relative_path.to_string_lossy(),
1133            "München/Alpha",
1134            "distinct path was wrongly suffixed"
1135        );
1136        assert_eq!(
1137            names[1].relative_path.to_string_lossy(),
1138            "München/Beta",
1139            "distinct path was wrongly suffixed"
1140        );
1141    }
1142}