Skip to main content

suno_core/
naming.rs

1//! Pure naming and relative path rendering for [`Clip`] values.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::fmt;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use serde::{Deserialize, Serialize};
9use unicode_normalization::UnicodeNormalization as _;
10
11use crate::Clip;
12use crate::error::{Error, Result};
13use crate::lineage::LineageContext;
14
15/// The default relative path template.
16///
17/// Supported placeholders are `{creator}`, `{handle}`, `{album}`, `{title}`,
18/// `{id}`, `{id8}` (first 8 characters of the clip id), and `{root_id8}`
19/// (first 8 of the resolved lineage root id). Empty path segments are dropped
20/// after rendering.
21///
22/// The default embeds `[{id8}]` in the file name so same-title clips never
23/// collide, and folders under `{album}`, which resolves to the lineage root's
24/// title (else the clip's own title).
25pub const DEFAULT_TEMPLATE: &str = "{creator}/{album}/{creator}-{title} [{id8}]";
26const DEFAULT_MAX_COMPONENT_LEN: usize = 80;
27
28const MIN_BASE_CHARS_WITH_SUFFIX: usize = 1;
29
30#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
31#[serde(rename_all = "lowercase")]
32pub enum CharacterSet {
33    #[default]
34    Unicode,
35    Ascii,
36}
37
38impl FromStr for CharacterSet {
39    type Err = Error;
40
41    fn from_str(s: &str) -> Result<Self> {
42        match s.to_ascii_lowercase().as_str() {
43            "unicode" => Ok(Self::Unicode),
44            "ascii" => Ok(Self::Ascii),
45            other => Err(Error::Config(format!(
46                "unknown character_set '{other}'; expected 'unicode' or 'ascii'"
47            ))),
48        }
49    }
50}
51
52impl fmt::Display for CharacterSet {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::Unicode => f.write_str("unicode"),
56            Self::Ascii => f.write_str("ascii"),
57        }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct NamingConfig {
63    pub template: String,
64    pub character_set: CharacterSet,
65    pub max_component_len: usize,
66}
67
68impl Default for NamingConfig {
69    fn default() -> Self {
70        Self {
71            template: DEFAULT_TEMPLATE.to_string(),
72            character_set: CharacterSet::Unicode,
73            max_component_len: DEFAULT_MAX_COMPONENT_LEN,
74        }
75    }
76}
77
78#[derive(Debug, Clone, Copy)]
79pub struct NamingRequest<'a> {
80    pub clip: &'a Clip,
81    pub lineage: &'a LineageContext,
82}
83
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct RenderedName {
86    pub relative_path: PathBuf,
87    pub base_name: String,
88}
89
90pub fn render_clip_name(request: NamingRequest<'_>, config: &NamingConfig) -> RenderedName {
91    let album = album_component(request, config);
92    render_with_album(request, config, &album)
93}
94
95pub fn render_clip_names(
96    requests: &[NamingRequest<'_>],
97    config: &NamingConfig,
98    colliding_albums: &BTreeSet<String>,
99) -> Vec<RenderedName> {
100    let albums = disambiguated_albums(requests, config, colliding_albums);
101    let mut rendered = requests
102        .iter()
103        .zip(&albums)
104        .map(|(request, album)| render_with_album(*request, config, album))
105        .collect::<Vec<_>>();
106
107    // Two passes to keep distinct clips from landing on one path.  The first
108    // pass keys on the exact rendered string; the second on the filesystem-
109    // canonical form (NFC + lowercase) so that paths differing only by case or
110    // Unicode normalisation (NFD vs NFC) are caught too — they would collide on
111    // case-insensitive or NFC-normalising filesystems (Windows, macOS default).
112    for apply_canonical in [false, true] {
113        let mut collisions = BTreeMap::<String, Vec<usize>>::new();
114        for (index, name) in rendered.iter().enumerate() {
115            let key = if apply_canonical {
116                canonical_path_key(&name.relative_path.to_string_lossy())
117            } else {
118                name.relative_path.to_string_lossy().into_owned()
119            };
120            collisions.entry(key).or_default().push(index);
121        }
122        for indexes in collisions.into_values().filter(|v| v.len() > 1) {
123            for index in indexes {
124                let suffix = &requests[index].clip.id;
125                rendered[index] = with_suffix(
126                    rendered[index].clone(),
127                    suffix,
128                    config.character_set,
129                    config.max_component_len,
130                );
131            }
132        }
133    }
134
135    rendered
136}
137
138/// Filesystem-canonical key: NFC-normalise then lowercase, so paths that differ
139/// only by case or by NFC/NFD encoding hash to the same bucket.
140fn canonical_path_key(path: &str) -> String {
141    path.nfc().flat_map(char::to_lowercase).collect()
142}
143
144/// The album path component for every request, with a clip whose root title
145/// collides across distinct roots disambiguated by `[{root_id8}]`.
146///
147/// Distinct roots must never share an album folder (two different upload roots
148/// titled "Break Through" exist). `colliding_albums` is the authoritative set
149/// of such shared root titles, computed once from the whole lineage store, so
150/// the decision is stable across runs and independent of which clips appear in
151/// this batch. A clip whose resolved album is in that set always gets its
152/// root's short id appended; every other clip keeps the bare album and groups
153/// with its same-root siblings.
154fn disambiguated_albums(
155    requests: &[NamingRequest<'_>],
156    config: &NamingConfig,
157    colliding_albums: &BTreeSet<String>,
158) -> Vec<String> {
159    requests
160        .iter()
161        .map(|request| album_for(*request, config, colliding_albums))
162        .collect()
163}
164
165/// The (possibly disambiguated) album component for one request.
166fn album_for(
167    request: NamingRequest<'_>,
168    config: &NamingConfig,
169    colliding_albums: &BTreeSet<String>,
170) -> String {
171    let raw_album = request.lineage.album(&title_name(request.clip));
172    let album = sanitise_component(&raw_album, config.character_set, config.max_component_len);
173    if colliding_albums.contains(raw_album.trim()) {
174        let suffix = truncate_chars(&request.lineage.root_id, 8);
175        append_suffix(
176            &album,
177            &suffix,
178            config.character_set,
179            config.max_component_len,
180        )
181    } else {
182        album
183    }
184}
185
186/// The sanitised album component: the resolved lineage album (root title, else
187/// the clip's own title).
188fn album_component(request: NamingRequest<'_>, config: &NamingConfig) -> String {
189    let album = request.lineage.album(&title_name(request.clip));
190    sanitise_component(&album, config.character_set, config.max_component_len)
191}
192
193/// Render one clip's path with an already-resolved album component.
194fn render_with_album(
195    request: NamingRequest<'_>,
196    config: &NamingConfig,
197    album: &str,
198) -> RenderedName {
199    let clip = request.clip;
200    let creator = sanitise_component(
201        &creator_name(clip),
202        config.character_set,
203        config.max_component_len,
204    );
205    let handle = sanitise_component(&clip.handle, config.character_set, config.max_component_len);
206    let title = sanitise_component(
207        &title_name(clip),
208        config.character_set,
209        config.max_component_len,
210    );
211    let id = sanitise_component(&clip.id, CharacterSet::Ascii, config.max_component_len);
212    let id8 = sanitise_component(
213        &truncate_chars(&clip.id, 8),
214        CharacterSet::Ascii,
215        config.max_component_len,
216    );
217    let root_id8 = sanitise_component(
218        &truncate_chars(&request.lineage.root_id, 8),
219        CharacterSet::Ascii,
220        config.max_component_len,
221    );
222    let substitutions = SegmentSubstitutions {
223        creator: &creator,
224        handle: &handle,
225        album,
226        title: &title,
227        root_id8: &root_id8,
228        id8: &id8,
229        id: &id,
230    };
231    let mut components = config
232        .template
233        .split('/')
234        .filter_map(|segment| {
235            let rendered = substitute_segment(segment, substitutions);
236            let sanitised = sanitise_segment(
237                &rendered,
238                config.character_set,
239                config.max_component_len,
240                [id8.as_str(), root_id8.as_str()],
241            );
242            (!sanitised.is_empty()).then_some(sanitised)
243        })
244        .collect::<Vec<_>>();
245
246    if components.is_empty() {
247        components.push(title.clone());
248    }
249
250    let mut base_name = components
251        .pop()
252        .filter(|value| !value.is_empty())
253        .unwrap_or_else(|| title.clone());
254    // Guarantee a non-empty file name even when every token sanitises away.
255    if base_name.is_empty() {
256        base_name = append_suffix(
257            &base_name,
258            &clip.id,
259            config.character_set,
260            config.max_component_len,
261        );
262    }
263
264    let mut relative_path = PathBuf::new();
265    for component in components {
266        relative_path.push(component);
267    }
268
269    relative_path.push(&base_name);
270    RenderedName {
271        relative_path,
272        base_name,
273    }
274}
275
276#[derive(Clone, Copy)]
277struct SegmentSubstitutions<'a> {
278    creator: &'a str,
279    handle: &'a str,
280    album: &'a str,
281    title: &'a str,
282    root_id8: &'a str,
283    id8: &'a str,
284    id: &'a str,
285}
286
287fn substitute_segment(segment: &str, substitutions: SegmentSubstitutions<'_>) -> String {
288    let mut rendered = String::with_capacity(segment.len());
289    let mut remainder = segment;
290    while let Some(start) = remainder.find('{') {
291        rendered.push_str(&remainder[..start]);
292        remainder = &remainder[start..];
293        if let Some((token_len, value)) = placeholder_match(remainder, substitutions) {
294            rendered.push_str(value);
295            remainder = &remainder[token_len..];
296        } else {
297            rendered.push('{');
298            remainder = &remainder[1..];
299        }
300    }
301    rendered.push_str(remainder);
302    rendered
303}
304
305fn placeholder_match<'a>(
306    segment: &str,
307    substitutions: SegmentSubstitutions<'a>,
308) -> Option<(usize, &'a str)> {
309    if segment.starts_with("{creator}") {
310        Some(("{creator}".len(), substitutions.creator))
311    } else if segment.starts_with("{handle}") {
312        Some(("{handle}".len(), substitutions.handle))
313    } else if segment.starts_with("{album}") {
314        Some(("{album}".len(), substitutions.album))
315    } else if segment.starts_with("{title}") {
316        Some(("{title}".len(), substitutions.title))
317    } else if segment.starts_with("{root_id8}") {
318        Some(("{root_id8}".len(), substitutions.root_id8))
319    } else if segment.starts_with("{id8}") {
320        Some(("{id8}".len(), substitutions.id8))
321    } else if segment.starts_with("{id}") {
322        Some(("{id}".len(), substitutions.id))
323    } else {
324        None
325    }
326}
327
328fn with_suffix(
329    mut rendered: RenderedName,
330    suffix: &str,
331    character_set: CharacterSet,
332    max_component_len: usize,
333) -> RenderedName {
334    rendered.base_name = append_suffix(
335        &rendered.base_name,
336        suffix,
337        character_set,
338        max_component_len,
339    );
340    rendered.relative_path.set_file_name(&rendered.base_name);
341    rendered
342}
343
344fn creator_name(clip: &Clip) -> String {
345    non_blank(&clip.display_name)
346        .or_else(|| non_blank(&clip.handle))
347        .unwrap_or("Unknown Creator")
348        .to_string()
349}
350
351fn title_name(clip: &Clip) -> String {
352    let title = clip.title.trim();
353    if title.is_empty() || title.eq_ignore_ascii_case("untitled") {
354        "Untitled".to_string()
355    } else {
356        title.to_string()
357    }
358}
359
360fn append_suffix(
361    base: &str,
362    suffix: &str,
363    character_set: CharacterSet,
364    max_component_len: usize,
365) -> String {
366    let suffix_pattern = format!(" [{suffix}]");
367    if base.ends_with(&suffix_pattern) {
368        return sanitise_component(base, character_set, max_component_len);
369    }
370
371    let max_len =
372        max_component_len.max(suffix_pattern.chars().count() + MIN_BASE_CHARS_WITH_SUFFIX);
373    let allowed = max_len.saturating_sub(suffix_pattern.chars().count());
374    // Sanitise the base before measuring it. The character set can expand a
375    // character (ascii turns `ß` into `ss`), so budgeting the cut on the raw
376    // length could let the sanitised prefix grow back over the room reserved for
377    // the suffix and slice through it again (#120).
378    let base = sanitise_component(base, character_set, max_len);
379    let truncated = truncate_chars(base.trim_end(), allowed);
380    let combined = format!("{truncated}{suffix_pattern}");
381    sanitise_component(&combined, character_set, max_len)
382}
383
384/// Sanitise a rendered template segment, preserving a trailing ` [id]`
385/// disambiguator (the `[{id8}]` or `[{root_id8}]` the template embeds) when the
386/// segment would otherwise be truncated through it. Only the title portion is
387/// shortened, so two long-titled siblings keep their distinguishing id and the
388/// closing bracket is never left unbalanced (#120). A segment that does not end
389/// in a disambiguator is sanitised exactly as before.
390fn sanitise_segment(
391    rendered: &str,
392    character_set: CharacterSet,
393    max_component_len: usize,
394    disambiguators: [&str; 2],
395) -> String {
396    for suffix in disambiguators {
397        if suffix.is_empty() {
398            continue;
399        }
400        let pattern = format!(" [{suffix}]");
401        if let Some(prefix) = rendered.strip_suffix(&pattern) {
402            return append_suffix(prefix, suffix, character_set, max_component_len);
403        }
404    }
405    sanitise_component(rendered, character_set, max_component_len)
406}
407
408/// Sanitise a free-form playlist name into a single safe path component.
409///
410/// Applies the same Unicode filtering and length cap as clip path components
411/// (default [`CharacterSet::Unicode`], [`DEFAULT_MAX_COMPONENT_LEN`]), so a
412/// playlist file name obeys the same filesystem rules as the rest of the
413/// library. An empty or fully-stripped name falls back to `playlist` so the
414/// caller always has a non-empty stem to append `.m3u8` to.
415pub fn sanitise_name(name: &str) -> String {
416    let cleaned = sanitise_component(name, CharacterSet::Unicode, DEFAULT_MAX_COMPONENT_LEN);
417    if cleaned.is_empty() {
418        "playlist".to_string()
419    } else {
420        cleaned
421    }
422}
423
424/// The `.stems` sub-folder that sits beside a song's audio file.
425///
426/// `base` is the song's extensionless relative path (the same value the audio
427/// and its sidecars are built from), so the folder is `{base}.stems`. It cannot
428/// collide with the audio file (`{base}.<ext>`) or any `{base}.<sidecar>`
429/// because the `.stems` suffix is distinct, mirroring the sidecar convention.
430pub fn stems_folder(base: &str) -> String {
431    format!("{base}.stems")
432}
433
434/// The relative path of one stem file inside a song's [`stems_folder`].
435///
436/// Named base+label+disambiguation rather than label-only, because Auto Split
437/// can mislabel stems and Advanced Split yields ~100 instruments, so blank or
438/// duplicate labels are expected. The file is
439/// `{song file name} - {label} [{stem id8}].{ext}`; the ` - {label}` piece is
440/// dropped when the label sanitises to empty, and the `[{stem id8}]`
441/// disambiguator (the first 8 characters of the stable stem id) keeps blank or
442/// duplicate labels collision-free. Every component is run through the same
443/// [`sanitise_component`] filter as the rest of the library, honouring
444/// `character_set`.
445pub fn stem_file_path(
446    base: &str,
447    label: &str,
448    stem_id: &str,
449    ext: &str,
450    character_set: CharacterSet,
451) -> String {
452    let folder = stems_folder(base);
453    // The song's own file-name stem (the last path component of `base`), reused
454    // so a stem stays identifiable even when viewed outside its `.stems` folder.
455    let song_stem = base.rsplit('/').next().unwrap_or(base);
456    let label = sanitise_component(label, character_set, DEFAULT_MAX_COMPONENT_LEN);
457    let id8 = sanitise_component(
458        &truncate_chars(stem_id, 8),
459        CharacterSet::Ascii,
460        DEFAULT_MAX_COMPONENT_LEN,
461    );
462
463    let mut name = song_stem.to_string();
464    if !label.is_empty() {
465        name.push_str(" - ");
466        name.push_str(&label);
467    }
468    if !id8.is_empty() {
469        name.push_str(" [");
470        name.push_str(&id8);
471        name.push(']');
472    }
473    // A degenerate base (empty song stem, blank label, empty id) must still
474    // yield a usable name rather than a hidden dotfile.
475    if name.trim().is_empty() {
476        name = "stem".to_string();
477    }
478    format!("{folder}/{name}.{}", sanitise_ext(ext))
479}
480
481/// Reduce a candidate extension to a safe lowercase alphanumeric token,
482/// defaulting to `mp3` when it is empty or fully stripped. The caller passes the
483/// resolved stem format's extension (`wav` or `mp3`); stems are stored RAW.
484fn sanitise_ext(ext: &str) -> String {
485    let cleaned: String = ext
486        .trim_start_matches('.')
487        .chars()
488        .filter(|c| c.is_ascii_alphanumeric())
489        .flat_map(char::to_lowercase)
490        .take(8)
491        .collect();
492    if cleaned.is_empty() {
493        "mp3".to_string()
494    } else {
495        cleaned
496    }
497}
498
499fn sanitise_component(
500    value: &str,
501    character_set: CharacterSet,
502    max_component_len: usize,
503) -> String {
504    let filtered = match character_set {
505        CharacterSet::Unicode => value.chars().map(unicode_char).collect::<String>(),
506        CharacterSet::Ascii => value.chars().flat_map(ascii_chars).collect::<String>(),
507    };
508    let collapsed = filtered.split_whitespace().collect::<Vec<_>>().join(" ");
509    let trimmed = collapsed.trim_matches([' ', '.']);
510    if trimmed.is_empty() {
511        return String::new();
512    }
513
514    let mut result = truncate_chars(trimmed, max_component_len.max(1));
515    result = result.trim_matches([' ', '.']).to_string();
516    if result.is_empty() {
517        return String::new();
518    }
519    if result == "." || result == ".." {
520        return "item".to_string();
521    }
522    if !result.ends_with('_') && is_reserved_name(&result) {
523        result.push('_');
524    }
525    result
526}
527
528fn unicode_char(ch: char) -> char {
529    if matches!(
530        ch,
531        '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' | '\0'
532    ) || ch.is_control()
533    {
534        ' '
535    } else {
536        ch
537    }
538}
539
540fn ascii_chars(ch: char) -> Vec<char> {
541    if ch.is_ascii() {
542        return vec![unicode_char(ch)];
543    }
544
545    match ch {
546        'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' => vec!['A'],
547        'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' => vec!['a'],
548        'Ç' => vec!['C'],
549        'ç' => vec!['c'],
550        'È' | 'É' | 'Ê' | 'Ë' => vec!['E'],
551        'è' | 'é' | 'ê' | 'ë' => vec!['e'],
552        'Ì' | 'Í' | 'Î' | 'Ï' => vec!['I'],
553        'ì' | 'í' | 'î' | 'ï' => vec!['i'],
554        'Ñ' => vec!['N'],
555        'ñ' => vec!['n'],
556        'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ø' => vec!['O'],
557        'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ø' => vec!['o'],
558        'Ù' | 'Ú' | 'Û' | 'Ü' => vec!['U'],
559        'ù' | 'ú' | 'û' | 'ü' => vec!['u'],
560        'Ý' | 'Ÿ' => vec!['Y'],
561        'ý' | 'ÿ' => vec!['y'],
562        'Æ' => vec!['A', 'E'],
563        'æ' => vec!['a', 'e'],
564        'Œ' => vec!['O', 'E'],
565        'œ' => vec!['o', 'e'],
566        'ß' => vec!['s', 's'],
567        _ => vec![' '],
568    }
569}
570
571fn truncate_chars(value: &str, max_len: usize) -> String {
572    value.chars().take(max_len).collect()
573}
574
575fn non_blank(value: &str) -> Option<&str> {
576    let trimmed = value.trim();
577    (!trimmed.is_empty()).then_some(trimmed)
578}
579
580fn is_reserved_name(value: &str) -> bool {
581    let stem = value.split('.').next().unwrap_or(value);
582    matches!(
583        stem.to_ascii_uppercase().as_str(),
584        "CON"
585            | "PRN"
586            | "AUX"
587            | "NUL"
588            | "COM1"
589            | "COM2"
590            | "COM3"
591            | "COM4"
592            | "COM5"
593            | "COM6"
594            | "COM7"
595            | "COM8"
596            | "COM9"
597            | "LPT1"
598            | "LPT2"
599            | "LPT3"
600            | "LPT4"
601            | "LPT5"
602            | "LPT6"
603            | "LPT7"
604            | "LPT8"
605            | "LPT9"
606    )
607}
608
609#[cfg(test)]
610mod tests {
611    use super::*;
612    use crate::lineage::{EdgeType, ResolveStatus};
613    use std::collections::{BTreeMap, BTreeSet};
614
615    fn test_clip(id: &str, title: &str) -> Clip {
616        Clip {
617            id: id.to_string(),
618            title: title.to_string(),
619            display_name: "München".to_string(),
620            handle: "munchen".to_string(),
621            album_title: String::new(),
622            root_ancestor_id: String::new(),
623            ..Clip::default()
624        }
625    }
626
627    fn render_own(clip: &Clip, config: &NamingConfig) -> RenderedName {
628        let lineage = LineageContext::own_root(clip);
629        render_clip_name(
630            NamingRequest {
631                clip,
632                lineage: &lineage,
633            },
634            config,
635        )
636    }
637
638    fn render_all_own(
639        clips: &[Clip],
640        config: &NamingConfig,
641        colliding: &BTreeSet<String>,
642    ) -> Vec<RenderedName> {
643        let lineages: Vec<LineageContext> = clips.iter().map(LineageContext::own_root).collect();
644        let requests: Vec<NamingRequest> = clips
645            .iter()
646            .zip(&lineages)
647            .map(|(clip, lineage)| NamingRequest { clip, lineage })
648            .collect();
649        render_clip_names(&requests, config, colliding)
650    }
651
652    #[test]
653    fn unicode_names_are_preserved_and_ascii_falls_back() {
654        let clip = test_clip("abc12345", "Beyoncé/東京");
655
656        let unicode = render_own(&clip, &NamingConfig::default());
657        assert_eq!(
658            unicode.relative_path.to_string_lossy(),
659            "München/Beyoncé 東京/München-Beyoncé 東京 [abc12345]"
660        );
661
662        let ascii = render_own(
663            &clip,
664            &NamingConfig {
665                character_set: CharacterSet::Ascii,
666                ..NamingConfig::default()
667            },
668        );
669        assert_eq!(
670            ascii.relative_path.to_string_lossy(),
671            "Munchen/Beyonce/Munchen-Beyonce [abc12345]"
672        );
673    }
674
675    #[test]
676    fn reserved_and_hostile_names_are_sanitised() {
677        let clip = Clip {
678            id: "deadbeef".to_string(),
679            title: "CON<>:\"/\\|?*.".to_string(),
680            display_name: "AUX".to_string(),
681            ..Clip::default()
682        };
683
684        let rendered = render_own(&clip, &NamingConfig::default());
685        let path = rendered.relative_path.to_string_lossy();
686        assert!(path.starts_with("AUX_/CON_/"), "path was {path}");
687        assert!(rendered.base_name.contains("[deadbeef]"));
688    }
689
690    #[test]
691    fn default_template_always_embeds_id8() {
692        let clip = test_clip("abcdef1234567890", "Any Title");
693        let rendered = render_own(&clip, &NamingConfig::default());
694        assert!(
695            rendered.base_name.contains("[abcdef12]"),
696            "base_name was {}",
697            rendered.base_name
698        );
699    }
700
701    #[test]
702    fn custom_template_replaces_all_known_placeholders_once() {
703        let clip = Clip {
704            id: "abcdef12-full".to_string(),
705            title: "Song".to_string(),
706            display_name: "Creator".to_string(),
707            handle: "handle".to_string(),
708            ..Clip::default()
709        };
710        let lineage = LineageContext {
711            root_id: "rootxyz9-extra".to_string(),
712            root_title: "Album".to_string(),
713            root_date: String::new(),
714            parent_id: "rootxyz9-extra".to_string(),
715            edge_type: Some(EdgeType::Cover),
716            status: ResolveStatus::Resolved,
717        };
718        let config = NamingConfig {
719            template: "{creator}-{handle}-{album}-{title}-{root_id8}-{id8}-{id}-{unknown}"
720                .to_string(),
721            ..NamingConfig::default()
722        };
723
724        let rendered = render_clip_name(
725            NamingRequest {
726                clip: &clip,
727                lineage: &lineage,
728            },
729            &config,
730        );
731
732        assert_eq!(
733            rendered.relative_path.to_string_lossy(),
734            "Creator-handle-Album-Song-rootxyz9-abcdef12-abcdef12-full-{unknown}"
735        );
736    }
737
738    #[test]
739    fn blank_titles_use_a_stable_suffix() {
740        let clip = test_clip("12345678-clip", "   ");
741
742        let rendered = render_own(&clip, &NamingConfig::default());
743        assert_eq!(rendered.base_name, "München-Untitled [12345678]");
744        assert_eq!(
745            rendered.relative_path.to_string_lossy(),
746            "München/Untitled/München-Untitled [12345678]"
747        );
748    }
749
750    #[test]
751    fn very_long_titles_are_trimmed() {
752        let clip = test_clip("abcdef12", &"a".repeat(120));
753        let rendered = render_own(
754            &clip,
755            &NamingConfig {
756                max_component_len: 24,
757                ..NamingConfig::default()
758            },
759        );
760
761        for component in rendered.relative_path.components() {
762            let text = component.as_os_str().to_string_lossy();
763            assert!(
764                text.chars().count() <= 24,
765                "component {text:?} exceeds 24 chars"
766            );
767        }
768        // The trailing [id8] must survive the truncation intact (#120).
769        assert!(
770            rendered.base_name.ends_with(" [abcdef12]"),
771            "id8 disambiguator was sliced; base_name was {:?}",
772            rendered.base_name
773        );
774    }
775
776    #[test]
777    fn long_names_keep_the_full_id8_disambiguator() {
778        // A creator+title long enough to overflow the cap keeps the whole
779        // trailing [id8]: the title is shortened, not the id, so the name stays
780        // complete and the bracket stays balanced (#120).
781        let clip = test_clip("1234abcd-tail", &"a".repeat(120));
782        let config = NamingConfig {
783            max_component_len: 40,
784            ..NamingConfig::default()
785        };
786        let rendered = render_own(&clip, &config);
787
788        assert!(
789            rendered.base_name.ends_with(" [1234abcd]"),
790            "base_name must end with the full disambiguator, was {:?}",
791            rendered.base_name
792        );
793        assert_eq!(rendered.base_name.chars().count(), 40);
794    }
795
796    #[test]
797    fn long_titled_siblings_stay_distinct_with_balanced_brackets() {
798        // Two same-(long-)titled clips sharing a root must remain distinct: only
799        // the title is shortened, so their [id8] suffixes differ and neither name
800        // ends up with an unbalanced bracket (#120).
801        let lineage = LineageContext {
802            root_id: "root-42".to_string(),
803            root_title: "Origin".to_string(),
804            root_date: String::new(),
805            parent_id: "root-42".to_string(),
806            edge_type: Some(EdgeType::Cover),
807            status: ResolveStatus::Resolved,
808        };
809        let title = "z".repeat(200);
810        let first = test_clip("aaaa1111-x", &title);
811        let second = test_clip("bbbb2222-y", &title);
812        let requests = [
813            NamingRequest {
814                clip: &first,
815                lineage: &lineage,
816            },
817            NamingRequest {
818                clip: &second,
819                lineage: &lineage,
820            },
821        ];
822
823        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
824
825        assert!(names[0].base_name.ends_with(" [aaaa1111]"));
826        assert!(names[1].base_name.ends_with(" [bbbb2222]"));
827        assert_ne!(names[0].relative_path, names[1].relative_path);
828        for name in &names {
829            assert!(name.base_name.chars().count() <= 80);
830            assert_eq!(name.base_name.matches('[').count(), 1, "unbalanced '['");
831            assert_eq!(name.base_name.matches(']').count(), 1, "unbalanced ']'");
832        }
833    }
834
835    #[test]
836    fn long_colliding_album_keeps_its_root_id8() {
837        // The album [root_id8] disambiguator is preserved when a long album title
838        // must be truncated, mirroring the file-name fix (#120).
839        let long = "Break Through ".repeat(20);
840        let title = long.trim().to_string();
841        let clip = Clip {
842            id: "aaaa1111-x".to_string(),
843            title: title.clone(),
844            display_name: "München".to_string(),
845            ..Clip::default()
846        };
847        let colliding: BTreeSet<String> = [title].into_iter().collect();
848        let names = render_all_own(&[clip], &NamingConfig::default(), &colliding);
849
850        let album = names[0]
851            .relative_path
852            .components()
853            .nth(1)
854            .map(|component| component.as_os_str().to_string_lossy().into_owned())
855            .unwrap_or_default();
856        assert!(album.ends_with(" [aaaa1111]"), "album was {album:?}");
857        assert!(album.chars().count() <= 80);
858    }
859
860    #[test]
861    fn ascii_expanding_chars_do_not_slice_the_disambiguator() {
862        // A literal expanding character (`ß` -> `ss` under ascii) in a custom
863        // template, right before the trailing ` [{id8}]`, must not grow back over
864        // the suffix and slice it: the base is sized after expansion (#120).
865        let clip = test_clip("1234abcd", "Title");
866        let config = NamingConfig {
867            template: format!("{}{{title}} [{{id8}}]", "ß".repeat(80)),
868            character_set: CharacterSet::Ascii,
869            max_component_len: 40,
870        };
871        let rendered = render_own(&clip, &config);
872
873        assert!(
874            rendered.base_name.ends_with(" [1234abcd]"),
875            "expansion sliced the id8; base_name was {:?}",
876            rendered.base_name
877        );
878        assert!(rendered.base_name.chars().count() <= 40);
879    }
880
881    #[test]
882    fn same_title_siblings_stay_distinct_via_id8() {
883        // Two clips sharing a root (same album folder) and the same title must
884        // still land on distinct files; the default template's {id8} does that.
885        let lineage = LineageContext {
886            root_id: "root-9".to_string(),
887            root_title: "Origin".to_string(),
888            root_date: String::new(),
889            parent_id: "root-9".to_string(),
890            edge_type: Some(EdgeType::Cover),
891            status: ResolveStatus::Resolved,
892        };
893        let first = test_clip("11111111-alpha", "Shared");
894        let second = test_clip("22222222-beta", "Shared");
895        let requests = [
896            NamingRequest {
897                clip: &first,
898                lineage: &lineage,
899            },
900            NamingRequest {
901                clip: &second,
902                lineage: &lineage,
903            },
904        ];
905
906        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
907
908        assert_eq!(
909            names[0].relative_path.to_string_lossy(),
910            "München/Origin/München-Shared [11111111]"
911        );
912        assert_eq!(
913            names[1].relative_path.to_string_lossy(),
914            "München/Origin/München-Shared [22222222]"
915        );
916    }
917
918    #[test]
919    fn id8_prefix_collision_falls_back_to_full_id() {
920        // Custom template without {id8} so identical titles collide and the
921        // filename fallback (full id) has to keep them distinct.
922        let config = NamingConfig {
923            template: "{creator}/{title}".to_string(),
924            ..NamingConfig::default()
925        };
926        let first = test_clip("abcd1234-first", "Untitled");
927        let second = test_clip("abcd1234-second", "Untitled");
928
929        let names = render_all_own(&[first.clone(), second.clone()], &config, &BTreeSet::new());
930        let swapped = render_all_own(&[second.clone(), first.clone()], &config, &BTreeSet::new());
931
932        assert_ne!(
933            names[0].relative_path.to_string_lossy(),
934            names[1].relative_path.to_string_lossy()
935        );
936
937        let ordered = |rendered: &[RenderedName], clips: &[Clip]| {
938            clips
939                .iter()
940                .zip(rendered)
941                .map(|(clip, name)| {
942                    (
943                        clip.id.clone(),
944                        name.relative_path.to_string_lossy().into_owned(),
945                    )
946                })
947                .collect::<BTreeMap<_, _>>()
948        };
949        assert_eq!(
950            ordered(&names, &[first.clone(), second.clone()]),
951            ordered(&swapped, &[second, first])
952        );
953    }
954
955    #[test]
956    fn album_is_root_title_for_a_remix() {
957        let clip = Clip {
958            id: "child".to_string(),
959            title: "Remix".to_string(),
960            display_name: "München".to_string(),
961            ..Clip::default()
962        };
963        let lineage = LineageContext {
964            root_id: "root-1".to_string(),
965            root_title: "Original".to_string(),
966            root_date: String::new(),
967            parent_id: "root-1".to_string(),
968            edge_type: Some(EdgeType::Cover),
969            status: ResolveStatus::Resolved,
970        };
971
972        let rendered = render_clip_name(
973            NamingRequest {
974                clip: &clip,
975                lineage: &lineage,
976            },
977            &NamingConfig::default(),
978        );
979        assert_eq!(
980            rendered.relative_path.to_string_lossy(),
981            "München/Original/München-Remix [child]"
982        );
983    }
984
985    #[test]
986    fn overridden_album_drives_the_folder_path() {
987        // A LineageContext whose root_title carries a manual override (as the
988        // store produces it) folders the clip under the preferred album name.
989        let clip = Clip {
990            id: "child".to_string(),
991            title: "Remix".to_string(),
992            display_name: "München".to_string(),
993            ..Clip::default()
994        };
995        let lineage = LineageContext {
996            root_id: "root-1".to_string(),
997            root_title: "Preferred Album".to_string(),
998            root_date: String::new(),
999            parent_id: "root-1".to_string(),
1000            edge_type: Some(EdgeType::Cover),
1001            status: ResolveStatus::Resolved,
1002        };
1003
1004        let rendered = render_clip_name(
1005            NamingRequest {
1006                clip: &clip,
1007                lineage: &lineage,
1008            },
1009            &NamingConfig::default(),
1010        );
1011        assert_eq!(
1012            rendered.relative_path.to_string_lossy(),
1013            "München/Preferred Album/München-Remix [child]"
1014        );
1015    }
1016
1017    #[test]
1018    fn album_is_own_title_for_a_root() {
1019        let clip = Clip {
1020            id: "root-1".to_string(),
1021            title: "Original".to_string(),
1022            display_name: "München".to_string(),
1023            ..Clip::default()
1024        };
1025
1026        let rendered = render_own(&clip, &NamingConfig::default());
1027        assert_eq!(
1028            rendered.relative_path.to_string_lossy(),
1029            "München/Original/München-Original [root-1]"
1030        );
1031    }
1032
1033    #[test]
1034    fn shared_album_title_from_distinct_roots_is_disambiguated() {
1035        let first = Clip {
1036            id: "aaaa1111-x".to_string(),
1037            title: "Break Through".to_string(),
1038            display_name: "München".to_string(),
1039            ..Clip::default()
1040        };
1041        let second = Clip {
1042            id: "bbbb2222-y".to_string(),
1043            title: "Break Through".to_string(),
1044            display_name: "München".to_string(),
1045            ..Clip::default()
1046        };
1047
1048        // The colliding set is authoritative (store-driven), so disambiguation
1049        // does not depend on both roots appearing in the same batch.
1050        let colliding: BTreeSet<String> = ["Break Through".to_string()].into_iter().collect();
1051        let names = render_all_own(
1052            &[first.clone(), second.clone()],
1053            &NamingConfig::default(),
1054            &colliding,
1055        );
1056        let swapped = render_all_own(
1057            &[second.clone(), first.clone()],
1058            &NamingConfig::default(),
1059            &colliding,
1060        );
1061
1062        let album_of = |rendered: &RenderedName| {
1063            rendered
1064                .relative_path
1065                .components()
1066                .nth(1)
1067                .map(|component| component.as_os_str().to_string_lossy().into_owned())
1068                .unwrap_or_default()
1069        };
1070
1071        assert_eq!(album_of(&names[0]), "Break Through [aaaa1111]");
1072        assert_eq!(album_of(&names[1]), "Break Through [bbbb2222]");
1073        // Deterministic regardless of input order.
1074        assert_eq!(album_of(&swapped[0]), "Break Through [bbbb2222]");
1075        assert_eq!(album_of(&swapped[1]), "Break Through [aaaa1111]");
1076
1077        // The MEDIUM fix: a narrowed run showing only one of the two roots
1078        // still gets the suffixed folder, so folders never oscillate.
1079        let alone = render_all_own(
1080            std::slice::from_ref(&first),
1081            &NamingConfig::default(),
1082            &colliding,
1083        );
1084        assert_eq!(album_of(&alone[0]), "Break Through [aaaa1111]");
1085    }
1086
1087    #[test]
1088    fn unique_root_title_stays_a_bare_album() {
1089        // A title absent from the colliding set keeps its bare folder even when
1090        // the batch happens to hold a same-titled sibling of the same root.
1091        let clip = Clip {
1092            id: "solo-1".to_string(),
1093            title: "Solo".to_string(),
1094            display_name: "München".to_string(),
1095            ..Clip::default()
1096        };
1097        let names = render_all_own(&[clip], &NamingConfig::default(), &BTreeSet::new());
1098        assert_eq!(
1099            names[0].relative_path.to_string_lossy(),
1100            "München/Solo/München-Solo [solo-1]"
1101        );
1102    }
1103
1104    #[test]
1105    fn sanitise_name_strips_separators_and_falls_back_when_empty() {
1106        assert_eq!(sanitise_name("Road/Trip: 2024"), "Road Trip 2024");
1107        assert_eq!(sanitise_name(""), "playlist");
1108        // A name made only of illegal characters strips to nothing, so the
1109        // caller still gets a usable, non-empty stem.
1110        assert_eq!(sanitise_name("///"), "playlist");
1111    }
1112
1113    #[test]
1114    fn stems_folder_is_a_sibling_suffix_of_the_song_base() {
1115        assert_eq!(
1116            stems_folder("Creator/Album/Creator-Song [abcd1234]"),
1117            "Creator/Album/Creator-Song [abcd1234].stems"
1118        );
1119    }
1120
1121    #[test]
1122    fn stem_file_path_combines_song_stem_label_and_disambiguator() {
1123        let path = stem_file_path(
1124            "Creator/Album/Creator-Song [abcd1234]",
1125            "Vocals",
1126            "stem-vocals-9f8e7d6c",
1127            "mp3",
1128            CharacterSet::Unicode,
1129        );
1130        assert_eq!(
1131            path,
1132            "Creator/Album/Creator-Song [abcd1234].stems/Creator-Song [abcd1234] - Vocals [stem-voc].mp3"
1133        );
1134    }
1135
1136    #[test]
1137    fn stem_file_path_disambiguates_blank_and_duplicate_labels_by_id() {
1138        // Two stems with the SAME (blank) label must not collide: the stem-id
1139        // disambiguator keeps them distinct even with no usable label.
1140        let a = stem_file_path("song", "", "id-aaaaaaaa", "wav", CharacterSet::Unicode);
1141        let b = stem_file_path("song", "", "id-bbbbbbbb", "wav", CharacterSet::Unicode);
1142        assert_eq!(a, "song.stems/song [id-aaaaa].wav");
1143        assert_eq!(b, "song.stems/song [id-bbbbb].wav");
1144        assert_ne!(a, b);
1145    }
1146
1147    #[test]
1148    fn stem_file_path_sanitises_label_and_extension_and_honours_ascii() {
1149        // Illegal path characters in the label are stripped, the extension is
1150        // reduced to a safe lowercase token, and ASCII folding applies.
1151        let path = stem_file_path(
1152            "song",
1153            "Lead/Vocal: Æ",
1154            "STEMID12",
1155            ".FLAC",
1156            CharacterSet::Ascii,
1157        );
1158        assert_eq!(path, "song.stems/song - Lead Vocal AE [STEMID12].flac");
1159        // A junk extension falls back to mp3 (defensive; callers pass wav/mp3).
1160        let fallback = stem_file_path("s", "Bass", "x", "??", CharacterSet::Unicode);
1161        assert_eq!(fallback, "s.stems/s - Bass [x].mp3");
1162    }
1163
1164    #[test]
1165    fn case_only_path_difference_is_a_canonical_collision() {
1166        // A custom template without {id8}: clips whose titles differ only in
1167        // case produce different exact paths but the same canonical path and
1168        // must be disambiguated to avoid clobbering on case-insensitive FSes.
1169        let config = NamingConfig {
1170            template: "{creator}/{title}".to_string(),
1171            ..NamingConfig::default()
1172        };
1173        let first = test_clip("aaaa1111-x", "sunrise");
1174        let second = test_clip("bbbb2222-y", "SUNRISE");
1175
1176        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1177
1178        assert_ne!(
1179            names[0].relative_path.to_string_lossy(),
1180            names[1].relative_path.to_string_lossy(),
1181            "canonical collision was not disambiguated"
1182        );
1183    }
1184
1185    #[test]
1186    fn nfc_nfd_path_difference_is_a_canonical_collision() {
1187        // The same character encoded as NFC vs NFD produces different byte
1188        // strings but the same file on NFC-normalising filesystems (macOS APFS).
1189        let config = NamingConfig {
1190            template: "{creator}/{title}".to_string(),
1191            ..NamingConfig::default()
1192        };
1193        // "é" as NFC (U+00E9) vs NFD (e + U+0301).
1194        let nfc_title = "\u{00e9}toile";
1195        let nfd_title = "e\u{0301}toile";
1196        let first = test_clip("aaaa1111-x", nfc_title);
1197        let second = test_clip("bbbb2222-y", nfd_title);
1198
1199        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1200
1201        assert_ne!(
1202            names[0].relative_path.to_string_lossy(),
1203            names[1].relative_path.to_string_lossy(),
1204            "NFC/NFD canonical collision was not disambiguated"
1205        );
1206    }
1207
1208    #[test]
1209    fn genuinely_distinct_paths_are_never_wrongly_disambiguated() {
1210        // Clips with distinct titles (not even canonically equivalent) must not
1211        // receive unnecessary suffixes — the canonical check must not produce
1212        // false positives.
1213        let config = NamingConfig {
1214            template: "{creator}/{title}".to_string(),
1215            ..NamingConfig::default()
1216        };
1217        let first = test_clip("aaaa1111-x", "Alpha");
1218        let second = test_clip("bbbb2222-y", "Beta");
1219
1220        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1221
1222        assert_eq!(
1223            names[0].relative_path.to_string_lossy(),
1224            "München/Alpha",
1225            "distinct path was wrongly suffixed"
1226        );
1227        assert_eq!(
1228            names[1].relative_path.to_string_lossy(),
1229            "München/Beta",
1230            "distinct path was wrongly suffixed"
1231        );
1232    }
1233}