Skip to main content

suno_core/
naming.rs

1//! Pure naming and relative path rendering for [`Clip`] values.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::fmt;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use serde::{Deserialize, Serialize};
9use unicode_normalization::UnicodeNormalization as _;
10
11use crate::Clip;
12use crate::error::{Error, Result};
13use crate::lineage::LineageContext;
14
15/// The default relative path template.
16///
17/// Supported placeholders are `{creator}`, `{handle}`, `{album}`, `{title}`,
18/// `{id}`, `{id8}` (first 8 characters of the clip id), and `{root_id8}`
19/// (first 8 of the resolved lineage root id). Empty path segments are dropped
20/// after rendering.
21///
22/// The default embeds `[{id8}]` in the file name so same-title clips never
23/// collide, and folders under `{album}`, which resolves to the lineage root's
24/// title (else the clip's own title).
25pub const DEFAULT_TEMPLATE: &str = "{creator}/{album}/{creator}-{title} [{id8}]";
26const DEFAULT_MAX_COMPONENT_LEN: usize = 80;
27
28const MIN_BASE_CHARS_WITH_SUFFIX: usize = 1;
29
30#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
31#[serde(rename_all = "lowercase")]
32pub enum CharacterSet {
33    #[default]
34    Unicode,
35    Ascii,
36}
37
38impl FromStr for CharacterSet {
39    type Err = Error;
40
41    fn from_str(s: &str) -> Result<Self> {
42        match s.to_ascii_lowercase().as_str() {
43            "unicode" => Ok(Self::Unicode),
44            "ascii" => Ok(Self::Ascii),
45            other => Err(Error::Config(format!(
46                "unknown character_set '{other}'; expected 'unicode' or 'ascii'"
47            ))),
48        }
49    }
50}
51
52impl fmt::Display for CharacterSet {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::Unicode => f.write_str("unicode"),
56            Self::Ascii => f.write_str("ascii"),
57        }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct NamingConfig {
63    pub template: String,
64    pub character_set: CharacterSet,
65    pub max_component_len: usize,
66}
67
68impl Default for NamingConfig {
69    fn default() -> Self {
70        Self {
71            template: DEFAULT_TEMPLATE.to_string(),
72            character_set: CharacterSet::Unicode,
73            max_component_len: DEFAULT_MAX_COMPONENT_LEN,
74        }
75    }
76}
77
78#[derive(Debug, Clone, Copy)]
79pub struct NamingRequest<'a> {
80    pub clip: &'a Clip,
81    pub lineage: &'a LineageContext,
82}
83
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct RenderedName {
86    pub relative_path: PathBuf,
87    pub base_name: String,
88}
89
90pub fn render_clip_name(request: NamingRequest<'_>, config: &NamingConfig) -> RenderedName {
91    let album = album_component(request, config);
92    render_with_album(request, config, &album)
93}
94
95pub fn render_clip_names(
96    requests: &[NamingRequest<'_>],
97    config: &NamingConfig,
98    colliding_albums: &BTreeSet<String>,
99) -> Vec<RenderedName> {
100    let albums = disambiguated_albums(requests, config, colliding_albums);
101    let mut rendered = requests
102        .iter()
103        .zip(&albums)
104        .map(|(request, album)| render_with_album(*request, config, album))
105        .collect::<Vec<_>>();
106
107    // Two passes to keep distinct clips from landing on one path.  The first
108    // pass keys on the exact rendered string; the second on the filesystem-
109    // canonical form (NFC + lowercase) so that paths differing only by case or
110    // Unicode normalisation (NFD vs NFC) are caught too — they would collide on
111    // case-insensitive or NFC-normalising filesystems (Windows, macOS default).
112    for apply_canonical in [false, true] {
113        let mut collisions = BTreeMap::<String, Vec<usize>>::new();
114        for (index, name) in rendered.iter().enumerate() {
115            let key = if apply_canonical {
116                canonical_path_key(&name.relative_path.to_string_lossy())
117            } else {
118                name.relative_path.to_string_lossy().into_owned()
119            };
120            collisions.entry(key).or_default().push(index);
121        }
122        for indexes in collisions.into_values().filter(|v| v.len() > 1) {
123            for index in indexes {
124                let suffix = &requests[index].clip.id;
125                rendered[index] = with_suffix(
126                    rendered[index].clone(),
127                    suffix,
128                    config.character_set,
129                    config.max_component_len,
130                );
131            }
132        }
133    }
134
135    rendered
136}
137
138/// Filesystem-canonical key: NFC-normalise then lowercase, so paths that differ
139/// only by case or by NFC/NFD encoding hash to the same bucket.
140fn canonical_path_key(path: &str) -> String {
141    path.nfc().flat_map(char::to_lowercase).collect()
142}
143
144/// The album path component for every request, with a clip whose root title
145/// collides across distinct roots disambiguated by `[{root_id8}]`.
146///
147/// Distinct roots must never share an album folder (two different upload roots
148/// titled "Break Through" exist). `colliding_albums` is the authoritative set
149/// of such shared root titles, computed once from the whole lineage store, so
150/// the decision is stable across runs and independent of which clips appear in
151/// this batch. A clip whose resolved album is in that set always gets its
152/// root's short id appended; every other clip keeps the bare album and groups
153/// with its same-root siblings.
154fn disambiguated_albums(
155    requests: &[NamingRequest<'_>],
156    config: &NamingConfig,
157    colliding_albums: &BTreeSet<String>,
158) -> Vec<String> {
159    requests
160        .iter()
161        .map(|request| album_for(*request, config, colliding_albums))
162        .collect()
163}
164
165/// The (possibly disambiguated) album component for one request.
166fn album_for(
167    request: NamingRequest<'_>,
168    config: &NamingConfig,
169    colliding_albums: &BTreeSet<String>,
170) -> String {
171    let raw_album = request.lineage.album(&title_name(request.clip));
172    let album = sanitise_component(&raw_album, config.character_set, config.max_component_len);
173    if colliding_albums.contains(raw_album.trim()) {
174        let suffix = truncate_chars(&request.lineage.root_id, 8);
175        append_suffix(
176            &album,
177            &suffix,
178            config.character_set,
179            config.max_component_len,
180        )
181    } else {
182        album
183    }
184}
185
186/// The sanitised album component: the resolved lineage album (root title, else
187/// the clip's own title).
188fn album_component(request: NamingRequest<'_>, config: &NamingConfig) -> String {
189    let album = request.lineage.album(&title_name(request.clip));
190    sanitise_component(&album, config.character_set, config.max_component_len)
191}
192
193/// Render one clip's path with an already-resolved album component.
194fn render_with_album(
195    request: NamingRequest<'_>,
196    config: &NamingConfig,
197    album: &str,
198) -> RenderedName {
199    let clip = request.clip;
200    let creator = sanitise_component(
201        &creator_name(clip),
202        config.character_set,
203        config.max_component_len,
204    );
205    let handle = sanitise_component(&clip.handle, config.character_set, config.max_component_len);
206    let title = sanitise_component(
207        &title_name(clip),
208        config.character_set,
209        config.max_component_len,
210    );
211    let id = sanitise_component(&clip.id, CharacterSet::Ascii, config.max_component_len);
212    let id8 = sanitise_component(
213        &truncate_chars(&clip.id, 8),
214        CharacterSet::Ascii,
215        config.max_component_len,
216    );
217    let root_id8 = sanitise_component(
218        &truncate_chars(&request.lineage.root_id, 8),
219        CharacterSet::Ascii,
220        config.max_component_len,
221    );
222    let substitutions = SegmentSubstitutions {
223        creator: &creator,
224        handle: &handle,
225        album,
226        title: &title,
227        root_id8: &root_id8,
228        id8: &id8,
229        id: &id,
230    };
231    let mut components = config
232        .template
233        .split('/')
234        .filter_map(|segment| {
235            let rendered = substitute_segment(segment, substitutions);
236            let sanitised = sanitise_segment(
237                &rendered,
238                config.character_set,
239                config.max_component_len,
240                [id8.as_str(), root_id8.as_str()],
241            );
242            (!sanitised.is_empty()).then_some(sanitised)
243        })
244        .collect::<Vec<_>>();
245
246    if components.is_empty() {
247        components.push(title.clone());
248    }
249
250    let mut base_name = components
251        .pop()
252        .filter(|value| !value.is_empty())
253        .unwrap_or_else(|| title.clone());
254    // Guarantee a non-empty file name even when every token sanitises away.
255    if base_name.is_empty() {
256        base_name = append_suffix(
257            &base_name,
258            &clip.id,
259            config.character_set,
260            config.max_component_len,
261        );
262    }
263
264    let mut relative_path = PathBuf::new();
265    for component in components {
266        relative_path.push(component);
267    }
268
269    relative_path.push(&base_name);
270    RenderedName {
271        relative_path,
272        base_name,
273    }
274}
275
276#[derive(Clone, Copy)]
277struct SegmentSubstitutions<'a> {
278    creator: &'a str,
279    handle: &'a str,
280    album: &'a str,
281    title: &'a str,
282    root_id8: &'a str,
283    id8: &'a str,
284    id: &'a str,
285}
286
287fn substitute_segment(segment: &str, substitutions: SegmentSubstitutions<'_>) -> String {
288    let mut rendered = String::with_capacity(segment.len());
289    let mut remainder = segment;
290    while let Some(start) = remainder.find('{') {
291        rendered.push_str(&remainder[..start]);
292        remainder = &remainder[start..];
293        if let Some((token_len, value)) = placeholder_match(remainder, substitutions) {
294            rendered.push_str(value);
295            remainder = &remainder[token_len..];
296        } else {
297            rendered.push('{');
298            remainder = &remainder[1..];
299        }
300    }
301    rendered.push_str(remainder);
302    rendered
303}
304
305fn placeholder_match<'a>(
306    segment: &str,
307    substitutions: SegmentSubstitutions<'a>,
308) -> Option<(usize, &'a str)> {
309    if segment.starts_with("{creator}") {
310        Some(("{creator}".len(), substitutions.creator))
311    } else if segment.starts_with("{handle}") {
312        Some(("{handle}".len(), substitutions.handle))
313    } else if segment.starts_with("{album}") {
314        Some(("{album}".len(), substitutions.album))
315    } else if segment.starts_with("{title}") {
316        Some(("{title}".len(), substitutions.title))
317    } else if segment.starts_with("{root_id8}") {
318        Some(("{root_id8}".len(), substitutions.root_id8))
319    } else if segment.starts_with("{id8}") {
320        Some(("{id8}".len(), substitutions.id8))
321    } else if segment.starts_with("{id}") {
322        Some(("{id}".len(), substitutions.id))
323    } else {
324        None
325    }
326}
327
328fn with_suffix(
329    mut rendered: RenderedName,
330    suffix: &str,
331    character_set: CharacterSet,
332    max_component_len: usize,
333) -> RenderedName {
334    rendered.base_name = append_suffix(
335        &rendered.base_name,
336        suffix,
337        character_set,
338        max_component_len,
339    );
340    rendered.relative_path.set_file_name(&rendered.base_name);
341    rendered
342}
343
344fn creator_name(clip: &Clip) -> String {
345    non_blank(&clip.display_name)
346        .or_else(|| non_blank(&clip.handle))
347        .unwrap_or("Unknown Creator")
348        .to_string()
349}
350
351fn title_name(clip: &Clip) -> String {
352    let title = clip.title.trim();
353    if title.is_empty() || title.eq_ignore_ascii_case("untitled") {
354        "Untitled".to_string()
355    } else {
356        title.to_string()
357    }
358}
359
360fn append_suffix(
361    base: &str,
362    suffix: &str,
363    character_set: CharacterSet,
364    max_component_len: usize,
365) -> String {
366    let suffix_pattern = format!(" [{suffix}]");
367    if base.ends_with(&suffix_pattern) {
368        return sanitise_component(base, character_set, max_component_len);
369    }
370
371    let max_len =
372        max_component_len.max(suffix_pattern.chars().count() + MIN_BASE_CHARS_WITH_SUFFIX);
373    let allowed = max_len.saturating_sub(suffix_pattern.chars().count());
374    // Sanitise the base before measuring it. The character set can expand a
375    // character (ascii turns `ß` into `ss`), so budgeting the cut on the raw
376    // length could let the sanitised prefix grow back over the room reserved for
377    // the suffix and slice through it again (#120).
378    let base = sanitise_component(base, character_set, max_len);
379    let truncated = truncate_chars(base.trim_end(), allowed);
380    let combined = format!("{truncated}{suffix_pattern}");
381    sanitise_component(&combined, character_set, max_len)
382}
383
384/// Sanitise a rendered template segment, preserving a trailing ` [id]`
385/// disambiguator (the `[{id8}]` or `[{root_id8}]` the template embeds) when the
386/// segment would otherwise be truncated through it. Only the title portion is
387/// shortened, so two long-titled siblings keep their distinguishing id and the
388/// closing bracket is never left unbalanced (#120). A segment that does not end
389/// in a disambiguator is sanitised exactly as before.
390fn sanitise_segment(
391    rendered: &str,
392    character_set: CharacterSet,
393    max_component_len: usize,
394    disambiguators: [&str; 2],
395) -> String {
396    for suffix in disambiguators {
397        if suffix.is_empty() {
398            continue;
399        }
400        let pattern = format!(" [{suffix}]");
401        if let Some(prefix) = rendered.strip_suffix(&pattern) {
402            return append_suffix(prefix, suffix, character_set, max_component_len);
403        }
404    }
405    sanitise_component(rendered, character_set, max_component_len)
406}
407
408/// Sanitise a free-form playlist name into a single safe path component.
409///
410/// Applies the same Unicode filtering and length cap as clip path components
411/// (default [`CharacterSet::Unicode`], [`DEFAULT_MAX_COMPONENT_LEN`]), so a
412/// playlist file name obeys the same filesystem rules as the rest of the
413/// library. An empty or fully-stripped name falls back to `playlist` so the
414/// caller always has a non-empty stem to append `.m3u8` to.
415pub fn sanitise_name(name: &str) -> String {
416    let cleaned = sanitise_component(name, CharacterSet::Unicode, DEFAULT_MAX_COMPONENT_LEN);
417    if cleaned.is_empty() {
418        "playlist".to_string()
419    } else {
420        cleaned
421    }
422}
423
424/// The `.stems` sub-folder that sits beside a song's audio file.
425///
426/// `base` is the song's extensionless relative path (the same value the audio
427/// and its sidecars are built from), so the folder is `{base}.stems`. It cannot
428/// collide with the audio file (`{base}.<ext>`) or any `{base}.<sidecar>`
429/// because the `.stems` suffix is distinct, mirroring the sidecar convention.
430pub fn stems_folder(base: &str) -> String {
431    format!("{base}.stems")
432}
433
434/// The relative path of one stem file inside a song's [`stems_folder`].
435///
436/// Named base+label+disambiguation rather than label-only, because Auto Split
437/// can mislabel stems and Advanced Split yields ~100 instruments, so blank or
438/// duplicate labels are expected. The file is
439/// `{song file name} - {label} [{stem id8}].{ext}`; the ` - {label}` piece is
440/// dropped when the label sanitises to empty, and the `[{stem id8}]`
441/// disambiguator (the first 8 characters of the stable stem id) keeps blank or
442/// duplicate labels collision-free. Every component is run through the same
443/// [`sanitise_component`] filter as the rest of the library, honouring
444/// `character_set`.
445pub fn stem_file_path(
446    base: &str,
447    label: &str,
448    stem_id: &str,
449    ext: &str,
450    character_set: CharacterSet,
451) -> String {
452    let folder = stems_folder(base);
453    // The song's own file-name stem (the last path component of `base`), reused
454    // so a stem stays identifiable even when viewed outside its `.stems` folder.
455    let song_stem = base.rsplit('/').next().unwrap_or(base);
456    let label = sanitise_component(label, character_set, DEFAULT_MAX_COMPONENT_LEN);
457    let id8 = sanitise_component(
458        &truncate_chars(stem_id, 8),
459        CharacterSet::Ascii,
460        DEFAULT_MAX_COMPONENT_LEN,
461    );
462
463    let mut name = song_stem.to_string();
464    if !label.is_empty() {
465        name.push_str(" - ");
466        name.push_str(&label);
467    }
468    if !id8.is_empty() {
469        name.push_str(" [");
470        name.push_str(&id8);
471        name.push(']');
472    }
473    // A degenerate base (empty song stem, blank label, empty id) must still
474    // yield a usable name rather than a hidden dotfile.
475    if name.trim().is_empty() {
476        name = "stem".to_string();
477    }
478    format!("{folder}/{name}.{}", sanitise_ext(ext))
479}
480
481/// Reduce a candidate extension to a safe lowercase alphanumeric token,
482/// defaulting to `mp3` when it is empty or fully stripped. The caller passes the
483/// resolved stem format's extension (`wav` or `mp3`); stems are stored RAW.
484fn sanitise_ext(ext: &str) -> String {
485    let cleaned: String = ext
486        .trim_start_matches('.')
487        .chars()
488        .filter(|c| c.is_ascii_alphanumeric())
489        .flat_map(char::to_lowercase)
490        .take(8)
491        .collect();
492    if cleaned.is_empty() {
493        "mp3".to_string()
494    } else {
495        cleaned
496    }
497}
498
499fn sanitise_component(
500    value: &str,
501    character_set: CharacterSet,
502    max_component_len: usize,
503) -> String {
504    // Single pass: map each char to its charset-safe form while collapsing runs
505    // of whitespace to one space and dropping leading/trailing whitespace. This
506    // fuses the old filter / split_whitespace / collect / join steps, which
507    // allocated several intermediate strings and a vector, into one buffer.
508    let mut collapsed = String::with_capacity(value.len());
509    let mut pending_space = false;
510    let push = |out: char, buf: &mut String, pending: &mut bool| {
511        if out.is_whitespace() {
512            *pending = !buf.is_empty();
513        } else {
514            if *pending {
515                buf.push(' ');
516            }
517            *pending = false;
518            buf.push(out);
519        }
520    };
521    match character_set {
522        CharacterSet::Unicode => {
523            for ch in value.chars() {
524                push(unicode_char(ch), &mut collapsed, &mut pending_space);
525            }
526        }
527        CharacterSet::Ascii => {
528            for ch in value.chars() {
529                for out in ascii_chars(ch) {
530                    push(out, &mut collapsed, &mut pending_space);
531                }
532            }
533        }
534    }
535
536    let trimmed = collapsed.trim_matches([' ', '.']);
537    if trimmed.is_empty() {
538        return String::new();
539    }
540
541    // Keep at most `max` characters, then trim any space or dot the cut exposed.
542    // Slicing at the char boundary avoids the extra String the old
543    // truncate-then-trim-then-to_string sequence built.
544    let max = max_component_len.max(1);
545    let end = trimmed
546        .char_indices()
547        .nth(max)
548        .map_or(trimmed.len(), |(index, _)| index);
549    let result = trimmed[..end].trim_matches([' ', '.']);
550    if result.is_empty() {
551        return String::new();
552    }
553    if result == "." || result == ".." {
554        return "item".to_string();
555    }
556    let mut result = result.to_string();
557    if !result.ends_with('_') && is_reserved_name(&result) {
558        result.push('_');
559    }
560    result
561}
562
563fn unicode_char(ch: char) -> char {
564    if matches!(
565        ch,
566        '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' | '\0'
567    ) || ch.is_control()
568    {
569        ' '
570    } else {
571        ch
572    }
573}
574
575fn ascii_chars(ch: char) -> Vec<char> {
576    if ch.is_ascii() {
577        return vec![unicode_char(ch)];
578    }
579
580    match ch {
581        'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' => vec!['A'],
582        'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' => vec!['a'],
583        'Ç' => vec!['C'],
584        'ç' => vec!['c'],
585        'È' | 'É' | 'Ê' | 'Ë' => vec!['E'],
586        'è' | 'é' | 'ê' | 'ë' => vec!['e'],
587        'Ì' | 'Í' | 'Î' | 'Ï' => vec!['I'],
588        'ì' | 'í' | 'î' | 'ï' => vec!['i'],
589        'Ñ' => vec!['N'],
590        'ñ' => vec!['n'],
591        'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ø' => vec!['O'],
592        'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ø' => vec!['o'],
593        'Ù' | 'Ú' | 'Û' | 'Ü' => vec!['U'],
594        'ù' | 'ú' | 'û' | 'ü' => vec!['u'],
595        'Ý' | 'Ÿ' => vec!['Y'],
596        'ý' | 'ÿ' => vec!['y'],
597        'Æ' => vec!['A', 'E'],
598        'æ' => vec!['a', 'e'],
599        'Œ' => vec!['O', 'E'],
600        'œ' => vec!['o', 'e'],
601        'ß' => vec!['s', 's'],
602        _ => vec![' '],
603    }
604}
605
606fn truncate_chars(value: &str, max_len: usize) -> String {
607    value.chars().take(max_len).collect()
608}
609
610fn non_blank(value: &str) -> Option<&str> {
611    let trimmed = value.trim();
612    (!trimmed.is_empty()).then_some(trimmed)
613}
614
615fn is_reserved_name(value: &str) -> bool {
616    let stem = value.split('.').next().unwrap_or(value);
617    // Every reserved device name is 3 (CON/PRN/AUX/NUL) or 4 (COMx/LPTx) ASCII
618    // bytes, so anything else cannot match without allocating an uppercased copy.
619    if !matches!(stem.len(), 3 | 4) {
620        return false;
621    }
622    const RESERVED: [&str; 22] = [
623        "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
624        "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
625    ];
626    RESERVED.iter().any(|name| name.eq_ignore_ascii_case(stem))
627}
628
629#[cfg(test)]
630mod tests {
631    use super::*;
632    use crate::lineage::{EdgeType, ResolveStatus};
633    use std::collections::{BTreeMap, BTreeSet};
634
635    fn test_clip(id: &str, title: &str) -> Clip {
636        Clip {
637            id: id.to_string(),
638            title: title.to_string(),
639            display_name: "München".to_string(),
640            handle: "munchen".to_string(),
641            album_title: String::new(),
642            root_ancestor_id: String::new(),
643            ..Clip::default()
644        }
645    }
646
647    fn render_own(clip: &Clip, config: &NamingConfig) -> RenderedName {
648        let lineage = LineageContext::own_root(clip);
649        render_clip_name(
650            NamingRequest {
651                clip,
652                lineage: &lineage,
653            },
654            config,
655        )
656    }
657
658    fn render_all_own(
659        clips: &[Clip],
660        config: &NamingConfig,
661        colliding: &BTreeSet<String>,
662    ) -> Vec<RenderedName> {
663        let lineages: Vec<LineageContext> = clips.iter().map(LineageContext::own_root).collect();
664        let requests: Vec<NamingRequest> = clips
665            .iter()
666            .zip(&lineages)
667            .map(|(clip, lineage)| NamingRequest { clip, lineage })
668            .collect();
669        render_clip_names(&requests, config, colliding)
670    }
671
672    #[test]
673    fn unicode_names_are_preserved_and_ascii_falls_back() {
674        let clip = test_clip("abc12345", "Beyoncé/東京");
675
676        let unicode = render_own(&clip, &NamingConfig::default());
677        assert_eq!(
678            unicode.relative_path.to_string_lossy(),
679            "München/Beyoncé 東京/München-Beyoncé 東京 [abc12345]"
680        );
681
682        let ascii = render_own(
683            &clip,
684            &NamingConfig {
685                character_set: CharacterSet::Ascii,
686                ..NamingConfig::default()
687            },
688        );
689        assert_eq!(
690            ascii.relative_path.to_string_lossy(),
691            "Munchen/Beyonce/Munchen-Beyonce [abc12345]"
692        );
693    }
694
695    #[test]
696    fn reserved_and_hostile_names_are_sanitised() {
697        let clip = Clip {
698            id: "deadbeef".to_string(),
699            title: "CON<>:\"/\\|?*.".to_string(),
700            display_name: "AUX".to_string(),
701            ..Clip::default()
702        };
703
704        let rendered = render_own(&clip, &NamingConfig::default());
705        let path = rendered.relative_path.to_string_lossy();
706        assert!(path.starts_with("AUX_/CON_/"), "path was {path}");
707        assert!(rendered.base_name.contains("[deadbeef]"));
708    }
709
710    #[test]
711    fn default_template_always_embeds_id8() {
712        let clip = test_clip("abcdef1234567890", "Any Title");
713        let rendered = render_own(&clip, &NamingConfig::default());
714        assert!(
715            rendered.base_name.contains("[abcdef12]"),
716            "base_name was {}",
717            rendered.base_name
718        );
719    }
720
721    #[test]
722    fn custom_template_replaces_all_known_placeholders_once() {
723        let clip = Clip {
724            id: "abcdef12-full".to_string(),
725            title: "Song".to_string(),
726            display_name: "Creator".to_string(),
727            handle: "handle".to_string(),
728            ..Clip::default()
729        };
730        let lineage = LineageContext {
731            root_id: "rootxyz9-extra".to_string(),
732            root_title: "Album".to_string(),
733            root_date: String::new(),
734            parent_id: "rootxyz9-extra".to_string(),
735            edge_type: Some(EdgeType::Cover),
736            status: ResolveStatus::Resolved,
737        };
738        let config = NamingConfig {
739            template: "{creator}-{handle}-{album}-{title}-{root_id8}-{id8}-{id}-{unknown}"
740                .to_string(),
741            ..NamingConfig::default()
742        };
743
744        let rendered = render_clip_name(
745            NamingRequest {
746                clip: &clip,
747                lineage: &lineage,
748            },
749            &config,
750        );
751
752        assert_eq!(
753            rendered.relative_path.to_string_lossy(),
754            "Creator-handle-Album-Song-rootxyz9-abcdef12-abcdef12-full-{unknown}"
755        );
756    }
757
758    #[test]
759    fn blank_titles_use_a_stable_suffix() {
760        let clip = test_clip("12345678-clip", "   ");
761
762        let rendered = render_own(&clip, &NamingConfig::default());
763        assert_eq!(rendered.base_name, "München-Untitled [12345678]");
764        assert_eq!(
765            rendered.relative_path.to_string_lossy(),
766            "München/Untitled/München-Untitled [12345678]"
767        );
768    }
769
770    #[test]
771    fn very_long_titles_are_trimmed() {
772        let clip = test_clip("abcdef12", &"a".repeat(120));
773        let rendered = render_own(
774            &clip,
775            &NamingConfig {
776                max_component_len: 24,
777                ..NamingConfig::default()
778            },
779        );
780
781        for component in rendered.relative_path.components() {
782            let text = component.as_os_str().to_string_lossy();
783            assert!(
784                text.chars().count() <= 24,
785                "component {text:?} exceeds 24 chars"
786            );
787        }
788        // The trailing [id8] must survive the truncation intact (#120).
789        assert!(
790            rendered.base_name.ends_with(" [abcdef12]"),
791            "id8 disambiguator was sliced; base_name was {:?}",
792            rendered.base_name
793        );
794    }
795
796    #[test]
797    fn long_names_keep_the_full_id8_disambiguator() {
798        // A creator+title long enough to overflow the cap keeps the whole
799        // trailing [id8]: the title is shortened, not the id, so the name stays
800        // complete and the bracket stays balanced (#120).
801        let clip = test_clip("1234abcd-tail", &"a".repeat(120));
802        let config = NamingConfig {
803            max_component_len: 40,
804            ..NamingConfig::default()
805        };
806        let rendered = render_own(&clip, &config);
807
808        assert!(
809            rendered.base_name.ends_with(" [1234abcd]"),
810            "base_name must end with the full disambiguator, was {:?}",
811            rendered.base_name
812        );
813        assert_eq!(rendered.base_name.chars().count(), 40);
814    }
815
816    #[test]
817    fn long_titled_siblings_stay_distinct_with_balanced_brackets() {
818        // Two same-(long-)titled clips sharing a root must remain distinct: only
819        // the title is shortened, so their [id8] suffixes differ and neither name
820        // ends up with an unbalanced bracket (#120).
821        let lineage = LineageContext {
822            root_id: "root-42".to_string(),
823            root_title: "Origin".to_string(),
824            root_date: String::new(),
825            parent_id: "root-42".to_string(),
826            edge_type: Some(EdgeType::Cover),
827            status: ResolveStatus::Resolved,
828        };
829        let title = "z".repeat(200);
830        let first = test_clip("aaaa1111-x", &title);
831        let second = test_clip("bbbb2222-y", &title);
832        let requests = [
833            NamingRequest {
834                clip: &first,
835                lineage: &lineage,
836            },
837            NamingRequest {
838                clip: &second,
839                lineage: &lineage,
840            },
841        ];
842
843        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
844
845        assert!(names[0].base_name.ends_with(" [aaaa1111]"));
846        assert!(names[1].base_name.ends_with(" [bbbb2222]"));
847        assert_ne!(names[0].relative_path, names[1].relative_path);
848        for name in &names {
849            assert!(name.base_name.chars().count() <= 80);
850            assert_eq!(name.base_name.matches('[').count(), 1, "unbalanced '['");
851            assert_eq!(name.base_name.matches(']').count(), 1, "unbalanced ']'");
852        }
853    }
854
855    #[test]
856    fn long_colliding_album_keeps_its_root_id8() {
857        // The album [root_id8] disambiguator is preserved when a long album title
858        // must be truncated, mirroring the file-name fix (#120).
859        let long = "Break Through ".repeat(20);
860        let title = long.trim().to_string();
861        let clip = Clip {
862            id: "aaaa1111-x".to_string(),
863            title: title.clone(),
864            display_name: "München".to_string(),
865            ..Clip::default()
866        };
867        let colliding: BTreeSet<String> = [title].into_iter().collect();
868        let names = render_all_own(&[clip], &NamingConfig::default(), &colliding);
869
870        let album = names[0]
871            .relative_path
872            .components()
873            .nth(1)
874            .map(|component| component.as_os_str().to_string_lossy().into_owned())
875            .unwrap_or_default();
876        assert!(album.ends_with(" [aaaa1111]"), "album was {album:?}");
877        assert!(album.chars().count() <= 80);
878    }
879
880    #[test]
881    fn ascii_expanding_chars_do_not_slice_the_disambiguator() {
882        // A literal expanding character (`ß` -> `ss` under ascii) in a custom
883        // template, right before the trailing ` [{id8}]`, must not grow back over
884        // the suffix and slice it: the base is sized after expansion (#120).
885        let clip = test_clip("1234abcd", "Title");
886        let config = NamingConfig {
887            template: format!("{}{{title}} [{{id8}}]", "ß".repeat(80)),
888            character_set: CharacterSet::Ascii,
889            max_component_len: 40,
890        };
891        let rendered = render_own(&clip, &config);
892
893        assert!(
894            rendered.base_name.ends_with(" [1234abcd]"),
895            "expansion sliced the id8; base_name was {:?}",
896            rendered.base_name
897        );
898        assert!(rendered.base_name.chars().count() <= 40);
899    }
900
901    #[test]
902    fn same_title_siblings_stay_distinct_via_id8() {
903        // Two clips sharing a root (same album folder) and the same title must
904        // still land on distinct files; the default template's {id8} does that.
905        let lineage = LineageContext {
906            root_id: "root-9".to_string(),
907            root_title: "Origin".to_string(),
908            root_date: String::new(),
909            parent_id: "root-9".to_string(),
910            edge_type: Some(EdgeType::Cover),
911            status: ResolveStatus::Resolved,
912        };
913        let first = test_clip("11111111-alpha", "Shared");
914        let second = test_clip("22222222-beta", "Shared");
915        let requests = [
916            NamingRequest {
917                clip: &first,
918                lineage: &lineage,
919            },
920            NamingRequest {
921                clip: &second,
922                lineage: &lineage,
923            },
924        ];
925
926        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
927
928        assert_eq!(
929            names[0].relative_path.to_string_lossy(),
930            "München/Origin/München-Shared [11111111]"
931        );
932        assert_eq!(
933            names[1].relative_path.to_string_lossy(),
934            "München/Origin/München-Shared [22222222]"
935        );
936    }
937
938    #[test]
939    fn id8_prefix_collision_falls_back_to_full_id() {
940        // Custom template without {id8} so identical titles collide and the
941        // filename fallback (full id) has to keep them distinct.
942        let config = NamingConfig {
943            template: "{creator}/{title}".to_string(),
944            ..NamingConfig::default()
945        };
946        let first = test_clip("abcd1234-first", "Untitled");
947        let second = test_clip("abcd1234-second", "Untitled");
948
949        let names = render_all_own(&[first.clone(), second.clone()], &config, &BTreeSet::new());
950        let swapped = render_all_own(&[second.clone(), first.clone()], &config, &BTreeSet::new());
951
952        assert_ne!(
953            names[0].relative_path.to_string_lossy(),
954            names[1].relative_path.to_string_lossy()
955        );
956
957        let ordered = |rendered: &[RenderedName], clips: &[Clip]| {
958            clips
959                .iter()
960                .zip(rendered)
961                .map(|(clip, name)| {
962                    (
963                        clip.id.clone(),
964                        name.relative_path.to_string_lossy().into_owned(),
965                    )
966                })
967                .collect::<BTreeMap<_, _>>()
968        };
969        assert_eq!(
970            ordered(&names, &[first.clone(), second.clone()]),
971            ordered(&swapped, &[second, first])
972        );
973    }
974
975    #[test]
976    fn album_is_root_title_for_a_remix() {
977        let clip = Clip {
978            id: "child".to_string(),
979            title: "Remix".to_string(),
980            display_name: "München".to_string(),
981            ..Clip::default()
982        };
983        let lineage = LineageContext {
984            root_id: "root-1".to_string(),
985            root_title: "Original".to_string(),
986            root_date: String::new(),
987            parent_id: "root-1".to_string(),
988            edge_type: Some(EdgeType::Cover),
989            status: ResolveStatus::Resolved,
990        };
991
992        let rendered = render_clip_name(
993            NamingRequest {
994                clip: &clip,
995                lineage: &lineage,
996            },
997            &NamingConfig::default(),
998        );
999        assert_eq!(
1000            rendered.relative_path.to_string_lossy(),
1001            "München/Original/München-Remix [child]"
1002        );
1003    }
1004
1005    #[test]
1006    fn album_is_own_title_for_a_root() {
1007        let clip = Clip {
1008            id: "root-1".to_string(),
1009            title: "Original".to_string(),
1010            display_name: "München".to_string(),
1011            ..Clip::default()
1012        };
1013
1014        let rendered = render_own(&clip, &NamingConfig::default());
1015        assert_eq!(
1016            rendered.relative_path.to_string_lossy(),
1017            "München/Original/München-Original [root-1]"
1018        );
1019    }
1020
1021    #[test]
1022    fn shared_album_title_from_distinct_roots_is_disambiguated() {
1023        let first = Clip {
1024            id: "aaaa1111-x".to_string(),
1025            title: "Break Through".to_string(),
1026            display_name: "München".to_string(),
1027            ..Clip::default()
1028        };
1029        let second = Clip {
1030            id: "bbbb2222-y".to_string(),
1031            title: "Break Through".to_string(),
1032            display_name: "München".to_string(),
1033            ..Clip::default()
1034        };
1035
1036        // The colliding set is authoritative (store-driven), so disambiguation
1037        // does not depend on both roots appearing in the same batch.
1038        let colliding: BTreeSet<String> = ["Break Through".to_string()].into_iter().collect();
1039        let names = render_all_own(
1040            &[first.clone(), second.clone()],
1041            &NamingConfig::default(),
1042            &colliding,
1043        );
1044        let swapped = render_all_own(
1045            &[second.clone(), first.clone()],
1046            &NamingConfig::default(),
1047            &colliding,
1048        );
1049
1050        let album_of = |rendered: &RenderedName| {
1051            rendered
1052                .relative_path
1053                .components()
1054                .nth(1)
1055                .map(|component| component.as_os_str().to_string_lossy().into_owned())
1056                .unwrap_or_default()
1057        };
1058
1059        assert_eq!(album_of(&names[0]), "Break Through [aaaa1111]");
1060        assert_eq!(album_of(&names[1]), "Break Through [bbbb2222]");
1061        // Deterministic regardless of input order.
1062        assert_eq!(album_of(&swapped[0]), "Break Through [bbbb2222]");
1063        assert_eq!(album_of(&swapped[1]), "Break Through [aaaa1111]");
1064
1065        // The MEDIUM fix: a narrowed run showing only one of the two roots
1066        // still gets the suffixed folder, so folders never oscillate.
1067        let alone = render_all_own(
1068            std::slice::from_ref(&first),
1069            &NamingConfig::default(),
1070            &colliding,
1071        );
1072        assert_eq!(album_of(&alone[0]), "Break Through [aaaa1111]");
1073    }
1074
1075    #[test]
1076    fn unique_root_title_stays_a_bare_album() {
1077        // A title absent from the colliding set keeps its bare folder even when
1078        // the batch happens to hold a same-titled sibling of the same root.
1079        let clip = Clip {
1080            id: "solo-1".to_string(),
1081            title: "Solo".to_string(),
1082            display_name: "München".to_string(),
1083            ..Clip::default()
1084        };
1085        let names = render_all_own(&[clip], &NamingConfig::default(), &BTreeSet::new());
1086        assert_eq!(
1087            names[0].relative_path.to_string_lossy(),
1088            "München/Solo/München-Solo [solo-1]"
1089        );
1090    }
1091
1092    #[test]
1093    fn sanitise_name_strips_separators_and_falls_back_when_empty() {
1094        assert_eq!(sanitise_name("Road/Trip: 2024"), "Road Trip 2024");
1095        assert_eq!(sanitise_name(""), "playlist");
1096        // A name made only of illegal characters strips to nothing, so the
1097        // caller still gets a usable, non-empty stem.
1098        assert_eq!(sanitise_name("///"), "playlist");
1099    }
1100
1101    #[test]
1102    fn stems_folder_is_a_sibling_suffix_of_the_song_base() {
1103        assert_eq!(
1104            stems_folder("Creator/Album/Creator-Song [abcd1234]"),
1105            "Creator/Album/Creator-Song [abcd1234].stems"
1106        );
1107    }
1108
1109    #[test]
1110    fn stem_file_path_combines_song_stem_label_and_disambiguator() {
1111        let path = stem_file_path(
1112            "Creator/Album/Creator-Song [abcd1234]",
1113            "Vocals",
1114            "stem-vocals-9f8e7d6c",
1115            "mp3",
1116            CharacterSet::Unicode,
1117        );
1118        assert_eq!(
1119            path,
1120            "Creator/Album/Creator-Song [abcd1234].stems/Creator-Song [abcd1234] - Vocals [stem-voc].mp3"
1121        );
1122    }
1123
1124    #[test]
1125    fn stem_file_path_disambiguates_blank_and_duplicate_labels_by_id() {
1126        // Two stems with the SAME (blank) label must not collide: the stem-id
1127        // disambiguator keeps them distinct even with no usable label.
1128        let a = stem_file_path("song", "", "id-aaaaaaaa", "wav", CharacterSet::Unicode);
1129        let b = stem_file_path("song", "", "id-bbbbbbbb", "wav", CharacterSet::Unicode);
1130        assert_eq!(a, "song.stems/song [id-aaaaa].wav");
1131        assert_eq!(b, "song.stems/song [id-bbbbb].wav");
1132        assert_ne!(a, b);
1133    }
1134
1135    #[test]
1136    fn stem_file_path_sanitises_label_and_extension_and_honours_ascii() {
1137        // Illegal path characters in the label are stripped, the extension is
1138        // reduced to a safe lowercase token, and ASCII folding applies.
1139        let path = stem_file_path(
1140            "song",
1141            "Lead/Vocal: Æ",
1142            "STEMID12",
1143            ".FLAC",
1144            CharacterSet::Ascii,
1145        );
1146        assert_eq!(path, "song.stems/song - Lead Vocal AE [STEMID12].flac");
1147        // A junk extension falls back to mp3 (defensive; callers pass wav/mp3).
1148        let fallback = stem_file_path("s", "Bass", "x", "??", CharacterSet::Unicode);
1149        assert_eq!(fallback, "s.stems/s - Bass [x].mp3");
1150    }
1151
1152    #[test]
1153    fn case_only_path_difference_is_a_canonical_collision() {
1154        // A custom template without {id8}: clips whose titles differ only in
1155        // case produce different exact paths but the same canonical path and
1156        // must be disambiguated to avoid clobbering on case-insensitive FSes.
1157        let config = NamingConfig {
1158            template: "{creator}/{title}".to_string(),
1159            ..NamingConfig::default()
1160        };
1161        let first = test_clip("aaaa1111-x", "sunrise");
1162        let second = test_clip("bbbb2222-y", "SUNRISE");
1163
1164        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1165
1166        assert_ne!(
1167            names[0].relative_path.to_string_lossy(),
1168            names[1].relative_path.to_string_lossy(),
1169            "canonical collision was not disambiguated"
1170        );
1171    }
1172
1173    #[test]
1174    fn nfc_nfd_path_difference_is_a_canonical_collision() {
1175        // The same character encoded as NFC vs NFD produces different byte
1176        // strings but the same file on NFC-normalising filesystems (macOS APFS).
1177        let config = NamingConfig {
1178            template: "{creator}/{title}".to_string(),
1179            ..NamingConfig::default()
1180        };
1181        // "é" as NFC (U+00E9) vs NFD (e + U+0301).
1182        let nfc_title = "\u{00e9}toile";
1183        let nfd_title = "e\u{0301}toile";
1184        let first = test_clip("aaaa1111-x", nfc_title);
1185        let second = test_clip("bbbb2222-y", nfd_title);
1186
1187        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1188
1189        assert_ne!(
1190            names[0].relative_path.to_string_lossy(),
1191            names[1].relative_path.to_string_lossy(),
1192            "NFC/NFD canonical collision was not disambiguated"
1193        );
1194    }
1195
1196    #[test]
1197    fn genuinely_distinct_paths_are_never_wrongly_disambiguated() {
1198        // Clips with distinct titles (not even canonically equivalent) must not
1199        // receive unnecessary suffixes — the canonical check must not produce
1200        // false positives.
1201        let config = NamingConfig {
1202            template: "{creator}/{title}".to_string(),
1203            ..NamingConfig::default()
1204        };
1205        let first = test_clip("aaaa1111-x", "Alpha");
1206        let second = test_clip("bbbb2222-y", "Beta");
1207
1208        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1209
1210        assert_eq!(
1211            names[0].relative_path.to_string_lossy(),
1212            "München/Alpha",
1213            "distinct path was wrongly suffixed"
1214        );
1215        assert_eq!(
1216            names[1].relative_path.to_string_lossy(),
1217            "München/Beta",
1218            "distinct path was wrongly suffixed"
1219        );
1220    }
1221}