Skip to main content

suno_core/
naming.rs

1//! Pure naming and relative path rendering for [`Clip`] values.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::fmt;
5use std::path::PathBuf;
6use std::str::FromStr;
7
8use serde::{Deserialize, Serialize};
9use unicode_normalization::UnicodeNormalization as _;
10
11use crate::Clip;
12use crate::error::{Error, Result};
13use crate::lineage::LineageContext;
14
15/// The default relative path template.
16///
17/// Supported placeholders are `{creator}`, `{handle}`, `{album}`, `{title}`,
18/// `{id}`, `{id8}` (first 8 characters of the clip id), and `{root_id8}`
19/// (first 8 of the resolved lineage root id). Empty path segments are dropped
20/// after rendering.
21///
22/// The default embeds `[{id8}]` in the file name so same-title clips never
23/// collide, and folders under `{album}`, which resolves to the lineage root's
24/// title (else the clip's own title).
25pub const DEFAULT_TEMPLATE: &str = "{creator}/{album}/{creator}-{title} [{id8}]";
26const DEFAULT_MAX_COMPONENT_LEN: usize = 80;
27
28const MIN_BASE_CHARS_WITH_SUFFIX: usize = 1;
29
30#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
31#[serde(rename_all = "lowercase")]
32pub enum CharacterSet {
33    #[default]
34    Unicode,
35    Ascii,
36}
37
38impl FromStr for CharacterSet {
39    type Err = Error;
40
41    fn from_str(s: &str) -> Result<Self> {
42        match s.to_ascii_lowercase().as_str() {
43            "unicode" => Ok(Self::Unicode),
44            "ascii" => Ok(Self::Ascii),
45            other => Err(Error::Config(format!(
46                "unknown character_set '{other}'; expected 'unicode' or 'ascii'"
47            ))),
48        }
49    }
50}
51
52impl fmt::Display for CharacterSet {
53    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54        match self {
55            Self::Unicode => f.write_str("unicode"),
56            Self::Ascii => f.write_str("ascii"),
57        }
58    }
59}
60
61#[derive(Debug, Clone, PartialEq, Eq)]
62pub struct NamingConfig {
63    pub template: String,
64    pub character_set: CharacterSet,
65    pub max_component_len: usize,
66}
67
68impl Default for NamingConfig {
69    fn default() -> Self {
70        Self {
71            template: DEFAULT_TEMPLATE.to_string(),
72            character_set: CharacterSet::Unicode,
73            max_component_len: DEFAULT_MAX_COMPONENT_LEN,
74        }
75    }
76}
77
78#[derive(Debug, Clone, Copy)]
79pub struct NamingRequest<'a> {
80    pub clip: &'a Clip,
81    pub lineage: &'a LineageContext,
82}
83
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub struct RenderedName {
86    pub relative_path: PathBuf,
87    pub base_name: String,
88}
89
90pub fn render_clip_name(request: NamingRequest<'_>, config: &NamingConfig) -> RenderedName {
91    let album = album_component(request, config);
92    render_with_album(request, config, &album)
93}
94
95pub fn render_clip_names(
96    requests: &[NamingRequest<'_>],
97    config: &NamingConfig,
98    colliding_albums: &BTreeSet<String>,
99) -> Vec<RenderedName> {
100    let albums = disambiguated_albums(requests, config, colliding_albums);
101    let mut rendered = requests
102        .iter()
103        .zip(&albums)
104        .map(|(request, album)| render_with_album(*request, config, album))
105        .collect::<Vec<_>>();
106
107    // Two passes to keep distinct clips from landing on one path.  The first
108    // pass keys on the exact rendered string; the second on the filesystem-
109    // canonical form (NFC + lowercase) so that paths differing only by case or
110    // Unicode normalisation (NFD vs NFC) are caught too — they would collide on
111    // case-insensitive or NFC-normalising filesystems (Windows, macOS default).
112    for apply_canonical in [false, true] {
113        let mut collisions = BTreeMap::<String, Vec<usize>>::new();
114        for (index, name) in rendered.iter().enumerate() {
115            let key = if apply_canonical {
116                canonical_path_key(&name.relative_path.to_string_lossy())
117            } else {
118                name.relative_path.to_string_lossy().into_owned()
119            };
120            collisions.entry(key).or_default().push(index);
121        }
122        for indexes in collisions.into_values().filter(|v| v.len() > 1) {
123            for index in indexes {
124                let suffix = &requests[index].clip.id;
125                rendered[index] = with_suffix(
126                    rendered[index].clone(),
127                    suffix,
128                    config.character_set,
129                    config.max_component_len,
130                );
131            }
132        }
133    }
134
135    rendered
136}
137
138/// Filesystem-canonical key: NFC-normalise then lowercase, so paths that differ
139/// only by case or by NFC/NFD encoding hash to the same bucket.
140fn canonical_path_key(path: &str) -> String {
141    path.nfc().flat_map(char::to_lowercase).collect()
142}
143
144/// The album path component for every request, with a clip whose root title
145/// collides across distinct roots disambiguated by `[{root_id8}]`.
146///
147/// Distinct roots must never share an album folder (two different upload roots
148/// titled "Break Through" exist). `colliding_albums` is the authoritative set
149/// of such shared root titles, computed once from the whole lineage store, so
150/// the decision is stable across runs and independent of which clips appear in
151/// this batch. A clip whose resolved album is in that set always gets its
152/// root's short id appended; every other clip keeps the bare album and groups
153/// with its same-root siblings.
154fn disambiguated_albums(
155    requests: &[NamingRequest<'_>],
156    config: &NamingConfig,
157    colliding_albums: &BTreeSet<String>,
158) -> Vec<String> {
159    requests
160        .iter()
161        .map(|request| album_for(*request, config, colliding_albums))
162        .collect()
163}
164
165/// The (possibly disambiguated) album component for one request.
166fn album_for(
167    request: NamingRequest<'_>,
168    config: &NamingConfig,
169    colliding_albums: &BTreeSet<String>,
170) -> String {
171    let raw_album = request.lineage.album(&title_name(request.clip));
172    let album = sanitise_component(&raw_album, config.character_set, config.max_component_len);
173    if colliding_albums.contains(raw_album.trim()) {
174        let suffix = truncate_chars(&request.lineage.root_id, 8);
175        append_suffix(
176            &album,
177            &suffix,
178            config.character_set,
179            config.max_component_len,
180        )
181    } else {
182        album
183    }
184}
185
186/// The sanitised album component: the resolved lineage album (root title, else
187/// the clip's own title).
188fn album_component(request: NamingRequest<'_>, config: &NamingConfig) -> String {
189    let album = request.lineage.album(&title_name(request.clip));
190    sanitise_component(&album, config.character_set, config.max_component_len)
191}
192
193/// Render one clip's path with an already-resolved album component.
194fn render_with_album(
195    request: NamingRequest<'_>,
196    config: &NamingConfig,
197    album: &str,
198) -> RenderedName {
199    let clip = request.clip;
200    let creator = sanitise_component(
201        &creator_name(clip),
202        config.character_set,
203        config.max_component_len,
204    );
205    let handle = sanitise_component(&clip.handle, config.character_set, config.max_component_len);
206    let title = sanitise_component(
207        &title_name(clip),
208        config.character_set,
209        config.max_component_len,
210    );
211    let id = sanitise_component(&clip.id, CharacterSet::Ascii, config.max_component_len);
212    let id8 = sanitise_component(
213        &truncate_chars(&clip.id, 8),
214        CharacterSet::Ascii,
215        config.max_component_len,
216    );
217    let root_id8 = sanitise_component(
218        &truncate_chars(&request.lineage.root_id, 8),
219        CharacterSet::Ascii,
220        config.max_component_len,
221    );
222    let substitutions = SegmentSubstitutions {
223        creator: &creator,
224        handle: &handle,
225        album,
226        title: &title,
227        root_id8: &root_id8,
228        id8: &id8,
229        id: &id,
230    };
231    let mut components = config
232        .template
233        .split('/')
234        .filter_map(|segment| {
235            let rendered = substitute_segment(segment, substitutions);
236            let sanitised = sanitise_segment(
237                &rendered,
238                config.character_set,
239                config.max_component_len,
240                [id8.as_str(), root_id8.as_str()],
241            );
242            (!sanitised.is_empty()).then_some(sanitised)
243        })
244        .collect::<Vec<_>>();
245
246    if components.is_empty() {
247        components.push(title.clone());
248    }
249
250    let mut base_name = components
251        .pop()
252        .filter(|value| !value.is_empty())
253        .unwrap_or_else(|| title.clone());
254    // Guarantee a non-empty file name even when every token sanitises away.
255    if base_name.is_empty() {
256        base_name = append_suffix(
257            &base_name,
258            &clip.id,
259            config.character_set,
260            config.max_component_len,
261        );
262    }
263
264    let mut relative_path = PathBuf::new();
265    for component in components {
266        relative_path.push(component);
267    }
268
269    relative_path.push(&base_name);
270    RenderedName {
271        relative_path,
272        base_name,
273    }
274}
275
276#[derive(Clone, Copy)]
277struct SegmentSubstitutions<'a> {
278    creator: &'a str,
279    handle: &'a str,
280    album: &'a str,
281    title: &'a str,
282    root_id8: &'a str,
283    id8: &'a str,
284    id: &'a str,
285}
286
287fn substitute_segment(segment: &str, substitutions: SegmentSubstitutions<'_>) -> String {
288    let mut rendered = String::with_capacity(segment.len());
289    let mut remainder = segment;
290    while let Some(start) = remainder.find('{') {
291        rendered.push_str(&remainder[..start]);
292        remainder = &remainder[start..];
293        if let Some((token_len, value)) = placeholder_match(remainder, substitutions) {
294            rendered.push_str(value);
295            remainder = &remainder[token_len..];
296        } else {
297            rendered.push('{');
298            remainder = &remainder[1..];
299        }
300    }
301    rendered.push_str(remainder);
302    rendered
303}
304
305fn placeholder_match<'a>(
306    segment: &str,
307    substitutions: SegmentSubstitutions<'a>,
308) -> Option<(usize, &'a str)> {
309    if segment.starts_with("{creator}") {
310        Some(("{creator}".len(), substitutions.creator))
311    } else if segment.starts_with("{handle}") {
312        Some(("{handle}".len(), substitutions.handle))
313    } else if segment.starts_with("{album}") {
314        Some(("{album}".len(), substitutions.album))
315    } else if segment.starts_with("{title}") {
316        Some(("{title}".len(), substitutions.title))
317    } else if segment.starts_with("{root_id8}") {
318        Some(("{root_id8}".len(), substitutions.root_id8))
319    } else if segment.starts_with("{id8}") {
320        Some(("{id8}".len(), substitutions.id8))
321    } else if segment.starts_with("{id}") {
322        Some(("{id}".len(), substitutions.id))
323    } else {
324        None
325    }
326}
327
328fn with_suffix(
329    mut rendered: RenderedName,
330    suffix: &str,
331    character_set: CharacterSet,
332    max_component_len: usize,
333) -> RenderedName {
334    rendered.base_name = append_suffix(
335        &rendered.base_name,
336        suffix,
337        character_set,
338        max_component_len,
339    );
340    rendered.relative_path.set_file_name(&rendered.base_name);
341    rendered
342}
343
344fn creator_name(clip: &Clip) -> String {
345    non_blank(&clip.display_name)
346        .or_else(|| non_blank(&clip.handle))
347        .unwrap_or("Unknown Creator")
348        .to_string()
349}
350
351fn title_name(clip: &Clip) -> String {
352    let title = clip.title.trim();
353    if title.is_empty() || title.eq_ignore_ascii_case("untitled") {
354        "Untitled".to_string()
355    } else {
356        title.to_string()
357    }
358}
359
360fn append_suffix(
361    base: &str,
362    suffix: &str,
363    character_set: CharacterSet,
364    max_component_len: usize,
365) -> String {
366    let suffix_pattern = format!(" [{suffix}]");
367    if base.ends_with(&suffix_pattern) {
368        return sanitise_component(base, character_set, max_component_len);
369    }
370
371    let max_len =
372        max_component_len.max(suffix_pattern.chars().count() + MIN_BASE_CHARS_WITH_SUFFIX);
373    let allowed = max_len.saturating_sub(suffix_pattern.chars().count());
374    // Sanitise the base before measuring it. The character set can expand a
375    // character (ascii turns `ß` into `ss`), so budgeting the cut on the raw
376    // length could let the sanitised prefix grow back over the room reserved for
377    // the suffix and slice through it again (#120).
378    let base = sanitise_component(base, character_set, max_len);
379    let truncated = truncate_chars(base.trim_end(), allowed);
380    let combined = format!("{truncated}{suffix_pattern}");
381    sanitise_component(&combined, character_set, max_len)
382}
383
384/// Sanitise a rendered template segment, preserving a trailing ` [id]`
385/// disambiguator (the `[{id8}]` or `[{root_id8}]` the template embeds) when the
386/// segment would otherwise be truncated through it. Only the title portion is
387/// shortened, so two long-titled siblings keep their distinguishing id and the
388/// closing bracket is never left unbalanced (#120). A segment that does not end
389/// in a disambiguator is sanitised exactly as before.
390fn sanitise_segment(
391    rendered: &str,
392    character_set: CharacterSet,
393    max_component_len: usize,
394    disambiguators: [&str; 2],
395) -> String {
396    for suffix in disambiguators {
397        if suffix.is_empty() {
398            continue;
399        }
400        let pattern = format!(" [{suffix}]");
401        if let Some(prefix) = rendered.strip_suffix(&pattern) {
402            return append_suffix(prefix, suffix, character_set, max_component_len);
403        }
404    }
405    sanitise_component(rendered, character_set, max_component_len)
406}
407
408/// Sanitise a free-form playlist name into a single safe path component.
409///
410/// Applies the same Unicode filtering and length cap as clip path components
411/// (default [`CharacterSet::Unicode`], [`DEFAULT_MAX_COMPONENT_LEN`]), so a
412/// playlist file name obeys the same filesystem rules as the rest of the
413/// library. An empty or fully-stripped name falls back to `playlist` so the
414/// caller always has a non-empty stem to append `.m3u8` to.
415pub fn sanitise_name(name: &str) -> String {
416    let cleaned = sanitise_component(name, CharacterSet::Unicode, DEFAULT_MAX_COMPONENT_LEN);
417    if cleaned.is_empty() {
418        "playlist".to_string()
419    } else {
420        cleaned
421    }
422}
423
424/// The `.stems` sub-folder that sits beside a song's audio file.
425///
426/// `base` is the song's extensionless relative path (the same value the audio
427/// and its sidecars are built from), so the folder is `{base}.stems`. It cannot
428/// collide with the audio file (`{base}.<ext>`) or any `{base}.<sidecar>`
429/// because the `.stems` suffix is distinct, mirroring the sidecar convention.
430pub fn stems_folder(base: &str) -> String {
431    format!("{base}.stems")
432}
433
434/// The relative path of one stem file inside a song's [`stems_folder`].
435///
436/// Named base+label+disambiguation rather than label-only, because Auto Split
437/// can mislabel stems and Advanced Split yields ~100 instruments, so blank or
438/// duplicate labels are expected. The file is
439/// `{song file name} - {label} [{stem id8}].{ext}`; the ` - {label}` piece is
440/// dropped when the label sanitises to empty, and the `[{stem id8}]`
441/// disambiguator (the first 8 characters of the stable stem id) keeps blank or
442/// duplicate labels collision-free. Every component is run through the same
443/// [`sanitise_component`] filter as the rest of the library, honouring
444/// `character_set`.
445pub fn stem_file_path(
446    base: &str,
447    label: &str,
448    stem_id: &str,
449    ext: &str,
450    character_set: CharacterSet,
451) -> String {
452    let folder = stems_folder(base);
453    // The song's own file-name stem (the last path component of `base`), reused
454    // so a stem stays identifiable even when viewed outside its `.stems` folder.
455    let song_stem = base.rsplit('/').next().unwrap_or(base);
456    let label = sanitise_component(label, character_set, DEFAULT_MAX_COMPONENT_LEN);
457    let id8 = sanitise_component(
458        &truncate_chars(stem_id, 8),
459        CharacterSet::Ascii,
460        DEFAULT_MAX_COMPONENT_LEN,
461    );
462
463    let mut name = song_stem.to_string();
464    if !label.is_empty() {
465        name.push_str(" - ");
466        name.push_str(&label);
467    }
468    if !id8.is_empty() {
469        name.push_str(" [");
470        name.push_str(&id8);
471        name.push(']');
472    }
473    // A degenerate base (empty song stem, blank label, empty id) must still
474    // yield a usable name rather than a hidden dotfile.
475    if name.trim().is_empty() {
476        name = "stem".to_string();
477    }
478    format!("{folder}/{name}.{}", sanitise_ext(ext))
479}
480
481/// Reduce a candidate extension to a safe lowercase alphanumeric token,
482/// defaulting to `mp3` when it is empty or fully stripped. The caller passes the
483/// resolved stem format's extension (`wav` or `mp3`); stems are stored RAW.
484fn sanitise_ext(ext: &str) -> String {
485    let cleaned: String = ext
486        .trim_start_matches('.')
487        .chars()
488        .filter(|c| c.is_ascii_alphanumeric())
489        .flat_map(char::to_lowercase)
490        .take(8)
491        .collect();
492    if cleaned.is_empty() {
493        "mp3".to_string()
494    } else {
495        cleaned
496    }
497}
498
499fn sanitise_component(
500    value: &str,
501    character_set: CharacterSet,
502    max_component_len: usize,
503) -> String {
504    // Single pass: map each char to its charset-safe form while collapsing runs
505    // of whitespace to one space and dropping leading/trailing whitespace. This
506    // fuses the old filter / split_whitespace / collect / join steps, which
507    // allocated several intermediate strings and a vector, into one buffer.
508    let mut collapsed = String::with_capacity(value.len());
509    let mut pending_space = false;
510    let push = |out: char, buf: &mut String, pending: &mut bool| {
511        if out.is_whitespace() {
512            *pending = !buf.is_empty();
513        } else {
514            if *pending {
515                buf.push(' ');
516            }
517            *pending = false;
518            buf.push(out);
519        }
520    };
521    match character_set {
522        CharacterSet::Unicode => {
523            for ch in value.chars() {
524                push(unicode_char(ch), &mut collapsed, &mut pending_space);
525            }
526        }
527        CharacterSet::Ascii => {
528            for ch in value.chars() {
529                for out in ascii_chars(ch) {
530                    push(out, &mut collapsed, &mut pending_space);
531                }
532            }
533        }
534    }
535
536    let trimmed = collapsed.trim_matches([' ', '.']);
537    if trimmed.is_empty() {
538        return String::new();
539    }
540
541    // Keep at most `max` characters, then trim any space or dot the cut exposed.
542    // Slicing at the char boundary avoids the extra String the old
543    // truncate-then-trim-then-to_string sequence built.
544    let max = max_component_len.max(1);
545    let end = trimmed
546        .char_indices()
547        .nth(max)
548        .map_or(trimmed.len(), |(index, _)| index);
549    let result = trimmed[..end].trim_matches([' ', '.']);
550    if result.is_empty() {
551        return String::new();
552    }
553    if result == "." || result == ".." {
554        return "item".to_string();
555    }
556    let mut result = result.to_string();
557    if !result.ends_with('_') && is_reserved_name(&result) {
558        result.push('_');
559    }
560    result
561}
562
563fn unicode_char(ch: char) -> char {
564    if matches!(
565        ch,
566        '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' | '\0'
567    ) || ch.is_control()
568    {
569        ' '
570    } else {
571        ch
572    }
573}
574
575fn ascii_chars(ch: char) -> Vec<char> {
576    if ch.is_ascii() {
577        return vec![unicode_char(ch)];
578    }
579
580    match ch {
581        'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' => vec!['A'],
582        'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' => vec!['a'],
583        'Ç' => vec!['C'],
584        'ç' => vec!['c'],
585        'È' | 'É' | 'Ê' | 'Ë' => vec!['E'],
586        'è' | 'é' | 'ê' | 'ë' => vec!['e'],
587        'Ì' | 'Í' | 'Î' | 'Ï' => vec!['I'],
588        'ì' | 'í' | 'î' | 'ï' => vec!['i'],
589        'Ñ' => vec!['N'],
590        'ñ' => vec!['n'],
591        'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ø' => vec!['O'],
592        'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ø' => vec!['o'],
593        'Ù' | 'Ú' | 'Û' | 'Ü' => vec!['U'],
594        'ù' | 'ú' | 'û' | 'ü' => vec!['u'],
595        'Ý' | 'Ÿ' => vec!['Y'],
596        'ý' | 'ÿ' => vec!['y'],
597        'Æ' => vec!['A', 'E'],
598        'æ' => vec!['a', 'e'],
599        'Œ' => vec!['O', 'E'],
600        'œ' => vec!['o', 'e'],
601        'ß' => vec!['s', 's'],
602        _ => vec![' '],
603    }
604}
605
606fn truncate_chars(value: &str, max_len: usize) -> String {
607    value.chars().take(max_len).collect()
608}
609
610fn non_blank(value: &str) -> Option<&str> {
611    let trimmed = value.trim();
612    (!trimmed.is_empty()).then_some(trimmed)
613}
614
615fn is_reserved_name(value: &str) -> bool {
616    let stem = value.split('.').next().unwrap_or(value);
617    // Every reserved device name is 3 (CON/PRN/AUX/NUL) or 4 (COMx/LPTx) ASCII
618    // bytes, so anything else cannot match without allocating an uppercased copy.
619    if !matches!(stem.len(), 3 | 4) {
620        return false;
621    }
622    const RESERVED: [&str; 22] = [
623        "CON", "PRN", "AUX", "NUL", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8",
624        "COM9", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
625    ];
626    RESERVED.iter().any(|name| name.eq_ignore_ascii_case(stem))
627}
628
629#[cfg(test)]
630mod tests {
631    use super::*;
632    use crate::lineage::{EdgeType, ResolveStatus};
633    use std::collections::{BTreeMap, BTreeSet};
634
635    fn test_clip(id: &str, title: &str) -> Clip {
636        Clip {
637            id: id.to_string(),
638            title: title.to_string(),
639            display_name: "München".to_string(),
640            handle: "munchen".to_string(),
641            ..Clip::default()
642        }
643    }
644
645    fn render_own(clip: &Clip, config: &NamingConfig) -> RenderedName {
646        let lineage = LineageContext::own_root(clip);
647        render_clip_name(
648            NamingRequest {
649                clip,
650                lineage: &lineage,
651            },
652            config,
653        )
654    }
655
656    fn render_all_own(
657        clips: &[Clip],
658        config: &NamingConfig,
659        colliding: &BTreeSet<String>,
660    ) -> Vec<RenderedName> {
661        let lineages: Vec<LineageContext> = clips.iter().map(LineageContext::own_root).collect();
662        let requests: Vec<NamingRequest> = clips
663            .iter()
664            .zip(&lineages)
665            .map(|(clip, lineage)| NamingRequest { clip, lineage })
666            .collect();
667        render_clip_names(&requests, config, colliding)
668    }
669
670    #[test]
671    fn unicode_names_are_preserved_and_ascii_falls_back() {
672        let clip = test_clip("abc12345", "Beyoncé/東京");
673
674        let unicode = render_own(&clip, &NamingConfig::default());
675        assert_eq!(
676            unicode.relative_path.to_string_lossy(),
677            "München/Beyoncé 東京/München-Beyoncé 東京 [abc12345]"
678        );
679
680        let ascii = render_own(
681            &clip,
682            &NamingConfig {
683                character_set: CharacterSet::Ascii,
684                ..NamingConfig::default()
685            },
686        );
687        assert_eq!(
688            ascii.relative_path.to_string_lossy(),
689            "Munchen/Beyonce/Munchen-Beyonce [abc12345]"
690        );
691    }
692
693    #[test]
694    fn reserved_and_hostile_names_are_sanitised() {
695        let clip = Clip {
696            id: "deadbeef".to_string(),
697            title: "CON<>:\"/\\|?*.".to_string(),
698            display_name: "AUX".to_string(),
699            ..Clip::default()
700        };
701
702        let rendered = render_own(&clip, &NamingConfig::default());
703        let path = rendered.relative_path.to_string_lossy();
704        assert!(path.starts_with("AUX_/CON_/"), "path was {path}");
705        assert!(rendered.base_name.contains("[deadbeef]"));
706    }
707
708    #[test]
709    fn default_template_always_embeds_id8() {
710        let clip = test_clip("abcdef1234567890", "Any Title");
711        let rendered = render_own(&clip, &NamingConfig::default());
712        assert!(
713            rendered.base_name.contains("[abcdef12]"),
714            "base_name was {}",
715            rendered.base_name
716        );
717    }
718
719    #[test]
720    fn custom_template_replaces_all_known_placeholders_once() {
721        let clip = Clip {
722            id: "abcdef12-full".to_string(),
723            title: "Song".to_string(),
724            display_name: "Creator".to_string(),
725            handle: "handle".to_string(),
726            ..Clip::default()
727        };
728        let lineage = LineageContext {
729            root_id: "rootxyz9-extra".to_string(),
730            root_title: "Album".to_string(),
731            root_date: String::new(),
732            parent_id: "rootxyz9-extra".to_string(),
733            edge_type: Some(EdgeType::Cover),
734            status: ResolveStatus::Resolved,
735        };
736        let config = NamingConfig {
737            template: "{creator}-{handle}-{album}-{title}-{root_id8}-{id8}-{id}-{unknown}"
738                .to_string(),
739            ..NamingConfig::default()
740        };
741
742        let rendered = render_clip_name(
743            NamingRequest {
744                clip: &clip,
745                lineage: &lineage,
746            },
747            &config,
748        );
749
750        assert_eq!(
751            rendered.relative_path.to_string_lossy(),
752            "Creator-handle-Album-Song-rootxyz9-abcdef12-abcdef12-full-{unknown}"
753        );
754    }
755
756    #[test]
757    fn blank_titles_use_a_stable_suffix() {
758        let clip = test_clip("12345678-clip", "   ");
759
760        let rendered = render_own(&clip, &NamingConfig::default());
761        assert_eq!(rendered.base_name, "München-Untitled [12345678]");
762        assert_eq!(
763            rendered.relative_path.to_string_lossy(),
764            "München/Untitled/München-Untitled [12345678]"
765        );
766    }
767
768    #[test]
769    fn very_long_titles_are_trimmed() {
770        let clip = test_clip("abcdef12", &"a".repeat(120));
771        let rendered = render_own(
772            &clip,
773            &NamingConfig {
774                max_component_len: 24,
775                ..NamingConfig::default()
776            },
777        );
778
779        for component in rendered.relative_path.components() {
780            let text = component.as_os_str().to_string_lossy();
781            assert!(
782                text.chars().count() <= 24,
783                "component {text:?} exceeds 24 chars"
784            );
785        }
786        // The trailing [id8] must survive the truncation intact (#120).
787        assert!(
788            rendered.base_name.ends_with(" [abcdef12]"),
789            "id8 disambiguator was sliced; base_name was {:?}",
790            rendered.base_name
791        );
792    }
793
794    #[test]
795    fn long_names_keep_the_full_id8_disambiguator() {
796        // A creator+title long enough to overflow the cap keeps the whole
797        // trailing [id8]: the title is shortened, not the id, so the name stays
798        // complete and the bracket stays balanced (#120).
799        let clip = test_clip("1234abcd-tail", &"a".repeat(120));
800        let config = NamingConfig {
801            max_component_len: 40,
802            ..NamingConfig::default()
803        };
804        let rendered = render_own(&clip, &config);
805
806        assert!(
807            rendered.base_name.ends_with(" [1234abcd]"),
808            "base_name must end with the full disambiguator, was {:?}",
809            rendered.base_name
810        );
811        assert_eq!(rendered.base_name.chars().count(), 40);
812    }
813
814    #[test]
815    fn long_titled_siblings_stay_distinct_with_balanced_brackets() {
816        // Two same-(long-)titled clips sharing a root must remain distinct: only
817        // the title is shortened, so their [id8] suffixes differ and neither name
818        // ends up with an unbalanced bracket (#120).
819        let lineage = LineageContext {
820            root_id: "root-42".to_string(),
821            root_title: "Origin".to_string(),
822            root_date: String::new(),
823            parent_id: "root-42".to_string(),
824            edge_type: Some(EdgeType::Cover),
825            status: ResolveStatus::Resolved,
826        };
827        let title = "z".repeat(200);
828        let first = test_clip("aaaa1111-x", &title);
829        let second = test_clip("bbbb2222-y", &title);
830        let requests = [
831            NamingRequest {
832                clip: &first,
833                lineage: &lineage,
834            },
835            NamingRequest {
836                clip: &second,
837                lineage: &lineage,
838            },
839        ];
840
841        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
842
843        assert!(names[0].base_name.ends_with(" [aaaa1111]"));
844        assert!(names[1].base_name.ends_with(" [bbbb2222]"));
845        assert_ne!(names[0].relative_path, names[1].relative_path);
846        for name in &names {
847            assert!(name.base_name.chars().count() <= 80);
848            assert_eq!(name.base_name.matches('[').count(), 1, "unbalanced '['");
849            assert_eq!(name.base_name.matches(']').count(), 1, "unbalanced ']'");
850        }
851    }
852
853    #[test]
854    fn long_colliding_album_keeps_its_root_id8() {
855        // The album [root_id8] disambiguator is preserved when a long album title
856        // must be truncated, mirroring the file-name fix (#120).
857        let long = "Break Through ".repeat(20);
858        let title = long.trim().to_string();
859        let clip = Clip {
860            id: "aaaa1111-x".to_string(),
861            title: title.clone(),
862            display_name: "München".to_string(),
863            ..Clip::default()
864        };
865        let colliding: BTreeSet<String> = [title].into_iter().collect();
866        let names = render_all_own(&[clip], &NamingConfig::default(), &colliding);
867
868        let album = names[0]
869            .relative_path
870            .components()
871            .nth(1)
872            .map(|component| component.as_os_str().to_string_lossy().into_owned())
873            .unwrap_or_default();
874        assert!(album.ends_with(" [aaaa1111]"), "album was {album:?}");
875        assert!(album.chars().count() <= 80);
876    }
877
878    #[test]
879    fn ascii_expanding_chars_do_not_slice_the_disambiguator() {
880        // A literal expanding character (`ß` -> `ss` under ascii) in a custom
881        // template, right before the trailing ` [{id8}]`, must not grow back over
882        // the suffix and slice it: the base is sized after expansion (#120).
883        let clip = test_clip("1234abcd", "Title");
884        let config = NamingConfig {
885            template: format!("{}{{title}} [{{id8}}]", "ß".repeat(80)),
886            character_set: CharacterSet::Ascii,
887            max_component_len: 40,
888        };
889        let rendered = render_own(&clip, &config);
890
891        assert!(
892            rendered.base_name.ends_with(" [1234abcd]"),
893            "expansion sliced the id8; base_name was {:?}",
894            rendered.base_name
895        );
896        assert!(rendered.base_name.chars().count() <= 40);
897    }
898
899    #[test]
900    fn same_title_siblings_stay_distinct_via_id8() {
901        // Two clips sharing a root (same album folder) and the same title must
902        // still land on distinct files; the default template's {id8} does that.
903        let lineage = LineageContext {
904            root_id: "root-9".to_string(),
905            root_title: "Origin".to_string(),
906            root_date: String::new(),
907            parent_id: "root-9".to_string(),
908            edge_type: Some(EdgeType::Cover),
909            status: ResolveStatus::Resolved,
910        };
911        let first = test_clip("11111111-alpha", "Shared");
912        let second = test_clip("22222222-beta", "Shared");
913        let requests = [
914            NamingRequest {
915                clip: &first,
916                lineage: &lineage,
917            },
918            NamingRequest {
919                clip: &second,
920                lineage: &lineage,
921            },
922        ];
923
924        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
925
926        assert_eq!(
927            names[0].relative_path.to_string_lossy(),
928            "München/Origin/München-Shared [11111111]"
929        );
930        assert_eq!(
931            names[1].relative_path.to_string_lossy(),
932            "München/Origin/München-Shared [22222222]"
933        );
934    }
935
936    #[test]
937    fn id8_prefix_collision_falls_back_to_full_id() {
938        // Custom template without {id8} so identical titles collide and the
939        // filename fallback (full id) has to keep them distinct.
940        let config = NamingConfig {
941            template: "{creator}/{title}".to_string(),
942            ..NamingConfig::default()
943        };
944        let first = test_clip("abcd1234-first", "Untitled");
945        let second = test_clip("abcd1234-second", "Untitled");
946
947        let names = render_all_own(&[first.clone(), second.clone()], &config, &BTreeSet::new());
948        let swapped = render_all_own(&[second.clone(), first.clone()], &config, &BTreeSet::new());
949
950        assert_ne!(
951            names[0].relative_path.to_string_lossy(),
952            names[1].relative_path.to_string_lossy()
953        );
954
955        let ordered = |rendered: &[RenderedName], clips: &[Clip]| {
956            clips
957                .iter()
958                .zip(rendered)
959                .map(|(clip, name)| {
960                    (
961                        clip.id.clone(),
962                        name.relative_path.to_string_lossy().into_owned(),
963                    )
964                })
965                .collect::<BTreeMap<_, _>>()
966        };
967        assert_eq!(
968            ordered(&names, &[first.clone(), second.clone()]),
969            ordered(&swapped, &[second, first])
970        );
971    }
972
973    #[test]
974    fn album_is_root_title_for_a_remix() {
975        let clip = Clip {
976            id: "child".to_string(),
977            title: "Remix".to_string(),
978            display_name: "München".to_string(),
979            ..Clip::default()
980        };
981        let lineage = LineageContext {
982            root_id: "root-1".to_string(),
983            root_title: "Original".to_string(),
984            root_date: String::new(),
985            parent_id: "root-1".to_string(),
986            edge_type: Some(EdgeType::Cover),
987            status: ResolveStatus::Resolved,
988        };
989
990        let rendered = render_clip_name(
991            NamingRequest {
992                clip: &clip,
993                lineage: &lineage,
994            },
995            &NamingConfig::default(),
996        );
997        assert_eq!(
998            rendered.relative_path.to_string_lossy(),
999            "München/Original/München-Remix [child]"
1000        );
1001    }
1002
1003    #[test]
1004    fn album_is_own_title_for_a_root() {
1005        let clip = Clip {
1006            id: "root-1".to_string(),
1007            title: "Original".to_string(),
1008            display_name: "München".to_string(),
1009            ..Clip::default()
1010        };
1011
1012        let rendered = render_own(&clip, &NamingConfig::default());
1013        assert_eq!(
1014            rendered.relative_path.to_string_lossy(),
1015            "München/Original/München-Original [root-1]"
1016        );
1017    }
1018
1019    #[test]
1020    fn shared_album_title_from_distinct_roots_is_disambiguated() {
1021        let first = Clip {
1022            id: "aaaa1111-x".to_string(),
1023            title: "Break Through".to_string(),
1024            display_name: "München".to_string(),
1025            ..Clip::default()
1026        };
1027        let second = Clip {
1028            id: "bbbb2222-y".to_string(),
1029            title: "Break Through".to_string(),
1030            display_name: "München".to_string(),
1031            ..Clip::default()
1032        };
1033
1034        // The colliding set is authoritative (store-driven), so disambiguation
1035        // does not depend on both roots appearing in the same batch.
1036        let colliding: BTreeSet<String> = ["Break Through".to_string()].into_iter().collect();
1037        let names = render_all_own(
1038            &[first.clone(), second.clone()],
1039            &NamingConfig::default(),
1040            &colliding,
1041        );
1042        let swapped = render_all_own(
1043            &[second.clone(), first.clone()],
1044            &NamingConfig::default(),
1045            &colliding,
1046        );
1047
1048        let album_of = |rendered: &RenderedName| {
1049            rendered
1050                .relative_path
1051                .components()
1052                .nth(1)
1053                .map(|component| component.as_os_str().to_string_lossy().into_owned())
1054                .unwrap_or_default()
1055        };
1056
1057        assert_eq!(album_of(&names[0]), "Break Through [aaaa1111]");
1058        assert_eq!(album_of(&names[1]), "Break Through [bbbb2222]");
1059        // Deterministic regardless of input order.
1060        assert_eq!(album_of(&swapped[0]), "Break Through [bbbb2222]");
1061        assert_eq!(album_of(&swapped[1]), "Break Through [aaaa1111]");
1062
1063        // The MEDIUM fix: a narrowed run showing only one of the two roots
1064        // still gets the suffixed folder, so folders never oscillate.
1065        let alone = render_all_own(
1066            std::slice::from_ref(&first),
1067            &NamingConfig::default(),
1068            &colliding,
1069        );
1070        assert_eq!(album_of(&alone[0]), "Break Through [aaaa1111]");
1071    }
1072
1073    #[test]
1074    fn unique_root_title_stays_a_bare_album() {
1075        // A title absent from the colliding set keeps its bare folder even when
1076        // the batch happens to hold a same-titled sibling of the same root.
1077        let clip = Clip {
1078            id: "solo-1".to_string(),
1079            title: "Solo".to_string(),
1080            display_name: "München".to_string(),
1081            ..Clip::default()
1082        };
1083        let names = render_all_own(&[clip], &NamingConfig::default(), &BTreeSet::new());
1084        assert_eq!(
1085            names[0].relative_path.to_string_lossy(),
1086            "München/Solo/München-Solo [solo-1]"
1087        );
1088    }
1089
1090    #[test]
1091    fn sanitise_name_strips_separators_and_falls_back_when_empty() {
1092        assert_eq!(sanitise_name("Road/Trip: 2024"), "Road Trip 2024");
1093        assert_eq!(sanitise_name(""), "playlist");
1094        // A name made only of illegal characters strips to nothing, so the
1095        // caller still gets a usable, non-empty stem.
1096        assert_eq!(sanitise_name("///"), "playlist");
1097    }
1098
1099    #[test]
1100    fn stems_folder_is_a_sibling_suffix_of_the_song_base() {
1101        assert_eq!(
1102            stems_folder("Creator/Album/Creator-Song [abcd1234]"),
1103            "Creator/Album/Creator-Song [abcd1234].stems"
1104        );
1105    }
1106
1107    #[test]
1108    fn stem_file_path_combines_song_stem_label_and_disambiguator() {
1109        let path = stem_file_path(
1110            "Creator/Album/Creator-Song [abcd1234]",
1111            "Vocals",
1112            "stem-vocals-9f8e7d6c",
1113            "mp3",
1114            CharacterSet::Unicode,
1115        );
1116        assert_eq!(
1117            path,
1118            "Creator/Album/Creator-Song [abcd1234].stems/Creator-Song [abcd1234] - Vocals [stem-voc].mp3"
1119        );
1120    }
1121
1122    #[test]
1123    fn stem_file_path_disambiguates_blank_and_duplicate_labels_by_id() {
1124        // Two stems with the SAME (blank) label must not collide: the stem-id
1125        // disambiguator keeps them distinct even with no usable label.
1126        let a = stem_file_path("song", "", "id-aaaaaaaa", "wav", CharacterSet::Unicode);
1127        let b = stem_file_path("song", "", "id-bbbbbbbb", "wav", CharacterSet::Unicode);
1128        assert_eq!(a, "song.stems/song [id-aaaaa].wav");
1129        assert_eq!(b, "song.stems/song [id-bbbbb].wav");
1130        assert_ne!(a, b);
1131    }
1132
1133    #[test]
1134    fn stem_file_path_sanitises_label_and_extension_and_honours_ascii() {
1135        // Illegal path characters in the label are stripped, the extension is
1136        // reduced to a safe lowercase token, and ASCII folding applies.
1137        let path = stem_file_path(
1138            "song",
1139            "Lead/Vocal: Æ",
1140            "STEMID12",
1141            ".FLAC",
1142            CharacterSet::Ascii,
1143        );
1144        assert_eq!(path, "song.stems/song - Lead Vocal AE [STEMID12].flac");
1145        // A junk extension falls back to mp3 (defensive; callers pass wav/mp3).
1146        let fallback = stem_file_path("s", "Bass", "x", "??", CharacterSet::Unicode);
1147        assert_eq!(fallback, "s.stems/s - Bass [x].mp3");
1148    }
1149
1150    #[test]
1151    fn case_only_path_difference_is_a_canonical_collision() {
1152        // A custom template without {id8}: clips whose titles differ only in
1153        // case produce different exact paths but the same canonical path and
1154        // must be disambiguated to avoid clobbering on case-insensitive FSes.
1155        let config = NamingConfig {
1156            template: "{creator}/{title}".to_string(),
1157            ..NamingConfig::default()
1158        };
1159        let first = test_clip("aaaa1111-x", "sunrise");
1160        let second = test_clip("bbbb2222-y", "SUNRISE");
1161
1162        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1163
1164        assert_ne!(
1165            names[0].relative_path.to_string_lossy(),
1166            names[1].relative_path.to_string_lossy(),
1167            "canonical collision was not disambiguated"
1168        );
1169    }
1170
1171    #[test]
1172    fn nfc_nfd_path_difference_is_a_canonical_collision() {
1173        // The same character encoded as NFC vs NFD produces different byte
1174        // strings but the same file on NFC-normalising filesystems (macOS APFS).
1175        let config = NamingConfig {
1176            template: "{creator}/{title}".to_string(),
1177            ..NamingConfig::default()
1178        };
1179        // "é" as NFC (U+00E9) vs NFD (e + U+0301).
1180        let nfc_title = "\u{00e9}toile";
1181        let nfd_title = "e\u{0301}toile";
1182        let first = test_clip("aaaa1111-x", nfc_title);
1183        let second = test_clip("bbbb2222-y", nfd_title);
1184
1185        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1186
1187        assert_ne!(
1188            names[0].relative_path.to_string_lossy(),
1189            names[1].relative_path.to_string_lossy(),
1190            "NFC/NFD canonical collision was not disambiguated"
1191        );
1192    }
1193
1194    #[test]
1195    fn genuinely_distinct_paths_are_never_wrongly_disambiguated() {
1196        // Clips with distinct titles (not even canonically equivalent) must not
1197        // receive unnecessary suffixes — the canonical check must not produce
1198        // false positives.
1199        let config = NamingConfig {
1200            template: "{creator}/{title}".to_string(),
1201            ..NamingConfig::default()
1202        };
1203        let first = test_clip("aaaa1111-x", "Alpha");
1204        let second = test_clip("bbbb2222-y", "Beta");
1205
1206        let names = render_all_own(&[first, second], &config, &BTreeSet::new());
1207
1208        assert_eq!(
1209            names[0].relative_path.to_string_lossy(),
1210            "München/Alpha",
1211            "distinct path was wrongly suffixed"
1212        );
1213        assert_eq!(
1214            names[1].relative_path.to_string_lossy(),
1215            "München/Beta",
1216            "distinct path was wrongly suffixed"
1217        );
1218    }
1219}