Skip to main content

suno_core/
naming.rs

1//! Pure naming and relative path rendering for [`Clip`] values.
2
3use std::collections::{BTreeMap, BTreeSet};
4use std::path::PathBuf;
5
6use crate::Clip;
7use crate::lineage::LineageContext;
8
9/// The default relative path template.
10///
11/// Supported placeholders are `{creator}`, `{handle}`, `{album}`, `{title}`,
12/// `{id}`, `{id8}` (first 8 characters of the clip id), and `{root_id8}`
13/// (first 8 of the resolved lineage root id). Empty path segments are dropped
14/// after rendering.
15///
16/// The default embeds `[{id8}]` in the file name so same-title clips never
17/// collide, and folders under `{album}`, which resolves to the lineage root's
18/// title (else the clip's own title).
19pub const DEFAULT_TEMPLATE: &str = "{creator}/{album}/{creator}-{title} [{id8}]";
20const DEFAULT_MAX_COMPONENT_LEN: usize = 80;
21
22const MIN_BASE_CHARS_WITH_SUFFIX: usize = 1;
23
24#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
25pub enum CharacterSet {
26    #[default]
27    Unicode,
28    Ascii,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq)]
32pub struct NamingConfig {
33    pub template: String,
34    pub character_set: CharacterSet,
35    pub max_component_len: usize,
36}
37
38impl Default for NamingConfig {
39    fn default() -> Self {
40        Self {
41            template: DEFAULT_TEMPLATE.to_string(),
42            character_set: CharacterSet::Unicode,
43            max_component_len: DEFAULT_MAX_COMPONENT_LEN,
44        }
45    }
46}
47
48#[derive(Debug, Clone, Copy)]
49pub struct NamingRequest<'a> {
50    pub clip: &'a Clip,
51    pub lineage: &'a LineageContext,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub struct RenderedName {
56    pub relative_path: PathBuf,
57    pub base_name: String,
58}
59
60pub fn render_clip_name(request: NamingRequest<'_>, config: &NamingConfig) -> RenderedName {
61    let album = album_component(request, config);
62    render_with_album(request, config, &album)
63}
64
65pub fn render_clip_names(
66    requests: &[NamingRequest<'_>],
67    config: &NamingConfig,
68    colliding_albums: &BTreeSet<String>,
69) -> Vec<RenderedName> {
70    let albums = disambiguated_albums(requests, config, colliding_albums);
71    let mut rendered = requests
72        .iter()
73        .zip(&albums)
74        .map(|(request, album)| render_with_album(*request, config, album))
75        .collect::<Vec<_>>();
76
77    // Filename fallback: any distinct clips that still render to one path (a
78    // custom template lacking `{id8}`) are separated by their full clip id.
79    let mut collisions = BTreeMap::<String, Vec<usize>>::new();
80    for (index, name) in rendered.iter().enumerate() {
81        collisions
82            .entry(name.relative_path.to_string_lossy().into_owned())
83            .or_default()
84            .push(index);
85    }
86
87    for indexes in collisions.into_values().filter(|indexes| indexes.len() > 1) {
88        for index in indexes {
89            let suffix = &requests[index].clip.id;
90            rendered[index] =
91                with_suffix(rendered[index].clone(), suffix, config.max_component_len);
92        }
93    }
94
95    rendered
96}
97
98/// The album path component for every request, with a clip whose root title
99/// collides across distinct roots disambiguated by `[{root_id8}]`.
100///
101/// Distinct roots must never share an album folder (two different upload roots
102/// titled "Break Through" exist). `colliding_albums` is the authoritative set
103/// of such shared root titles, computed once from the whole lineage store, so
104/// the decision is stable across runs and independent of which clips appear in
105/// this batch. A clip whose resolved album is in that set always gets its
106/// root's short id appended; every other clip keeps the bare album and groups
107/// with its same-root siblings.
108fn disambiguated_albums(
109    requests: &[NamingRequest<'_>],
110    config: &NamingConfig,
111    colliding_albums: &BTreeSet<String>,
112) -> Vec<String> {
113    requests
114        .iter()
115        .map(|request| album_for(*request, config, colliding_albums))
116        .collect()
117}
118
119/// The (possibly disambiguated) album component for one request.
120fn album_for(
121    request: NamingRequest<'_>,
122    config: &NamingConfig,
123    colliding_albums: &BTreeSet<String>,
124) -> String {
125    let raw_album = request.lineage.album(&title_name(request.clip));
126    let album = sanitise_component(&raw_album, config.character_set, config.max_component_len);
127    if colliding_albums.contains(raw_album.trim()) {
128        let suffix = truncate_chars(&request.lineage.root_id, 8);
129        sanitise_component(
130            &format!("{album} [{suffix}]"),
131            config.character_set,
132            config.max_component_len,
133        )
134    } else {
135        album
136    }
137}
138
139/// The sanitised album component: the resolved lineage album (root title, else
140/// the clip's own title).
141fn album_component(request: NamingRequest<'_>, config: &NamingConfig) -> String {
142    let album = request.lineage.album(&title_name(request.clip));
143    sanitise_component(&album, config.character_set, config.max_component_len)
144}
145
146/// Render one clip's path with an already-resolved album component.
147fn render_with_album(
148    request: NamingRequest<'_>,
149    config: &NamingConfig,
150    album: &str,
151) -> RenderedName {
152    let clip = request.clip;
153    let creator = sanitise_component(
154        &creator_name(clip),
155        config.character_set,
156        config.max_component_len,
157    );
158    let handle = sanitise_component(&clip.handle, config.character_set, config.max_component_len);
159    let title = sanitise_component(
160        &title_name(clip),
161        config.character_set,
162        config.max_component_len,
163    );
164    let id = sanitise_component(&clip.id, CharacterSet::Ascii, config.max_component_len);
165    let id8 = sanitise_component(
166        &truncate_chars(&clip.id, 8),
167        CharacterSet::Ascii,
168        config.max_component_len,
169    );
170    let root_id8 = sanitise_component(
171        &truncate_chars(&request.lineage.root_id, 8),
172        CharacterSet::Ascii,
173        config.max_component_len,
174    );
175    let mut components = config
176        .template
177        .split('/')
178        .filter_map(|segment| {
179            let rendered = segment
180                .replace("{creator}", &creator)
181                .replace("{handle}", &handle)
182                .replace("{album}", album)
183                .replace("{title}", &title)
184                .replace("{root_id8}", &root_id8)
185                .replace("{id8}", &id8)
186                .replace("{id}", &id);
187            let sanitised =
188                sanitise_component(&rendered, config.character_set, config.max_component_len);
189            (!sanitised.is_empty()).then_some(sanitised)
190        })
191        .collect::<Vec<_>>();
192
193    if components.is_empty() {
194        components.push(title.clone());
195    }
196
197    let mut base_name = components
198        .pop()
199        .filter(|value| !value.is_empty())
200        .unwrap_or_else(|| title.clone());
201    // Guarantee a non-empty file name even when every token sanitises away.
202    if base_name.is_empty() {
203        base_name = append_suffix(&base_name, &clip.id, config.max_component_len);
204    }
205
206    let mut relative_path = PathBuf::new();
207    for component in components {
208        relative_path.push(component);
209    }
210
211    relative_path.push(&base_name);
212    RenderedName {
213        relative_path,
214        base_name,
215    }
216}
217
218fn with_suffix(mut rendered: RenderedName, suffix: &str, max_component_len: usize) -> RenderedName {
219    rendered.base_name = append_suffix(&rendered.base_name, suffix, max_component_len);
220    rendered.relative_path.set_file_name(&rendered.base_name);
221    rendered
222}
223
224fn creator_name(clip: &Clip) -> String {
225    non_blank(&clip.display_name)
226        .or_else(|| non_blank(&clip.handle))
227        .unwrap_or("Unknown Creator")
228        .to_string()
229}
230
231fn title_name(clip: &Clip) -> String {
232    let title = clip.title.trim();
233    if title.is_empty() || title.eq_ignore_ascii_case("untitled") {
234        "Untitled".to_string()
235    } else {
236        title.to_string()
237    }
238}
239
240fn append_suffix(base: &str, suffix: &str, max_component_len: usize) -> String {
241    let suffix_pattern = format!(" [{suffix}]");
242    if base.ends_with(&suffix_pattern) {
243        return sanitise_component(base, CharacterSet::Unicode, max_component_len);
244    }
245
246    let max_len =
247        max_component_len.max(suffix_pattern.chars().count() + MIN_BASE_CHARS_WITH_SUFFIX);
248    let allowed = max_len.saturating_sub(suffix_pattern.chars().count());
249    let truncated = truncate_chars(base.trim_end(), allowed);
250    let combined = format!("{truncated}{suffix_pattern}");
251    sanitise_component(&combined, CharacterSet::Unicode, max_len)
252}
253
254/// Sanitise a free-form playlist name into a single safe path component.
255///
256/// Applies the same Unicode filtering and length cap as clip path components
257/// (default [`CharacterSet::Unicode`], [`DEFAULT_MAX_COMPONENT_LEN`]), so a
258/// playlist file name obeys the same filesystem rules as the rest of the
259/// library. An empty or fully-stripped name falls back to `playlist` so the
260/// caller always has a non-empty stem to append `.m3u8` to.
261pub fn sanitise_name(name: &str) -> String {
262    let cleaned = sanitise_component(name, CharacterSet::Unicode, DEFAULT_MAX_COMPONENT_LEN);
263    if cleaned.is_empty() {
264        "playlist".to_string()
265    } else {
266        cleaned
267    }
268}
269
270fn sanitise_component(
271    value: &str,
272    character_set: CharacterSet,
273    max_component_len: usize,
274) -> String {
275    let filtered = match character_set {
276        CharacterSet::Unicode => value.chars().map(unicode_char).collect::<String>(),
277        CharacterSet::Ascii => value.chars().flat_map(ascii_chars).collect::<String>(),
278    };
279    let collapsed = filtered.split_whitespace().collect::<Vec<_>>().join(" ");
280    let trimmed = collapsed.trim_matches([' ', '.']);
281    if trimmed.is_empty() {
282        return String::new();
283    }
284
285    let mut result = truncate_chars(trimmed, max_component_len.max(1));
286    result = result.trim_matches([' ', '.']).to_string();
287    if result.is_empty() {
288        return String::new();
289    }
290    if result == "." || result == ".." {
291        return "item".to_string();
292    }
293    if !result.ends_with('_') && is_reserved_name(&result) {
294        result.push('_');
295    }
296    result
297}
298
299fn unicode_char(ch: char) -> char {
300    if matches!(
301        ch,
302        '<' | '>' | ':' | '"' | '/' | '\\' | '|' | '?' | '*' | '\0'
303    ) || ch.is_control()
304    {
305        ' '
306    } else {
307        ch
308    }
309}
310
311fn ascii_chars(ch: char) -> Vec<char> {
312    if ch.is_ascii() {
313        return vec![unicode_char(ch)];
314    }
315
316    match ch {
317        'À' | 'Á' | 'Â' | 'Ã' | 'Ä' | 'Å' => vec!['A'],
318        'à' | 'á' | 'â' | 'ã' | 'ä' | 'å' => vec!['a'],
319        'Ç' => vec!['C'],
320        'ç' => vec!['c'],
321        'È' | 'É' | 'Ê' | 'Ë' => vec!['E'],
322        'è' | 'é' | 'ê' | 'ë' => vec!['e'],
323        'Ì' | 'Í' | 'Î' | 'Ï' => vec!['I'],
324        'ì' | 'í' | 'î' | 'ï' => vec!['i'],
325        'Ñ' => vec!['N'],
326        'ñ' => vec!['n'],
327        'Ò' | 'Ó' | 'Ô' | 'Õ' | 'Ö' | 'Ø' => vec!['O'],
328        'ò' | 'ó' | 'ô' | 'õ' | 'ö' | 'ø' => vec!['o'],
329        'Ù' | 'Ú' | 'Û' | 'Ü' => vec!['U'],
330        'ù' | 'ú' | 'û' | 'ü' => vec!['u'],
331        'Ý' | 'Ÿ' => vec!['Y'],
332        'ý' | 'ÿ' => vec!['y'],
333        'Æ' => vec!['A', 'E'],
334        'æ' => vec!['a', 'e'],
335        'Œ' => vec!['O', 'E'],
336        'œ' => vec!['o', 'e'],
337        'ß' => vec!['s', 's'],
338        _ => vec![' '],
339    }
340}
341
342fn truncate_chars(value: &str, max_len: usize) -> String {
343    value.chars().take(max_len).collect()
344}
345
346fn non_blank(value: &str) -> Option<&str> {
347    let trimmed = value.trim();
348    (!trimmed.is_empty()).then_some(trimmed)
349}
350
351fn is_reserved_name(value: &str) -> bool {
352    let stem = value.split('.').next().unwrap_or(value);
353    matches!(
354        stem.to_ascii_uppercase().as_str(),
355        "CON"
356            | "PRN"
357            | "AUX"
358            | "NUL"
359            | "COM1"
360            | "COM2"
361            | "COM3"
362            | "COM4"
363            | "COM5"
364            | "COM6"
365            | "COM7"
366            | "COM8"
367            | "COM9"
368            | "LPT1"
369            | "LPT2"
370            | "LPT3"
371            | "LPT4"
372            | "LPT5"
373            | "LPT6"
374            | "LPT7"
375            | "LPT8"
376            | "LPT9"
377    )
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use crate::lineage::{EdgeType, ResolveStatus};
384    use std::collections::{BTreeMap, BTreeSet};
385
386    fn test_clip(id: &str, title: &str) -> Clip {
387        Clip {
388            id: id.to_string(),
389            title: title.to_string(),
390            display_name: "München".to_string(),
391            handle: "munchen".to_string(),
392            album_title: String::new(),
393            root_ancestor_id: String::new(),
394            ..Clip::default()
395        }
396    }
397
398    fn render_own(clip: &Clip, config: &NamingConfig) -> RenderedName {
399        let lineage = LineageContext::own_root(clip);
400        render_clip_name(
401            NamingRequest {
402                clip,
403                lineage: &lineage,
404            },
405            config,
406        )
407    }
408
409    fn render_all_own(
410        clips: &[Clip],
411        config: &NamingConfig,
412        colliding: &BTreeSet<String>,
413    ) -> Vec<RenderedName> {
414        let lineages: Vec<LineageContext> = clips.iter().map(LineageContext::own_root).collect();
415        let requests: Vec<NamingRequest> = clips
416            .iter()
417            .zip(&lineages)
418            .map(|(clip, lineage)| NamingRequest { clip, lineage })
419            .collect();
420        render_clip_names(&requests, config, colliding)
421    }
422
423    #[test]
424    fn unicode_names_are_preserved_and_ascii_falls_back() {
425        let clip = test_clip("abc12345", "Beyoncé/東京");
426
427        let unicode = render_own(&clip, &NamingConfig::default());
428        assert_eq!(
429            unicode.relative_path.to_string_lossy(),
430            "München/Beyoncé 東京/München-Beyoncé 東京 [abc12345]"
431        );
432
433        let ascii = render_own(
434            &clip,
435            &NamingConfig {
436                character_set: CharacterSet::Ascii,
437                ..NamingConfig::default()
438            },
439        );
440        assert_eq!(
441            ascii.relative_path.to_string_lossy(),
442            "Munchen/Beyonce/Munchen-Beyonce [abc12345]"
443        );
444    }
445
446    #[test]
447    fn reserved_and_hostile_names_are_sanitised() {
448        let clip = Clip {
449            id: "deadbeef".to_string(),
450            title: "CON<>:\"/\\|?*.".to_string(),
451            display_name: "AUX".to_string(),
452            ..Clip::default()
453        };
454
455        let rendered = render_own(&clip, &NamingConfig::default());
456        let path = rendered.relative_path.to_string_lossy();
457        assert!(path.starts_with("AUX_/CON_/"), "path was {path}");
458        assert!(rendered.base_name.contains("[deadbeef]"));
459    }
460
461    #[test]
462    fn default_template_always_embeds_id8() {
463        let clip = test_clip("abcdef1234567890", "Any Title");
464        let rendered = render_own(&clip, &NamingConfig::default());
465        assert!(
466            rendered.base_name.contains("[abcdef12]"),
467            "base_name was {}",
468            rendered.base_name
469        );
470    }
471
472    #[test]
473    fn blank_titles_use_a_stable_suffix() {
474        let clip = test_clip("12345678-clip", "   ");
475
476        let rendered = render_own(&clip, &NamingConfig::default());
477        assert_eq!(rendered.base_name, "München-Untitled [12345678]");
478        assert_eq!(
479            rendered.relative_path.to_string_lossy(),
480            "München/Untitled/München-Untitled [12345678]"
481        );
482    }
483
484    #[test]
485    fn very_long_titles_are_trimmed() {
486        let clip = test_clip("abcdef12", &"a".repeat(120));
487        let rendered = render_own(
488            &clip,
489            &NamingConfig {
490                max_component_len: 24,
491                ..NamingConfig::default()
492            },
493        );
494
495        for component in rendered.relative_path.components() {
496            let text = component.as_os_str().to_string_lossy();
497            assert!(
498                text.chars().count() <= 24,
499                "component {text:?} exceeds 24 chars"
500            );
501        }
502    }
503
504    #[test]
505    fn same_title_siblings_stay_distinct_via_id8() {
506        // Two clips sharing a root (same album folder) and the same title must
507        // still land on distinct files; the default template's {id8} does that.
508        let lineage = LineageContext {
509            root_id: "root-9".to_string(),
510            root_title: "Origin".to_string(),
511            parent_id: "root-9".to_string(),
512            edge_type: Some(EdgeType::Cover),
513            status: ResolveStatus::Resolved,
514        };
515        let first = test_clip("11111111-alpha", "Shared");
516        let second = test_clip("22222222-beta", "Shared");
517        let requests = [
518            NamingRequest {
519                clip: &first,
520                lineage: &lineage,
521            },
522            NamingRequest {
523                clip: &second,
524                lineage: &lineage,
525            },
526        ];
527
528        let names = render_clip_names(&requests, &NamingConfig::default(), &BTreeSet::new());
529
530        assert_eq!(
531            names[0].relative_path.to_string_lossy(),
532            "München/Origin/München-Shared [11111111]"
533        );
534        assert_eq!(
535            names[1].relative_path.to_string_lossy(),
536            "München/Origin/München-Shared [22222222]"
537        );
538    }
539
540    #[test]
541    fn id8_prefix_collision_falls_back_to_full_id() {
542        // Custom template without {id8} so identical titles collide and the
543        // filename fallback (full id) has to keep them distinct.
544        let config = NamingConfig {
545            template: "{creator}/{title}".to_string(),
546            ..NamingConfig::default()
547        };
548        let first = test_clip("abcd1234-first", "Untitled");
549        let second = test_clip("abcd1234-second", "Untitled");
550
551        let names = render_all_own(&[first.clone(), second.clone()], &config, &BTreeSet::new());
552        let swapped = render_all_own(&[second.clone(), first.clone()], &config, &BTreeSet::new());
553
554        assert_ne!(
555            names[0].relative_path.to_string_lossy(),
556            names[1].relative_path.to_string_lossy()
557        );
558
559        let ordered = |rendered: &[RenderedName], clips: &[Clip]| {
560            clips
561                .iter()
562                .zip(rendered)
563                .map(|(clip, name)| {
564                    (
565                        clip.id.clone(),
566                        name.relative_path.to_string_lossy().into_owned(),
567                    )
568                })
569                .collect::<BTreeMap<_, _>>()
570        };
571        assert_eq!(
572            ordered(&names, &[first.clone(), second.clone()]),
573            ordered(&swapped, &[second, first])
574        );
575    }
576
577    #[test]
578    fn album_is_root_title_for_a_remix() {
579        let clip = Clip {
580            id: "child".to_string(),
581            title: "Remix".to_string(),
582            display_name: "München".to_string(),
583            ..Clip::default()
584        };
585        let lineage = LineageContext {
586            root_id: "root-1".to_string(),
587            root_title: "Original".to_string(),
588            parent_id: "root-1".to_string(),
589            edge_type: Some(EdgeType::Cover),
590            status: ResolveStatus::Resolved,
591        };
592
593        let rendered = render_clip_name(
594            NamingRequest {
595                clip: &clip,
596                lineage: &lineage,
597            },
598            &NamingConfig::default(),
599        );
600        assert_eq!(
601            rendered.relative_path.to_string_lossy(),
602            "München/Original/München-Remix [child]"
603        );
604    }
605
606    #[test]
607    fn album_is_own_title_for_a_root() {
608        let clip = Clip {
609            id: "root-1".to_string(),
610            title: "Original".to_string(),
611            display_name: "München".to_string(),
612            ..Clip::default()
613        };
614
615        let rendered = render_own(&clip, &NamingConfig::default());
616        assert_eq!(
617            rendered.relative_path.to_string_lossy(),
618            "München/Original/München-Original [root-1]"
619        );
620    }
621
622    #[test]
623    fn shared_album_title_from_distinct_roots_is_disambiguated() {
624        let first = Clip {
625            id: "aaaa1111-x".to_string(),
626            title: "Break Through".to_string(),
627            display_name: "München".to_string(),
628            ..Clip::default()
629        };
630        let second = Clip {
631            id: "bbbb2222-y".to_string(),
632            title: "Break Through".to_string(),
633            display_name: "München".to_string(),
634            ..Clip::default()
635        };
636
637        // The colliding set is authoritative (store-driven), so disambiguation
638        // does not depend on both roots appearing in the same batch.
639        let colliding: BTreeSet<String> = ["Break Through".to_string()].into_iter().collect();
640        let names = render_all_own(
641            &[first.clone(), second.clone()],
642            &NamingConfig::default(),
643            &colliding,
644        );
645        let swapped = render_all_own(
646            &[second.clone(), first.clone()],
647            &NamingConfig::default(),
648            &colliding,
649        );
650
651        let album_of = |rendered: &RenderedName| {
652            rendered
653                .relative_path
654                .components()
655                .nth(1)
656                .map(|component| component.as_os_str().to_string_lossy().into_owned())
657                .unwrap_or_default()
658        };
659
660        assert_eq!(album_of(&names[0]), "Break Through [aaaa1111]");
661        assert_eq!(album_of(&names[1]), "Break Through [bbbb2222]");
662        // Deterministic regardless of input order.
663        assert_eq!(album_of(&swapped[0]), "Break Through [bbbb2222]");
664        assert_eq!(album_of(&swapped[1]), "Break Through [aaaa1111]");
665
666        // The MEDIUM fix: a narrowed run showing only one of the two roots
667        // still gets the suffixed folder, so folders never oscillate.
668        let alone = render_all_own(
669            std::slice::from_ref(&first),
670            &NamingConfig::default(),
671            &colliding,
672        );
673        assert_eq!(album_of(&alone[0]), "Break Through [aaaa1111]");
674    }
675
676    #[test]
677    fn unique_root_title_stays_a_bare_album() {
678        // A title absent from the colliding set keeps its bare folder even when
679        // the batch happens to hold a same-titled sibling of the same root.
680        let clip = Clip {
681            id: "solo-1".to_string(),
682            title: "Solo".to_string(),
683            display_name: "München".to_string(),
684            ..Clip::default()
685        };
686        let names = render_all_own(&[clip], &NamingConfig::default(), &BTreeSet::new());
687        assert_eq!(
688            names[0].relative_path.to_string_lossy(),
689            "München/Solo/München-Solo [solo-1]"
690        );
691    }
692
693    #[test]
694    fn sanitise_name_strips_separators_and_falls_back_when_empty() {
695        assert_eq!(sanitise_name("Road/Trip: 2024"), "Road Trip 2024");
696        assert_eq!(sanitise_name(""), "playlist");
697        // A name made only of illegal characters strips to nothing, so the
698        // caller still gets a usable, non-empty stem.
699        assert_eq!(sanitise_name("///"), "playlist");
700    }
701}