suno_core/hash.rs
1//! Stable content sentinels for change detection.
2//!
3//! Reconcile compares a clip's current [`meta_hash`]/[`art_hash`] against the
4//! manifest to decide whether a file needs re-tagging. The hashes must be stable
5//! across runs, versions, and platforms, so they use FNV-1a over a fixed field
6//! encoding rather than the standard library's deliberately unspecified hasher.
7//!
8//! The hash inputs are the exact fields the tag writer embeds: [`meta_hash`]
9//! hashes the resolved [`TrackMetadata`], and [`art_hash`] tracks the chosen art
10//! URL. Anything embedded in the file is therefore in a hash, so an upstream
11//! change to it triggers a retag; anything not embedded (path-affecting or
12//! sidecar-only fields such as the animated-cover URL) is excluded.
13
14use std::hash::{Hash, Hasher};
15
16use crate::lineage::LineageContext;
17use crate::model::Clip;
18use crate::tag::TrackMetadata;
19
20/// A short, stable hex digest of `bytes` (FNV-1a, 64-bit).
21fn digest(bytes: &[u8]) -> String {
22 let mut hasher = fnv::FnvHasher::default();
23 hasher.write(bytes);
24 format!("{:016x}", hasher.finish())
25}
26
27/// A stable sentinel over an arbitrary generated text artefact.
28///
29/// Used for playlists, whose `.m3u8` body is generated rather than fetched: the
30/// hash is taken over the **full rendered text**, so the playlist name, the
31/// member order, and every member's relative path, title, and duration all feed
32/// it (HARDENING B1: a change to anything that ends up in the file changes the
33/// hash and so triggers a rewrite). Because the render is deterministic, the
34/// hash is stable across runs and platforms.
35pub fn content_hash(text: &str) -> String {
36 digest(text.as_bytes())
37}
38
39/// A sentinel for the clip's embedded tag set.
40///
41/// Hashes the resolved [`TrackMetadata`] that is actually written into the file
42/// (title, artist, album, date/year, lyrics, prompt, model, handle, and the
43/// resolved lineage tags), so a change to any embedded tag — including the
44/// artist (`display_name`) and model label, which the old hand-listed field set
45/// omitted — is detected as a needed retag, while a change to a field that is
46/// *not* embedded in the audio (e.g. the animated-cover URL) is not. Taking
47/// [`TrackMetadata`] directly keeps this in lock-step with the tag writer
48/// (HARDENING B1: if a value is embedded, it is in the change hash), so a
49/// retitle, artist rename, re-point, album move, or year correction all trigger
50/// a retag. Chosen art is tracked separately by [`art_hash`].
51///
52/// A pure path change (e.g. one driven only by a field that renames but does
53/// not embed) is still handled as a rename, by comparing the rendered path with
54/// the stored one, not by this hash.
55pub fn meta_hash(clip: &Clip, lineage: &LineageContext) -> String {
56 let mut hasher = fnv::FnvHasher::default();
57 TrackMetadata::from_clip(clip, lineage).hash(&mut hasher);
58 format!("{:016x}", hasher.finish())
59}
60
61/// A stable digest of an artifact source URL (FNV-1a), or the empty string when
62/// `url` is empty.
63///
64/// Shared by [`art_hash`] (the embedded static cover) and the external animated
65/// cover sidecar, whose rewrite detection keys on the clip's `video_cover_url`
66/// rather than the selected image. Keeping both on the one helper means an empty
67/// URL always maps to the empty sentinel, the value reconcile reads as "no such
68/// artifact this run".
69pub fn art_url_hash(url: &str) -> String {
70 if url.is_empty() {
71 String::new()
72 } else {
73 digest(url.as_bytes())
74 }
75}
76
77/// The change-detection version for the synced `.lrc` body. Bump this when the
78/// rendered `.lrc` format changes so existing sidecars are rewritten on the next
79/// run (their stored hash then no longer matches, exactly as edited content
80/// would move a [`content_hash`]).
81pub const SYNCED_LRC_VERSION: u32 = 2;
82
83/// A stable per-clip source sentinel for the synced `.lrc` sidecar.
84///
85/// Suno's forced alignment for a given clip is immutable (the audio and its
86/// lyrics are fixed once generated), so the sidecar's rewrite detection keys on
87/// the clip id plus the render [`SYNCED_LRC_VERSION`] rather than the fetched
88/// body. This lets reconcile skip an unchanged clip WITHOUT a network fetch (the
89/// timed body is resolved only when a write is actually planned), while a
90/// version bump rewrites every sidecar. It mirrors how the cover sidecars key on
91/// their source URL rather than the fetched bytes ("the hash tracks the source").
92pub fn synced_lrc_source_hash(clip_id: &str) -> String {
93 content_hash(&format!("synced-lrc/v{SYNCED_LRC_VERSION}/{clip_id}"))
94}
95
96/// A sentinel for the embedded cover art: a digest of the selected art URL, or
97/// the empty string when the clip carries no art. A mismatch against the
98/// manifest means the file on disk holds stale art even if its tags are current.
99pub fn art_hash(clip: &Clip) -> String {
100 art_url_hash(clip.selected_image_url().unwrap_or(""))
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106 use crate::lineage::{EdgeType, ResolveStatus};
107
108 fn sample() -> Clip {
109 Clip {
110 title: "Electric Storm".to_owned(),
111 tags: "ambient, cinematic".to_owned(),
112 image_large_url: "https://cdn1.suno.ai/image_large_abc.jpeg".to_owned(),
113 image_url: "https://cdn1.suno.ai/image_abc.jpeg".to_owned(),
114 video_cover_url: String::new(),
115 root_ancestor_id: "root-1".to_owned(),
116 lineage_status: "continuation".to_owned(),
117 album_title: "Weather Series".to_owned(),
118 prompt: "an orchestral storm".to_owned(),
119 lyrics: "thunder rolls\nover the plains".to_owned(),
120 gpt_description_prompt: "stormy".to_owned(),
121 handle: "alice".to_owned(),
122 display_name: "Alice".to_owned(),
123 ..Default::default()
124 }
125 }
126
127 /// The resolved lineage embedded alongside [`sample`]: an extension of a
128 /// parent under the "Weather Series" root, created in 2023.
129 fn sample_lineage() -> LineageContext {
130 LineageContext {
131 root_id: "root-1".to_owned(),
132 root_title: "Weather Series".to_owned(),
133 root_date: "2023-05-01T00:00:00Z".to_owned(),
134 parent_id: "parent-1".to_owned(),
135 edge_type: Some(EdgeType::Extend),
136 status: ResolveStatus::Resolved,
137 }
138 }
139
140 #[test]
141 fn meta_hash_is_stable() {
142 // Golden value: a change here means the sentinel encoding changed and
143 // every existing manifest would see a spurious retag. Change with care.
144 let h = meta_hash(&sample(), &sample_lineage());
145 assert_eq!(h, "c247d31f60378b86");
146 assert_eq!(h.len(), 16);
147 assert_eq!(h, meta_hash(&sample(), &sample_lineage()));
148 }
149
150 #[test]
151 fn art_hash_is_stable_and_empty_without_art() {
152 let h = art_hash(&sample());
153 assert_eq!(h.len(), 16);
154 assert_eq!(h, art_hash(&sample()));
155
156 let mut bare = sample();
157 bare.image_large_url = String::new();
158 bare.image_url = String::new();
159 bare.video_cover_url = String::new();
160 assert_eq!(art_hash(&bare), "");
161 }
162
163 #[test]
164 fn art_url_hash_is_stable_and_empty_for_empty_url() {
165 assert_eq!(art_url_hash(""), "");
166 let h = art_url_hash("https://cdn1.suno.ai/video_cover.mp4");
167 assert_eq!(h.len(), 16);
168 assert_eq!(h, art_url_hash("https://cdn1.suno.ai/video_cover.mp4"));
169 assert_ne!(h, art_url_hash("https://cdn1.suno.ai/other.mp4"));
170 // art_hash routes the selected image URL through the same helper.
171 assert_eq!(
172 art_hash(&sample()),
173 art_url_hash(sample().selected_image_url().unwrap())
174 );
175 }
176
177 #[test]
178 fn meta_hash_tracks_the_artist_and_model_but_not_sidecar_only_fields() {
179 let lineage = sample_lineage();
180 let base = meta_hash(&sample(), &lineage);
181 // The artist (`display_name`) and model label are embedded tags, so an
182 // upstream change to either must retag (#135) -- the old hand-listed
183 // hash omitted both, leaving stale tags on re-sync.
184 let mut artist = sample();
185 artist.display_name = "Someone Else".to_owned();
186 assert_ne!(meta_hash(&artist, &lineage), base);
187 let mut model = sample();
188 model.model_name = "chirp-v9".to_owned();
189 assert_ne!(meta_hash(&model, &lineage), base);
190 // The animated-cover URL is a sidecar source, not an audio tag: it has
191 // its own hash and must not force a needless audio retag (#136).
192 let mut cover = sample();
193 cover.video_cover_url = "https://cdn1.suno.ai/new_cover.mp4".to_owned();
194 assert_eq!(meta_hash(&cover, &lineage), base);
195 }
196
197 #[test]
198 fn meta_hash_changes_when_a_content_field_changes() {
199 let lineage = sample_lineage();
200 let base = meta_hash(&sample(), &lineage);
201 // Clip-side content fields. (Art lives in `art_hash`, not here.)
202 for mutate in [
203 |c: &mut Clip| c.title = "Different".to_owned(),
204 |c: &mut Clip| c.tags = "lofi".to_owned(),
205 |c: &mut Clip| c.handle = "bob".to_owned(),
206 |c: &mut Clip| c.lyrics = "new words".to_owned(),
207 ] {
208 let mut clip = sample();
209 mutate(&mut clip);
210 assert_ne!(meta_hash(&clip, &lineage), base);
211 }
212 // Resolved-lineage values that get embedded must also move the hash.
213 for mutate in [
214 |l: &mut LineageContext| l.parent_id = "other-parent".to_owned(),
215 |l: &mut LineageContext| l.root_id = "other-root".to_owned(),
216 |l: &mut LineageContext| l.root_title = "Other Album".to_owned(),
217 |l: &mut LineageContext| l.edge_type = Some(EdgeType::Cover),
218 |l: &mut LineageContext| l.root_date = "2099-01-01T00:00:00Z".to_owned(),
219 ] {
220 let mut lin = sample_lineage();
221 mutate(&mut lin);
222 assert_ne!(meta_hash(&sample(), &lin), base);
223 }
224 }
225
226 #[test]
227 fn art_hash_tracks_the_selected_url_in_preference_order() {
228 let mut clip = sample();
229 let large = art_hash(&clip);
230 clip.image_large_url = String::new();
231 let standard = art_hash(&clip);
232 assert_ne!(large, standard);
233 clip.image_url = String::new();
234 clip.video_cover_url = "https://cdn1.suno.ai/video_cover.jpeg".to_owned();
235 let video = art_hash(&clip);
236 assert_ne!(standard, video);
237 }
238
239 #[test]
240 fn content_hash_is_stable_and_tracks_any_change() {
241 let text = "#EXTM3U\n#PLAYLIST:Mix\n#EXTINF:60,One\nA/One.flac\n";
242 let h = content_hash(text);
243 assert_eq!(h.len(), 16);
244 assert_eq!(h, content_hash(text), "same text hashes the same");
245 // A different name, order, path, title, or duration changes the digest.
246 assert_ne!(
247 h,
248 content_hash("#EXTM3U\n#PLAYLIST:Other\n#EXTINF:60,One\nA/One.flac\n")
249 );
250 assert_ne!(
251 h,
252 content_hash("#EXTM3U\n#PLAYLIST:Mix\n#EXTINF:61,One\nA/One.flac\n")
253 );
254 }
255
256 #[test]
257 fn synced_lrc_source_hash_is_stable_per_clip_and_never_empty() {
258 let a = synced_lrc_source_hash("clip-a");
259 assert_eq!(a.len(), 16);
260 assert_eq!(a, synced_lrc_source_hash("clip-a"), "stable per clip id");
261 // Distinct clips get distinct sentinels; none is the empty ("absent")
262 // value, so a desired synced `.lrc` is never mistaken for "no artifact".
263 assert_ne!(a, synced_lrc_source_hash("clip-b"));
264 assert!(!a.is_empty());
265 }
266}