1use std::fmt::Write as _;
16
17use serde_json::Value;
18
19#[derive(Debug, Clone, PartialEq)]
26pub struct AlignedWord {
27 pub word: String,
28 pub success: bool,
29 pub start_s: f64,
30 pub end_s: f64,
31 pub p_align: f64,
32}
33
34#[derive(Debug, Clone, PartialEq)]
40pub struct AlignedLineWord {
41 pub text: String,
42 pub start_s: f64,
43 pub end_s: f64,
44}
45
46#[derive(Debug, Clone, PartialEq)]
48pub struct AlignedLine {
49 pub text: String,
50 pub start_s: f64,
51 pub end_s: f64,
52 pub section: String,
54 pub words: Vec<AlignedLineWord>,
55}
56
57#[derive(Debug, Clone, Default, PartialEq)]
62pub struct AlignedLyrics {
63 pub words: Vec<AlignedWord>,
64 pub lines: Vec<AlignedLine>,
65 pub waveform_data: Vec<f64>,
69 pub hoot_cer: Option<f64>,
72 pub is_streamed: Option<bool>,
74}
75
76impl AlignedLyrics {
77 pub fn from_json(raw: &Value) -> AlignedLyrics {
83 let words = raw
84 .get("aligned_words")
85 .and_then(Value::as_array)
86 .map(|items| items.iter().map(parse_word).collect())
87 .unwrap_or_default();
88 let lines = raw
89 .get("aligned_lyrics")
90 .and_then(Value::as_array)
91 .map(|items| items.iter().map(parse_line).collect())
92 .unwrap_or_default();
93 let waveform_data = raw
94 .get("waveform_data")
95 .and_then(Value::as_array)
96 .map(|items| items.iter().filter_map(Value::as_f64).collect())
97 .unwrap_or_default();
98 let hoot_cer = raw.get("hoot_cer").and_then(Value::as_f64);
99 let is_streamed = raw.get("is_streamed").and_then(Value::as_bool);
100 AlignedLyrics {
101 words,
102 lines,
103 waveform_data,
104 hoot_cer,
105 is_streamed,
106 }
107 }
108
109 pub fn from_bytes(body: &[u8]) -> AlignedLyrics {
112 serde_json::from_slice::<Value>(body)
113 .map(|value| Self::from_json(&value))
114 .unwrap_or_default()
115 }
116
117 pub fn is_empty(&self) -> bool {
120 self.lines.is_empty() && self.words.is_empty()
121 }
122
123 pub fn plain_text(&self) -> String {
128 if !self.lines.is_empty() {
129 return self
130 .lines
131 .iter()
132 .map(|line| line.text.trim_end())
133 .collect::<Vec<_>>()
134 .join("\n");
135 }
136 self.words
137 .iter()
138 .map(|word| word.word.as_str())
139 .collect::<Vec<_>>()
140 .join(" ")
141 }
142
143 pub fn lrc_body(&self) -> String {
154 let mut out = String::new();
155 for line in &self.lines {
156 let text = if line.text.trim().is_empty() {
157 line.words
158 .iter()
159 .map(|w| w.text.trim())
160 .filter(|t| !t.is_empty())
161 .collect::<Vec<_>>()
162 .join(" ")
163 } else {
164 line.text.trim().to_owned()
165 };
166 let _ = writeln!(out, "[{}]{text}", lrc_stamp(line.start_s));
167 }
168 out
169 }
170
171 pub fn sylt_entries(&self) -> Vec<(u32, String)> {
177 let mut entries = Vec::new();
178 for (line_index, line) in self.lines.iter().enumerate() {
179 let words: Vec<&AlignedLineWord> = line
180 .words
181 .iter()
182 .filter(|w| !w.text.trim().is_empty())
183 .collect();
184 let prefix = if line_index == 0 { "" } else { "\n" };
185 if words.is_empty() {
186 let text = line.text.trim();
187 if !text.is_empty() {
188 entries.push((to_ms(line.start_s), format!("{prefix}{text}")));
189 }
190 continue;
191 }
192 for (word_index, word) in words.iter().enumerate() {
193 let text = word.text.trim();
194 let segment = if word_index == 0 {
195 format!("{prefix}{text}")
196 } else {
197 format!(" {text}")
198 };
199 entries.push((to_ms(word.start_s), segment));
200 }
201 }
202 entries
203 }
204}
205
206fn parse_word(raw: &Value) -> AlignedWord {
207 AlignedWord {
208 word: string(raw, "word"),
209 success: raw.get("success").and_then(Value::as_bool).unwrap_or(false),
210 start_s: f64_field(raw, "start_s"),
211 end_s: f64_field(raw, "end_s"),
212 p_align: f64_field(raw, "p_align"),
213 }
214}
215
216fn parse_line(raw: &Value) -> AlignedLine {
217 let words = raw
218 .get("words")
219 .and_then(Value::as_array)
220 .map(|items| {
221 items
222 .iter()
223 .map(|word| AlignedLineWord {
224 text: string(word, "text"),
225 start_s: f64_field(word, "start_s"),
226 end_s: f64_field(word, "end_s"),
227 })
228 .collect()
229 })
230 .unwrap_or_default();
231 AlignedLine {
232 text: string(raw, "text"),
233 start_s: f64_field(raw, "start_s"),
234 end_s: f64_field(raw, "end_s"),
235 section: string(raw, "section"),
236 words,
237 }
238}
239
240fn string(value: &Value, key: &str) -> String {
241 value
242 .get(key)
243 .and_then(Value::as_str)
244 .unwrap_or("")
245 .to_string()
246}
247
248fn f64_field(value: &Value, key: &str) -> f64 {
249 value.get(key).and_then(Value::as_f64).unwrap_or(0.0)
250}
251
252fn to_ms(secs: f64) -> u32 {
254 if !secs.is_finite() || secs <= 0.0 {
255 return 0;
256 }
257 (secs * 1000.0).round() as u32
258}
259
260fn lrc_stamp(secs: f64) -> String {
263 let cs = centiseconds(secs);
264 format!("{:02}:{:02}.{:02}", cs / 6000, (cs / 100) % 60, cs % 100)
265}
266
267fn centiseconds(secs: f64) -> u64 {
268 if !secs.is_finite() || secs <= 0.0 {
269 return 0;
270 }
271 (secs * 100.0).round() as u64
272}
273
274#[cfg(test)]
275mod tests {
276 use super::*;
277
278 fn sample_json() -> Value {
280 serde_json::json!({
281 "aligned_words": [
282 {"word": "Hello", "success": true, "start_s": 0.5, "end_s": 0.9, "p_align": 0.99},
283 {"word": "world", "success": true, "start_s": 1.0, "end_s": 1.4, "p_align": 0.98},
284 {"word": "again", "success": true, "start_s": 61.2, "end_s": 61.8, "p_align": 0.97}
285 ],
286 "aligned_lyrics": [
287 {"text": "Hello world", "start_s": 0.5, "end_s": 1.4, "section": "Verse 1",
288 "words": [
289 {"text": "Hello", "start_s": 0.5, "end_s": 0.9},
290 {"text": "world", "start_s": 1.0, "end_s": 1.4}
291 ]},
292 {"text": "[Chorus]", "start_s": 60.0, "end_s": 60.0, "section": "Chorus", "words": []},
293 {"text": "again", "start_s": 61.2, "end_s": 61.8, "section": "Chorus",
294 "words": [{"text": "again", "start_s": 61.2, "end_s": 61.8}]}
295 ],
296 "hoot_cer": 0.22,
297 "is_streamed": false
298 })
299 }
300
301 #[test]
302 fn parses_words_and_lines() {
303 let aligned = AlignedLyrics::from_json(&sample_json());
304 assert_eq!(aligned.words.len(), 3);
305 assert_eq!(aligned.lines.len(), 3);
306 assert_eq!(aligned.words[0].word, "Hello");
307 assert!(aligned.words[0].success);
308 assert!((aligned.words[0].p_align - 0.99).abs() < 1e-9);
309 assert_eq!(aligned.lines[0].section, "Verse 1");
310 assert_eq!(aligned.lines[0].words.len(), 2);
311 assert_eq!(aligned.lines[0].words[1].text, "world");
312 assert!(!aligned.is_empty());
313 }
314
315 #[test]
316 fn empty_arrays_are_empty() {
317 let json = serde_json::json!({
318 "aligned_words": [], "aligned_lyrics": [], "hoot_cer": 1.0, "is_streamed": false
319 });
320 let aligned = AlignedLyrics::from_json(&json);
321 assert!(aligned.is_empty());
322 assert_eq!(aligned.plain_text(), "");
323 assert_eq!(aligned.lrc_body(), "");
324 assert!(aligned.sylt_entries().is_empty());
325 }
326
327 #[test]
328 fn missing_keys_map_to_empty() {
329 assert!(AlignedLyrics::from_json(&serde_json::json!({})).is_empty());
330 assert!(AlignedLyrics::from_json(&Value::Null).is_empty());
331 assert!(AlignedLyrics::from_bytes(b"not json").is_empty());
332 }
333
334 #[test]
335 fn captures_waveform_hoot_cer_and_is_streamed_absent_safe() {
336 let json = serde_json::json!({
340 "aligned_words": [],
341 "aligned_lyrics": [],
342 "waveform_data": [0.00044, 0.0, 0.00014, 0.0008, 0.00146],
343 "hoot_cer": 0.22907083716651333_f64,
344 "is_streamed": false
345 });
346 let aligned = AlignedLyrics::from_json(&json);
347 assert_eq!(aligned.waveform_data.len(), 5);
348 assert!((aligned.waveform_data[3] - 0.0008).abs() < 1e-9);
349 assert!(
350 aligned
351 .hoot_cer
352 .is_some_and(|cer| (cer - 0.229_070_837).abs() < 1e-6)
353 );
354 assert_eq!(aligned.is_streamed, Some(false));
355 assert!(aligned.is_empty());
358
359 let bare = AlignedLyrics::from_json(&serde_json::json!({}));
361 assert!(bare.waveform_data.is_empty());
362 assert_eq!(bare.hoot_cer, None);
363 assert_eq!(bare.is_streamed, None);
364 let odd = AlignedLyrics::from_json(&serde_json::json!({
366 "waveform_data": "nope", "hoot_cer": "high", "is_streamed": 1
367 }));
368 assert!(odd.waveform_data.is_empty());
369 assert_eq!(odd.hoot_cer, None);
370 assert_eq!(odd.is_streamed, None);
371 }
372
373 #[test]
374 fn lrc_body_has_line_level_stamps() {
375 let aligned = AlignedLyrics::from_json(&sample_json());
376 let body = aligned.lrc_body();
377 let expected = "[00:00.50]Hello world\n\
378 [01:00.00][Chorus]\n\
379 [01:01.20]again\n";
380 assert_eq!(body, expected);
381 }
382
383 #[test]
384 fn plain_text_joins_line_text() {
385 let aligned = AlignedLyrics::from_json(&sample_json());
386 assert_eq!(aligned.plain_text(), "Hello world\n[Chorus]\nagain");
387 }
388
389 #[test]
390 fn sylt_entries_are_word_level_with_line_breaks() {
391 let aligned = AlignedLyrics::from_json(&sample_json());
392 let entries = aligned.sylt_entries();
393 assert_eq!(
394 entries,
395 vec![
396 (500, "Hello".to_owned()),
397 (1000, " world".to_owned()),
398 (60000, "\n[Chorus]".to_owned()),
399 (61200, "\nagain".to_owned()),
400 ]
401 );
402 }
403
404 #[test]
405 fn stamps_round_and_do_not_wrap_minutes() {
406 assert_eq!(lrc_stamp(61.2), "01:01.20");
408 assert_eq!(lrc_stamp(3661.0), "61:01.00");
409 assert_eq!(to_ms(1.2346), 1235);
410 assert_eq!(to_ms(-1.0), 0);
411 }
412}