use std::fmt::Write as _;
use serde_json::Value;
#[derive(Debug, Clone, PartialEq)]
pub struct AlignedWord {
pub word: String,
pub success: bool,
pub start_s: f64,
pub end_s: f64,
pub p_align: f64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct AlignedLineWord {
pub text: String,
pub start_s: f64,
pub end_s: f64,
}
#[derive(Debug, Clone, PartialEq)]
pub struct AlignedLine {
pub text: String,
pub start_s: f64,
pub end_s: f64,
pub section: String,
pub words: Vec<AlignedLineWord>,
}
#[derive(Debug, Clone, Default, PartialEq)]
pub struct AlignedLyrics {
pub words: Vec<AlignedWord>,
pub lines: Vec<AlignedLine>,
}
impl AlignedLyrics {
pub fn from_json(raw: &Value) -> AlignedLyrics {
let words = raw
.get("aligned_words")
.and_then(Value::as_array)
.map(|items| items.iter().map(parse_word).collect())
.unwrap_or_default();
let lines = raw
.get("aligned_lyrics")
.and_then(Value::as_array)
.map(|items| items.iter().map(parse_line).collect())
.unwrap_or_default();
AlignedLyrics { words, lines }
}
pub fn from_bytes(body: &[u8]) -> AlignedLyrics {
serde_json::from_slice::<Value>(body)
.map(|value| Self::from_json(&value))
.unwrap_or_default()
}
pub fn is_empty(&self) -> bool {
self.lines.is_empty() && self.words.is_empty()
}
pub fn plain_text(&self) -> String {
if !self.lines.is_empty() {
return self
.lines
.iter()
.map(|line| line.text.trim_end())
.collect::<Vec<_>>()
.join("\n");
}
self.words
.iter()
.map(|word| word.word.as_str())
.collect::<Vec<_>>()
.join(" ")
}
pub fn lrc_body(&self) -> String {
let mut out = String::new();
for line in &self.lines {
let text = if line.text.trim().is_empty() {
line.words
.iter()
.map(|w| w.text.trim())
.filter(|t| !t.is_empty())
.collect::<Vec<_>>()
.join(" ")
} else {
line.text.trim().to_owned()
};
let _ = writeln!(out, "[{}]{text}", lrc_stamp(line.start_s));
}
out
}
pub fn sylt_entries(&self) -> Vec<(u32, String)> {
let mut entries = Vec::new();
for (line_index, line) in self.lines.iter().enumerate() {
let words: Vec<&AlignedLineWord> = line
.words
.iter()
.filter(|w| !w.text.trim().is_empty())
.collect();
let prefix = if line_index == 0 { "" } else { "\n" };
if words.is_empty() {
let text = line.text.trim();
if !text.is_empty() {
entries.push((to_ms(line.start_s), format!("{prefix}{text}")));
}
continue;
}
for (word_index, word) in words.iter().enumerate() {
let text = word.text.trim();
let segment = if word_index == 0 {
format!("{prefix}{text}")
} else {
format!(" {text}")
};
entries.push((to_ms(word.start_s), segment));
}
}
entries
}
}
fn parse_word(raw: &Value) -> AlignedWord {
AlignedWord {
word: string(raw, "word"),
success: raw.get("success").and_then(Value::as_bool).unwrap_or(false),
start_s: f64_field(raw, "start_s"),
end_s: f64_field(raw, "end_s"),
p_align: f64_field(raw, "p_align"),
}
}
fn parse_line(raw: &Value) -> AlignedLine {
let words = raw
.get("words")
.and_then(Value::as_array)
.map(|items| {
items
.iter()
.map(|word| AlignedLineWord {
text: string(word, "text"),
start_s: f64_field(word, "start_s"),
end_s: f64_field(word, "end_s"),
})
.collect()
})
.unwrap_or_default();
AlignedLine {
text: string(raw, "text"),
start_s: f64_field(raw, "start_s"),
end_s: f64_field(raw, "end_s"),
section: string(raw, "section"),
words,
}
}
fn string(value: &Value, key: &str) -> String {
value
.get(key)
.and_then(Value::as_str)
.unwrap_or("")
.to_string()
}
fn f64_field(value: &Value, key: &str) -> f64 {
value.get(key).and_then(Value::as_f64).unwrap_or(0.0)
}
fn to_ms(secs: f64) -> u32 {
if !secs.is_finite() || secs <= 0.0 {
return 0;
}
(secs * 1000.0).round() as u32
}
fn lrc_stamp(secs: f64) -> String {
let cs = centiseconds(secs);
format!("{:02}:{:02}.{:02}", cs / 6000, (cs / 100) % 60, cs % 100)
}
fn centiseconds(secs: f64) -> u64 {
if !secs.is_finite() || secs <= 0.0 {
return 0;
}
(secs * 100.0).round() as u64
}
#[cfg(test)]
mod tests {
use super::*;
fn sample_json() -> Value {
serde_json::json!({
"aligned_words": [
{"word": "Hello", "success": true, "start_s": 0.5, "end_s": 0.9, "p_align": 0.99},
{"word": "world", "success": true, "start_s": 1.0, "end_s": 1.4, "p_align": 0.98},
{"word": "again", "success": true, "start_s": 61.2, "end_s": 61.8, "p_align": 0.97}
],
"aligned_lyrics": [
{"text": "Hello world", "start_s": 0.5, "end_s": 1.4, "section": "Verse 1",
"words": [
{"text": "Hello", "start_s": 0.5, "end_s": 0.9},
{"text": "world", "start_s": 1.0, "end_s": 1.4}
]},
{"text": "[Chorus]", "start_s": 60.0, "end_s": 60.0, "section": "Chorus", "words": []},
{"text": "again", "start_s": 61.2, "end_s": 61.8, "section": "Chorus",
"words": [{"text": "again", "start_s": 61.2, "end_s": 61.8}]}
],
"hoot_cer": 0.22,
"is_streamed": false
})
}
#[test]
fn parses_words_and_lines() {
let aligned = AlignedLyrics::from_json(&sample_json());
assert_eq!(aligned.words.len(), 3);
assert_eq!(aligned.lines.len(), 3);
assert_eq!(aligned.words[0].word, "Hello");
assert!(aligned.words[0].success);
assert!((aligned.words[0].p_align - 0.99).abs() < 1e-9);
assert_eq!(aligned.lines[0].section, "Verse 1");
assert_eq!(aligned.lines[0].words.len(), 2);
assert_eq!(aligned.lines[0].words[1].text, "world");
assert!(!aligned.is_empty());
}
#[test]
fn empty_arrays_are_empty() {
let json = serde_json::json!({
"aligned_words": [], "aligned_lyrics": [], "hoot_cer": 1.0, "is_streamed": false
});
let aligned = AlignedLyrics::from_json(&json);
assert!(aligned.is_empty());
assert_eq!(aligned.plain_text(), "");
assert_eq!(aligned.lrc_body(), "");
assert!(aligned.sylt_entries().is_empty());
}
#[test]
fn missing_keys_map_to_empty() {
assert!(AlignedLyrics::from_json(&serde_json::json!({})).is_empty());
assert!(AlignedLyrics::from_json(&Value::Null).is_empty());
assert!(AlignedLyrics::from_bytes(b"not json").is_empty());
}
#[test]
fn lrc_body_has_line_level_stamps() {
let aligned = AlignedLyrics::from_json(&sample_json());
let body = aligned.lrc_body();
let expected = "[00:00.50]Hello world\n\
[01:00.00][Chorus]\n\
[01:01.20]again\n";
assert_eq!(body, expected);
}
#[test]
fn plain_text_joins_line_text() {
let aligned = AlignedLyrics::from_json(&sample_json());
assert_eq!(aligned.plain_text(), "Hello world\n[Chorus]\nagain");
}
#[test]
fn sylt_entries_are_word_level_with_line_breaks() {
let aligned = AlignedLyrics::from_json(&sample_json());
let entries = aligned.sylt_entries();
assert_eq!(
entries,
vec![
(500, "Hello".to_owned()),
(1000, " world".to_owned()),
(60000, "\n[Chorus]".to_owned()),
(61200, "\nagain".to_owned()),
]
);
}
#[test]
fn stamps_round_and_do_not_wrap_minutes() {
assert_eq!(lrc_stamp(61.2), "01:01.20");
assert_eq!(lrc_stamp(3661.0), "61:01.00");
assert_eq!(to_ms(1.2346), 1235);
assert_eq!(to_ms(-1.0), 0);
}
}