use std::collections::HashMap;
use std::path::{Path, PathBuf};
const PGS_CODING_TYPE: u8 = 0x90;
const PGS_TEXT_CODING_TYPE: u8 = 0x91;
const CLPI_MAGIC: &[u8; 4] = b"HDMV";
const SEQUENCE_INFO_OFFSET_POS: usize = 8;
const PROGRAM_INFO_OFFSET_POS: usize = 12;
const MIN_CLPI_SIZE: usize = 40;
pub(crate) fn clpi_language_map(m2ts_path: &Path) -> HashMap<u16, String> {
let Some(clpi_path) = resolve_clpi_path(m2ts_path) else {
return HashMap::new();
};
let Ok(data) = std::fs::read(&clpi_path) else {
return HashMap::new();
};
parse_clpi_file(&data).unwrap_or_default()
}
pub(crate) fn clpi_presentation_times(m2ts_path: &Path) -> Option<(u64, u64)> {
let clpi_path = resolve_clpi_path(m2ts_path)?;
let data = std::fs::read(&clpi_path).ok()?;
parse_sequence_info_times(&data)
}
fn parse_sequence_info_times(data: &[u8]) -> Option<(u64, u64)> {
if data.len() < MIN_CLPI_SIZE {
return None;
}
if &data[0..4] != CLPI_MAGIC {
return None;
}
let seq_info_offset = read_u32_be(data, SEQUENCE_INFO_OFFSET_POS)? as usize;
if seq_info_offset == 0 || seq_info_offset >= data.len() {
return None;
}
let section = &data[seq_info_offset..];
if section.len() < 26 {
return None;
}
let num_atc = section[5] as usize;
if num_atc == 0 {
return None;
}
let atc_pos = 6;
if atc_pos + 6 > section.len() {
return None;
}
let num_stc = section[atc_pos + 4] as usize;
if num_stc == 0 {
return None;
}
let stc_pos = atc_pos + 6;
if stc_pos + 14 > section.len() {
return None;
}
let start = read_u32_be(section, stc_pos + 6)? as u64;
let end = read_u32_be(section, stc_pos + 10)? as u64;
Some((start, end))
}
fn resolve_clpi_path(m2ts_path: &Path) -> Option<PathBuf> {
let stem = m2ts_path.file_stem()?.to_str()?;
let stream_dir = m2ts_path.parent()?;
let stream_dir_name = stream_dir.file_name()?.to_str()?;
if !stream_dir_name.eq_ignore_ascii_case("STREAM") {
return None;
}
let bdmv_dir = stream_dir.parent()?;
for dir_name in &["CLIPINF", "clipinf"] {
let clpi_path = bdmv_dir.join(dir_name).join(format!("{}.clpi", stem));
if clpi_path.exists() {
return Some(clpi_path);
}
}
None
}
fn read_u16_be(data: &[u8], offset: usize) -> Option<u16> {
if offset + 2 > data.len() {
return None;
}
Some(u16::from_be_bytes([data[offset], data[offset + 1]]))
}
fn read_u32_be(data: &[u8], offset: usize) -> Option<u32> {
if offset + 4 > data.len() {
return None;
}
Some(u32::from_be_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
]))
}
fn parse_clpi_file(data: &[u8]) -> Result<HashMap<u16, String>, ()> {
if data.len() < MIN_CLPI_SIZE {
return Err(());
}
if &data[0..4] != CLPI_MAGIC {
return Err(());
}
let prog_info_offset = read_u32_be(data, PROGRAM_INFO_OFFSET_POS).ok_or(())? as usize;
if prog_info_offset == 0 || prog_info_offset >= data.len() {
return Err(());
}
let section = &data[prog_info_offset..];
if section.len() < 6 {
return Err(());
}
let _length = read_u32_be(section, 0).ok_or(())?;
if section.len() < 6 {
return Err(());
}
if let Some(m) = parse_program_info_sequences(section, 5) {
return Ok(m);
}
if let Some(m) = parse_program_info_sequences(section, 4) {
return Ok(m);
}
Ok(HashMap::new())
}
fn parse_program_info_sequences(
section: &[u8],
count_offset: usize,
) -> Option<HashMap<u16, String>> {
if count_offset >= section.len() {
return None;
}
let num_sequences = section[count_offset] as usize;
if num_sequences == 0 || num_sequences > 100 {
return None;
}
let mut map = HashMap::new();
let mut pos = count_offset + 1;
for _ in 0..num_sequences {
if pos + 8 > section.len() {
return None;
}
let _spn = read_u32_be(section, pos)?;
pos += 4;
let _pmt_pid = read_u16_be(section, pos)?;
pos += 2;
let num_streams = section[pos] as usize;
pos += 1;
let num_groups = section[pos] as usize;
pos += 1;
for _ in 0..num_streams {
if pos + 3 > section.len() {
return None;
}
let stream_pid = read_u16_be(section, pos)?;
pos += 2;
let coding_info_len = section[pos] as usize;
pos += 1;
if coding_info_len == 0 || pos + coding_info_len > section.len() {
if coding_info_len == 0 {
continue;
}
return None;
}
let coding_type = section[pos];
if (coding_type == PGS_CODING_TYPE || coding_type == PGS_TEXT_CODING_TYPE)
&& coding_info_len >= 4
{
let lang_bytes = §ion[pos + 1..pos + 4];
let lang = std::str::from_utf8(lang_bytes)
.ok()
.map(|s| s.trim_end_matches('\0').to_string())
.filter(|s| !s.is_empty() && s != "und")
.map(|s| crate::lang::normalize_language(&s));
if let Some(lang) = lang {
map.entry(stream_pid).or_insert(lang);
}
}
pos += coding_info_len;
}
for _ in 0..num_groups {
if pos + 3 > section.len() {
break;
}
pos += 1; let group_len = read_u16_be(section, pos)? as usize;
pos += 2;
pos += group_len;
}
}
if map.is_empty() { None } else { Some(map) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_resolve_clpi_path_valid_bdmv() {
let path = Path::new("/media/disc/BDMV/STREAM/00001.m2ts");
let stream_dir = path.parent().unwrap();
let stream_dir_name = stream_dir.file_name().unwrap().to_str().unwrap();
assert!(stream_dir_name.eq_ignore_ascii_case("STREAM"));
let stem = path.file_stem().unwrap().to_str().unwrap();
assert_eq!(stem, "00001");
let bdmv_dir = stream_dir.parent().unwrap();
let expected = bdmv_dir.join("CLIPINF").join("00001.clpi");
assert_eq!(expected, Path::new("/media/disc/BDMV/CLIPINF/00001.clpi"));
}
#[test]
fn test_resolve_clpi_path_not_bdmv() {
let path = Path::new("/home/user/videos/movie.m2ts");
assert!(resolve_clpi_path(path).is_none());
}
#[test]
fn test_resolve_clpi_path_wrong_parent() {
let path = Path::new("/media/disc/BDMV/BACKUP/00001.m2ts");
assert!(resolve_clpi_path(path).is_none());
}
#[test]
fn test_parse_clpi_bad_magic() {
let mut data = vec![0u8; 64];
data[0..4].copy_from_slice(b"XXXX");
assert!(parse_clpi_file(&data).is_err());
}
#[test]
fn test_parse_clpi_too_small() {
let data = vec![0u8; 10];
assert!(parse_clpi_file(&data).is_err());
}
fn build_clpi_with_pgs(pid: u16, lang: &[u8; 3]) -> Vec<u8> {
let mut data = Vec::new();
data.extend_from_slice(b"HDMV0300");
data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&40u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes());
assert_eq!(data.len(), 40);
let section_content_len: u32 = 2 + 8 + 2 + 1 + 4 + 3; data.extend_from_slice(§ion_content_len.to_be_bytes()); data.push(0x00); data.push(0x01);
data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0x0100u16.to_be_bytes()); data.push(0x01); data.push(0x00);
data.extend_from_slice(&pid.to_be_bytes()); data.push(0x04); data.push(PGS_CODING_TYPE); data.extend_from_slice(lang);
data
}
#[test]
fn test_parse_clpi_pgs_stream() {
let data = build_clpi_with_pgs(0x1200, b"eng");
let map = parse_clpi_file(&data).unwrap();
assert_eq!(map.len(), 1);
assert_eq!(map.get(&0x1200).unwrap(), "en");
}
#[test]
fn test_parse_clpi_mixed_streams() {
let mut data = Vec::new();
data.extend_from_slice(b"HDMV0300");
data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&40u32.to_be_bytes()); while data.len() < 40 {
data.extend_from_slice(&0u32.to_be_bytes());
}
let section_content_len: u32 = 2 + 8 + 3 * (2 + 1 + 4 + 3); data.extend_from_slice(§ion_content_len.to_be_bytes());
data.push(0x00); data.push(0x01);
data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0x0100u16.to_be_bytes()); data.push(0x03); data.push(0x00);
data.extend_from_slice(&0x1011u16.to_be_bytes());
data.push(0x04);
data.push(0x02); data.extend_from_slice(b"\0\0\0");
data.extend_from_slice(&0x1100u16.to_be_bytes());
data.push(0x04);
data.push(0x80); data.extend_from_slice(b"jpn");
data.extend_from_slice(&0x1200u16.to_be_bytes());
data.push(0x04);
data.push(PGS_CODING_TYPE);
data.extend_from_slice(b"fra");
let map = parse_clpi_file(&data).unwrap();
assert_eq!(map.len(), 1, "should only contain PGS stream");
assert_eq!(map.get(&0x1200).unwrap(), "fr");
assert!(!map.contains_key(&0x1011), "should not contain video");
assert!(!map.contains_key(&0x1100), "should not contain audio");
}
#[test]
fn test_parse_clpi_und_language_filtered() {
let data = build_clpi_with_pgs(0x1200, b"und");
let result = parse_clpi_file(&data);
match result {
Ok(map) => assert!(map.is_empty()),
Err(()) => {} }
}
#[test]
fn test_clpi_language_map_nonexistent() {
let map = clpi_language_map(Path::new("/nonexistent/BDMV/STREAM/00001.m2ts"));
assert!(map.is_empty());
}
fn build_clpi_with_sequence_times(
presentation_start_time: u32,
presentation_end_time: u32,
) -> Vec<u8> {
let mut data = Vec::new();
data.extend_from_slice(b"HDMV0300");
data.extend_from_slice(&40u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes());
assert_eq!(data.len(), 40);
let section_len: u32 = 2 + 6 + 14; data.extend_from_slice(§ion_len.to_be_bytes()); data.push(0x00); data.push(0x01);
data.extend_from_slice(&0u32.to_be_bytes()); data.push(0x01); data.push(0x00);
data.extend_from_slice(&0x1001u16.to_be_bytes()); data.extend_from_slice(&0u32.to_be_bytes()); data.extend_from_slice(&presentation_start_time.to_be_bytes()); data.extend_from_slice(&presentation_end_time.to_be_bytes());
data
}
fn build_clpi_with_sequence_info(presentation_start_time: u32) -> Vec<u8> {
build_clpi_with_sequence_times(presentation_start_time, 0)
}
#[test]
fn test_parse_sequence_info_start_time() {
let data = build_clpi_with_sequence_info(54_000_000);
let result = parse_sequence_info_times(&data);
assert_eq!(result, Some((54_000_000, 0)));
}
#[test]
fn test_parse_sequence_info_zero_offset() {
let data = build_clpi_with_sequence_info(0);
let result = parse_sequence_info_times(&data);
assert_eq!(result, Some((0, 0)));
}
#[test]
fn test_parse_sequence_info_times() {
let start = 54_000_000u32; let end = 594_000_000u32; let data = build_clpi_with_sequence_times(start, end);
let result = parse_sequence_info_times(&data);
assert_eq!(result, Some((start as u64, end as u64)));
}
#[test]
fn test_parse_sequence_info_no_section() {
let mut data = Vec::new();
data.extend_from_slice(b"HDMV0300");
data.extend_from_slice(&0u32.to_be_bytes()); while data.len() < MIN_CLPI_SIZE {
data.push(0);
}
assert_eq!(parse_sequence_info_times(&data), None);
}
#[test]
fn test_parse_sequence_info_bad_magic() {
let mut data = build_clpi_with_sequence_info(90000);
data[0..4].copy_from_slice(b"XXXX");
assert_eq!(parse_sequence_info_times(&data), None);
}
#[test]
fn test_parse_sequence_info_no_atc_sequences() {
let mut data = build_clpi_with_sequence_info(90000);
data[45] = 0;
assert_eq!(parse_sequence_info_times(&data), None);
}
}