use regex::Regex;
use crate::matcher::regex_utils::{BoundarySpec, CharClass, check_boundary};
use crate::matcher::span::{MatchSpan, Property};
use std::sync::LazyLock;
static BIT_RATE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"(?i)(?P<num>\d+(?:\.\d{1,2})?)\s*(?P<unit>[KkMm])(?:bps|bits?)")
.expect("BIT_RATE regex is valid")
});
static BIT_RATE_BOUNDARY: BoundarySpec = BoundarySpec {
left: Some(CharClass::AlphaDigit),
right: Some(CharClass::Alpha),
};
pub fn find_matches(input: &str) -> Vec<MatchSpan> {
let bytes = input.as_bytes();
let mut matches = Vec::new();
let mut search_start = 0;
while search_start < input.len() {
let Some(cap) = BIT_RATE_PATTERN.captures(&input[search_start..]) else {
break;
};
let full = cap.get(0).expect("group 0 always present in a regex match");
let abs_start = search_start + full.start();
let abs_end = search_start + full.end();
if !check_boundary(bytes, abs_start, abs_end, &BIT_RATE_BOUNDARY) {
search_start = abs_start + 1;
continue;
}
let num = cap
.name("num")
.expect("num group always present in BIT_RATE")
.as_str();
let unit = cap
.name("unit")
.expect("unit group always present in BIT_RATE")
.as_str();
let (normalized_unit, property) = match unit.to_ascii_lowercase().as_str() {
"k" => ("Kbps", Property::AudioBitRate),
"m" => ("Mbps", Property::VideoBitRate),
_ => unreachable!("BIT_RATE regex captures only [KkMm] for the unit group"),
};
let value = format!("{num}{normalized_unit}");
matches.push(
MatchSpan::new(abs_start, abs_end, property, &value)
.with_priority(crate::priority::VOCABULARY),
);
search_start = abs_end;
}
matches
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_kbps() {
let m = find_matches("Music.Track.320Kbps.mp3");
assert_eq!(m.len(), 1);
assert_eq!(m[0].value, "320Kbps");
assert_eq!(m[0].property, Property::AudioBitRate);
}
#[test]
fn test_kbps_with_space() {
let m = find_matches("Music [320 Kbps].mp3");
assert_eq!(m.len(), 1);
assert_eq!(m[0].value, "320Kbps");
assert_eq!(m[0].property, Property::AudioBitRate);
}
#[test]
fn test_mbps() {
let m = find_matches("Show.Name.19.1Mbps.mkv");
assert_eq!(m.len(), 1);
assert_eq!(m[0].value, "19.1Mbps");
assert_eq!(m[0].property, Property::VideoBitRate);
}
#[test]
fn test_mbps_integer() {
let m = find_matches("Show.Name.20Mbps.mkv");
assert_eq!(m.len(), 1);
assert_eq!(m[0].value, "20Mbps");
assert_eq!(m[0].property, Property::VideoBitRate);
}
#[test]
fn test_bracketed_mbps() {
let m = find_matches("Title Name [480p][1.5Mbps][.mp4]");
assert_eq!(m.len(), 1);
assert_eq!(m[0].value, "1.5Mbps");
assert_eq!(m[0].property, Property::VideoBitRate);
}
#[test]
fn test_after_codec() {
let m = find_matches("H264.384Kbps.mkv");
assert_eq!(m.len(), 1);
assert_eq!(m[0].value, "384Kbps");
assert_eq!(m[0].property, Property::AudioBitRate);
}
#[test]
fn test_no_false_positive() {
let m = find_matches("Movie.2024.1080p.BluRay.mkv");
assert!(m.is_empty());
}
}