use regex::Regex;
use serde::Deserialize;
use std::borrow::Cow;
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct TokenMatch<'a> {
pub value: Cow<'a, str>,
pub side_effects: Vec<SideEffect>,
pub not_before: Option<Vec<String>>,
pub not_after: Option<Vec<String>>,
pub requires_after: Option<Vec<String>>,
pub requires_before: Option<Vec<String>>,
pub requires_context: bool,
pub reclaimable: bool,
pub requires_nearby: Option<Vec<String>>,
}
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
pub struct SideEffect {
pub property: String,
pub value: String,
}
#[derive(Debug)]
struct PatternRule {
regex: Regex,
template: String,
is_dynamic: bool,
side_effects: Vec<SideEffect>,
not_before: Option<Vec<String>>,
not_after: Option<Vec<String>>,
requires_after: Option<Vec<String>>,
requires_before: Option<Vec<String>>,
requires_context: bool,
reclaimable: bool,
requires_nearby: Option<Vec<String>>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[non_exhaustive]
pub enum ZoneScope {
#[default]
Unrestricted,
TechOnly,
AfterAnchor,
}
#[derive(Debug)]
pub struct RuleSet {
#[allow(dead_code)]
pub property: String,
pub zone_scope: ZoneScope,
exact: HashMap<String, String>,
exact_sensitive: HashMap<String, String>,
patterns: Vec<PatternRule>,
}
#[derive(Deserialize)]
struct RawRuleFile {
property: String,
#[serde(default)]
zone_scope: Option<String>,
#[serde(default)]
exact: HashMap<String, String>,
#[serde(default)]
exact_sensitive: HashMap<String, String>,
#[serde(default)]
patterns: Vec<RawPattern>,
}
#[derive(Deserialize)]
struct RawPattern {
#[serde(rename = "match")]
pattern: String,
value: String,
#[serde(default)]
side_effects: Vec<RawSideEffect>,
#[serde(default)]
not_before: Option<Vec<String>>,
#[serde(default)]
not_after: Option<Vec<String>>,
#[serde(default)]
requires_after: Option<Vec<String>>,
#[serde(default)]
requires_before: Option<Vec<String>>,
#[serde(default)]
requires_context: bool,
#[serde(default)]
reclaimable: bool,
#[serde(default)]
requires_nearby: Option<Vec<String>>,
}
#[derive(Deserialize)]
struct RawSideEffect {
property: String,
value: String,
}
impl RuleSet {
pub fn from_toml(toml_str: &str) -> Self {
let raw: RawRuleFile =
toml::from_str(toml_str).unwrap_or_else(|e| panic!("Bad TOML rule file: {e}"));
let zone_scope = match raw.zone_scope.as_deref() {
None | Some("unrestricted") => ZoneScope::Unrestricted,
Some("tech_only") => ZoneScope::TechOnly,
Some("after_anchor") => ZoneScope::AfterAnchor,
Some(other) => panic!(
"Unknown zone_scope '{}' in {} rules. Valid: unrestricted, tech_only, after_anchor",
other, raw.property
),
};
let exact: HashMap<String, String> = raw
.exact
.into_iter()
.map(|(k, v)| (k.to_lowercase(), v))
.collect();
let patterns: Vec<PatternRule> = raw
.patterns
.into_iter()
.map(|p| {
let regex = Regex::new(&p.pattern).unwrap_or_else(|e| {
panic!("Bad regex in {} rules: `{}`: {e}", raw.property, p.pattern)
});
let is_dynamic = p.value.contains('{');
let side_effects = p
.side_effects
.into_iter()
.map(|s| SideEffect {
property: s.property,
value: s.value,
})
.collect();
PatternRule {
regex,
template: p.value,
is_dynamic,
side_effects,
not_before: p.not_before,
not_after: p.not_after,
requires_after: p.requires_after,
requires_before: p.requires_before,
requires_context: p.requires_context,
reclaimable: p.reclaimable,
requires_nearby: p.requires_nearby,
}
})
.collect();
Self {
property: raw.property,
zone_scope,
exact,
exact_sensitive: raw.exact_sensitive,
patterns,
}
}
pub fn match_token(&self, token: &str) -> Option<TokenMatch<'_>> {
if let Some(value) = self.exact_sensitive.get(token) {
return Some(TokenMatch::exact(Cow::Borrowed(value.as_str())));
}
let lower = token.to_lowercase();
if let Some(value) = self.exact.get(&lower) {
return Some(TokenMatch::exact(Cow::Borrowed(value.as_str())));
}
for rule in &self.patterns {
if !rule.is_dynamic {
if rule.regex.is_match(token) {
return Some(TokenMatch::from_pattern(
Cow::Borrowed(rule.template.as_str()),
rule,
));
}
} else {
if let Some(caps) = rule.regex.captures(token) {
let value = substitute_captures(&rule.template, &caps);
return Some(TokenMatch::from_pattern(Cow::Owned(value), rule));
}
}
}
None
}
#[cfg(test)]
pub fn exact_count(&self) -> usize {
self.exact.len()
}
#[cfg(test)]
pub fn pattern_count(&self) -> usize {
self.patterns.len()
}
}
impl<'a> TokenMatch<'a> {
fn exact(value: Cow<'a, str>) -> Self {
Self {
value,
side_effects: Vec::new(),
not_before: None,
not_after: None,
requires_after: None,
requires_before: None,
requires_context: false,
reclaimable: false,
requires_nearby: None,
}
}
fn from_pattern(value: Cow<'a, str>, rule: &PatternRule) -> Self {
Self {
value,
side_effects: rule.side_effects.clone(),
not_before: rule.not_before.clone(),
not_after: rule.not_after.clone(),
requires_after: rule.requires_after.clone(),
requires_before: rule.requires_before.clone(),
requires_context: rule.requires_context,
reclaimable: rule.reclaimable,
requires_nearby: rule.requires_nearby.clone(),
}
}
}
fn substitute_captures(template: &str, caps: ®ex::Captures<'_>) -> String {
let mut result = String::with_capacity(template.len());
let mut chars = template.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '{' {
let mut digits = String::new();
while let Some(&d) = chars.peek() {
if d.is_ascii_digit() {
digits.push(d);
chars.next();
} else {
break;
}
}
if chars.peek() == Some(&'}') {
chars.next();
}
if let Ok(idx) = digits.parse::<usize>()
&& let Some(m) = caps.get(idx)
{
result.push_str(m.as_str());
}
} else {
result.push(ch);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
const TEST_TOML: &str = r#"
property = "video_codec"
[exact]
x264 = "H.264"
h264 = "H.264"
hevc = "H.265"
xvid = "Xvid"
[[patterns]]
match = '(?i)^[xh][.-]?265$'
value = "H.265"
[[patterns]]
match = '(?i)^rv\d{2}$'
value = "RealVideo"
"#;
fn val(m: Option<TokenMatch<'_>>) -> Option<String> {
m.map(|t| t.value.into_owned())
}
#[test]
fn test_parse_rule_file() {
let rules = RuleSet::from_toml(TEST_TOML);
assert_eq!(rules.property, "video_codec");
assert_eq!(rules.exact_count(), 4);
assert_eq!(rules.pattern_count(), 2);
}
#[test]
fn test_exact_match() {
let rules = RuleSet::from_toml(TEST_TOML);
assert_eq!(val(rules.match_token("x264")), Some("H.264".into()));
assert_eq!(val(rules.match_token("X264")), Some("H.264".into()));
assert_eq!(val(rules.match_token("HEVC")), Some("H.265".into()));
assert_eq!(val(rules.match_token("XviD")), Some("Xvid".into()));
}
#[test]
fn test_regex_match() {
let rules = RuleSet::from_toml(TEST_TOML);
assert_eq!(val(rules.match_token("x.265")), Some("H.265".into()));
assert_eq!(val(rules.match_token("H-265")), Some("H.265".into()));
assert_eq!(val(rules.match_token("Rv20")), Some("RealVideo".into()));
}
#[test]
fn test_no_match() {
let rules = RuleSet::from_toml(TEST_TOML);
assert!(rules.match_token("Movie").is_none());
assert!(rules.match_token("720p").is_none());
}
#[test]
fn test_exact_preferred_over_regex() {
let rules = RuleSet::from_toml(TEST_TOML);
assert_eq!(val(rules.match_token("hevc")), Some("H.265".into()));
}
#[test]
fn test_load_video_codec_toml() {
let toml_str = include_str!("../rules/video_codec.toml");
let rules = RuleSet::from_toml(toml_str);
assert_eq!(rules.property, "video_codec");
assert!(rules.exact_count() >= 10);
assert!(rules.pattern_count() >= 5);
assert_eq!(val(rules.match_token("x264")), Some("H.264".into()));
assert_eq!(val(rules.match_token("HEVC")), Some("H.265".into()));
assert_eq!(val(rules.match_token("h.265")), Some("H.265".into()));
assert_eq!(val(rules.match_token("XviD")), Some("Xvid".into()));
assert_eq!(val(rules.match_token("AV1")), Some("AV1".into()));
assert_eq!(val(rules.match_token("Rv10")), Some("RealVideo".into()));
}
#[test]
fn test_capture_group_template() {
let toml = r#"
property = "screen_size"
[exact]
[[patterns]]
match = '(?i)^(\d{3,4})x(\d{3,4})$'
value = "{2}p"
[[patterns]]
match = '(?i)^(\d{3,4})p(\d{2,3})$'
value = "{1}p"
"#;
let rules = RuleSet::from_toml(toml);
assert_eq!(val(rules.match_token("1920x1080")), Some("1080p".into()));
assert_eq!(val(rules.match_token("1280x720")), Some("720p".into()));
assert_eq!(val(rules.match_token("720p60")), Some("720p".into()));
assert_eq!(val(rules.match_token("1080p25")), Some("1080p".into()));
}
#[test]
fn test_exact_match_has_no_side_effects_or_constraints() {
let rules = RuleSet::from_toml(TEST_TOML);
let m = rules.match_token("x264").expect("should match");
assert!(m.side_effects.is_empty());
assert!(m.not_before.is_none());
assert!(m.not_after.is_none());
assert!(m.requires_after.is_none());
}
#[test]
fn test_side_effects_from_toml() {
let toml = r#"
property = "source"
[exact]
[[patterns]]
match = '(?i)^dvd[-. ]?rip$'
value = "DVD"
side_effects = [
{ property = "other", value = "Rip" }
]
"#;
let rules = RuleSet::from_toml(toml);
let m = rules.match_token("DVDRip").expect("should match");
assert_eq!(m.value, "DVD");
assert_eq!(m.side_effects.len(), 1);
assert_eq!(m.side_effects[0].property, "other");
assert_eq!(m.side_effects[0].value, "Rip");
}
#[test]
fn test_neighbor_constraints_from_toml() {
let toml = r#"
property = "streaming_service"
[exact]
[[patterns]]
match = '(?i)^hd$'
value = "HD"
not_before = ["tv", "dvd"]
[[patterns]]
match = '(?i)^ae$'
value = "A&E"
requires_after = ["web"]
[[patterns]]
match = '(?i)^cam$'
value = "Camera"
not_after = ["web"]
"#;
let rules = RuleSet::from_toml(toml);
let hd = rules.match_token("HD").expect("should match");
assert_eq!(hd.value, "HD");
assert_eq!(
hd.not_before.as_deref(),
Some(&["tv".to_string(), "dvd".to_string()][..])
);
assert!(hd.not_after.is_none());
assert!(hd.requires_after.is_none());
let ae = rules.match_token("AE").expect("should match");
assert_eq!(ae.value, "A&E");
assert_eq!(ae.requires_after.as_deref(), Some(&["web".to_string()][..]));
assert!(ae.not_before.is_none());
let cam = rules.match_token("cam").expect("should match");
assert_eq!(cam.value, "Camera");
assert_eq!(cam.not_after.as_deref(), Some(&["web".to_string()][..]));
}
#[test]
fn test_pattern_without_side_effects_has_empty_vec() {
let rules = RuleSet::from_toml(TEST_TOML);
let m = rules.match_token("x.265").expect("should match regex");
assert_eq!(m.value, "H.265");
assert!(m.side_effects.is_empty());
assert!(m.not_before.is_none());
}
}