hunch 2.0.2

A media filename parser for movies, TV, and anime — built in Rust, inspired by guessit
Documentation
//! Size detection.
//!
//! Detects file sizes: 700MB, 1.4GB, 4.7GB, etc.
//!
//! ## Why this lives in Rust (not `src/rules/`)
//!
//! Float parsing of the numeric capture plus unit normalization
//! (MB/GB/TB → a single output format) requires arithmetic the TOML
//! schema can't express. See DESIGN.md D2 decision table → "requires
//! post-match arithmetic" + "requires type conversion".

use regex::Regex;

use crate::matcher::regex_utils::{BoundarySpec, CharClass, check_boundary};
use crate::matcher::span::{MatchSpan, Property};
use std::sync::LazyLock;

static SIZE_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?P<size>[0-9]+(?:\.[0-9]+)?\s*(?:GB|MB|TB|GiB|MiB|TiB))")
        .expect("SIZE regex is valid")
});

static SIZE_BOUNDARY: BoundarySpec = BoundarySpec {
    left: Some(CharClass::AlphaDigit), // (?i)(?<![a-z0-9])
    right: Some(CharClass::Alpha),     // (?i)(?![a-z])
};

/// Scan for file size patterns (e.g., `1.4 GB`, `700 MB`) and return matches.
pub fn find_matches(input: &str) -> Vec<MatchSpan> {
    let bytes = input.as_bytes();
    let mut matches = Vec::new();
    if let Some(cap) = SIZE_PATTERN.captures(input)
        && let Some(size) = cap.name("size")
        && check_boundary(bytes, size.start(), size.end(), &SIZE_BOUNDARY)
    {
        matches.push(
            MatchSpan::new(size.start(), size.end(), Property::Size, size.as_str())
                .with_priority(crate::priority::VOCABULARY),
        );
    }
    matches
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_size_mb() {
        let m = find_matches("Movie.700MB.mkv");
        assert_eq!(m.len(), 1);
        assert_eq!(m[0].value, "700MB");
    }

    #[test]
    fn test_size_gb() {
        let m = find_matches("Movie.1.4GB.mkv");
        assert_eq!(m.len(), 1);
    }
}