1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#[macro_use] extern crate lazy_static;
extern crate regex;
extern crate itertools;

mod rope;

use rope::Rope;
use regex::Regex;
use itertools::Itertools;

#[derive(Debug, Clone)]
pub struct MediaInfo {
    pub title:      Option<String>,
    pub group:      Option<String>,
    pub resolution: Option<String>,
    pub season:     Option<u32>,
    pub episode:    Option<u32>,
    pub source:     Option<String>,
    pub year:       Option<u32>,
    pub codec:      Option<String>,
    pub audio:      Option<String>,
    pub extension:  Option<String>,
    pub checksum:   Option<String>
}

fn parse_pattern(rope: &mut Rope, regex: &Regex) -> Option<String> {
    let mut info: Option<(usize, String, (usize, usize))> = None;
    
    for (i, part) in rope.iter().enumerate() {
        if let Some(captures) = regex.captures(part.string) {
            let value = captures.iter().skip(1)
                .find(|x| x.is_some())
                .unwrap().unwrap().to_owned();
            let range = captures.pos(0).unwrap();
            
            info = Some((i, value, range));
            break;
        }
    };

    if let Some((i, value, (start, end))) = info {
        rope.mark_part_range(i, start..end);
        return Some(value);
    }

    None
}

pub fn parse_filename(filename: &str) -> MediaInfo {
    lazy_static! {
        static ref EXTENSION_REGEX: Regex = Regex::new("\\.([A-Za-z0-9]{2,4})$").unwrap();
        static ref RESOLUTION_REGEX: Regex = Regex::new("([0-9]{3,4}p|[0-9]{3,4}x[0-9]{3,4})").unwrap();
        static ref GROUP_REGEX: Regex = Regex::new("(?:^\\[([^]]+)\\]|- ?([^-]+)$)").unwrap();
        static ref EPISODE_REGEX: Regex = Regex::new("(?:[eE]([0-9]{2,3})|[^0-9A-Za-z]([0-9]{2,3})(?:v[0-9])?[^0-9A-Za-z])").unwrap();
        static ref SEASON_REGEX: Regex = Regex::new("[sS]([0-9]{1,2})").unwrap();
        static ref SOURCE_REGEX: Regex = Regex::new("((?i)(?:PPV.)?[HP]DTV|(?:HD)?CAM|BRRIP|[^a-z]TS[^a-z]|(?:PPV )?WEB.?DL(?: DVDRip)?|HDRip|DVDRip|CamRip|W[EB]BRip|BluRay|BD|DVD|DvDScr|hdtv)").unwrap();
        static ref YEAR_REGEX: Regex = Regex::new("((19[0-9]|20[01])[0-9])").unwrap();
        static ref CODEC_REGEX: Regex = Regex::new("((?i)xvid|x264|h\\.?264)").unwrap();
        static ref AUDIO_REGEX: Regex = Regex::new("((?i)MP3|DD5\\.?1|Dual[- ]Audio|LiNE|DTS|AAC(?:\\.?2\\.0)?|AC3(?:\\.5\\.1)?)").unwrap();
        static ref CRC_REGEX: Regex = Regex::new("\\[([0-9A-F]{8})\\]").unwrap();
    }

    let mut rope = Rope::new(&filename);

    MediaInfo {
        extension:  parse_pattern(&mut rope, &EXTENSION_REGEX),
        checksum:   parse_pattern(&mut rope, &CRC_REGEX),
        source:     parse_pattern(&mut rope, &SOURCE_REGEX),
        codec:      parse_pattern(&mut rope, &CODEC_REGEX),
        audio:      parse_pattern(&mut rope, &AUDIO_REGEX),
        resolution: parse_pattern(&mut rope, &RESOLUTION_REGEX),
        group:      parse_pattern(&mut rope, &GROUP_REGEX),
        season:     parse_pattern(&mut rope, &SEASON_REGEX).and_then(|s| s.parse().ok()),
        year:       parse_pattern(&mut rope, &YEAR_REGEX).and_then(|s| s.parse().ok()),
        episode:    parse_pattern(&mut rope, &EPISODE_REGEX).and_then(|s| s.parse().ok()),

        title: {
            let x: &[_] = &['(', ')', '[', ']', ' ', '-', '_', '.'];

            let title = rope.unmarked()
                .map(|p| p.string)
                .sorted_by(|a,b| Ord::cmp(&a.len(), &b.len()))
                .pop();

            let title = title.map(|s| s.trim_matches(x));

            title.map(|s| s.to_owned())
        }
    }
}

#[cfg(test)]
mod tests {
    use super::parse_filename;

    macro_rules! assert_parse {
        ( $str:expr, {
            $($field:ident : $value:expr),*
        } ) => {
            {
                let info = parse_filename($str);
                println!("{:?}", info);
                $(assert!(info.$field == Some(($value).into()));)*
            }
        }
    }
    
    #[test]
    fn parse() {
        assert_parse!("[HorribleSubs] Mayoiga - 03 [720p].mkv", {
            group: "HorribleSubs",
            episode: 03 as u32,
            resolution: "720p",
            title: "Mayoiga"
        });

        assert_parse!("Game of Thrones Season 6 S06E05 720p Web Dl x264 Mrlss", {
            title: "Game of Thrones Season 6",
            season: 06 as u32,
            episode: 05 as u32,
            resolution: "720p",
            source: "Web Dl",
            codec: "x264"
        });

        assert_parse!("The Ones Below 2015 HDRip XViD-ETRG", {
            title: "The Ones Below",
            source: "HDRip",
            group: "ETRG",
            year: 2015 as u32,
            source: "HDRip",
            codec: "XViD"
        });

        assert_parse!("Mega Movie (BD 1280x720 10bit)", {
            title: "Mega Movie",
            source: "BD",
            resolution: "1280x720"
        });

        assert_parse!("[RightShiftBy2] Akagami no Shirayuki-hime - 15 [720p][6860573F].mp4", {
            title: "Akagami no Shirayuki-hime",
            group: "RightShiftBy2",
            episode: 15 as u32,
            resolution: "720p",
            checksum: "6860573F",
            extension: "mp4"
        });
    }
}