opus_parse/
opensubtitles.rs

1use std::collections::{HashSet};
2use std::path::{Component, Path, PathBuf};
3use walkdir::{DirEntry, WalkDir};
4
5fn entry_is_subtitle(entry: &DirEntry) -> bool {
6    entry.file_type().is_file() &&
7        entry.file_name().to_str().map(|s| s.ends_with(".xml.gz")).unwrap_or(false)
8}
9
10fn id_of_path(path: &Path) -> Option<u64> {
11    let mut components = path.components();
12    components.next_back();
13    components.next_back().and_then(|comp|
14        match comp {
15            Component::Normal(path) => Some(path),
16            _ => None
17        })
18        .and_then(|path| path.to_str())
19        .and_then(|path| path.parse::<u64>().ok())
20}
21
22/// Walk the tree of all subtitles for single language
23pub fn walk<P: AsRef<Path>>(path: P) -> Box<Iterator<Item=(u64, PathBuf)>> {
24    // XXX: Returns a boxed iterator. Change to impl Iterator when in stable.
25    let walker = WalkDir::new(path).into_iter();
26    let mut seen = HashSet::new();
27    Box::new(walker
28        .filter_map(|entry_result| entry_result.ok().and_then(
29            |entry| {
30                if entry_is_subtitle(&entry) { Some(entry) } else { None }
31            }))
32        .filter_map(move |subtitle_entry| {
33            let subtitle_path = subtitle_entry.path();
34            let movie_id = id_of_path(subtitle_path).unwrap();
35            if seen.contains(&movie_id) {
36                None
37            } else {
38                seen.insert(movie_id);
39                Some((movie_id, subtitle_path.to_owned()))
40            }
41        }))
42}