opus_parse/
opensubtitles.rs1use std::collections::{HashSet};
2use std::path::{Component, Path, PathBuf};
3use walkdir::{DirEntry, WalkDir};
4
5fn entry_is_subtitle(entry: &DirEntry) -> bool {
6 entry.file_type().is_file() &&
7 entry.file_name().to_str().map(|s| s.ends_with(".xml.gz")).unwrap_or(false)
8}
9
10fn id_of_path(path: &Path) -> Option<u64> {
11 let mut components = path.components();
12 components.next_back();
13 components.next_back().and_then(|comp|
14 match comp {
15 Component::Normal(path) => Some(path),
16 _ => None
17 })
18 .and_then(|path| path.to_str())
19 .and_then(|path| path.parse::<u64>().ok())
20}
21
22pub fn walk<P: AsRef<Path>>(path: P) -> Box<Iterator<Item=(u64, PathBuf)>> {
24 let walker = WalkDir::new(path).into_iter();
26 let mut seen = HashSet::new();
27 Box::new(walker
28 .filter_map(|entry_result| entry_result.ok().and_then(
29 |entry| {
30 if entry_is_subtitle(&entry) { Some(entry) } else { None }
31 }))
32 .filter_map(move |subtitle_entry| {
33 let subtitle_path = subtitle_entry.path();
34 let movie_id = id_of_path(subtitle_path).unwrap();
35 if seen.contains(&movie_id) {
36 None
37 } else {
38 seen.insert(movie_id);
39 Some((movie_id, subtitle_path.to_owned()))
40 }
41 }))
42}