1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
//! SRT-format subtitle support.

use std::fs::File;
use std::io::Read;
use std::path::Path;

use errors::*;
use decode::smart_decode;
use clean::{clean_subtitle_file, strip_formatting};
use grammar;
use lang::Lang;
use time::Period;

/// Format seconds using the standard SRT time format.
pub fn format_time(time: f32) -> String {
    let (h, rem) = ((time / 3600.0).trunc(), time % 3600.0);
    let (m, s) = ((rem / 60.0).trunc(), rem % 60.0);
    (format!("{:02}:{:02}:{:0>6.3}", h, m, s)).replace(".", ",")
}

/// A single SRT-format subtitle, minus some of the optional fields used in
/// various versions of the file format.
#[derive(Debug, PartialEq, Clone, RustcEncodable)]
pub struct Subtitle {
    /// The index of this subtitle.  We should normalize these to start
    /// with 1 on output.
    pub index: usize,

    /// The time period during which this subtitle is shown.
    pub period: Period,

    /// The lines of text in this subtitle.
    pub lines: Vec<String>
}

impl Subtitle {
    /// Return a string representation of this subtitle.
    pub fn to_string(&self) -> String {
        format!("{}\n{} --> {}\n{}\n", self.index,
                format_time(self.period.begin()),
                format_time(self.period.end()),
                self.lines.join("\n"))
    }

    /// Return a plain-text version of this subtitle.
    pub fn plain_text(&self) -> String {
        strip_formatting(&self.lines.join(" "))
    }
}

/// The contents of an SRT-format subtitle file.
#[derive(Debug, PartialEq)]
pub struct SubtitleFile {
    /// The subtitles in this file.
    pub subtitles: Vec<Subtitle>
}

impl SubtitleFile {
    /// Parse raw subtitle text into an appropriate structure.
    pub fn from_str(data: &str) -> Result<SubtitleFile> {
        Ok(try!(grammar::subtitle_file(data)))
    }

    /// Parse the subtitle file found at the specified path.
    pub fn from_path(path: &Path) -> Result<SubtitleFile> {
        let mut file = try!(File::open(path));
        let mut bytes = Vec::new();
        try!(file.read_to_end(&mut bytes));
        let data = try!(smart_decode(&bytes));
        SubtitleFile::from_str(&data)
    }

    /// Parse and normalize the subtitle file found at the specified path.
    pub fn cleaned_from_path(path: &Path) -> Result<SubtitleFile> {
        let raw = try!(SubtitleFile::from_path(path));
        Ok(try!(clean_subtitle_file(&raw)))
    }

    /// Convert subtitles to a string.
    pub fn to_string(&self) -> String {
        let subs: Vec<String> =
            self.subtitles.iter().map(|s| s.to_string()).collect();
        // The BOM (byte-order mark) is generally discouraged on Linux, but
        // it's sometimes needed to get good results under Windows.  We
        // include it here because Wikipedia says that SRT files files
        // default to various legacy encoding, but that the BOM can be used
        // for Unicode.
        format!("\u{FEFF}{}", subs.join("\n"))
    }

    /// Find the subtitle with the given index.
    pub fn find(&self, index: usize) -> Option<&Subtitle> {
        self.subtitles.iter().find(|s| s.index == index)
    }

    /// Detect the language used in these subtitles.
    pub fn detect_language(&self) -> Option<Lang> {
        let subs: Vec<_> = self.subtitles.iter()
            .map(|s| s.plain_text())
            .collect();
        let text = subs.join("\n");
        Lang::for_text(&text)
    }
}

#[cfg(test)]
mod test {
    use std::path::Path;
    use srt::{SubtitleFile,Subtitle};
    use lang::Lang;
    use time::Period;

    #[test]
    fn subtitle_file_from_path() {
        let path = Path::new("fixtures/sample.es.srt");
        let srt = SubtitleFile::from_path(&path).unwrap();
        assert_eq!(5, srt.subtitles.len());

        let sub = &srt.subtitles[0];
        assert_eq!(16, sub.index);
        assert_eq!(62.328, sub.period.begin());
        assert_eq!(64.664, sub.period.end());
        assert_eq!(vec!("¡Si! ¡Aang ha vuelto!".to_string()), sub.lines);

        let sub2 = &srt.subtitles[2];
        assert_eq!(vec!("Tu diste la señal a la armada".to_string(),
                        "del fuego con la bengala,".to_string()),
                   sub2.lines);
    }

    #[test]
    fn subtitle_to_string() {
        let sub = Subtitle{index: 4,
                           period: Period::new(61.5, 63.75).unwrap(),
                           lines: vec!("Line 1".to_string(),
                                       "<i>Line 2</i>".to_string())};
        let expected = r"4
00:01:01,500 --> 00:01:03,750
Line 1
<i>Line 2</i>
".to_string();
        assert_eq!(expected, sub.to_string());
    }

    #[test]
    fn subtitle_file_to_string() {
        let data = "\u{FEFF}16
00:01:02,328 --> 00:01:04,664
Line 1.1

17
00:01:12,839 --> 00:01:13,839
Line 2.1
";
        let srt = SubtitleFile::from_str(data).unwrap();
        assert_eq!(data, &srt.to_string());
    }

    #[test]
    fn detect_language() {
        let path_es = Path::new("fixtures/sample.es.srt");
        let srt_es = SubtitleFile::from_path(&path_es).unwrap();
        assert_eq!(Some(Lang::iso639("es").unwrap()),
                   srt_es.detect_language());

        let path_en = Path::new("fixtures/sample.en.srt");
        let srt_en = SubtitleFile::from_path(&path_en).unwrap();
        assert_eq!(Some(Lang::iso639("en").unwrap()),
                   srt_en.detect_language());
    }
}