srt_parser/
lib.rs

1#![warn(missing_docs)]
2#![warn(clippy::missing_docs_in_private_items)]
3
4//! A simple SubRip file parser. Example usage -
5//!
6//! ```rust
7//! use std::path::PathBuf;
8//!
9//! use srt_parser::SubRipFile;
10//!
11//! fn main() -> Result<(), Box<dyn std::error::Error>> {
12//!     let subs = SubRipFile::new(PathBuf::from("test.srt"))?;
13//!
14//!     for sub in subs.subtitles() {
15//!         println!("{:#?}", sub);
16//!     }
17//!
18//!     Ok(())
19//! }
20//! ```
21
22use std::path::PathBuf;
23
24use anyhow::{
25    anyhow,
26    Context,
27    Result,
28};
29use itertools::Itertools;
30use time::Time;
31
32/// Includes `peg` parsers used to parse subtitles
33pub mod parser;
34
35/// A struct representing a subtitle file
36#[derive(Debug)]
37pub struct SubRipFile {
38    /// The path to the subtitle file
39    path:      PathBuf,
40    /// The source text of the subtitle file
41    source:    String,
42    /// a vector of subtitles parsed from the file
43    subtitles: Vec<Subtitle>,
44}
45
46impl SubRipFile {
47    /// Creates a new `SubTitleFile` from a path to a subtitle file
48    pub fn new(path: PathBuf) -> Result<Self> {
49        let source = std::fs::read_to_string(&path)
50            .context(format!("Failed to read file as string {}", &path.display()))?;
51        let subtitles = source
52            .lines()
53            .into_iter()
54            .map(String::from)
55            .map(|line| line.replace('\u{feff}', ""))
56            .coalesce(|prev, next| {
57                if next.trim().is_empty() {
58                    Err((prev, next))
59                } else {
60                    Ok(format!("{}\n{}", prev, next).trim().to_string())
61                }
62            })
63            .filter(|s| !s.trim().is_empty())
64            .map(Subtitle::from_string)
65            .collect::<Result<Vec<Subtitle>>>()?;
66
67        Ok(Self {
68            path,
69            source,
70            subtitles,
71        })
72    }
73
74    /// Returns the path to the subtitle file
75    pub fn path(&self) -> &PathBuf {
76        &self.path
77    }
78
79    /// Returns the source text of the subtitle file
80    pub fn source(&self) -> &str {
81        self.source.as_ref()
82    }
83
84    /// Returns a vector of subtitles parsed from the file
85    pub fn subtitles(&self) -> &[Subtitle] {
86        self.subtitles.as_ref()
87    }
88}
89
90/// A struct representing a subtitle
91#[derive(Debug, Clone)]
92pub struct Subtitle {
93    /// The sequence number of the subtitle
94    sequence_number: u32,
95    /// The start timecode of the subtitle
96    start:           Time,
97    /// The end timecode of the subtitle
98    end:             Time,
99    /// The text of the subtitle
100    text:            String,
101}
102
103impl Subtitle {
104    /// Creates a new `SubTitle`
105    pub fn new(sn: u32, s: Time, e: Time, t: String) -> Self {
106        Self {
107            sequence_number: sn,
108            start:           s,
109            end:             e,
110            text:            t,
111        }
112    }
113
114    /// Creates a new `SubTitle` from a string
115    pub fn from_string(source: String) -> Result<Self> {
116        let lines = source.lines().collect::<Vec<&str>>();
117        if lines.len() < 3 {
118            return Err(anyhow!(
119                "Invalid subtitle (length is {}): {:?}",
120                lines.len(),
121                lines
122            ));
123        }
124        let sequence_number = parser::srt::sequence_number(lines[0].trim()).context(format!(
125            "Could not parse a sequence number in the SRT file: {:?}",
126            lines[0]
127        ))?;
128        let (start, end) = parser::srt::sub_duration(lines[1].trim())
129            .context("Could not parse a timecode in the SRT file")??;
130        let text = lines[2..].join("\n");
131
132        Ok(Self {
133            sequence_number,
134            start,
135            end,
136            text,
137        })
138    }
139
140    /// Returns the sequence number of the subtitle
141    pub fn sequence_number(&self) -> u32 {
142        self.sequence_number
143    }
144
145    /// Returns the start timecode of the subtitle
146    pub fn start(&self) -> Time {
147        self.start
148    }
149
150    /// Returns the end timecode of the subtitle
151    pub fn end(&self) -> Time {
152        self.end
153    }
154
155    /// Returns the text of the subtitle
156    pub fn text(&self) -> &str {
157        self.text.as_ref()
158    }
159}