1#![doc = include_str!("../README.md")]
2
3mod ocr;
4mod opt;
5mod preprocessor;
6
7pub use crate::{ocr::process, ocr::Error as OcrError, ocr::OcrOpt, opt::Opt};
8
9use image::{GrayImage, LumaA};
10use log::warn;
11use preprocessor::rgb_palette_to_luminance;
12use rayon::{
13 iter::{IntoParallelRefIterator, ParallelIterator},
14 ThreadPoolBuildError,
15};
16use std::{
17 ffi::OsStr,
18 fs::File,
19 io::{self, BufReader, BufWriter},
20 path::PathBuf,
21};
22use subtile::{
23 image::{dump_images, luma_a_to_luma, ToImage, ToOcrImage, ToOcrImageOpt},
24 pgs::{self, DecodeTimeImage, RleToImage},
25 srt,
26 time::TimeSpan,
27 vobsub::{self, conv_to_rgba, VobSubError, VobSubIndexedImage, VobSubOcrImage, VobSubToImage},
28 SubtileError,
29};
30use thiserror::Error;
31
32#[allow(missing_docs)]
34#[derive(Error, Debug)]
35pub enum Error {
36 #[error("Failed to create a rayon ThreadPool.")]
37 RayonThreadPool(#[from] ThreadPoolBuildError),
38
39 #[error("The file extension '{extension}' is not managed.")]
40 InvalidFileExtension { extension: String },
41
42 #[error("The file doesn't have a valid extension, can't choose a parser.")]
43 NoFileExtension,
44
45 #[error("Failed to open Index file.")]
46 IndexOpen(#[source] VobSubError),
47
48 #[error("Failed to create PgsParser from file")]
49 PgsParserFromFile(#[source] pgs::PgsError),
50
51 #[error("Failed to parse Pgs")]
52 PgsParsing(#[source] pgs::PgsError),
53
54 #[error("Failed to dump subtitles images")]
55 DumpImage(#[source] SubtileError),
56
57 #[error("Could not perform OCR on subtitles.")]
58 Ocr(#[from] ocr::Error),
59
60 #[error("Error happen during OCR on {0} subtitles images")]
61 OcrFails(u32),
62
63 #[error("Could not generate SRT file: {message}")]
64 GenerateSrt { message: String },
65
66 #[error("Could not write SRT file {}", path.display())]
67 WriteSrtFile { path: PathBuf, source: io::Error },
68
69 #[error("Could not write SRT on stdout.")]
70 WriteSrtStdout { source: io::Error },
71}
72
73#[profiling::function]
83pub fn run(opt: &Opt) -> Result<(), Error> {
84 rayon::ThreadPoolBuilder::new()
85 .thread_name(|idx| format!("Rayon_{idx}"))
86 .build_global()
87 .map_err(Error::RayonThreadPool)?;
88
89 let (times, images) = match opt.input.extension().and_then(OsStr::to_str) {
90 Some(ext) => match ext {
91 "sup" => process_pgs(opt),
92 "idx" => process_vobsub(opt),
93 ext => Err(Error::InvalidFileExtension {
94 extension: ext.into(),
95 }),
96 },
97 None => Err(Error::NoFileExtension),
98 }?;
99
100 if opt.dump {
102 dump_images("dumps", &images).map_err(Error::DumpImage)?;
103 }
104
105 let ocr_opt = OcrOpt::new(&opt.tessdata_dir, opt.lang.as_str(), &opt.config, opt.dpi);
106 let texts = ocr::process(images, &ocr_opt)?;
107 let subtitles = check_subtitles(times.into_iter().zip(texts))?;
108
109 write_srt(&opt.output, &subtitles)?;
111
112 Ok(())
113}
114
115#[profiling::function]
123pub fn process_pgs(opt: &Opt) -> Result<(Vec<TimeSpan>, Vec<GrayImage>), Error> {
124 let parser = {
125 profiling::scope!("Create PGS parser");
126 subtile::pgs::SupParser::<BufReader<File>, DecodeTimeImage>::from_file(&opt.input)
127 .map_err(Error::PgsParserFromFile)?
128 };
129
130 let (times, rle_images) = {
131 profiling::scope!("Parse PGS file");
132 parser
133 .collect::<Result<(Vec<_>, Vec<_>), _>>()
134 .map_err(Error::PgsParsing)?
135 };
136
137 if opt.dump_raw {
138 let images = rle_images
139 .iter()
140 .map(|rle_img| RleToImage::new(rle_img, |pix: LumaA<u8>| pix).to_image());
141 dump_images("dumps_raw", images).map_err(Error::DumpImage)?;
142 }
143
144 let conv_fn = luma_a_to_luma::<_, _, 100, 100>; let images = {
147 profiling::scope!("Convert images for OCR");
148 let ocr_opt = ocr_opt(opt);
149 rle_images
150 .par_iter()
151 .map(|rle_img| RleToImage::new(rle_img, &conv_fn).image(&ocr_opt))
152 .collect::<Vec<_>>()
153 };
154
155 Ok((times, images))
156}
157
158#[profiling::function]
165pub fn process_vobsub(opt: &Opt) -> Result<(Vec<TimeSpan>, Vec<GrayImage>), Error> {
166 let idx = {
167 profiling::scope!("Open idx");
168 vobsub::Index::open(&opt.input).map_err(Error::IndexOpen)?
169 };
170 let (times, images): (Vec<_>, Vec<_>) = {
171 profiling::scope!("Parse subtitles");
172 idx.subtitles::<(TimeSpan, VobSubIndexedImage)>()
173 .filter_map(|sub| match sub {
174 Ok(sub) => Some(sub),
175 Err(e) => {
176 warn!(
177 "warning: unable to read subtitle: {e}. (This can usually be safely ignored.)"
178 );
179 None
180 }
181 })
182 .unzip()
183 };
184
185 if opt.dump_raw {
186 let images = images.iter().map(|rle_img| {
187 let image: image::RgbaImage =
188 VobSubToImage::new(rle_img, idx.palette(), conv_to_rgba).to_image();
189 image
190 });
191 dump_images("dumps_raw", images).map_err(Error::DumpImage)?;
192 }
193
194 let images_for_ocr = {
195 profiling::scope!("Convert images for OCR");
196
197 let ocr_opt = ocr_opt(opt);
198 let palette = rgb_palette_to_luminance(idx.palette());
199 images
200 .par_iter()
201 .map(|vobsub_img| {
202 let converter = VobSubOcrImage::new(vobsub_img, &palette);
203 converter.image(&ocr_opt)
204 })
205 .collect::<Vec<_>>()
206 };
207
208 Ok((times, images_for_ocr))
209}
210
211fn ocr_opt(opt: &Opt) -> ToOcrImageOpt {
213 ToOcrImageOpt {
214 border: opt.border,
215 ..Default::default()
216 }
217}
218
219#[profiling::function]
224pub fn check_subtitles<In>(subtitles: In) -> Result<Vec<(TimeSpan, String)>, Error>
225where
226 In: IntoIterator<Item = (TimeSpan, Result<String, ocr::Error>)>,
227{
228 let mut ocr_error_count = 0;
229 let subtitles = subtitles
230 .into_iter()
231 .enumerate()
232 .filter_map(|(idx, (time, maybe_text))| match maybe_text {
233 Ok(text) => Some((time, text)),
234 Err(e) => {
235 let err = anyhow::Error::new(e); warn!(
237 "Error while running OCR on subtitle image ({} - {time:?}):\n\t {err:#}",
238 idx + 1,
239 );
240 ocr_error_count += 1;
241 None
242 }
243 })
244 .collect::<Vec<_>>();
245
246 if ocr_error_count > 0 {
247 Err(Error::OcrFails(ocr_error_count))
248 } else {
249 Ok(subtitles)
250 }
251}
252
253#[profiling::function]
254fn write_srt(path: &Option<PathBuf>, subtitles: &[(TimeSpan, String)]) -> Result<(), Error> {
255 match &path {
256 Some(path) => {
257 let mkerr = |source| Error::WriteSrtFile {
258 path: path.to_path_buf(),
259 source,
260 };
261
262 let subtitle_file = File::create(path).map_err(mkerr)?;
264 let mut stream = BufWriter::new(subtitle_file);
265 srt::write_srt(&mut stream, subtitles).map_err(mkerr)?;
266 }
267 None => {
268 let mut stdout = io::stdout();
270 srt::write_srt(&mut stdout, subtitles)
271 .map_err(|source| Error::WriteSrtStdout { source })?;
272 }
273 }
274 Ok(())
275}