1use camino::Utf8Path;
4use chardetng::EncodingDetector;
5use std::{collections::HashSet, fs, io};
6
7pub fn read_text_auto(path: &Utf8Path) -> io::Result<String> {
9 let bytes = fs::read(path)?;
10 let mut det = EncodingDetector::new();
11 det.feed(&bytes, true);
12 Ok(det.guess(None, true).decode(&bytes).0.into_owned())
13}
14
15#[derive(Debug, Default, PartialEq, Eq, Clone)]
19enum Section {
20 Styles,
21 Events,
22 #[default]
23 Other,
24}
25
26impl Section {
27 fn from_header(s: &str) -> Self {
29 match s {
30 "[V4+ Styles]" | "[V4 Styles]" | "[V4++ Styles]" => Self::Styles,
31 "[Events]" => Self::Events,
32 _ => Self::Other,
33 }
34 }
35}
36
37#[derive(Debug, PartialEq, Eq, Clone)]
41struct ColFormat {
42 target_index: usize,
44 columns_count: usize,
46}
47
48impl ColFormat {
49 fn style_default() -> Self {
51 Self {
53 target_index: 1,
54 columns_count: usize::MAX,
55 }
56 }
57
58 fn event_default() -> Self {
60 Self {
62 target_index: 9,
63 columns_count: 10,
64 }
65 }
66
67 fn update_from_format_line(&mut self, format_str: &str, needle: &str) {
69 let cols: Vec<&str> = format_str.split(',').map(str::trim).collect();
70 self.columns_count = cols.len();
71 self.target_index = cols
72 .iter()
73 .position(|c| c.eq_ignore_ascii_case(needle))
74 .unwrap_or(self.columns_count.saturating_sub(1));
75 }
76}
77
78pub fn extract_fonts(ass: &str) -> Vec<String> {
82 let mut fonts = HashSet::<String>::new();
83 let mut section = Section::default();
84 let mut style_fmt = ColFormat::style_default();
85 let mut event_fmt = ColFormat::event_default();
86
87 for line in ass.lines().map(str::trim).filter(|l| !l.is_empty()) {
88 if line.starts_with('[') && line.ends_with(']') {
89 section = Section::from_header(line);
90 continue;
91 }
92
93 match section {
94 Section::Styles => handle_style(line, &mut style_fmt, &mut fonts),
95 Section::Events => handle_event(line, &mut event_fmt, &mut fonts),
96 Section::Other => {}
97 }
98 }
99
100 let mut out: Vec<_> = fonts.into_iter().collect();
101 out.sort();
102 out
103}
104
105fn handle_style(line: &str, fmt: &mut ColFormat, fonts: &mut HashSet<String>) {
109 if let Some(rest) = line.strip_prefix("Format:") {
110 fmt.update_from_format_line(rest, "fontname");
111 } else if let Some(rest) = line.strip_prefix("Style:")
112 && let Some(name) = rest.split(',').nth(fmt.target_index)
113 {
114 add_font(fonts, name);
115 }
116}
117
118fn handle_event(line: &str, fmt: &mut ColFormat, fonts: &mut HashSet<String>) {
120 if let Some(rest) = line.strip_prefix("Format:") {
121 fmt.update_from_format_line(rest, "text");
122 }
123 else if matches!(line.split_once(':'), Some(("Dialogue", _))) {
125 let content = line[line.find(':').unwrap() + 1..].trim_start();
126 if let Some(text) = content.splitn(fmt.columns_count, ',').nth(fmt.target_index) {
127 scan_inline_tags(text, fonts);
128 }
129 }
130}
131
132fn scan_inline_tags(text: &str, fonts: &mut HashSet<String>) {
136 let mut rest = text;
138 while let Some(open) = rest.find('{') {
139 rest = &rest[open + 1..];
140 let Some(close) = rest.find('}') else { break };
141
142 rest[..close]
143 .split('\\')
144 .filter_map(|tag| tag.strip_prefix("fn"))
145 .for_each(|name| add_font(fonts, name));
146
147 rest = &rest[close + 1..];
148 }
149}
150
151fn add_font(fonts: &mut HashSet<String>, raw: &str) {
155 let name = raw.trim().strip_prefix('@').unwrap_or(raw.trim());
156 if !name.is_empty() {
157 fonts.insert(name.to_owned());
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
166 fn test_extract_fonts_lucky_star() {
167 let ass = r"
168 [Script Info]
169 Title:lucky star
170 Original Script:CASO&I.G
171 Synch Point:0
172 ScriptType:v4.00+
173 Collisions:Normal
174 PlayResX:704
175 PlayResY:396
176 Timer:100.0000
177
178 [V4+ Styles]
179 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
180 Style: zhengwen,方正准圆_GBK,26,&H00FFFFFF,&H004080FF,&H20B71082,&H99B71082,-1,0,0,0,100,100,0,0.00,0,3,1,2,30,30,8,134
181 Style: zhushi,方正准圆_GBK,19,&H00FFFFFF,&H004080FF,&H20E22E1B,&H99E22E1B,-1,0,0,0,100,100,0,0.00,0,3,1,8,30,30,8,134
182 Style: jinggao,方正准圆_GBK,17,&H00FFFFFF,&H00FFFFFF,&H00000000,&HFF000000,-1,0,0,0,100,100,0,0.00,1,3,1,8,15,15,5,134
183 Style: staff,@DFGKanTeiRyu-XB,18,&H00FFFFFF,&H00000000,&H407A0748,&HA0FFFFFF,0,0,0,0,105,105,1,0.00,1,3,0,7,30,30,10,128
184 Style: OPJ,DFGKanTeiRyu-XB,18,&H00FFFFFF,&H00000000,&H208B0C66,&H66666666,0,0,0,0,100,105,2,0.00,1,3,1,8,30,30,10,128
185 Style: OPC,方正少儿_GBK,24,&H00FFFFFF,&H00000000,&H208B0C66,&H66666666,0,0,0,0,100,100,0,0.00,1,3,1,2,30,30,7,134
186 Style: EDJ,MS Gothic,21,&H00EEEEEE,&H90FFFFFF,&H12333333,&H20000000,-1,0,0,0,100,100,1,0.00,0,3,0,8,30,30,8,128
187 Style: EDC,方正黑体_GBK,19,&H00EEEEEE,&HFF000000,&H12333333,&H20000000,0,0,0,0,100,100,2,0.00,0,3,0,8,30,30,12,134
188
189 [Events]
190 Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text
191 Dialogue: 0,0:00:06.08,0:00:08.78,OPJ,NTP,0000,0000,0000,,曖昧3センチ そりゃぷにってコトかい? ちょっ!
192 Dialogue: 0,0:00:08.95,0:00:11.97,OPJ,NTP,0000,0000,0000,,らっぴんぐが制服…だぁぁ不利ってことない ぷ。
193 Dialogue: 0,0:00:12.20,0:00:13.55,OPJ,NTP,0000,0000,0000,,がんばっちゃ やっちゃっちゃ
194 ";
195 let fonts_extracted = extract_fonts(ass);
196 let mut fonts_expected = vec![
197 "DFGKanTeiRyu-XB",
198 "MS Gothic",
199 "方正少儿_GBK",
200 "方正准圆_GBK",
201 "方正黑体_GBK",
202 ];
203 fonts_expected.sort();
204 assert_eq!(fonts_extracted, fonts_expected);
205 }
206
207 #[test]
208 fn test_extract_fonts_make_heroine_ga_oosugiru() {
209 let ass = r"
210 [Script Info]
211 ; Script generated by Aegisub 9706-cibuilds-20caaabc0
212 ; http://www.aegisub.org/
213 Title: [KitaujiSub] Make Heroine ga Oosugiru! - 12
214 ScriptType: v4.00+
215 WrapStyle: 2
216 ScaledBorderAndShadow: yes
217 YCbCr Matrix: TV.709
218 PlayResX: 1920
219 PlayResY: 1080
220 LayoutResX: 1920
221 LayoutResY: 1080
222
223 [V4+ Styles]
224 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
225 Style: Text - CN,Source Han Sans TC Medium,80,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.5,0,2,10,10,52,1
226 Style: Text - JP,Source Han Sans Medium,52,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.2,0,2,10,10,10,1
227 Style: Text - CN - UP,Source Han Sans TC Medium,80,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.5,0,8,10,10,10,1
228 Style: Text - JP - UP,Source Han Sans Medium,52,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.2,0,8,10,10,80,1
229 Style: Screen,Source Han Sans TC Medium,80,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.5,0,2,10,10,53,1
230 Style: Title,华康翩翩体W5-A,72,&H00B9B9B9,&H000000FF,&H00B99A57,&H00B99A57,0,0,0,0,100,100,5,0,1,0,0,2,10,10,285,1
231 Style: Ruby,Source Han Sans TC Medium,45,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,2,0,1,2.2,0,2,10,10,115,1
232 Style: Staff,华康翩翩体W5-A,60,&H00B9B9B9,&H000000FF,&H00B99A57,&H00B99A57,0,0,0,0,100,100,0,0,1,0,0,8,10,10,30,1
233
234 [Events]
235 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
236 Comment: 0,0:00:00.00,0:00:00.00,Note,,0,0,0,,--------------- Subtitle Staff&Title ---------------
237 Comment: 0,0:05:41.45,0:05:47.37,Screen,,0,0,0,,# 參見這裡 https://www.nonhoi.jp/amusement/
238 Dialogue: 0,0:05:41.44,0:05:41.48,Screen,,0,0,0,,{=0}{\alpha&H00&\an4\fs70\bord0\c&H312C33&\pos(455,330)}兒童列車
239 Dialogue: 0,0:05:41.48,0:05:41.52,Screen,,0,0,0,,{=0}{\alpha&H00&\an4\fs70\bord0\c&H312C33&\pos(457.06,310)}兒童列車
240 Dialogue: 0,0:05:41.52,0:05:41.57,Screen,,0,0,0,,{=0}{\alpha&H00&\an4\fs70\bord0\c&H312C33&\pos(459.13,290.16)}兒童列車
241 Dialogue: 0,0:05:42.78,0:05:42.82,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(175.71,1125.35)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
242 Dialogue: 0,0:05:42.82,0:05:42.86,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(177.77,1105.37)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
243 Dialogue: 0,0:05:42.86,0:05:42.90,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(179.85,1085.56)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
244 Dialogue: 0,0:05:42.90,0:05:42.94,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(181.98,1065.73)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
245 Dialogue: 0,0:05:42.94,0:05:42.98,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(184.07,1045.73)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
246 Dialogue: 0,0:05:42.98,0:05:43.03,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(186.17,1025.88)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
247 Dialogue: 0,0:05:43.03,0:05:43.07,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(188.31,1006.03)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
248 Dialogue: 0,0:05:43.07,0:05:43.11,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(190.43,986.13)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
249";
250 let fonts_extracted = extract_fonts(ass);
251 let mut fonts_expected = vec![
252 "Source Han Sans TC",
253 "Source Han Sans Medium",
254 "华康翩翩体W5-A",
255 "Source Han Sans TC Medium",
256 ];
257 fonts_expected.sort();
258 assert_eq!(fonts_extracted, fonts_expected);
259 }
260
261 #[test]
262 fn test_scan_inline_tags() {
263 let mut fonts = HashSet::new();
264 scan_inline_tags("{\\", &mut fonts);
265 scan_inline_tags("{fn", &mut fonts);
266 scan_inline_tags("{\\fn}", &mut fonts);
267 scan_inline_tags("{\\fn\\}", &mut fonts);
268 assert!(fonts.is_empty());
269
270 let mut fonts = HashSet::new();
271 scan_inline_tags("{\\fnSource Han Sans TC\\b2e}", &mut fonts);
272 assert_eq!(fonts, HashSet::from(["Source Han Sans TC".to_string()]));
273
274 let mut fonts = HashSet::new();
275 scan_inline_tags("{{\\fnArial}text}", &mut fonts);
276 assert_eq!(fonts, HashSet::from(["Arial".to_string()]));
277 }
278}