Skip to main content

ass_font/
lib.rs

1//! A library for extracting font names used in ASS (Advanced SubStation Alpha) subtitle files.
2//! 
3//! This crate provides functionality to detect the text encoding of an ASS file, read it, and 
4//! extract all required fonts correctly by parsing both the Styles section and inline font 
5//! overrides within Dialogue events (e.g. `\fnFontName`).
6use camino::Utf8Path;
7use chardetng::EncodingDetector;
8use std::{collections::HashSet, fs, io};
9
10/// Reads a file into a `String`, automatically detecting and decoding its character encoding.
11pub fn read_text_auto(path: &Utf8Path) -> io::Result<String> {
12    let bytes = fs::read(path)?;
13    let mut det = EncodingDetector::new();
14    det.feed(&bytes, true);
15    Ok(det.guess(None, true).decode(&bytes).0.into_owned())
16}
17
18// ── ASS Section Types ────────────────────────────────────────────────────────
19
20/// Represents a section in an ASS file.
21#[derive(Debug, Default, PartialEq, Eq, Clone)]
22enum Section {
23    Styles,
24    Events,
25    #[default]
26    Other,
27}
28
29impl Section {
30    /// Creates a `Section` from a section header string.
31    fn from_header(s: &str) -> Self {
32        match s {
33            "[V4+ Styles]" | "[V4 Styles]" | "[V4++ Styles]" => Self::Styles,
34            "[Events]" => Self::Events,
35            _ => Self::Other,
36        }
37    }
38}
39
40// ── Column Formatting (Lazy parsing, updated only when a "Format:" line is encountered) ──
41
42/// Represents the column format for a specific section.
43#[derive(Debug, PartialEq, Eq, Clone)]
44struct ColFormat {
45    /// The index of the target column after splitting by commas.
46    target_index: usize,
47    /// The maximum number of columns expected (used for `splitn`, relevant for Events).
48    columns_count: usize,
49}
50
51impl ColFormat {
52    /// Default format for the Styles section.
53    fn style_default() -> Self {
54        // Style Format: Name, Fontname, ...
55        Self {
56            target_index: 1,
57            columns_count: usize::MAX,
58        }
59    }
60
61    /// Default format for the Events section.
62    fn event_default() -> Self {
63        // Event Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
64        Self {
65            target_index: 9,
66            columns_count: 10,
67        }
68    }
69
70    /// Updates the format from a "Format: col0, col1, ..." line, calculating the index of the `needle` column.
71    fn update_from_format_line(&mut self, format_str: &str, needle: &str) {
72        let cols: Vec<&str> = format_str.split(',').map(str::trim).collect();
73        self.columns_count = cols.len();
74        self.target_index = cols
75            .iter()
76            .position(|c| c.eq_ignore_ascii_case(needle))
77            .unwrap_or(self.columns_count.saturating_sub(1));
78    }
79}
80
81// ── Core Extraction ──────────────────────────────────────────────────────────
82
83/// Extracts a unique, sorted list of font names used in the provided ASS subtitle content.
84pub fn extract_fonts(ass: &str) -> Vec<String> {
85    let mut fonts = HashSet::<String>::new();
86    let mut section = Section::default();
87    let mut style_fmt = ColFormat::style_default();
88    let mut event_fmt = ColFormat::event_default();
89
90    for line in ass.lines().map(str::trim).filter(|l| !l.is_empty()) {
91        if line.starts_with('[') && line.ends_with(']') {
92            section = Section::from_header(line);
93            continue;
94        }
95
96        match section {
97            Section::Styles => handle_style(line, &mut style_fmt, &mut fonts),
98            Section::Events => handle_event(line, &mut event_fmt, &mut fonts),
99            Section::Other => {}
100        }
101    }
102
103    let mut out: Vec<_> = fonts.into_iter().collect();
104    out.sort();
105    out
106}
107
108// ── Section Processing ───────────────────────────────────────────────────────
109
110/// Handles a line in the Styles section, updating the format or extracting a font name.
111fn handle_style(line: &str, fmt: &mut ColFormat, fonts: &mut HashSet<String>) {
112    if let Some(rest) = line.strip_prefix("Format:") {
113        fmt.update_from_format_line(rest, "fontname");
114    } else if let Some(rest) = line.strip_prefix("Style:")
115        && let Some(name) = rest.split(',').nth(fmt.target_index)
116    {
117        add_font(fonts, name);
118    }
119}
120
121/// Handles a line in the Events section, updating the format or extracting font tags from dialogue text.
122fn handle_event(line: &str, fmt: &mut ColFormat, fonts: &mut HashSet<String>) {
123    if let Some(rest) = line.strip_prefix("Format:") {
124        fmt.update_from_format_line(rest, "text");
125    }
126    // Only process Dialogue lines (ignore Comments)
127    else if matches!(line.split_once(':'), Some(("Dialogue", _))) {
128        let content = line[line.find(':').unwrap() + 1..].trim_start();
129        if let Some(text) = content.splitn(fmt.columns_count, ',').nth(fmt.target_index) {
130            scan_inline_tags(text, fonts);
131        }
132    }
133}
134
135// ── Inline Tag Scanning `{\fnName\b1...}` ────────────────────────────────────
136
137/// Scans for inline font tags (e.g., `\fnFontName`) within dialogue text and extracts their values.
138fn scan_inline_tags(text: &str, fonts: &mut HashSet<String>) {
139    // Treat text as a slice, processing each `{...}` block and advancing
140    let mut rest = text;
141    while let Some(open) = rest.find('{') {
142        rest = &rest[open + 1..];
143        let Some(close) = rest.find('}') else { break };
144
145        rest[..close]
146            .split('\\')
147            .filter_map(|tag| tag.strip_prefix("fn"))
148            .for_each(|name| add_font(fonts, name));
149
150        rest = &rest[close + 1..];
151    }
152}
153
154// ── Utility Functions ────────────────────────────────────────────────────────
155
156/// Inserts a font name into the collection, removing whitespace and the `@` prefix for vertical typography if present.
157fn add_font(fonts: &mut HashSet<String>, raw: &str) {
158    let trimmed = raw.trim();
159    let name = trimmed.strip_prefix('@').unwrap_or(trimmed);
160    if !name.is_empty() {
161        fonts.insert(name.to_owned());
162    }
163}
164
165#[cfg(test)]
166mod tests {
167    use super::*;
168
169    #[test]
170    fn test_extract_fonts_lucky_star() {
171        let ass = r"
172            [Script Info]
173            Title:lucky star
174            Original Script:CASO&I.G
175            Synch Point:0
176            ScriptType:v4.00+
177            Collisions:Normal
178            PlayResX:704
179            PlayResY:396
180            Timer:100.0000
181
182            [V4+ Styles]
183            Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
184            Style: zhengwen,方正准圆_GBK,26,&H00FFFFFF,&H004080FF,&H20B71082,&H99B71082,-1,0,0,0,100,100,0,0.00,0,3,1,2,30,30,8,134
185            Style: zhushi,方正准圆_GBK,19,&H00FFFFFF,&H004080FF,&H20E22E1B,&H99E22E1B,-1,0,0,0,100,100,0,0.00,0,3,1,8,30,30,8,134
186            Style: jinggao,方正准圆_GBK,17,&H00FFFFFF,&H00FFFFFF,&H00000000,&HFF000000,-1,0,0,0,100,100,0,0.00,1,3,1,8,15,15,5,134
187            Style: staff,@DFGKanTeiRyu-XB,18,&H00FFFFFF,&H00000000,&H407A0748,&HA0FFFFFF,0,0,0,0,105,105,1,0.00,1,3,0,7,30,30,10,128
188            Style: OPJ,DFGKanTeiRyu-XB,18,&H00FFFFFF,&H00000000,&H208B0C66,&H66666666,0,0,0,0,100,105,2,0.00,1,3,1,8,30,30,10,128
189            Style: OPC,方正少儿_GBK,24,&H00FFFFFF,&H00000000,&H208B0C66,&H66666666,0,0,0,0,100,100,0,0.00,1,3,1,2,30,30,7,134
190            Style: EDJ,MS Gothic,21,&H00EEEEEE,&H90FFFFFF,&H12333333,&H20000000,-1,0,0,0,100,100,1,0.00,0,3,0,8,30,30,8,128
191            Style: EDC,方正黑体_GBK,19,&H00EEEEEE,&HFF000000,&H12333333,&H20000000,0,0,0,0,100,100,2,0.00,0,3,0,8,30,30,12,134
192
193            [Events]
194            Format: Layer, Start, End, Style, Actor, MarginL, MarginR, MarginV, Effect, Text
195            Dialogue: 0,0:00:06.08,0:00:08.78,OPJ,NTP,0000,0000,0000,,曖昧3センチ そりゃぷにってコトかい? ちょっ!
196            Dialogue: 0,0:00:08.95,0:00:11.97,OPJ,NTP,0000,0000,0000,,らっぴんぐが制服…だぁぁ不利ってことない ぷ。
197            Dialogue: 0,0:00:12.20,0:00:13.55,OPJ,NTP,0000,0000,0000,,がんばっちゃ やっちゃっちゃ
198            ";
199        let fonts_extracted = extract_fonts(ass);
200        let mut fonts_expected = vec![
201            "DFGKanTeiRyu-XB",
202            "MS Gothic",
203            "方正少儿_GBK",
204            "方正准圆_GBK",
205            "方正黑体_GBK",
206        ];
207        fonts_expected.sort();
208        assert_eq!(fonts_extracted, fonts_expected);
209    }
210
211    #[test]
212    fn test_extract_fonts_make_heroine_ga_oosugiru() {
213        let ass = r"
214            [Script Info]
215            ; Script generated by Aegisub 9706-cibuilds-20caaabc0
216            ; http://www.aegisub.org/
217            Title: [KitaujiSub] Make Heroine ga Oosugiru! - 12
218            ScriptType: v4.00+
219            WrapStyle: 2
220            ScaledBorderAndShadow: yes
221            YCbCr Matrix: TV.709
222            PlayResX: 1920
223            PlayResY: 1080
224            LayoutResX: 1920
225            LayoutResY: 1080
226
227            [V4+ Styles]
228            Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
229            Style: Text - CN,Source Han Sans TC Medium,80,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.5,0,2,10,10,52,1
230            Style: Text - JP,Source Han Sans Medium,52,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.2,0,2,10,10,10,1
231            Style: Text - CN - UP,Source Han Sans TC Medium,80,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.5,0,8,10,10,10,1
232            Style: Text - JP - UP,Source Han Sans Medium,52,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.2,0,8,10,10,80,1
233            Style: Screen,Source Han Sans TC Medium,80,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,1,0,1,2.5,0,2,10,10,53,1
234            Style: Title,华康翩翩体W5-A,72,&H00B9B9B9,&H000000FF,&H00B99A57,&H00B99A57,0,0,0,0,100,100,5,0,1,0,0,2,10,10,285,1
235            Style: Ruby,Source Han Sans TC Medium,45,&H00F5F5F5,&H000000FF,&H00252525,&H00000000,0,0,0,0,100,100,2,0,1,2.2,0,2,10,10,115,1
236            Style: Staff,华康翩翩体W5-A,60,&H00B9B9B9,&H000000FF,&H00B99A57,&H00B99A57,0,0,0,0,100,100,0,0,1,0,0,8,10,10,30,1
237
238            [Events]
239            Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
240            Comment: 0,0:00:00.00,0:00:00.00,Note,,0,0,0,,--------------- Subtitle Staff&Title ---------------
241            Comment: 0,0:05:41.45,0:05:47.37,Screen,,0,0,0,,# 參見這裡 https://www.nonhoi.jp/amusement/
242            Dialogue: 0,0:05:41.44,0:05:41.48,Screen,,0,0,0,,{=0}{\alpha&H00&\an4\fs70\bord0\c&H312C33&\pos(455,330)}兒童列車
243            Dialogue: 0,0:05:41.48,0:05:41.52,Screen,,0,0,0,,{=0}{\alpha&H00&\an4\fs70\bord0\c&H312C33&\pos(457.06,310)}兒童列車
244            Dialogue: 0,0:05:41.52,0:05:41.57,Screen,,0,0,0,,{=0}{\alpha&H00&\an4\fs70\bord0\c&H312C33&\pos(459.13,290.16)}兒童列車
245            Dialogue: 0,0:05:42.78,0:05:42.82,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(175.71,1125.35)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
246            Dialogue: 0,0:05:42.82,0:05:42.86,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(177.77,1105.37)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
247            Dialogue: 0,0:05:42.86,0:05:42.90,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(179.85,1085.56)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
248            Dialogue: 0,0:05:42.90,0:05:42.94,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(181.98,1065.73)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
249            Dialogue: 0,0:05:42.94,0:05:42.98,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(184.07,1045.73)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
250            Dialogue: 0,0:05:42.98,0:05:43.03,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(186.17,1025.88)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
251            Dialogue: 0,0:05:43.03,0:05:43.07,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(188.31,1006.03)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
252            Dialogue: 0,0:05:43.07,0:05:43.11,Screen,,0,0,0,,{=2}{\alpha&H00&\an4\fnSource Han Sans TC\fs40\bord0\fsp0\c&H38434A&\pos(190.43,986.13)}隨著「嘟嘟—」的汽笛聲前進!乘上列車沿著遊樂場內鋪設的軌道周遊,好好探險吧。
253";
254        let fonts_extracted = extract_fonts(ass);
255        let mut fonts_expected = vec![
256            "Source Han Sans TC",
257            "Source Han Sans Medium",
258            "华康翩翩体W5-A",
259            "Source Han Sans TC Medium",
260        ];
261        fonts_expected.sort();
262        assert_eq!(fonts_extracted, fonts_expected);
263    }
264
265    #[test]
266    fn test_scan_inline_tags() {
267        let mut fonts = HashSet::new();
268        scan_inline_tags("{\\", &mut fonts);
269        scan_inline_tags("{fn", &mut fonts);
270        scan_inline_tags("{\\fn}", &mut fonts);
271        scan_inline_tags("{\\fn\\}", &mut fonts);
272        assert!(fonts.is_empty());
273
274        let mut fonts = HashSet::new();
275        scan_inline_tags("{\\fnSource Han Sans TC\\b2e}", &mut fonts);
276        assert_eq!(fonts, HashSet::from(["Source Han Sans TC".to_string()]));
277
278        let mut fonts = HashSet::new();
279        scan_inline_tags("{{\\fnArial}text}", &mut fonts);
280        assert_eq!(fonts, HashSet::from(["Arial".to_string()]));
281    }
282}