ass_core/analysis/events/
unicode_wrap.rs

1//! Unicode-aware soft-wrap opportunities (UAX #14).
2//!
3//! Feature-gated behind `unicode-wrap`. Provides the analysis-side equivalent
4//! of libass 0.17.4's `ASS_FEATURE_WRAP_UNICODE`: it identifies valid line
5//! break positions in plain text, including breaks between CJK/Kana/Hangul
6//! characters that are not separated by spaces.
7//!
8//! The input is expected to be *plain* text — that is, with override tag blocks
9//! removed and explicit `\N`/`\n` breaks already resolved (see
10//! [`TextWithLineBreaks`](super::line_breaks::TextWithLineBreaks)). Each
11//! returned offset is the byte index of the character that would start the new
12//! line if a wrap is taken there.
13//!
14//! # Examples
15//!
16//! ```rust
17//! use ass_core::analysis::events::unicode_wrap::soft_wrap_offsets;
18//!
19//! // A space provides a soft-wrap opportunity before the next word.
20//! let offsets = soft_wrap_offsets("Hello world");
21//! assert!(offsets.contains(&6));
22//! ```
23
24use alloc::vec::Vec;
25
26/// A position in plain text where a line wrap may occur.
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub struct WrapOpportunity {
29    /// Byte offset of the character that starts the new line if wrapped here.
30    pub offset: usize,
31    /// Whether the break is mandatory (e.g. a hard line feed or end of text).
32    pub mandatory: bool,
33}
34
35/// Compute all Unicode line-break opportunities for `text` per UAX #14.
36///
37/// Includes the mandatory break at the end of the text. Use
38/// [`soft_wrap_offsets`] for only the optional intra-text break points.
39#[must_use]
40pub fn wrap_opportunities(text: &str) -> Vec<WrapOpportunity> {
41    unicode_linebreak::linebreaks(text)
42        .map(|(offset, opportunity)| WrapOpportunity {
43            offset,
44            mandatory: matches!(opportunity, unicode_linebreak::BreakOpportunity::Mandatory),
45        })
46        .collect()
47}
48
49/// Return only the byte offsets at which an optional (soft) wrap may occur.
50///
51/// Mandatory breaks — including the implicit one at the end of the string — are
52/// excluded, leaving the positions a renderer may choose to wrap at when a line
53/// exceeds the available width.
54#[must_use]
55pub fn soft_wrap_offsets(text: &str) -> Vec<usize> {
56    wrap_opportunities(text)
57        .into_iter()
58        .filter(|opportunity| !opportunity.mandatory)
59        .map(|opportunity| opportunity.offset)
60        .collect()
61}
62
63#[cfg(test)]
64mod tests {
65    use super::*;
66
67    #[test]
68    fn breaks_after_space() {
69        let offsets = soft_wrap_offsets("Hello world");
70        // The break is permitted before "world" (offset of 'w').
71        assert_eq!(offsets, alloc::vec![6]);
72    }
73
74    #[test]
75    fn multiple_words() {
76        let offsets = soft_wrap_offsets("one two three");
77        assert_eq!(offsets, alloc::vec![4, 8]);
78    }
79
80    #[test]
81    fn breaks_between_cjk_without_spaces() {
82        // Japanese text has no spaces but permits inter-character wrapping.
83        let text = "日本語字幕";
84        let offsets = soft_wrap_offsets(text);
85        // At least one break opportunity exists despite the absence of spaces.
86        assert!(!offsets.is_empty());
87        // Every offset must fall on a UTF-8 character boundary.
88        assert!(offsets.iter().all(|&o| text.is_char_boundary(o)));
89    }
90
91    #[test]
92    fn no_soft_break_in_single_token() {
93        // A single unbroken word offers no interior soft-wrap point.
94        assert!(soft_wrap_offsets("indivisible").is_empty());
95    }
96
97    #[test]
98    fn final_break_is_mandatory() {
99        let all = wrap_opportunities("hi there");
100        let last = all.last().expect("at least one opportunity");
101        assert_eq!(last.offset, "hi there".len());
102        assert!(last.mandatory);
103    }
104}
ass_core/analysis/events/unicode_wrap.rs

ass_core/analysis/events/
unicode_wrap.rs