Skip to main content

oximedia_caption_gen/
line_breaking.rs

1//! Caption line-breaking algorithms: greedy, optimal (Knuth-Plass-inspired DP),
2//! reading-speed helpers, and line-balance optimisation.
3
4use std::collections::HashMap;
5
6/// Configuration for line-breaking behaviour.
7#[derive(Debug, Clone, PartialEq)]
8pub struct LineBreakConfig {
9    /// Maximum characters per line.
10    pub max_chars_per_line: u8,
11    /// Maximum reading speed in characters per second.
12    pub max_cps: f32,
13    /// Maximum number of lines in a caption block.
14    pub max_lines: u8,
15    /// Minimum gap between successive caption blocks in milliseconds.
16    pub min_gap_ms: u32,
17    /// Hard maximum characters per line (enforced even if `max_chars_per_line`
18    /// would allow more).  `None` means no additional constraint.
19    pub hard_max_chars: Option<u8>,
20}
21
22impl LineBreakConfig {
23    /// Sensible broadcast defaults: 42 chars/line, 17 CPS, 2 lines, 80ms gap.
24    pub fn default_broadcast() -> Self {
25        Self {
26            max_chars_per_line: 42,
27            max_cps: 17.0,
28            max_lines: 2,
29            min_gap_ms: 80,
30            hard_max_chars: None,
31        }
32    }
33
34    /// Effective maximum characters per line considering the hard cap.
35    pub fn effective_max_chars(&self) -> u8 {
36        match self.hard_max_chars {
37            Some(hard) => self.max_chars_per_line.min(hard),
38            None => self.max_chars_per_line,
39        }
40    }
41}
42
43// ─── Target audience reading speed ────────────────────────────────────────────
44
45/// The intended viewing audience, used to select appropriate CPS limits.
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum AudienceProfile {
48    /// Young children (ages 4–7): very slow readers.
49    YoungChildren,
50    /// Older children (ages 8–12): moderate readers.
51    OlderChildren,
52    /// General adult audience: standard broadcast speed.
53    Adults,
54    /// Specialised/technical audience: faster reading expected.
55    TechnicalAdults,
56}
57
58impl AudienceProfile {
59    /// Maximum recommended reading speed (CPS) for this audience.
60    pub fn max_cps(self) -> f32 {
61        match self {
62            AudienceProfile::YoungChildren => 5.0,
63            AudienceProfile::OlderChildren => 10.0,
64            AudienceProfile::Adults => 17.0,
65            AudienceProfile::TechnicalAdults => 22.0,
66        }
67    }
68
69    /// Minimum recommended display duration (ms) for this audience.
70    pub fn min_display_ms(self) -> u32 {
71        match self {
72            AudienceProfile::YoungChildren => 3000,
73            AudienceProfile::OlderChildren => 1500,
74            AudienceProfile::Adults => 1000,
75            AudienceProfile::TechnicalAdults => 700,
76        }
77    }
78}
79
80/// Validate reading speed for a specific audience profile.
81///
82/// Returns `true` if the CPS is within acceptable range for the audience.
83pub fn reading_speed_ok_for_audience(
84    text: &str,
85    duration_ms: u64,
86    audience: AudienceProfile,
87) -> bool {
88    reading_speed_ok(text, duration_ms, audience.max_cps())
89}
90
91// ─── CPS cache ────────────────────────────────────────────────────────────────
92
93/// A cache for CPS (characters-per-second) computations.
94///
95/// This avoids recomputing CPS for the same `(text, duration_ms)` pairs when
96/// captions are re-broken multiple times (e.g., during layout refinement).
97#[derive(Debug, Default)]
98pub struct CpsCache {
99    cache: HashMap<(u64, u64), f32>, // key: (text_hash, duration_ms)
100}
101
102impl CpsCache {
103    /// Create a new empty cache.
104    pub fn new() -> Self {
105        Self::default()
106    }
107
108    /// Compute or retrieve cached CPS for `(text, duration_ms)`.
109    pub fn compute_cps(&mut self, text: &str, duration_ms: u64) -> f32 {
110        let key = (hash_str(text), duration_ms);
111        *self
112            .cache
113            .entry(key)
114            .or_insert_with(|| compute_cps(text, duration_ms))
115    }
116
117    /// Return the number of entries currently in the cache.
118    pub fn len(&self) -> usize {
119        self.cache.len()
120    }
121
122    /// Return `true` if the cache is empty.
123    pub fn is_empty(&self) -> bool {
124        self.cache.is_empty()
125    }
126
127    /// Clear all cached entries.
128    pub fn clear(&mut self) {
129        self.cache.clear();
130    }
131}
132
133/// Simple FNV-1a 64-bit hash for a string.
134fn hash_str(s: &str) -> u64 {
135    const FNV_OFFSET: u64 = 14695981039346656037;
136    const FNV_PRIME: u64 = 1099511628211;
137    s.bytes().fold(FNV_OFFSET, |acc, b| {
138        (acc ^ b as u64).wrapping_mul(FNV_PRIME)
139    })
140}
141
142// ─── CJK line breaking ────────────────────────────────────────────────────────
143
144/// Returns `true` if `ch` is a CJK character (logographic / ideographic).
145fn is_cjk_char(ch: char) -> bool {
146    // CJK Unified Ideographs and common extensions.
147    ('\u{4E00}'..='\u{9FFF}').contains(&ch)
148        || ('\u{3400}'..='\u{4DBF}').contains(&ch)
149        || ('\u{F900}'..='\u{FAFF}').contains(&ch)
150        // Hiragana and Katakana (Japanese syllabic scripts).
151        || ('\u{3040}'..='\u{309F}').contains(&ch)
152        || ('\u{30A0}'..='\u{30FF}').contains(&ch)
153        // Hangul (Korean).
154        || ('\u{AC00}'..='\u{D7AF}').contains(&ch)
155}
156
157/// Returns `true` if the character is a line-break *prohibiting* character.
158///
159/// These characters must not appear at the start of a line (opening brackets,
160/// leading punctuation) per Unicode line-breaking rules (UAX #14).
161fn is_cjk_no_start(ch: char) -> bool {
162    matches!(
163        ch,
164        '、' | '。'
165            | ','
166            | '.'
167            | ':'
168            | ';'
169            | '?'
170            | '!'
171            | ')'
172            | '」'
173            | '』'
174            | '】'
175            | '〕'
176            | '〉'
177            | '》'
178            | '·'
179            | '‥'
180            | '…'
181            | 'ー'
182            | 'ヽ'
183            | 'ヾ'
184            | 'ゝ'
185            | 'ゞ'
186    )
187}
188
189/// Break `text` into lines for CJK scripts (no spaces between words).
190///
191/// CJK text is broken at character boundaries with the following rules:
192/// - No line ends with a leading bracket / punctuation character that should
193///   not start a line (`is_cjk_no_start`).
194/// - Lines do not exceed `max_width` characters.
195pub fn cjk_break(text: &str, max_width: u8) -> Vec<String> {
196    let max = max_width.max(1) as usize;
197    let chars: Vec<char> = text.chars().collect();
198    let n = chars.len();
199
200    if n <= max {
201        return vec![text.to_string()];
202    }
203
204    let mut lines: Vec<String> = Vec::new();
205    let mut start = 0;
206
207    while start < n {
208        // Ideal end is `start + max`.
209        let ideal_end = (start + max).min(n);
210
211        if ideal_end >= n {
212            lines.push(chars[start..].iter().collect());
213            break;
214        }
215
216        // Adjust end if the character *after* the cut cannot start a line.
217        let mut end = ideal_end;
218        while end > start + 1 && is_cjk_no_start(chars[end]) {
219            end -= 1;
220        }
221
222        lines.push(chars[start..end].iter().collect());
223        start = end;
224    }
225
226    if lines.is_empty() {
227        lines.push(String::new());
228    }
229    lines
230}
231
232/// Language-aware line breaking.
233///
234/// For CJK text, delegates to [`cjk_break`].  For all other scripts,
235/// delegates to [`greedy_break`].
236///
237/// The heuristic for detecting CJK: if > 30% of non-whitespace characters
238/// are CJK/Hiragana/Katakana/Hangul, the text is treated as CJK.
239pub fn language_aware_break(text: &str, max_width: u8) -> Vec<String> {
240    let non_ws: Vec<char> = text.chars().filter(|c| !c.is_whitespace()).collect();
241    if non_ws.is_empty() {
242        return vec![String::new()];
243    }
244
245    let cjk_count = non_ws.iter().filter(|&&c| is_cjk_char(c)).count();
246    let cjk_fraction = cjk_count as f32 / non_ws.len() as f32;
247
248    if cjk_fraction > 0.30 {
249        cjk_break(text, max_width)
250    } else {
251        greedy_break(text, max_width)
252    }
253}
254
255/// Which algorithm to use when breaking caption text into lines.
256#[derive(Debug, Clone, PartialEq)]
257pub enum LineBreakAlgorithm {
258    /// Break at the last space before `max_width`.
259    Greedy,
260    /// Dynamic-programming algorithm that minimises raggedness (Knuth-Plass
261    /// inspired): `cost(line) = (max_width - used_width)^2`.
262    Optimal,
263    /// Every line is exactly `u8` characters wide (hard wrap, no splitting of words).
264    Fixed(u8),
265}
266
267// ─── Greedy break ─────────────────────────────────────────────────────────────
268
269/// Break `text` greedily at the last space before `max_width` characters.
270///
271/// Words longer than `max_width` are placed on their own line unchanged.
272pub fn greedy_break(text: &str, max_width: u8) -> Vec<String> {
273    let max = max_width.max(1) as usize;
274    let mut lines: Vec<String> = Vec::new();
275    let mut current = String::new();
276
277    for word in text.split_whitespace() {
278        if current.is_empty() {
279            current.push_str(word);
280        } else if current.chars().count() + 1 + word.chars().count() <= max {
281            current.push(' ');
282            current.push_str(word);
283        } else {
284            lines.push(current.clone());
285            current = word.to_string();
286        }
287    }
288    if !current.is_empty() {
289        lines.push(current);
290    }
291    if lines.is_empty() {
292        lines.push(String::new());
293    }
294    lines
295}
296
297// ─── Optimal break (Knuth-Plass DP) ──────────────────────────────────────────
298
299/// Break `text` using a dynamic-programming algorithm that minimises the sum of
300/// squared slack on each line: `cost(line) = (max_width - line_width)^2`.
301///
302/// This produces more balanced lines than the greedy approach.
303pub fn optimal_break(text: &str, max_width: u8) -> Vec<String> {
304    let max = max_width.max(1) as usize;
305    let words: Vec<&str> = text.split_whitespace().collect();
306    let n = words.len();
307
308    if n == 0 {
309        return vec![String::new()];
310    }
311
312    // Pre-compute cumulative character widths (without spaces for quick lookup).
313    // span_width(i, j) = sum of word lengths from i..=j plus (j-i) spaces.
314    let word_lens: Vec<usize> = words.iter().map(|w| w.chars().count()).collect();
315
316    // dp[i] = minimum cost to break words[i..n] optimally.
317    // breaks[i] = the end-index (exclusive) of the first line when starting at i.
318    let mut dp = vec![u64::MAX; n + 1];
319    let mut breaks: Vec<usize> = vec![n; n + 1];
320    dp[n] = 0;
321
322    for i in (0..n).rev() {
323        let mut width = 0usize;
324        for j in i..n {
325            width += word_lens[j];
326            if j > i {
327                width += 1; // space
328            }
329            if width > max {
330                break;
331            }
332            let slack = max - width;
333            let line_cost = (slack * slack) as u64;
334            let rest_cost = dp[j + 1];
335            if rest_cost != u64::MAX {
336                let total = line_cost.saturating_add(rest_cost);
337                if total < dp[i] {
338                    dp[i] = total;
339                    breaks[i] = j + 1;
340                }
341            }
342        }
343        // If no valid break was found (all words too wide), force a single word.
344        if dp[i] == u64::MAX {
345            dp[i] = 0;
346            breaks[i] = i + 1;
347        }
348    }
349
350    // Reconstruct lines.
351    let mut lines: Vec<String> = Vec::new();
352    let mut pos = 0;
353    while pos < n {
354        let end = breaks[pos].min(n);
355        let end = if end <= pos { pos + 1 } else { end };
356        lines.push(words[pos..end].join(" "));
357        pos = end;
358    }
359    lines
360}
361
362// ─── Reading-speed helpers ────────────────────────────────────────────────────
363
364/// Compute reading speed in characters per second.
365///
366/// Returns 0.0 if `duration_ms` is zero.
367pub fn compute_cps(text: &str, duration_ms: u64) -> f32 {
368    if duration_ms == 0 {
369        return 0.0;
370    }
371    let char_count = text.chars().count() as f32;
372    char_count / (duration_ms as f32 / 1000.0)
373}
374
375/// Returns `true` when the reading speed of `text` over `duration_ms` does not
376/// exceed `max_cps`.
377pub fn reading_speed_ok(text: &str, duration_ms: u64, max_cps: f32) -> bool {
378    compute_cps(text, duration_ms) <= max_cps
379}
380
381/// Compute the minimum display duration required to read `text` at `max_cps`,
382/// but never shorter than `min_ms`.
383///
384/// Formula: `max(min_ms, ceil(char_count * 1000 / max_cps))`.
385pub fn adjust_duration_for_reading(text: &str, min_ms: u32, max_cps: f32) -> u32 {
386    if max_cps <= 0.0 {
387        return min_ms;
388    }
389    let char_count = text.chars().count() as f32;
390    let required_ms = (char_count * 1000.0 / max_cps).ceil() as u32;
391    required_ms.max(min_ms)
392}
393
394// ─── Line balance ─────────────────────────────────────────────────────────────
395
396/// Statistics and scoring for caption line balance.
397pub struct LineBalance;
398
399impl LineBalance {
400    /// Compute a balance factor in [0.0, 1.0]:
401    /// - `0.0` = perfectly balanced (all lines same length).
402    /// - `1.0` = maximally unbalanced.
403    ///
404    /// Uses the standard deviation of line lengths normalised by the mean.
405    /// Returns `0.0` for 0 or 1 lines.
406    pub fn balance_factor(lines: &[String]) -> f32 {
407        if lines.len() <= 1 {
408            return 0.0;
409        }
410        let lengths: Vec<f32> = lines.iter().map(|l| l.chars().count() as f32).collect();
411        let mean = lengths.iter().sum::<f32>() / lengths.len() as f32;
412        if mean < 1e-6 {
413            return 0.0;
414        }
415        let variance =
416            lengths.iter().map(|&l| (l - mean).powi(2)).sum::<f32>() / lengths.len() as f32;
417        let std_dev = variance.sqrt();
418        // Normalise by mean so the result is dimensionless; cap at 1.0.
419        (std_dev / mean).min(1.0)
420    }
421}
422
423/// Redistribute words across lines to minimise [`LineBalance::balance_factor`].
424///
425/// Internally calls [`optimal_break`] with a `max_width` derived from the
426/// average line length, then returns the result if it is better balanced than
427/// the input, otherwise returns the input unchanged.
428pub fn rebalance_lines(lines: Vec<String>, max_width: u8) -> Vec<String> {
429    if lines.len() <= 1 {
430        return lines;
431    }
432
433    let original_factor = LineBalance::balance_factor(&lines);
434    let combined = lines.join(" ");
435    let rebroken = optimal_break(&combined, max_width);
436    let new_factor = LineBalance::balance_factor(&rebroken);
437
438    if new_factor < original_factor {
439        rebroken
440    } else {
441        lines
442    }
443}
444
445// ─── Tests ────────────────────────────────────────────────────────────────────
446
447#[cfg(test)]
448mod tests {
449    use super::*;
450
451    // --- greedy_break ---
452
453    #[test]
454    fn greedy_break_empty_string() {
455        let result = greedy_break("", 40);
456        assert_eq!(result, vec![""]);
457    }
458
459    #[test]
460    fn greedy_break_single_word_fits() {
461        let result = greedy_break("Hello", 40);
462        assert_eq!(result, vec!["Hello"]);
463    }
464
465    #[test]
466    fn greedy_break_two_words_fit_on_one_line() {
467        let result = greedy_break("Hello world", 20);
468        assert_eq!(result, vec!["Hello world"]);
469    }
470
471    #[test]
472    fn greedy_break_wraps_at_limit() {
473        let result = greedy_break("Hello world", 8);
474        assert_eq!(result, vec!["Hello", "world"]);
475    }
476
477    #[test]
478    fn greedy_break_multiple_lines() {
479        let result = greedy_break("one two three four five", 9);
480        // "one two" = 7, "three" = 5, "four" = 4, "five" = 4
481        assert!(result.len() >= 2);
482        for line in &result {
483            assert!(line.chars().count() <= 9, "line '{line}' exceeds max width");
484        }
485    }
486
487    #[test]
488    fn greedy_break_long_word_gets_own_line() {
489        let result = greedy_break("A superlongwordthatexceedslimit B", 10);
490        // The long word must appear alone on its line.
491        assert!(result.iter().any(|l| l.contains("superlongword")));
492    }
493
494    #[test]
495    fn greedy_break_preserves_all_words() {
496        let text = "one two three four five six seven";
497        let result = greedy_break(text, 15);
498        let rejoined = result.join(" ");
499        assert_eq!(rejoined, text);
500    }
501
502    // --- optimal_break ---
503
504    #[test]
505    fn optimal_break_empty_string() {
506        let result = optimal_break("", 40);
507        assert_eq!(result, vec![""]);
508    }
509
510    #[test]
511    fn optimal_break_single_line() {
512        let result = optimal_break("Hello world", 20);
513        assert_eq!(result, vec!["Hello world"]);
514    }
515
516    #[test]
517    fn optimal_break_more_balanced_than_greedy() {
518        // "one two three four" greedy at width 10:
519        //   "one two"  (7) + "three"   (5) + "four" (4)  → slack: 3,5,6
520        // optimal should find a better balance.
521        let text = "one two three four";
522        let optimal = optimal_break(text, 10);
523        let greedy = greedy_break(text, 10);
524        let opt_balance = LineBalance::balance_factor(&optimal);
525        let greed_balance = LineBalance::balance_factor(&greedy);
526        // Optimal should be at least as balanced.
527        assert!(
528            opt_balance <= greed_balance + 0.01,
529            "optimal balance {opt_balance} worse than greedy {greed_balance}"
530        );
531    }
532
533    #[test]
534    fn optimal_break_preserves_all_words() {
535        let text = "alpha beta gamma delta epsilon zeta";
536        let result = optimal_break(text, 15);
537        let rejoined = result.join(" ");
538        assert_eq!(rejoined, text);
539    }
540
541    #[test]
542    fn optimal_break_no_line_exceeds_max_width() {
543        let text = "short lines should be wrapped correctly by algorithm";
544        let result = optimal_break(text, 20);
545        for line in &result {
546            assert!(
547                line.chars().count() <= 20,
548                "line '{line}' exceeds max width"
549            );
550        }
551    }
552
553    // --- compute_cps ---
554
555    #[test]
556    fn compute_cps_basic() {
557        // 10 chars over 2000ms = 5 cps.
558        let cps = compute_cps("Hello wrld", 2000);
559        assert!((cps - 5.0).abs() < 0.01, "expected ~5.0, got {cps}");
560    }
561
562    #[test]
563    fn compute_cps_zero_duration_returns_zero() {
564        assert_eq!(compute_cps("Hello", 0), 0.0);
565    }
566
567    #[test]
568    fn compute_cps_empty_text() {
569        assert_eq!(compute_cps("", 1000), 0.0);
570    }
571
572    // --- reading_speed_ok ---
573
574    #[test]
575    fn reading_speed_ok_slow_enough() {
576        // 5 chars at 1 second = 5 cps < 17 → ok.
577        assert!(reading_speed_ok("Hello", 1000, 17.0));
578    }
579
580    #[test]
581    fn reading_speed_ok_too_fast() {
582        // 50 chars at 1 second = 50 cps > 17.
583        let long_text = "A".repeat(50);
584        assert!(!reading_speed_ok(&long_text, 1000, 17.0));
585    }
586
587    // --- adjust_duration_for_reading ---
588
589    #[test]
590    fn adjust_duration_respects_min() {
591        // 5 chars at 17 cps needs ~295ms, but min is 1000ms.
592        let d = adjust_duration_for_reading("Hello", 1000, 17.0);
593        assert_eq!(d, 1000);
594    }
595
596    #[test]
597    fn adjust_duration_extends_for_long_text() {
598        // 170 chars at 17 cps needs 10000ms; min is 1000ms.
599        let text = "A".repeat(170);
600        let d = adjust_duration_for_reading(&text, 1000, 17.0);
601        assert_eq!(d, 10000);
602    }
603
604    #[test]
605    fn adjust_duration_zero_max_cps_returns_min() {
606        let d = adjust_duration_for_reading("Hello world", 500, 0.0);
607        assert_eq!(d, 500);
608    }
609
610    // --- LineBalance ---
611
612    #[test]
613    fn balance_factor_single_line_is_zero() {
614        let lines = vec!["Hello world".to_string()];
615        assert_eq!(LineBalance::balance_factor(&lines), 0.0);
616    }
617
618    #[test]
619    fn balance_factor_equal_lines_is_zero() {
620        let lines = vec!["Hello".to_string(), "World".to_string()];
621        assert!((LineBalance::balance_factor(&lines)).abs() < 1e-5);
622    }
623
624    #[test]
625    fn balance_factor_unequal_lines_nonzero() {
626        let lines = vec!["A".to_string(), "A much longer line here".to_string()];
627        assert!(LineBalance::balance_factor(&lines) > 0.0);
628    }
629
630    #[test]
631    fn balance_factor_empty_lines_is_zero() {
632        assert_eq!(LineBalance::balance_factor(&[]), 0.0);
633    }
634
635    // --- rebalance_lines ---
636
637    #[test]
638    fn rebalance_lines_single_line_unchanged() {
639        let lines = vec!["Hello world".to_string()];
640        let result = rebalance_lines(lines.clone(), 40);
641        assert_eq!(result, lines);
642    }
643
644    #[test]
645    fn rebalance_lines_produces_at_most_same_balance_factor() {
646        let lines = vec![
647            "Hi".to_string(),
648            "This is a much longer second line here".to_string(),
649        ];
650        let original_factor = LineBalance::balance_factor(&lines);
651        let result = rebalance_lines(lines, 40);
652        let new_factor = LineBalance::balance_factor(&result);
653        assert!(new_factor <= original_factor + 0.01);
654    }
655
656    #[test]
657    fn rebalance_lines_preserves_all_words() {
658        let lines = vec!["one two".to_string(), "three four five six".to_string()];
659        let original_words: std::collections::HashSet<String> = lines
660            .iter()
661            .flat_map(|l| l.split_whitespace())
662            .map(|w| w.to_string())
663            .collect();
664        let result = rebalance_lines(lines, 20);
665        let result_words: std::collections::HashSet<String> = result
666            .iter()
667            .flat_map(|l| l.split_whitespace())
668            .map(|w| w.to_string())
669            .collect();
670        assert_eq!(original_words, result_words);
671    }
672
673    #[test]
674    fn line_break_config_default_broadcast_values() {
675        let cfg = LineBreakConfig::default_broadcast();
676        assert_eq!(cfg.max_chars_per_line, 42);
677        assert_eq!(cfg.max_lines, 2);
678        assert_eq!(cfg.min_gap_ms, 80);
679        assert_eq!(cfg.hard_max_chars, None);
680    }
681
682    // --- LineBreakConfig.hard_max_chars ---
683
684    #[test]
685    fn line_break_config_hard_max_chars_constrains_effective() {
686        let mut cfg = LineBreakConfig::default_broadcast();
687        cfg.hard_max_chars = Some(30);
688        assert_eq!(cfg.effective_max_chars(), 30); // hard cap wins
689        cfg.hard_max_chars = Some(50);
690        assert_eq!(cfg.effective_max_chars(), 42); // max_chars_per_line wins
691    }
692
693    // --- AudienceProfile ---
694
695    #[test]
696    fn audience_profile_children_have_lower_cps() {
697        assert!(AudienceProfile::YoungChildren.max_cps() < AudienceProfile::Adults.max_cps());
698        assert!(AudienceProfile::OlderChildren.max_cps() < AudienceProfile::Adults.max_cps());
699    }
700
701    #[test]
702    fn audience_profile_children_have_longer_min_display() {
703        assert!(
704            AudienceProfile::YoungChildren.min_display_ms()
705                > AudienceProfile::Adults.min_display_ms()
706        );
707    }
708
709    #[test]
710    fn reading_speed_ok_for_audience_children() {
711        // 10 chars at 3 seconds = 3.3 cps < 5 cps (YoungChildren threshold)
712        assert!(reading_speed_ok_for_audience(
713            "Hello world",
714            3000,
715            AudienceProfile::YoungChildren
716        ));
717    }
718
719    #[test]
720    fn reading_speed_too_fast_for_children() {
721        // 100 chars at 2 seconds = 50 cps > 5 cps
722        let text = "A".repeat(100);
723        assert!(!reading_speed_ok_for_audience(
724            &text,
725            2000,
726            AudienceProfile::YoungChildren
727        ));
728    }
729
730    // --- CpsCache ---
731
732    #[test]
733    fn cps_cache_returns_same_value_twice() {
734        let mut cache = CpsCache::new();
735        let v1 = cache.compute_cps("Hello world", 2000);
736        let v2 = cache.compute_cps("Hello world", 2000);
737        assert!((v1 - v2).abs() < 1e-6);
738    }
739
740    #[test]
741    fn cps_cache_stores_entry() {
742        let mut cache = CpsCache::new();
743        assert_eq!(cache.len(), 0);
744        cache.compute_cps("Hello", 1000);
745        assert_eq!(cache.len(), 1);
746        // Same key → no new entry.
747        cache.compute_cps("Hello", 1000);
748        assert_eq!(cache.len(), 1);
749        // Different text → new entry.
750        cache.compute_cps("World", 1000);
751        assert_eq!(cache.len(), 2);
752    }
753
754    #[test]
755    fn cps_cache_clear_removes_all_entries() {
756        let mut cache = CpsCache::new();
757        cache.compute_cps("Hello", 1000);
758        cache.clear();
759        assert!(cache.is_empty());
760    }
761
762    // --- CJK breaking ---
763
764    #[test]
765    fn cjk_break_short_text_unchanged() {
766        let text = "日本語";
767        let result = cjk_break(text, 10);
768        assert_eq!(result.len(), 1);
769        assert_eq!(result[0], text);
770    }
771
772    #[test]
773    fn cjk_break_long_text_splits_at_char_boundary() {
774        let text = "これは日本語のテキストサンプルです"; // 16 chars
775        let result = cjk_break(text, 5);
776        assert!(result.len() > 1, "expected split");
777        for line in &result {
778            let count = line.chars().count();
779            assert!(count <= 5, "line '{line}' has {count} chars > 5");
780        }
781        // All characters should be preserved.
782        let combined: String = result.concat();
783        assert_eq!(combined.chars().count(), text.chars().count());
784    }
785
786    #[test]
787    fn language_aware_break_latin_uses_greedy() {
788        let text = "Hello there how are you doing";
789        let result = language_aware_break(text, 12);
790        let rejoined = result.join(" ");
791        assert_eq!(rejoined, text);
792    }
793
794    #[test]
795    fn language_aware_break_cjk_detected() {
796        let text = "これは日本語のテキストです"; // all CJK
797        let result = language_aware_break(text, 5);
798        assert!(result.len() > 1, "expected multi-line CJK break");
799    }
800
801    // --- optimal_break reference output test ---
802
803    #[test]
804    fn optimal_break_reference_output_known_case() {
805        // Reference: "one two three four five" at width 11.
806        // Optimal should produce lines whose total slack is minimised.
807        let text = "one two three four five";
808        let result = optimal_break(text, 11);
809        // All words must be present.
810        let rejoined = result.join(" ");
811        assert_eq!(rejoined, text);
812        // No line exceeds max width.
813        for line in &result {
814            assert!(
815                line.chars().count() <= 11,
816                "line '{line}' exceeds max width"
817            );
818        }
819    }
820
821    #[test]
822    fn greedy_and_optimal_produce_identical_single_line() {
823        // When all text fits on one line, both algorithms must produce one line.
824        let text = "Hello";
825        let g = greedy_break(text, 20);
826        let o = optimal_break(text, 20);
827        assert_eq!(g, o);
828    }
829
830    #[test]
831    fn greedy_and_optimal_identical_for_single_word_per_line() {
832        // Each word fits on one line individually: both algorithms agree.
833        let text = "a b c";
834        let g = greedy_break(text, 1);
835        let o = optimal_break(text, 1);
836        // Both produce 3 lines of 1 character each.
837        assert_eq!(g.len(), o.len(), "g={:?} o={:?}", g, o);
838    }
839}