batch_mode_tts/
chunk.rs

1crate::ix!();
2
3impl BatchModeTtsJob {
4    /// Character‑safe splitter (≤ `max_len` chars) with a soft preference for newline
5    /// boundaries. Long single lines are split at exact character boundaries.
6    pub fn chunk_text(text: &str, max_len: usize) -> Vec<String> {
7        let limit = max_len.min(4096);
8        if limit == 0 {
9            warn!("chunk_text called with max_len=0; returning empty chunk list");
10            return Vec::new();
11        }
12
13        let mut res = Vec::<String>::new();
14        let mut buf = String::new();
15        let mut buf_chars: usize = 0;
16
17        // Helper: flush current buffer if non‑empty
18        let flush = |res: &mut Vec<String>, buf: &mut String, buf_chars: &mut usize| {
19            if !buf.is_empty() {
20                trace!("Flushing chunk of {} chars", *buf_chars);
21                res.push(std::mem::take(buf));
22                *buf_chars = 0;
23            }
24        };
25
26        for line in text.lines() {
27            let mut remaining = line;
28            loop {
29                let rem_chars = remaining.chars().count();
30                // Will we need to insert a newline before appending `remaining`?
31                let sep = if buf.is_empty() { 0 } else { 1 };
32
33                if buf_chars + sep + rem_chars <= limit {
34                    if sep == 1 {
35                        buf.push('\n');
36                        buf_chars += 1;
37                    }
38                    buf.push_str(remaining);
39                    buf_chars += rem_chars;
40                    break; // Done with this line
41                }
42
43                // Not enough room to add whole `remaining`
44                // 1) If the buffer has something, flush it first so we start fresh
45                if buf_chars > 0 {
46                    flush(&mut res, &mut buf, &mut buf_chars);
47                    // Continue the loop — we will re‑evaluate with an empty buffer
48                    continue;
49                }
50
51                // 2) Buffer is empty but `remaining` itself is too large;
52                // split `remaining` into `limit`‑sized char chunk
53                let take_n = limit; // fill exactly to limit
54                // Take first `take_n` chars of `remaining`
55                let taken: String = remaining.chars().take(take_n).collect();
56                let taken_count = taken.chars().count();
57                debug!(
58                    "Splitting an over‑long line into a full chunk of {} chars (limit {})",
59                    taken_count, limit
60                );
61                res.push(taken);
62
63                // Advance `remaining`
64                let mut it = remaining.chars();
65                for _ in 0..take_n {
66                    it.next();
67                }
68                let rest: String = it.collect();
69
70                if rest.is_empty() {
71                    break; // finished this line
72                } else {
73                    remaining = &rest; // loop continues on rest
74                    // We must store `rest` somewhere stable; allocate new String and use it
75                    // To avoid lifetime issue, rebind `remaining` to a new owned string and iterate again
76                    let owned = rest; // already owned
77                    // Reassign `remaining` to a &'_ str from owned for next iteration
78                    // But we cannot keep `owned` alive across iterations without storage.
79                    // Workaround: move ownership into `remaining_owned` and shadow `remaining`.
80                    // Implement via block scope below.
81                    let mut cursor = owned;
82                    loop {
83                        // inner splitting loop replicates the top logic with `cursor`
84                        // 1) Determine chars left
85                        let c_rem = cursor.chars().count();
86                        let sep2 = if buf.is_empty() { 0 } else { 1 };
87                        if buf_chars + sep2 + c_rem <= limit {
88                            if sep2 == 1 {
89                                buf.push('\n');
90                                buf_chars += 1;
91                            }
92                            buf.push_str(&cursor);
93                            buf_chars += c_rem;
94                            break;
95                        }
96                        if buf_chars > 0 {
97                            flush(&mut res, &mut buf, &mut buf_chars);
98                            continue;
99                        }
100                        // Take another full chunk from cursor
101                        let take_full: String = cursor.chars().take(limit).collect();
102                        let taken_full_count = take_full.chars().count();
103                        debug!(
104                            "Splitting continuation into full chunk of {} chars (limit {})",
105                            taken_full_count, limit
106                        );
107                        res.push(take_full);
108                        let mut it2 = cursor.chars();
109                        for _ in 0..limit {
110                            it2.next();
111                        }
112                        let tmp: String = it2.collect();
113                        if tmp.is_empty() {
114                            break;
115                        }
116                        cursor = tmp;
117                        // continue inner loop
118                    }
119                    break; // done with original `line`
120                }
121            }
122        }
123
124        if !buf.is_empty() {
125            trace!("Flushing final chunk of {} chars", buf_chars);
126            res.push(buf);
127        }
128
129        res
130    }
131}