1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
//! Sentence-length budgeting.
//!
//! When a rendered sentence exceeds the engine's configured budget,
//! split it at a natural boundary (subordinate clause, list prefix,
//! em-dash) and wrap the tail as its own follow-up sentence. The tail
//! gets a lightweight transform so it stands on its own grammatically
//! (`, which impacts 6 consumers` → `This impacts 6 consumers.`).
#[cfg(not(feature = "std"))]
use alloc::format;
#[cfg(not(feature = "std"))]
use alloc::string::{String, ToString};
/// Try to split `sentence` so each piece fits within `max_chars`.
///
/// Returns the input unchanged if it's already short enough, or if no
/// natural split point exists inside the budget. The returned string
/// joins fragments with `" "`, so downstream sentence-termination and
/// cleanup still work.
pub fn split_long(sentence: &str, max_chars: usize) -> String {
let mut s = sentence.to_string();
split_long_in_place(&mut s, max_chars);
s
}
/// In-place version of [`split_long`]. Mutates `output` so each piece fits
/// within `max_chars`, splicing tail sentences back onto the buffer.
pub(crate) fn split_long_in_place(output: &mut String, max_chars: usize) {
if output.chars().count() <= max_chars {
return;
}
// Upper bound on where we'll look for a split — allow slight
// overflow rather than aggressively shrinking below budget.
let search_end = (max_chars + 40).min(output.len());
let window = &output[..search_end];
// Ordered by priority: longer/more-specific markers first.
let candidates: &[(&str, ContinuationKind)] = &[
(", which ", ContinuationKind::Which),
(", affecting ", ContinuationKind::Affecting),
(", impacting ", ContinuationKind::Impacting),
(", requiring ", ContinuationKind::Requiring),
(" including ", ContinuationKind::Including),
(" — ", ContinuationKind::Dash),
(". ", ContinuationKind::Sentence),
];
// Find the latest acceptable split point (highest byte index) so we
// keep the first sentence as substantive as possible.
let mut best: Option<(usize, usize, ContinuationKind)> = None;
for (marker, kind) in candidates {
if let Some(idx) = window.rfind(marker) {
// Don't split if the marker is at the very start — that
// would leave the first half empty.
if idx == 0 {
continue;
}
let end = idx + marker.len();
match best {
Some((prev_idx, _, _)) if prev_idx >= idx => {}
_ => best = Some((idx, end, *kind)),
}
}
}
let (split_at, tail_start, kind) = match best {
Some(b) => b,
None => return,
};
// Split the tail off the buffer.
let tail_raw = output[tail_start..].trim_start().to_string();
// Trim the head.
let head_end = output[..split_at].trim_end_matches([',', ' ']).len();
output.truncate(head_end);
// Rewrite the tail so it stands alone grammatically.
let mut tail_buf = rewrite_tail(&tail_raw, kind);
// Recursively split the tail in case it's still too long.
split_long_in_place(&mut tail_buf, max_chars);
// Append ". " + tail onto the head.
output.push('.');
output.push(' ');
output.push_str(&tail_buf);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ContinuationKind {
/// ", which <verb> …" — rewrite so the tail starts with "This <verb>".
Which,
/// ", affecting …" — tail becomes "This affects …".
Affecting,
/// ", impacting …" — tail becomes "This impacts …".
Impacting,
/// ", requiring …" — tail becomes "This requires …".
Requiring,
/// " including …" — tail becomes "Including …" capitalized.
Including,
/// " — …" em-dash — tail capitalized on its own.
Dash,
/// Already sentence-terminated; just take the tail as the next sentence.
Sentence,
}
fn rewrite_tail(tail: &str, kind: ContinuationKind) -> String {
match kind {
ContinuationKind::Which => {
// "which impacts 6 consumers" → "This impacts 6 consumers"
// The verb right after "which" stays as-is; we just replace
// the relative pronoun with "This".
format!("This {tail}")
}
ContinuationKind::Affecting => format!("This affects {tail}"),
ContinuationKind::Impacting => format!("This impacts {tail}"),
ContinuationKind::Requiring => format!("This requires {tail}"),
ContinuationKind::Including => {
// "ProfileComponent, SettingsComponent, …" → "Including
// ProfileComponent, …." Keeps the marker word as the
// sentence head so the list still reads naturally.
format!("Including {tail}")
}
ContinuationKind::Dash | ContinuationKind::Sentence => capitalize_first(tail),
}
}
fn capitalize_first(s: &str) -> String {
let mut chars = s.chars();
match chars.next() {
None => String::new(),
Some(c) => {
let mut out = String::with_capacity(s.len());
for upper in c.to_uppercase() {
out.push(upper);
}
out.extend(chars);
out
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn under_budget_returns_unchanged() {
let s = "short sentence";
assert_eq!(split_long(s, 80), s);
}
#[test]
fn splits_on_which_marker() {
let s = "The class UserService was renamed to AccountService, \
which impacts 6 consumers";
let out = split_long(s, 60);
assert!(out.contains("This impacts 6 consumers"), "got: {out}");
assert!(
out.starts_with("The class UserService was renamed"),
"got: {out}"
);
}
#[test]
fn splits_on_including_marker() {
let s = "The method processOrder was modified, which may affect \
5 consumers including CartComponent, CheckoutFlow, \
OrderHistory, ProfilePage, AdminView";
let out = split_long(s, 80);
assert!(out.contains("Including"), "got: {out}");
}
#[test]
fn splits_on_affecting_marker() {
let s = "AuthGuard was modified, affecting 3 routes Dashboard, \
Settings, Admin";
let out = split_long(s, 35);
assert!(out.contains("This affects 3 routes"), "got: {out}");
}
#[test]
fn no_split_when_no_natural_boundary() {
let s = "Averyverylongrunningstringwithnospacesandnowordsseparated";
let out = split_long(s, 20);
// Nowhere natural to split — we return unchanged rather than
// chop mid-word.
assert_eq!(out, s);
}
#[test]
fn recursive_split_handles_multi_long_sentence() {
let s = "The class UserService was renamed to AccountService, \
which impacts 12 consumers including Alpha, Bravo, \
Charlie, Delta, Echo, Foxtrot, Golf, Hotel, India, Juliet";
let out = split_long(s, 60);
// Should split at least once at a natural boundary.
assert!(out.matches(". ").count() >= 1, "got: {out}");
}
}