Skip to main content

prosaic_core/
length.rs

1//! Sentence-length budgeting.
2//!
3//! When a rendered sentence exceeds the engine's configured budget,
4//! split it at a natural boundary (subordinate clause, list prefix,
5//! em-dash) and wrap the tail as its own follow-up sentence. The tail
6//! gets a lightweight transform so it stands on its own grammatically
7//! (`, which impacts 6 consumers` → `This impacts 6 consumers.`).
8
9#[cfg(not(feature = "std"))]
10use alloc::format;
11#[cfg(not(feature = "std"))]
12use alloc::string::{String, ToString};
13
14/// Try to split `sentence` so each piece fits within `max_chars`.
15///
16/// Returns the input unchanged if it's already short enough, or if no
17/// natural split point exists inside the budget. The returned string
18/// joins fragments with `" "`, so downstream sentence-termination and
19/// cleanup still work.
20pub fn split_long(sentence: &str, max_chars: usize) -> String {
21    let mut s = sentence.to_string();
22    split_long_in_place(&mut s, max_chars);
23    s
24}
25
26/// In-place version of [`split_long`]. Mutates `output` so each piece fits
27/// within `max_chars`, splicing tail sentences back onto the buffer.
28pub(crate) fn split_long_in_place(output: &mut String, max_chars: usize) {
29    if output.chars().count() <= max_chars {
30        return;
31    }
32
33    // Upper bound on where we'll look for a split — allow slight
34    // overflow rather than aggressively shrinking below budget.
35    let search_end = (max_chars + 40).min(output.len());
36    let window = &output[..search_end];
37
38    // Ordered by priority: longer/more-specific markers first.
39    let candidates: &[(&str, ContinuationKind)] = &[
40        (", which ", ContinuationKind::Which),
41        (", affecting ", ContinuationKind::Affecting),
42        (", impacting ", ContinuationKind::Impacting),
43        (", requiring ", ContinuationKind::Requiring),
44        (" including ", ContinuationKind::Including),
45        (" — ", ContinuationKind::Dash),
46        (". ", ContinuationKind::Sentence),
47    ];
48
49    // Find the latest acceptable split point (highest byte index) so we
50    // keep the first sentence as substantive as possible.
51    let mut best: Option<(usize, usize, ContinuationKind)> = None;
52    for (marker, kind) in candidates {
53        if let Some(idx) = window.rfind(marker) {
54            // Don't split if the marker is at the very start — that
55            // would leave the first half empty.
56            if idx == 0 {
57                continue;
58            }
59            let end = idx + marker.len();
60            match best {
61                Some((prev_idx, _, _)) if prev_idx >= idx => {}
62                _ => best = Some((idx, end, *kind)),
63            }
64        }
65    }
66
67    let (split_at, tail_start, kind) = match best {
68        Some(b) => b,
69        None => return,
70    };
71
72    // Split the tail off the buffer.
73    let tail_raw = output[tail_start..].trim_start().to_string();
74    // Trim the head.
75    let head_end = output[..split_at].trim_end_matches([',', ' ']).len();
76    output.truncate(head_end);
77
78    // Rewrite the tail so it stands alone grammatically.
79    let mut tail_buf = rewrite_tail(&tail_raw, kind);
80
81    // Recursively split the tail in case it's still too long.
82    split_long_in_place(&mut tail_buf, max_chars);
83
84    // Append ". " + tail onto the head.
85    output.push('.');
86    output.push(' ');
87    output.push_str(&tail_buf);
88}
89
90#[derive(Debug, Clone, Copy, PartialEq, Eq)]
91enum ContinuationKind {
92    /// ", which <verb> …" — rewrite so the tail starts with "This <verb>".
93    Which,
94    /// ", affecting …" — tail becomes "This affects …".
95    Affecting,
96    /// ", impacting …" — tail becomes "This impacts …".
97    Impacting,
98    /// ", requiring …" — tail becomes "This requires …".
99    Requiring,
100    /// " including …" — tail becomes "Including …" capitalized.
101    Including,
102    /// " — …" em-dash — tail capitalized on its own.
103    Dash,
104    /// Already sentence-terminated; just take the tail as the next sentence.
105    Sentence,
106}
107
108fn rewrite_tail(tail: &str, kind: ContinuationKind) -> String {
109    match kind {
110        ContinuationKind::Which => {
111            // "which impacts 6 consumers" → "This impacts 6 consumers"
112            // The verb right after "which" stays as-is; we just replace
113            // the relative pronoun with "This".
114            format!("This {tail}")
115        }
116        ContinuationKind::Affecting => format!("This affects {tail}"),
117        ContinuationKind::Impacting => format!("This impacts {tail}"),
118        ContinuationKind::Requiring => format!("This requires {tail}"),
119        ContinuationKind::Including => {
120            // "ProfileComponent, SettingsComponent, …" → "Including
121            // ProfileComponent, …." Keeps the marker word as the
122            // sentence head so the list still reads naturally.
123            format!("Including {tail}")
124        }
125        ContinuationKind::Dash | ContinuationKind::Sentence => capitalize_first(tail),
126    }
127}
128
129fn capitalize_first(s: &str) -> String {
130    let mut chars = s.chars();
131    match chars.next() {
132        None => String::new(),
133        Some(c) => {
134            let mut out = String::with_capacity(s.len());
135            for upper in c.to_uppercase() {
136                out.push(upper);
137            }
138            out.extend(chars);
139            out
140        }
141    }
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147
148    #[test]
149    fn under_budget_returns_unchanged() {
150        let s = "short sentence";
151        assert_eq!(split_long(s, 80), s);
152    }
153
154    #[test]
155    fn splits_on_which_marker() {
156        let s = "The class UserService was renamed to AccountService, \
157                 which impacts 6 consumers";
158        let out = split_long(s, 60);
159        assert!(out.contains("This impacts 6 consumers"), "got: {out}");
160        assert!(
161            out.starts_with("The class UserService was renamed"),
162            "got: {out}"
163        );
164    }
165
166    #[test]
167    fn splits_on_including_marker() {
168        let s = "The method processOrder was modified, which may affect \
169                 5 consumers including CartComponent, CheckoutFlow, \
170                 OrderHistory, ProfilePage, AdminView";
171        let out = split_long(s, 80);
172        assert!(out.contains("Including"), "got: {out}");
173    }
174
175    #[test]
176    fn splits_on_affecting_marker() {
177        let s = "AuthGuard was modified, affecting 3 routes Dashboard, \
178                 Settings, Admin";
179        let out = split_long(s, 35);
180        assert!(out.contains("This affects 3 routes"), "got: {out}");
181    }
182
183    #[test]
184    fn no_split_when_no_natural_boundary() {
185        let s = "Averyverylongrunningstringwithnospacesandnowordsseparated";
186        let out = split_long(s, 20);
187        // Nowhere natural to split — we return unchanged rather than
188        // chop mid-word.
189        assert_eq!(out, s);
190    }
191
192    #[test]
193    fn recursive_split_handles_multi_long_sentence() {
194        let s = "The class UserService was renamed to AccountService, \
195                 which impacts 12 consumers including Alpha, Bravo, \
196                 Charlie, Delta, Echo, Foxtrot, Golf, Hotel, India, Juliet";
197        let out = split_long(s, 60);
198        // Should split at least once at a natural boundary.
199        assert!(out.matches(". ").count() >= 1, "got: {out}");
200    }
201}