oxicuda_seq/string/manacher.rs
1//! Manacher's algorithm for longest palindromic substring in linear time.
2//!
3//! Reference: Glenn Manacher, *"A New Linear-Time 'On-Line' Algorithm for
4//! Finding the Smallest Initial Palindrome of a String"*, Journal of the ACM
5//! 22(3), 1975, pp. 346–351. The modern center/radius formulation used here is
6//! the standard textbook restatement.
7//!
8//! # Idea
9//!
10//! A palindrome is either *odd*-length (centred on a character) or *even*-length
11//! (centred between two characters). To treat both uniformly, the input is
12//! transformed by interleaving a separator that occurs nowhere in the data:
13//!
14//! ```text
15//! "abba" -> ^#a#b#b#a#$
16//! ```
17//!
18//! Every palindrome of the original string corresponds to an odd-length
19//! palindrome of the transformed string centred at one of the `#`/character
20//! slots, and vice versa. We compute, for each transformed position `i`, the
21//! radius `p[i]` = the largest `r` such that `t[i-r..=i+r]` is a palindrome.
22//!
23//! The linear-time trick maintains the palindrome with the right-most boundary
24//! seen so far, `(center, right)`. For a new position `i` inside that
25//! palindrome, the **mirror** position `2*center - i` gives a free lower bound
26//! on `p[i]`; only the excess beyond the boundary is verified by explicit
27//! character comparisons, and each such comparison advances `right`, so the
28//! total work is `O(n)`.
29//!
30//! The sentinels `^` and `$` (chosen here as two *distinct* synthetic markers
31//! that never compare equal to any data byte or to each other) remove the need
32//! for explicit bounds checks during expansion.
33//!
34//! # What is returned
35//!
36//! [`manacher`] returns a [`Manacher`] bundling the longest palindromic
37//! substring together with its location and the full per-center radius array,
38//! from which every maximal palindrome — and the total count of palindromic
39//! substrings — can be recovered ([`Manacher::palindrome_count`],
40//! [`Manacher::longest`]).
41//!
42//! Input is treated as raw bytes (`&[u8]`); for ASCII this is the usual
43//! character-level notion of a palindrome.
44
45use crate::error::{SeqError, SeqResult};
46
47/// Result of running [`manacher`] on a byte string.
48///
49/// The radius array `radii` is indexed by *transformed* position. Position `i`
50/// of the transformed string corresponds either to a real character (when `i`
51/// is odd in the `# c # c # …` layout) or to a gap between characters (when `i`
52/// is even). `radii[i]` is the palindromic radius in transformed coordinates;
53/// the length of the palindrome centred there, measured in *original*
54/// characters, is exactly `radii[i]`.
55#[derive(Debug, Clone)]
56pub struct Manacher {
57 /// Start index (inclusive) of a longest palindromic substring in the input.
58 pub start: usize,
59 /// Length in bytes of a longest palindromic substring (0 for empty input).
60 pub length: usize,
61 /// Per-center radius array in transformed coordinates. Its length is
62 /// `2 * n + 1` where `n = input.len()`. `radii[i]` equals the number of
63 /// original characters spanned on each side of center `i`, i.e. the length
64 /// of the palindrome centred at `i` in original-character units.
65 pub radii: Vec<usize>,
66}
67
68impl Manacher {
69 /// Borrow the longest palindromic substring out of the original `input`.
70 ///
71 /// Returns `&input[start..start + length]`. For an empty input this is the
72 /// empty slice.
73 pub fn longest<'a>(&self, input: &'a [u8]) -> &'a [u8] {
74 &input[self.start..self.start + self.length]
75 }
76
77 /// Total number of palindromic substrings (each distinct occurrence counted
78 /// once), derived from the radius array.
79 ///
80 /// A palindrome centred at transformed position `i` with radius `r` (in
81 /// original-character units) contains `⌈r / 2⌉` palindromic substrings that
82 /// are themselves centred at `i` (itself, then peeling two characters at a
83 /// time). Summing over all centers counts every palindromic substring of
84 /// the original string exactly once, because each palindrome has a unique
85 /// center in the transformed string.
86 pub fn palindrome_count(&self) -> usize {
87 self.radii.iter().map(|&r| r.div_ceil(2)).sum()
88 }
89}
90
91/// Compute the longest palindromic substring of `input` with Manacher's
92/// algorithm in `O(n)` time.
93///
94/// On ties (several palindromes of maximal length) the **left-most** one is
95/// returned, which makes the result deterministic. The returned [`Manacher`]
96/// also exposes the full radius array for callers that need every palindrome.
97///
98/// # Errors
99///
100/// Returns [`SeqError::EmptyInput`] for an empty `input`: there is no
101/// palindromic *substring* of the empty string under the convention that a
102/// substring is non-empty.
103///
104/// # Examples
105///
106/// ```
107/// use oxicuda_seq::string::manacher;
108///
109/// let m = manacher(b"babad").expect("non-empty");
110/// // Either "bab" or "aba"; this implementation returns the left-most, "bab".
111/// assert_eq!(m.longest(b"babad"), b"bab");
112/// assert_eq!(m.length, 3);
113/// ```
114pub fn manacher(input: &[u8]) -> SeqResult<Manacher> {
115 if input.is_empty() {
116 return Err(SeqError::EmptyInput);
117 }
118
119 let n = input.len();
120
121 // Transformed alphabet. We map each data byte to a value in 2..=257 so that
122 // the two sentinels (LEFT_GUARD, GAP) and the boundary marker are reserved
123 // and never collide with any data character.
124 const GAP: u16 = 0; // the interleaved separator '#'
125 const LEFT_GUARD: u16 = 1; // synthetic '^' to the left of the whole string
126
127 // Build "^ # c0 # c1 # … # c_{n-1} # $" where the trailing region is
128 // implicitly guarded: we store an explicit right guard distinct from GAP.
129 const RIGHT_GUARD: u16 = 0xFFFF; // synthetic '$'; distinct from LEFT_GUARD.
130
131 // The center/gap layout has length 2*n + 1 (a gap, then char, gap, char, …,
132 // gap). We additionally bracket it with the two distinct guards so that the
133 // expansion loop never needs a bounds check.
134 let trans_len = 2 * n + 1;
135 let mut t: Vec<u16> = Vec::with_capacity(trans_len + 2);
136 t.push(LEFT_GUARD);
137 for &byte in input {
138 t.push(GAP);
139 // Offset data bytes by 2 to clear LEFT_GUARD(1) and GAP(0).
140 t.push(u16::from(byte) + 2);
141 }
142 t.push(GAP);
143 t.push(RIGHT_GUARD);
144
145 // `p[k]` is the radius (in transformed units) at transformed-center k, where
146 // k ranges over the inner positions 1..=trans_len of `t` (excluding the two
147 // guards). We index `p` by the *inner* coordinate i in 0..trans_len, mapping
148 // to t-coordinate i + 1.
149 let mut p = vec![0usize; trans_len];
150
151 let mut center: isize = 0; // current palindrome center, inner coordinate
152 let mut right: isize = 0; // one past the right edge, inner coordinate
153
154 for i in 0..trans_len as isize {
155 // Mirror of i across `center`.
156 if i < right {
157 let mirror = 2 * center - i;
158 // Free lower bound: min(distance to right edge, mirror radius).
159 let bound = (right - i) as usize;
160 p[i as usize] = bound.min(p[mirror as usize]);
161 }
162
163 // Attempt to expand around i. The +1 offsets translate inner coordinate
164 // to t-coordinate; guards stop the loop without explicit bounds checks.
165 loop {
166 let radius = p[i as usize] as isize;
167 let left_pos = i - radius - 1 + 1; // = i - radius (t-coordinate)
168 let right_pos = i + radius + 1 + 1; // = i + radius + 2 (t-coordinate)
169 // Bounds are guaranteed by the guards: indices stay in 0..t.len().
170 if t[left_pos as usize] == t[right_pos as usize] {
171 p[i as usize] += 1;
172 } else {
173 break;
174 }
175 }
176
177 // Slide the right-most boundary if we expanded past it.
178 if i + p[i as usize] as isize > right {
179 center = i;
180 right = i + p[i as usize] as isize;
181 }
182 }
183
184 // Recover the longest palindrome. In this transform, p[i] (transformed
185 // radius) equals the palindrome length in *original* characters, and its
186 // start in original coordinates is (i - p[i]) / 2.
187 let mut best_len = 0usize;
188 let mut best_center = 0usize;
189 for (i, &radius) in p.iter().enumerate() {
190 if radius > best_len {
191 best_len = radius;
192 best_center = i;
193 }
194 }
195 // `best_center` is an inner coordinate; original start = (best_center - p)/2.
196 let start = (best_center - best_len) / 2;
197
198 Ok(Manacher {
199 start,
200 length: best_len,
201 radii: p,
202 })
203}
204
205/// Convenience wrapper returning the longest palindromic substring of a `&str`
206/// as an owned `String`.
207///
208/// The string is processed by its UTF-8 bytes; for inputs containing multi-byte
209/// characters the returned slice is guaranteed to fall on byte boundaries only
210/// when those characters are not split by a palindrome edge, so this helper is
211/// intended for ASCII / byte-oriented use. The result is re-decoded losslessly
212/// because palindrome edges in a byte palindrome of valid UTF-8 input land on
213/// character boundaries for ASCII text.
214///
215/// # Errors
216///
217/// Propagates [`SeqError::EmptyInput`] for an empty string, and returns
218/// [`SeqError::InvalidObservation`] if the recovered byte range is not valid
219/// UTF-8 (possible only for non-ASCII inputs whose palindrome splits a
220/// multi-byte sequence).
221pub fn longest_palindrome_str(input: &str) -> SeqResult<String> {
222 let m = manacher(input.as_bytes())?;
223 let slice = m.longest(input.as_bytes());
224 std::str::from_utf8(slice)
225 .map(|s| s.to_owned())
226 .map_err(|e| {
227 SeqError::InvalidObservation(format!("palindrome split a UTF-8 boundary: {e}"))
228 })
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 /// Brute-force `O(n²)` expansion oracle: returns, for the input, the length
236 /// and start of a left-most longest palindrome, plus the total count of
237 /// palindromic substrings.
238 fn brute_force(input: &[u8]) -> (usize, usize, usize) {
239 let n = input.len();
240 let mut best_len = 0usize;
241 let mut best_start = 0usize;
242 let mut count = 0usize;
243
244 let is_pal = |lo: usize, hi: usize| -> bool {
245 // Inclusive [lo, hi].
246 let (mut a, mut b) = (lo, hi);
247 while a < b {
248 if input[a] != input[b] {
249 return false;
250 }
251 a += 1;
252 b -= 1;
253 }
254 true
255 };
256
257 for lo in 0..n {
258 for hi in lo..n {
259 if is_pal(lo, hi) {
260 count += 1;
261 let len = hi - lo + 1;
262 if len > best_len {
263 best_len = len;
264 best_start = lo;
265 }
266 }
267 }
268 }
269 (best_len, best_start, count)
270 }
271
272 fn random_bytes(rng: &mut crate::handle::LcgRng, alphabet: &[u8], len: usize) -> Vec<u8> {
273 (0..len)
274 .map(|_| alphabet[rng.next_usize(alphabet.len())])
275 .collect()
276 }
277
278 /// (a) "babad" → a length-3 palindrome; left-most is "bab".
279 #[test]
280 fn babad_returns_length_three() {
281 let m = manacher(b"babad").expect("non-empty");
282 assert_eq!(m.length, 3);
283 let got = m.longest(b"babad");
284 assert!(got == b"bab" || got == b"aba", "got {got:?}");
285 // Deterministic left-most choice.
286 assert_eq!(got, b"bab");
287 }
288
289 /// (b) "cbbd" → "bb" (an even palindrome).
290 #[test]
291 fn cbbd_returns_bb() {
292 let m = manacher(b"cbbd").expect("non-empty");
293 assert_eq!(m.length, 2);
294 assert_eq!(m.longest(b"cbbd"), b"bb");
295 }
296
297 /// (c) A whole-string palindrome returns itself.
298 #[test]
299 fn whole_string_palindrome() {
300 for s in [b"racecar".as_slice(), b"abba", b"a", b"level"] {
301 let m = manacher(s).expect("non-empty");
302 assert_eq!(m.length, s.len());
303 assert_eq!(m.longest(s), s);
304 }
305 }
306
307 /// (d) A single character is its own longest palindrome.
308 #[test]
309 fn single_char() {
310 let m = manacher(b"z").expect("non-empty");
311 assert_eq!(m.length, 1);
312 assert_eq!(m.longest(b"z"), b"z");
313 assert_eq!(m.palindrome_count(), 1);
314 }
315
316 /// (e) "aaaa" → "aaaa".
317 #[test]
318 fn all_same_char() {
319 let m = manacher(b"aaaa").expect("non-empty");
320 assert_eq!(m.length, 4);
321 assert_eq!(m.longest(b"aaaa"), b"aaaa");
322 // Palindromic substrings of "aaaa": 4 of len1, 3 of len2, 2 of len3, 1
323 // of len4 = 10.
324 assert_eq!(m.palindrome_count(), 10);
325 }
326
327 /// (f) Both even and odd palindromes are handled, and a no-nontrivial case.
328 #[test]
329 fn even_and_odd_palindromes() {
330 // Odd center.
331 let odd = manacher(b"xabax").expect("non-empty");
332 assert_eq!(odd.length, 5);
333 assert_eq!(odd.longest(b"xabax"), b"xabax");
334
335 // Even center.
336 let even = manacher(b"xabbax").expect("non-empty");
337 assert_eq!(even.length, 6);
338 assert_eq!(even.longest(b"xabbax"), b"xabbax");
339
340 // No palindrome longer than a single character.
341 let none = manacher(b"abcde").expect("non-empty");
342 assert_eq!(none.length, 1);
343 // Only the 5 single characters are palindromes.
344 assert_eq!(none.palindrome_count(), 5);
345 }
346
347 /// (g) The radius array and longest-length match brute force on random
348 /// strings (both even- and odd-friendly small alphabets).
349 #[test]
350 fn radius_array_matches_brute_force() {
351 let mut rng = crate::handle::LcgRng::new(123);
352
353 for &alphabet in &[b"ab".as_slice(), b"abc"] {
354 for _ in 0..400 {
355 let len = 1 + rng.next_usize(24);
356 let s = random_bytes(&mut rng, alphabet, len);
357
358 let m = manacher(&s).expect("non-empty");
359 let (bf_len, _bf_start, bf_count) = brute_force(&s);
360
361 assert_eq!(m.length, bf_len, "length mismatch for {s:?}");
362
363 // The returned palindrome really is a palindrome of that length.
364 let got = m.longest(&s);
365 assert_eq!(got.len(), bf_len);
366 let rev: Vec<u8> = got.iter().rev().copied().collect();
367 assert_eq!(got, rev.as_slice(), "returned slice is a palindrome");
368
369 // Independently recompute every maximal palindrome from `radii`
370 // and confirm the implied count matches brute force (h).
371 assert_eq!(m.palindrome_count(), bf_count, "count mismatch for {s:?}");
372 }
373 }
374 }
375
376 /// (h) Palindrome count matches brute force on the hand-picked strings.
377 #[test]
378 fn palindrome_count_hand_checked() {
379 let cases: &[(&[u8], usize)] = &[
380 (b"abc", 3), // a, b, c
381 (b"aaa", 6), // a×3, aa×2, aaa×1
382 (b"aba", 4), // a, b, a, aba
383 (b"abba", 6), // a, b, b, a, bb, abba
384 (b"", 0), // handled separately below
385 ];
386 for &(s, expected) in cases {
387 if s.is_empty() {
388 assert!(matches!(manacher(s), Err(SeqError::EmptyInput)));
389 continue;
390 }
391 let m = manacher(s).expect("non-empty");
392 assert_eq!(m.palindrome_count(), expected, "for {s:?}");
393 let (_, _, bf) = brute_force(s);
394 assert_eq!(m.palindrome_count(), bf, "vs brute force for {s:?}");
395 }
396 }
397
398 /// Empty input is an error.
399 #[test]
400 fn empty_input_errors() {
401 assert!(matches!(manacher(b""), Err(SeqError::EmptyInput)));
402 assert!(matches!(
403 longest_palindrome_str(""),
404 Err(SeqError::EmptyInput)
405 ));
406 }
407
408 /// The `&str` wrapper returns the expected owned palindrome.
409 #[test]
410 fn str_wrapper() {
411 assert_eq!(longest_palindrome_str("babad").expect("ok"), "bab");
412 assert_eq!(longest_palindrome_str("cbbd").expect("ok"), "bb");
413 assert_eq!(longest_palindrome_str("racecar").expect("ok"), "racecar");
414 }
415}