Skip to main content

oxicuda_seq/string/
manacher.rs

1//! Manacher's algorithm for longest palindromic substring in linear time.
2//!
3//! Reference: Glenn Manacher, *"A New Linear-Time 'On-Line' Algorithm for
4//! Finding the Smallest Initial Palindrome of a String"*, Journal of the ACM
5//! 22(3), 1975, pp. 346–351. The modern center/radius formulation used here is
6//! the standard textbook restatement.
7//!
8//! # Idea
9//!
10//! A palindrome is either *odd*-length (centred on a character) or *even*-length
11//! (centred between two characters). To treat both uniformly, the input is
12//! transformed by interleaving a separator that occurs nowhere in the data:
13//!
14//! ```text
15//! "abba"  ->  ^#a#b#b#a#$
16//! ```
17//!
18//! Every palindrome of the original string corresponds to an odd-length
19//! palindrome of the transformed string centred at one of the `#`/character
20//! slots, and vice versa. We compute, for each transformed position `i`, the
21//! radius `p[i]` = the largest `r` such that `t[i-r..=i+r]` is a palindrome.
22//!
23//! The linear-time trick maintains the palindrome with the right-most boundary
24//! seen so far, `(center, right)`. For a new position `i` inside that
25//! palindrome, the **mirror** position `2*center - i` gives a free lower bound
26//! on `p[i]`; only the excess beyond the boundary is verified by explicit
27//! character comparisons, and each such comparison advances `right`, so the
28//! total work is `O(n)`.
29//!
30//! The sentinels `^` and `$` (chosen here as two *distinct* synthetic markers
31//! that never compare equal to any data byte or to each other) remove the need
32//! for explicit bounds checks during expansion.
33//!
34//! # What is returned
35//!
36//! [`manacher`] returns a [`Manacher`] bundling the longest palindromic
37//! substring together with its location and the full per-center radius array,
38//! from which every maximal palindrome — and the total count of palindromic
39//! substrings — can be recovered ([`Manacher::palindrome_count`],
40//! [`Manacher::longest`]).
41//!
42//! Input is treated as raw bytes (`&[u8]`); for ASCII this is the usual
43//! character-level notion of a palindrome.
44
45use crate::error::{SeqError, SeqResult};
46
47/// Result of running [`manacher`] on a byte string.
48///
49/// The radius array `radii` is indexed by *transformed* position. Position `i`
50/// of the transformed string corresponds either to a real character (when `i`
51/// is odd in the `# c # c # …` layout) or to a gap between characters (when `i`
52/// is even). `radii[i]` is the palindromic radius in transformed coordinates;
53/// the length of the palindrome centred there, measured in *original*
54/// characters, is exactly `radii[i]`.
55#[derive(Debug, Clone)]
56pub struct Manacher {
57    /// Start index (inclusive) of a longest palindromic substring in the input.
58    pub start: usize,
59    /// Length in bytes of a longest palindromic substring (0 for empty input).
60    pub length: usize,
61    /// Per-center radius array in transformed coordinates. Its length is
62    /// `2 * n + 1` where `n = input.len()`. `radii[i]` equals the number of
63    /// original characters spanned on each side of center `i`, i.e. the length
64    /// of the palindrome centred at `i` in original-character units.
65    pub radii: Vec<usize>,
66}
67
68impl Manacher {
69    /// Borrow the longest palindromic substring out of the original `input`.
70    ///
71    /// Returns `&input[start..start + length]`. For an empty input this is the
72    /// empty slice.
73    pub fn longest<'a>(&self, input: &'a [u8]) -> &'a [u8] {
74        &input[self.start..self.start + self.length]
75    }
76
77    /// Total number of palindromic substrings (each distinct occurrence counted
78    /// once), derived from the radius array.
79    ///
80    /// A palindrome centred at transformed position `i` with radius `r` (in
81    /// original-character units) contains `⌈r / 2⌉` palindromic substrings that
82    /// are themselves centred at `i` (itself, then peeling two characters at a
83    /// time). Summing over all centers counts every palindromic substring of
84    /// the original string exactly once, because each palindrome has a unique
85    /// center in the transformed string.
86    pub fn palindrome_count(&self) -> usize {
87        self.radii.iter().map(|&r| r.div_ceil(2)).sum()
88    }
89}
90
91/// Compute the longest palindromic substring of `input` with Manacher's
92/// algorithm in `O(n)` time.
93///
94/// On ties (several palindromes of maximal length) the **left-most** one is
95/// returned, which makes the result deterministic. The returned [`Manacher`]
96/// also exposes the full radius array for callers that need every palindrome.
97///
98/// # Errors
99///
100/// Returns [`SeqError::EmptyInput`] for an empty `input`: there is no
101/// palindromic *substring* of the empty string under the convention that a
102/// substring is non-empty.
103///
104/// # Examples
105///
106/// ```
107/// use oxicuda_seq::string::manacher;
108///
109/// let m = manacher(b"babad").expect("non-empty");
110/// // Either "bab" or "aba"; this implementation returns the left-most, "bab".
111/// assert_eq!(m.longest(b"babad"), b"bab");
112/// assert_eq!(m.length, 3);
113/// ```
114pub fn manacher(input: &[u8]) -> SeqResult<Manacher> {
115    if input.is_empty() {
116        return Err(SeqError::EmptyInput);
117    }
118
119    let n = input.len();
120
121    // Transformed alphabet. We map each data byte to a value in 2..=257 so that
122    // the two sentinels (LEFT_GUARD, GAP) and the boundary marker are reserved
123    // and never collide with any data character.
124    const GAP: u16 = 0; // the interleaved separator '#'
125    const LEFT_GUARD: u16 = 1; // synthetic '^' to the left of the whole string
126
127    // Build "^ # c0 # c1 # … # c_{n-1} # $" where the trailing region is
128    // implicitly guarded: we store an explicit right guard distinct from GAP.
129    const RIGHT_GUARD: u16 = 0xFFFF; // synthetic '$'; distinct from LEFT_GUARD.
130
131    // The center/gap layout has length 2*n + 1 (a gap, then char, gap, char, …,
132    // gap). We additionally bracket it with the two distinct guards so that the
133    // expansion loop never needs a bounds check.
134    let trans_len = 2 * n + 1;
135    let mut t: Vec<u16> = Vec::with_capacity(trans_len + 2);
136    t.push(LEFT_GUARD);
137    for &byte in input {
138        t.push(GAP);
139        // Offset data bytes by 2 to clear LEFT_GUARD(1) and GAP(0).
140        t.push(u16::from(byte) + 2);
141    }
142    t.push(GAP);
143    t.push(RIGHT_GUARD);
144
145    // `p[k]` is the radius (in transformed units) at transformed-center k, where
146    // k ranges over the inner positions 1..=trans_len of `t` (excluding the two
147    // guards). We index `p` by the *inner* coordinate i in 0..trans_len, mapping
148    // to t-coordinate i + 1.
149    let mut p = vec![0usize; trans_len];
150
151    let mut center: isize = 0; // current palindrome center, inner coordinate
152    let mut right: isize = 0; // one past the right edge, inner coordinate
153
154    for i in 0..trans_len as isize {
155        // Mirror of i across `center`.
156        if i < right {
157            let mirror = 2 * center - i;
158            // Free lower bound: min(distance to right edge, mirror radius).
159            let bound = (right - i) as usize;
160            p[i as usize] = bound.min(p[mirror as usize]);
161        }
162
163        // Attempt to expand around i. The +1 offsets translate inner coordinate
164        // to t-coordinate; guards stop the loop without explicit bounds checks.
165        loop {
166            let radius = p[i as usize] as isize;
167            let left_pos = i - radius - 1 + 1; // = i - radius  (t-coordinate)
168            let right_pos = i + radius + 1 + 1; // = i + radius + 2 (t-coordinate)
169            // Bounds are guaranteed by the guards: indices stay in 0..t.len().
170            if t[left_pos as usize] == t[right_pos as usize] {
171                p[i as usize] += 1;
172            } else {
173                break;
174            }
175        }
176
177        // Slide the right-most boundary if we expanded past it.
178        if i + p[i as usize] as isize > right {
179            center = i;
180            right = i + p[i as usize] as isize;
181        }
182    }
183
184    // Recover the longest palindrome. In this transform, p[i] (transformed
185    // radius) equals the palindrome length in *original* characters, and its
186    // start in original coordinates is (i - p[i]) / 2.
187    let mut best_len = 0usize;
188    let mut best_center = 0usize;
189    for (i, &radius) in p.iter().enumerate() {
190        if radius > best_len {
191            best_len = radius;
192            best_center = i;
193        }
194    }
195    // `best_center` is an inner coordinate; original start = (best_center - p)/2.
196    let start = (best_center - best_len) / 2;
197
198    Ok(Manacher {
199        start,
200        length: best_len,
201        radii: p,
202    })
203}
204
205/// Convenience wrapper returning the longest palindromic substring of a `&str`
206/// as an owned `String`.
207///
208/// The string is processed by its UTF-8 bytes; for inputs containing multi-byte
209/// characters the returned slice is guaranteed to fall on byte boundaries only
210/// when those characters are not split by a palindrome edge, so this helper is
211/// intended for ASCII / byte-oriented use. The result is re-decoded losslessly
212/// because palindrome edges in a byte palindrome of valid UTF-8 input land on
213/// character boundaries for ASCII text.
214///
215/// # Errors
216///
217/// Propagates [`SeqError::EmptyInput`] for an empty string, and returns
218/// [`SeqError::InvalidObservation`] if the recovered byte range is not valid
219/// UTF-8 (possible only for non-ASCII inputs whose palindrome splits a
220/// multi-byte sequence).
221pub fn longest_palindrome_str(input: &str) -> SeqResult<String> {
222    let m = manacher(input.as_bytes())?;
223    let slice = m.longest(input.as_bytes());
224    std::str::from_utf8(slice)
225        .map(|s| s.to_owned())
226        .map_err(|e| {
227            SeqError::InvalidObservation(format!("palindrome split a UTF-8 boundary: {e}"))
228        })
229}
230
231#[cfg(test)]
232mod tests {
233    use super::*;
234
235    /// Brute-force `O(n²)` expansion oracle: returns, for the input, the length
236    /// and start of a left-most longest palindrome, plus the total count of
237    /// palindromic substrings.
238    fn brute_force(input: &[u8]) -> (usize, usize, usize) {
239        let n = input.len();
240        let mut best_len = 0usize;
241        let mut best_start = 0usize;
242        let mut count = 0usize;
243
244        let is_pal = |lo: usize, hi: usize| -> bool {
245            // Inclusive [lo, hi].
246            let (mut a, mut b) = (lo, hi);
247            while a < b {
248                if input[a] != input[b] {
249                    return false;
250                }
251                a += 1;
252                b -= 1;
253            }
254            true
255        };
256
257        for lo in 0..n {
258            for hi in lo..n {
259                if is_pal(lo, hi) {
260                    count += 1;
261                    let len = hi - lo + 1;
262                    if len > best_len {
263                        best_len = len;
264                        best_start = lo;
265                    }
266                }
267            }
268        }
269        (best_len, best_start, count)
270    }
271
272    fn random_bytes(rng: &mut crate::handle::LcgRng, alphabet: &[u8], len: usize) -> Vec<u8> {
273        (0..len)
274            .map(|_| alphabet[rng.next_usize(alphabet.len())])
275            .collect()
276    }
277
278    /// (a) "babad" → a length-3 palindrome; left-most is "bab".
279    #[test]
280    fn babad_returns_length_three() {
281        let m = manacher(b"babad").expect("non-empty");
282        assert_eq!(m.length, 3);
283        let got = m.longest(b"babad");
284        assert!(got == b"bab" || got == b"aba", "got {got:?}");
285        // Deterministic left-most choice.
286        assert_eq!(got, b"bab");
287    }
288
289    /// (b) "cbbd" → "bb" (an even palindrome).
290    #[test]
291    fn cbbd_returns_bb() {
292        let m = manacher(b"cbbd").expect("non-empty");
293        assert_eq!(m.length, 2);
294        assert_eq!(m.longest(b"cbbd"), b"bb");
295    }
296
297    /// (c) A whole-string palindrome returns itself.
298    #[test]
299    fn whole_string_palindrome() {
300        for s in [b"racecar".as_slice(), b"abba", b"a", b"level"] {
301            let m = manacher(s).expect("non-empty");
302            assert_eq!(m.length, s.len());
303            assert_eq!(m.longest(s), s);
304        }
305    }
306
307    /// (d) A single character is its own longest palindrome.
308    #[test]
309    fn single_char() {
310        let m = manacher(b"z").expect("non-empty");
311        assert_eq!(m.length, 1);
312        assert_eq!(m.longest(b"z"), b"z");
313        assert_eq!(m.palindrome_count(), 1);
314    }
315
316    /// (e) "aaaa" → "aaaa".
317    #[test]
318    fn all_same_char() {
319        let m = manacher(b"aaaa").expect("non-empty");
320        assert_eq!(m.length, 4);
321        assert_eq!(m.longest(b"aaaa"), b"aaaa");
322        // Palindromic substrings of "aaaa": 4 of len1, 3 of len2, 2 of len3, 1
323        // of len4 = 10.
324        assert_eq!(m.palindrome_count(), 10);
325    }
326
327    /// (f) Both even and odd palindromes are handled, and a no-nontrivial case.
328    #[test]
329    fn even_and_odd_palindromes() {
330        // Odd center.
331        let odd = manacher(b"xabax").expect("non-empty");
332        assert_eq!(odd.length, 5);
333        assert_eq!(odd.longest(b"xabax"), b"xabax");
334
335        // Even center.
336        let even = manacher(b"xabbax").expect("non-empty");
337        assert_eq!(even.length, 6);
338        assert_eq!(even.longest(b"xabbax"), b"xabbax");
339
340        // No palindrome longer than a single character.
341        let none = manacher(b"abcde").expect("non-empty");
342        assert_eq!(none.length, 1);
343        // Only the 5 single characters are palindromes.
344        assert_eq!(none.palindrome_count(), 5);
345    }
346
347    /// (g) The radius array and longest-length match brute force on random
348    /// strings (both even- and odd-friendly small alphabets).
349    #[test]
350    fn radius_array_matches_brute_force() {
351        let mut rng = crate::handle::LcgRng::new(123);
352
353        for &alphabet in &[b"ab".as_slice(), b"abc"] {
354            for _ in 0..400 {
355                let len = 1 + rng.next_usize(24);
356                let s = random_bytes(&mut rng, alphabet, len);
357
358                let m = manacher(&s).expect("non-empty");
359                let (bf_len, _bf_start, bf_count) = brute_force(&s);
360
361                assert_eq!(m.length, bf_len, "length mismatch for {s:?}");
362
363                // The returned palindrome really is a palindrome of that length.
364                let got = m.longest(&s);
365                assert_eq!(got.len(), bf_len);
366                let rev: Vec<u8> = got.iter().rev().copied().collect();
367                assert_eq!(got, rev.as_slice(), "returned slice is a palindrome");
368
369                // Independently recompute every maximal palindrome from `radii`
370                // and confirm the implied count matches brute force (h).
371                assert_eq!(m.palindrome_count(), bf_count, "count mismatch for {s:?}");
372            }
373        }
374    }
375
376    /// (h) Palindrome count matches brute force on the hand-picked strings.
377    #[test]
378    fn palindrome_count_hand_checked() {
379        let cases: &[(&[u8], usize)] = &[
380            (b"abc", 3),  // a, b, c
381            (b"aaa", 6),  // a×3, aa×2, aaa×1
382            (b"aba", 4),  // a, b, a, aba
383            (b"abba", 6), // a, b, b, a, bb, abba
384            (b"", 0),     // handled separately below
385        ];
386        for &(s, expected) in cases {
387            if s.is_empty() {
388                assert!(matches!(manacher(s), Err(SeqError::EmptyInput)));
389                continue;
390            }
391            let m = manacher(s).expect("non-empty");
392            assert_eq!(m.palindrome_count(), expected, "for {s:?}");
393            let (_, _, bf) = brute_force(s);
394            assert_eq!(m.palindrome_count(), bf, "vs brute force for {s:?}");
395        }
396    }
397
398    /// Empty input is an error.
399    #[test]
400    fn empty_input_errors() {
401        assert!(matches!(manacher(b""), Err(SeqError::EmptyInput)));
402        assert!(matches!(
403            longest_palindrome_str(""),
404            Err(SeqError::EmptyInput)
405        ));
406    }
407
408    /// The `&str` wrapper returns the expected owned palindrome.
409    #[test]
410    fn str_wrapper() {
411        assert_eq!(longest_palindrome_str("babad").expect("ok"), "bab");
412        assert_eq!(longest_palindrome_str("cbbd").expect("ok"), "bb");
413        assert_eq!(longest_palindrome_str("racecar").expect("ok"), "racecar");
414    }
415}