xsd_schema/xpath/timsort.rs
1/*
2 * Copyright 2009 Google Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26//! A stable, adaptive, iterative mergesort that requires far fewer than
27//! n lg(n) comparisons when running on partially sorted arrays, while
28//! offering performance comparable to a traditional mergesort when run
29//! on random arrays. Like all proper mergesorts, this sort is stable and
30//! runs O(n log n) time (worst case). In the worst case, this sort requires
31//! temporary storage space for n/2 object references; in the best case,
32//! it requires only a small constant amount of space.
33//!
34//! This implementation was adapted from Tim Peters's list sort for Python,
35//! which is described in detail here:
36//! http://svn.python.org/projects/python/trunk/Objects/listsort.txt
37//!
38//! Tim's C code may be found here:
39//! http://svn.python.org/projects/python/trunk/Objects/listobject.c
40//!
41//! The underlying techniques are described in this paper (and may have
42//! even earlier origins):
43//! "Optimistic Sorting and Information Theoretic Complexity"
44//! Peter McIlroy
45//! SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
46//! pp 467-474, Austin, Texas, 25-27 January 1993.
47//!
48//! Ported from Java/C# implementation by Josh Bloch.
49
50use std::cmp::Ordering;
51use std::marker::PhantomData;
52
53// ============================================================================
54// IComparer Trait - Similar to C#'s IComparer<T>
55// ============================================================================
56
57/// Trait for comparing two values, similar to C#'s `IComparer<T>`.
58///
59/// This trait allows creating stateful or stateless comparers that can be
60/// passed to sorting functions.
61pub trait IComparer<T> {
62 /// Compares two values and returns their ordering.
63 ///
64 /// Returns:
65 /// - `Ordering::Less` if `x < y`
66 /// - `Ordering::Equal` if `x == y`
67 /// - `Ordering::Greater` if `x > y`
68 fn compare(&self, x: &T, y: &T) -> Ordering;
69}
70
71/// A comparer that uses the default `Ord` implementation.
72#[derive(Debug, Clone, Copy, Default)]
73pub struct OrdComparer<T>(PhantomData<T>);
74
75impl<T> OrdComparer<T> {
76 /// Creates a new `OrdComparer`.
77 pub fn new() -> Self {
78 OrdComparer(PhantomData)
79 }
80}
81
82impl<T: Ord> IComparer<T> for OrdComparer<T> {
83 fn compare(&self, x: &T, y: &T) -> Ordering {
84 x.cmp(y)
85 }
86}
87
88/// A comparer that reverses the ordering of another comparer.
89#[derive(Debug, Clone, Copy)]
90pub struct ReverseComparer<C>(pub C);
91
92impl<C> ReverseComparer<C> {
93 /// Creates a new `ReverseComparer` wrapping another comparer.
94 pub fn new(comparer: C) -> Self {
95 ReverseComparer(comparer)
96 }
97}
98
99impl<T, C: IComparer<T>> IComparer<T> for ReverseComparer<C> {
100 fn compare(&self, x: &T, y: &T) -> Ordering {
101 self.0.compare(y, x)
102 }
103}
104
105/// A comparer that wraps a closure.
106pub struct FnComparer<F>(pub F);
107
108impl<F> FnComparer<F> {
109 /// Creates a new `FnComparer` wrapping a closure.
110 pub fn new(f: F) -> Self {
111 FnComparer(f)
112 }
113}
114
115impl<T, F> IComparer<T> for FnComparer<F>
116where
117 F: Fn(&T, &T) -> Ordering,
118{
119 fn compare(&self, x: &T, y: &T) -> Ordering {
120 (self.0)(x, y)
121 }
122}
123
124// ============================================================================
125// TimSort Constants
126// ============================================================================
127
128/// This is the minimum sized sequence that will be merged. Shorter
129/// sequences will be lengthened by calling binarySort. If the entire
130/// array is less than this length, no merges will be performed.
131///
132/// This constant should be a power of two. It was 64 in Tim Peter's C
133/// implementation, but 32 was empirically determined to work better in
134/// this implementation.
135const MIN_MERGE: usize = 32;
136
137/// When we get into galloping mode, we stay there until both runs win less
138/// often than MIN_GALLOP consecutive times.
139const MIN_GALLOP: usize = 7;
140
141/// Maximum initial size of tmp array, which is used for merging.
142/// The array can grow to accommodate demand.
143const INITIAL_TMP_STORAGE_LENGTH: usize = 256;
144
145/// TimSort state for an ongoing sort operation.
146pub struct TimSort<T, F>
147where
148 F: FnMut(&T, &T) -> Ordering,
149{
150 /// The array being sorted
151 a: Vec<T>,
152 /// The comparator for this sort
153 c: F,
154 /// This controls when we get *into* galloping mode.
155 /// It is initialized to MIN_GALLOP. The mergeLo and
156 /// mergeHi methods nudge it higher for random data,
157 /// and lower for highly structured data.
158 min_gallop: usize,
159 /// Temp storage for merges
160 tmp: Vec<T>,
161 /// Number of pending runs on stack
162 stack_size: usize,
163 /// Stack of pending run bases
164 run_base: Vec<usize>,
165 /// Stack of pending run lengths
166 run_len: Vec<usize>,
167}
168
169impl<T, F> TimSort<T, F>
170where
171 T: Clone,
172 F: FnMut(&T, &T) -> Ordering,
173{
174 /// Creates a TimSort instance to maintain the state of an ongoing sort.
175 fn new(a: Vec<T>, c: F) -> Self {
176 let len = a.len();
177
178 // Allocate temp storage (which may be increased later if necessary)
179 let tmp_len = if len < 2 * INITIAL_TMP_STORAGE_LENGTH {
180 len / 2
181 } else {
182 INITIAL_TMP_STORAGE_LENGTH
183 };
184
185 // Allocate runs-to-be-merged stack (which cannot be expanded). The
186 // stack length requirements are described in listsort.txt. The C
187 // version always uses the same stack length (85), but this was
188 // measured to be too expensive when sorting "mid-sized" arrays (e.g.,
189 // 100 elements) in Java. Therefore, we use smaller (but sufficiently
190 // large) stack lengths for smaller arrays.
191 let stack_len = if len < 120 {
192 5
193 } else if len < 1542 {
194 10
195 } else if len < 119151 {
196 19
197 } else {
198 40
199 };
200
201 TimSort {
202 a,
203 c,
204 min_gallop: MIN_GALLOP,
205 tmp: Vec::with_capacity(tmp_len),
206 stack_size: 0,
207 run_base: vec![0; stack_len],
208 run_len: vec![0; stack_len],
209 }
210 }
211
212 /// Sort the array in place using TimSort algorithm.
213 pub fn sort(mut a: Vec<T>, mut c: F) -> Vec<T> {
214 let len = a.len();
215 if len < 2 {
216 return a;
217 }
218
219 // If array is small, do a "mini-TimSort" with no merges
220 if len < MIN_MERGE {
221 let init_run_len = count_run_and_make_ascending(&mut a, 0, len, &mut c);
222 binary_sort(&mut a, 0, len, init_run_len, &mut c);
223 return a;
224 }
225
226 // March over the array once, left to right, finding natural runs,
227 // extending short natural runs to minRun elements, and merging runs
228 // to maintain stack invariant.
229 let mut ts = TimSort::new(a, c);
230 let min_run = min_run_length(len);
231 let mut lo = 0;
232 let mut n_remaining = len;
233
234 loop {
235 // Identify next run
236 let mut run_len = count_run_and_make_ascending(&mut ts.a, lo, len, &mut ts.c);
237
238 // If run is short, extend to min(minRun, nRemaining)
239 if run_len < min_run {
240 let force = if n_remaining <= min_run {
241 n_remaining
242 } else {
243 min_run
244 };
245 binary_sort(&mut ts.a, lo, lo + force, lo + run_len, &mut ts.c);
246 run_len = force;
247 }
248
249 // Push run onto pending-run stack, and maybe merge
250 ts.push_run(lo, run_len);
251 ts.merge_collapse();
252
253 // Advance to find next run
254 lo += run_len;
255 n_remaining -= run_len;
256
257 if n_remaining == 0 {
258 break;
259 }
260 }
261
262 // Merge all remaining runs to complete sort
263 ts.merge_force_collapse();
264 ts.a
265 }
266
267 /// Pushes the specified run onto the pending-run stack.
268 fn push_run(&mut self, run_base: usize, run_len: usize) {
269 self.run_base[self.stack_size] = run_base;
270 self.run_len[self.stack_size] = run_len;
271 self.stack_size += 1;
272 }
273
274 /// Examines the stack of runs waiting to be merged and merges adjacent runs
275 /// until the stack invariants are reestablished:
276 ///
277 /// 1. `runLen[i - 3] > runLen[i - 2] + runLen[i - 1]`
278 /// 2. `runLen[i - 2] > runLen[i - 1]`
279 ///
280 /// This method is called each time a new run is pushed onto the stack,
281 /// so the invariants are guaranteed to hold for i < stackSize upon
282 /// entry to the method.
283 fn merge_collapse(&mut self) {
284 while self.stack_size > 1 {
285 let mut n = self.stack_size - 2;
286 if n > 0 && self.run_len[n - 1] <= self.run_len[n] + self.run_len[n + 1] {
287 if self.run_len[n - 1] < self.run_len[n + 1] {
288 n -= 1;
289 }
290 self.merge_at(n);
291 } else if self.run_len[n] <= self.run_len[n + 1] {
292 self.merge_at(n);
293 } else {
294 break; // Invariant is established
295 }
296 }
297 }
298
299 /// Merges all runs on the stack until only one remains.
300 /// This method is called once, to complete the sort.
301 fn merge_force_collapse(&mut self) {
302 while self.stack_size > 1 {
303 let mut n = self.stack_size - 2;
304 if n > 0 && self.run_len[n - 1] < self.run_len[n + 1] {
305 n -= 1;
306 }
307 self.merge_at(n);
308 }
309 }
310
311 /// Merges the two runs at stack indices i and i+1. Run i must be
312 /// the penultimate or antepenultimate run on the stack. In other words,
313 /// i must be equal to stackSize-2 or stackSize-3.
314 fn merge_at(&mut self, i: usize) {
315 debug_assert!(self.stack_size >= 2);
316 debug_assert!(i == self.stack_size - 2 || i == self.stack_size - 3);
317
318 let mut base1 = self.run_base[i];
319 let mut len1 = self.run_len[i];
320 let base2 = self.run_base[i + 1];
321 let mut len2 = self.run_len[i + 1];
322 debug_assert!(len1 > 0 && len2 > 0);
323 debug_assert!(base1 + len1 == base2);
324
325 // Record the length of the combined runs; if i is the 3rd-last
326 // run now, also slide over the last run (which isn't involved
327 // in this merge). The current run (i+1) goes away in any case.
328 self.run_len[i] = len1 + len2;
329 if i + 3 == self.stack_size {
330 self.run_base[i + 1] = self.run_base[i + 2];
331 self.run_len[i + 1] = self.run_len[i + 2];
332 }
333 self.stack_size -= 1;
334
335 // Find where the first element of run2 goes in run1. Prior elements
336 // in run1 can be ignored (because they're already in place).
337 let k = gallop_right(&self.a[base2], &self.a, base1, len1, 0, &mut self.c);
338 base1 += k;
339 len1 -= k;
340 if len1 == 0 {
341 return;
342 }
343
344 // Find where the last element of run1 goes in run2. Subsequent elements
345 // in run2 can be ignored (because they're already in place).
346 len2 = gallop_left(
347 &self.a[base1 + len1 - 1],
348 &self.a,
349 base2,
350 len2,
351 len2 - 1,
352 &mut self.c,
353 );
354 if len2 == 0 {
355 return;
356 }
357
358 // Merge remaining runs, using tmp array with min(len1, len2) elements
359 if len1 <= len2 {
360 self.merge_lo(base1, len1, base2, len2);
361 } else {
362 self.merge_hi(base1, len1, base2, len2);
363 }
364 }
365
366 /// Ensures that the external array tmp has at least the specified
367 /// number of elements, increasing its size if necessary. The size
368 /// increases exponentially to ensure amortized linear time complexity.
369 fn ensure_capacity(&mut self, min_capacity: usize) {
370 if self.tmp.capacity() < min_capacity {
371 // Compute smallest power of 2 > minCapacity
372 let mut new_size = min_capacity;
373 new_size |= new_size >> 1;
374 new_size |= new_size >> 2;
375 new_size |= new_size >> 4;
376 new_size |= new_size >> 8;
377 new_size |= new_size >> 16;
378 new_size = new_size.wrapping_add(1);
379
380 if new_size == 0 {
381 // overflow
382 new_size = min_capacity;
383 } else {
384 new_size = new_size.min(self.a.len() / 2);
385 }
386
387 self.tmp = Vec::with_capacity(new_size);
388 }
389 self.tmp.clear();
390 }
391
392 /// Merges two adjacent runs in place, in a stable fashion. The first
393 /// element of the first run must be greater than the first element of the
394 /// second run (a[base1] > a[base2]), and the last element of the first run
395 /// (a[base1 + len1-1]) must be greater than all elements of the second run.
396 ///
397 /// For performance, this method should be called only when len1 <= len2;
398 /// its twin, merge_hi should be called if len1 >= len2. (Either method
399 /// may be called if len1 == len2.)
400 fn merge_lo(&mut self, base1: usize, mut len1: usize, base2: usize, mut len2: usize) {
401 debug_assert!(len1 > 0 && len2 > 0 && base1 + len1 == base2);
402
403 // Copy first run into temp array
404 self.ensure_capacity(len1);
405 for i in 0..len1 {
406 self.tmp.push(self.a[base1 + i].clone());
407 }
408
409 let mut cursor1 = 0usize; // Indexes into tmp array
410 let mut cursor2 = base2; // Indexes into a
411 let mut dest = base1; // Indexes into a
412
413 // Move first element of second run and deal with degenerate cases
414 self.a[dest] = self.a[cursor2].clone();
415 dest += 1;
416 cursor2 += 1;
417 len2 -= 1;
418 if len2 == 0 {
419 for i in 0..len1 {
420 self.a[dest + i] = self.tmp[cursor1 + i].clone();
421 }
422 return;
423 }
424 if len1 == 1 {
425 for i in 0..len2 {
426 self.a[dest + i] = self.a[cursor2 + i].clone();
427 }
428 self.a[dest + len2] = self.tmp[cursor1].clone();
429 return;
430 }
431
432 let mut min_gallop = self.min_gallop;
433
434 'outer: loop {
435 let mut count1 = 0usize; // Number of times in a row that first run won
436 let mut count2 = 0usize; // Number of times in a row that second run won
437
438 // Do the straightforward thing until (if ever) one run starts
439 // winning consistently.
440 loop {
441 debug_assert!(len1 > 1 && len2 > 0);
442 if (self.c)(&self.a[cursor2], &self.tmp[cursor1]) == Ordering::Less {
443 self.a[dest] = self.a[cursor2].clone();
444 dest += 1;
445 cursor2 += 1;
446 count2 += 1;
447 count1 = 0;
448 len2 -= 1;
449 if len2 == 0 {
450 break 'outer;
451 }
452 } else {
453 self.a[dest] = self.tmp[cursor1].clone();
454 dest += 1;
455 cursor1 += 1;
456 count1 += 1;
457 count2 = 0;
458 len1 -= 1;
459 if len1 == 1 {
460 break 'outer;
461 }
462 }
463 if (count1 | count2) >= min_gallop {
464 break;
465 }
466 }
467
468 // One run is winning so consistently that galloping may be a
469 // huge win. So try that, and continue galloping until (if ever)
470 // neither run appears to be winning consistently anymore.
471 loop {
472 debug_assert!(len1 > 1 && len2 > 0);
473 count1 = gallop_right(&self.a[cursor2], &self.tmp, cursor1, len1, 0, &mut self.c);
474 if count1 != 0 {
475 for i in 0..count1 {
476 self.a[dest + i] = self.tmp[cursor1 + i].clone();
477 }
478 dest += count1;
479 cursor1 += count1;
480 len1 -= count1;
481 if len1 <= 1 {
482 break 'outer;
483 }
484 }
485 self.a[dest] = self.a[cursor2].clone();
486 dest += 1;
487 cursor2 += 1;
488 len2 -= 1;
489 if len2 == 0 {
490 break 'outer;
491 }
492
493 count2 = gallop_left(&self.tmp[cursor1], &self.a, cursor2, len2, 0, &mut self.c);
494 if count2 != 0 {
495 for i in 0..count2 {
496 self.a[dest + i] = self.a[cursor2 + i].clone();
497 }
498 dest += count2;
499 cursor2 += count2;
500 len2 -= count2;
501 if len2 == 0 {
502 break 'outer;
503 }
504 }
505 self.a[dest] = self.tmp[cursor1].clone();
506 dest += 1;
507 cursor1 += 1;
508 len1 -= 1;
509 if len1 == 1 {
510 break 'outer;
511 }
512 min_gallop = min_gallop.saturating_sub(1);
513 if count1 < MIN_GALLOP && count2 < MIN_GALLOP {
514 break;
515 }
516 }
517 min_gallop += 2; // Penalize for leaving gallop mode
518 }
519
520 self.min_gallop = if min_gallop < 1 { 1 } else { min_gallop };
521
522 if len1 == 1 {
523 debug_assert!(len2 > 0);
524 for i in 0..len2 {
525 self.a[dest + i] = self.a[cursor2 + i].clone();
526 }
527 self.a[dest + len2] = self.tmp[cursor1].clone();
528 } else if len1 == 0 {
529 panic!("Comparison method violates its general contract!");
530 } else {
531 debug_assert!(len2 == 0);
532 debug_assert!(len1 > 1);
533 for i in 0..len1 {
534 self.a[dest + i] = self.tmp[cursor1 + i].clone();
535 }
536 }
537 }
538
539 /// Like merge_lo, except that this method should be called only if
540 /// len1 >= len2; merge_lo should be called if len1 <= len2. (Either method
541 /// may be called if len1 == len2.)
542 fn merge_hi(&mut self, base1: usize, mut len1: usize, base2: usize, mut len2: usize) {
543 debug_assert!(len1 > 0 && len2 > 0 && base1 + len1 == base2);
544
545 // Copy second run into temp array
546 self.ensure_capacity(len2);
547 for i in 0..len2 {
548 self.tmp.push(self.a[base2 + i].clone());
549 }
550
551 let mut cursor1 = base1 + len1 - 1; // Indexes into a
552 let mut cursor2 = len2 - 1; // Indexes into tmp array (use isize for potential underflow)
553 let mut dest = base2 + len2 - 1; // Indexes into a
554
555 // Move last element of first run and deal with degenerate cases
556 self.a[dest] = self.a[cursor1].clone();
557 dest -= 1;
558 cursor1 -= 1;
559 len1 -= 1;
560 if len1 == 0 {
561 let start = dest - (len2 - 1);
562 for i in 0..len2 {
563 self.a[start + i] = self.tmp[i].clone();
564 }
565 return;
566 }
567 if len2 == 1 {
568 dest -= len1;
569 cursor1 -= len1;
570 for i in (0..len1).rev() {
571 self.a[dest + 1 + i] = self.a[cursor1 + 1 + i].clone();
572 }
573 self.a[dest] = self.tmp[cursor2].clone();
574 return;
575 }
576
577 let mut min_gallop = self.min_gallop;
578
579 'outer: loop {
580 let mut count1 = 0usize;
581 let mut count2 = 0usize;
582
583 // Do the straightforward thing until (if ever) one run
584 // appears to win consistently.
585 loop {
586 debug_assert!(len1 > 0 && len2 > 1);
587 if (self.c)(&self.tmp[cursor2], &self.a[cursor1]) == Ordering::Less {
588 self.a[dest] = self.a[cursor1].clone();
589 dest -= 1;
590 cursor1 -= 1;
591 count1 += 1;
592 count2 = 0;
593 len1 -= 1;
594 if len1 == 0 {
595 break 'outer;
596 }
597 } else {
598 self.a[dest] = self.tmp[cursor2].clone();
599 dest -= 1;
600 cursor2 = cursor2.wrapping_sub(1);
601 count2 += 1;
602 count1 = 0;
603 len2 -= 1;
604 if len2 == 1 {
605 break 'outer;
606 }
607 }
608 if (count1 | count2) >= min_gallop {
609 break;
610 }
611 }
612
613 // One run is winning so consistently that galloping may be a
614 // huge win. So try that, and continue galloping until (if ever)
615 // neither run appears to be winning consistently anymore.
616 loop {
617 debug_assert!(len1 > 0 && len2 > 1);
618 count1 = len1
619 - gallop_right(
620 &self.tmp[cursor2],
621 &self.a,
622 base1,
623 len1,
624 len1 - 1,
625 &mut self.c,
626 );
627 if count1 != 0 {
628 dest -= count1;
629 cursor1 -= count1;
630 len1 -= count1;
631 for i in (0..count1).rev() {
632 self.a[dest + 1 + i] = self.a[cursor1 + 1 + i].clone();
633 }
634 if len1 == 0 {
635 break 'outer;
636 }
637 }
638 self.a[dest] = self.tmp[cursor2].clone();
639 dest -= 1;
640 cursor2 = cursor2.wrapping_sub(1);
641 len2 -= 1;
642 if len2 == 1 {
643 break 'outer;
644 }
645
646 count2 =
647 len2 - gallop_left(&self.a[cursor1], &self.tmp, 0, len2, len2 - 1, &mut self.c);
648 if count2 != 0 {
649 dest -= count2;
650 cursor2 = cursor2.wrapping_sub(count2);
651 len2 -= count2;
652 for i in 0..count2 {
653 self.a[dest + 1 + i] = self.tmp[cursor2.wrapping_add(1) + i].clone();
654 }
655 if len2 <= 1 {
656 break 'outer;
657 }
658 }
659 self.a[dest] = self.a[cursor1].clone();
660 dest -= 1;
661 cursor1 -= 1;
662 len1 -= 1;
663 if len1 == 0 {
664 break 'outer;
665 }
666 min_gallop = min_gallop.saturating_sub(1);
667 if count1 < MIN_GALLOP && count2 < MIN_GALLOP {
668 break;
669 }
670 }
671 min_gallop += 2; // Penalize for leaving gallop mode
672 }
673
674 self.min_gallop = if min_gallop < 1 { 1 } else { min_gallop };
675
676 if len2 == 1 {
677 debug_assert!(len1 > 0);
678 dest -= len1;
679 cursor1 -= len1;
680 for i in (0..len1).rev() {
681 self.a[dest + 1 + i] = self.a[cursor1 + 1 + i].clone();
682 }
683 self.a[dest] = self.tmp[cursor2].clone();
684 } else if len2 == 0 {
685 panic!("Comparison method violates its general contract!");
686 } else {
687 debug_assert!(len1 == 0);
688 debug_assert!(len2 > 0);
689 let start = dest - (len2 - 1);
690 for i in 0..len2 {
691 self.a[start + i] = self.tmp[i].clone();
692 }
693 }
694 }
695}
696
697/// Sorts the specified portion of the specified array using a binary
698/// insertion sort. This is the best method for sorting small numbers
699/// of elements. It requires O(n log n) compares, but O(n^2) data
700/// movement (worst case).
701///
702/// If the initial part of the specified range is already sorted,
703/// this method can take advantage of it: the method assumes that the
704/// elements from index `lo`, inclusive, to `start`, exclusive are already sorted.
705fn binary_sort<T, F>(a: &mut [T], lo: usize, hi: usize, mut start: usize, c: &mut F)
706where
707 T: Clone,
708 F: FnMut(&T, &T) -> Ordering,
709{
710 debug_assert!(lo <= start && start <= hi);
711 if start == lo {
712 start += 1;
713 }
714 while start < hi {
715 let pivot = a[start].clone();
716
717 // Set left (and right) to the index where a[start] (pivot) belongs
718 let mut left = lo;
719 let mut right = start;
720
721 // Invariants:
722 // pivot >= all in [lo, left).
723 // pivot < all in [right, start).
724 while left < right {
725 let mid = (left + right) / 2;
726 if c(&pivot, &a[mid]) == Ordering::Less {
727 right = mid;
728 } else {
729 left = mid + 1;
730 }
731 }
732
733 // The invariants still hold: pivot >= all in [lo, left) and
734 // pivot < all in [left, start), so pivot belongs at left. Note
735 // that if there are elements equal to pivot, left points to the
736 // first slot after them -- that's why this sort is stable.
737 // Slide elements over to make room for pivot.
738 let n = start - left;
739 match n {
740 2 => {
741 a[left + 2] = a[left + 1].clone();
742 a[left + 1] = a[left].clone();
743 }
744 1 => {
745 a[left + 1] = a[left].clone();
746 }
747 _ if n > 0 => {
748 for i in (0..n).rev() {
749 a[left + i + 1] = a[left + i].clone();
750 }
751 }
752 _ => {}
753 }
754 a[left] = pivot;
755 start += 1;
756 }
757}
758
759/// Returns the length of the run beginning at the specified position in
760/// the specified array and reverses the run if it is descending (ensuring
761/// that the run will always be ascending when the method returns).
762///
763/// A run is the longest ascending sequence with:
764/// a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
765///
766/// or the longest descending sequence with:
767/// a[lo] > a[lo + 1] > a[lo + 2] > ...
768///
769/// For its intended use in a stable mergesort, the strictness of the
770/// definition of "descending" is needed so that the call can safely
771/// reverse a descending sequence without violating stability.
772fn count_run_and_make_ascending<T, F>(a: &mut [T], lo: usize, hi: usize, c: &mut F) -> usize
773where
774 T: Clone,
775 F: FnMut(&T, &T) -> Ordering,
776{
777 debug_assert!(lo < hi);
778 let mut run_hi = lo + 1;
779 if run_hi == hi {
780 return 1;
781 }
782
783 // Find end of run, and reverse range if descending
784 if c(&a[run_hi], &a[lo]) == Ordering::Less {
785 // Descending
786 run_hi += 1;
787 while run_hi < hi && c(&a[run_hi], &a[run_hi - 1]) == Ordering::Less {
788 run_hi += 1;
789 }
790 reverse_range(a, lo, run_hi);
791 } else {
792 // Ascending
793 run_hi += 1;
794 while run_hi < hi && c(&a[run_hi], &a[run_hi - 1]) != Ordering::Less {
795 run_hi += 1;
796 }
797 }
798
799 run_hi - lo
800}
801
802/// Reverse the specified range of the specified array.
803fn reverse_range<T>(a: &mut [T], mut lo: usize, mut hi: usize) {
804 hi -= 1;
805 while lo < hi {
806 a.swap(lo, hi);
807 lo += 1;
808 hi -= 1;
809 }
810}
811
812/// Returns the minimum acceptable run length for an array of the specified
813/// length. Natural runs shorter than this will be extended with binary_sort.
814///
815/// Roughly speaking, the computation is:
816/// If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
817/// Else if n is an exact power of 2, return MIN_MERGE/2.
818/// Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
819/// is close to, but strictly less than, an exact power of 2.
820fn min_run_length(mut n: usize) -> usize {
821 let mut r = 0; // Becomes 1 if any 1 bits are shifted off
822 while n >= MIN_MERGE {
823 r |= n & 1;
824 n >>= 1;
825 }
826 n + r
827}
828
829/// Locates the position at which to insert the specified key into the
830/// specified sorted range; if the range contains an element equal to key,
831/// returns the index of the leftmost equal element.
832fn gallop_left<T, F>(key: &T, a: &[T], base: usize, len: usize, hint: usize, c: &mut F) -> usize
833where
834 F: FnMut(&T, &T) -> Ordering,
835{
836 debug_assert!(len > 0 && hint < len);
837 let mut last_ofs = 0usize;
838 let mut ofs = 1usize;
839
840 if c(key, &a[base + hint]) == Ordering::Greater {
841 // Gallop right until a[base+hint+lastOfs] < key <= a[base+hint+ofs]
842 let max_ofs = len - hint;
843 while ofs < max_ofs && c(key, &a[base + hint + ofs]) == Ordering::Greater {
844 last_ofs = ofs;
845 ofs = (ofs << 1) + 1;
846 if ofs == 0 {
847 // int overflow
848 ofs = max_ofs;
849 }
850 }
851 if ofs > max_ofs {
852 ofs = max_ofs;
853 }
854
855 // Make offsets relative to base (and pre-increment last_ofs)
856 last_ofs += hint + 1;
857 ofs += hint;
858 } else {
859 // key <= a[base + hint]
860 // Gallop left until a[base+hint-ofs] < key <= a[base+hint-lastOfs]
861 let max_ofs = hint + 1;
862 while ofs < max_ofs && c(key, &a[base + hint - ofs]) != Ordering::Greater {
863 last_ofs = ofs;
864 ofs = (ofs << 1) + 1;
865 if ofs == 0 {
866 // int overflow
867 ofs = max_ofs;
868 }
869 }
870 if ofs > max_ofs {
871 ofs = max_ofs;
872 }
873
874 // Make offsets relative to base (and pre-increment last_ofs)
875 // Note: ofs can be hint+1 so hint-ofs would underflow for usize;
876 // folding the +1 avoids this since (hint+1)-ofs >= 0
877 let tmp = last_ofs;
878 last_ofs = (hint + 1) - ofs;
879 ofs = hint - tmp;
880 }
881 debug_assert!(last_ofs <= ofs && ofs <= len);
882
883 // Now a[base+lastOfs-1] < key <= a[base+ofs], so key belongs somewhere
884 // to the right of lastOfs but no farther right than ofs. Do a binary
885 // search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
886 while last_ofs < ofs {
887 let m = last_ofs + ((ofs - last_ofs) / 2);
888
889 if c(key, &a[base + m]) == Ordering::Greater {
890 last_ofs = m + 1; // a[base + m] < key
891 } else {
892 ofs = m; // key <= a[base + m]
893 }
894 }
895 debug_assert!(last_ofs == ofs);
896 ofs
897}
898
899/// Like gallop_left, except that if the range contains an element equal to
900/// key, gallop_right returns the index after the rightmost equal element.
901fn gallop_right<T, F>(key: &T, a: &[T], base: usize, len: usize, hint: usize, c: &mut F) -> usize
902where
903 F: FnMut(&T, &T) -> Ordering,
904{
905 debug_assert!(len > 0 && hint < len);
906
907 let mut ofs = 1usize;
908 let mut last_ofs = 0usize;
909
910 if c(key, &a[base + hint]) == Ordering::Less {
911 // Gallop left until a[b+hint - ofs] <= key < a[b+hint - lastOfs]
912 let max_ofs = hint + 1;
913 while ofs < max_ofs && c(key, &a[base + hint - ofs]) == Ordering::Less {
914 last_ofs = ofs;
915 ofs = (ofs << 1) + 1;
916 if ofs == 0 {
917 // int overflow
918 ofs = max_ofs;
919 }
920 }
921 if ofs > max_ofs {
922 ofs = max_ofs;
923 }
924
925 // Make offsets relative to b (and pre-increment last_ofs)
926 // Note: ofs can be hint+1 so hint-ofs would underflow for usize;
927 // folding the +1 avoids this since (hint+1)-ofs >= 0
928 let tmp = last_ofs;
929 last_ofs = (hint + 1) - ofs;
930 ofs = hint - tmp;
931 } else {
932 // a[b + hint] <= key
933 // Gallop right until a[b+hint + lastOfs] <= key < a[b+hint + ofs]
934 let max_ofs = len - hint;
935 while ofs < max_ofs && c(key, &a[base + hint + ofs]) != Ordering::Less {
936 last_ofs = ofs;
937 ofs = (ofs << 1) + 1;
938 if ofs == 0 {
939 // int overflow
940 ofs = max_ofs;
941 }
942 }
943 if ofs > max_ofs {
944 ofs = max_ofs;
945 }
946
947 // Make offsets relative to b (and pre-increment last_ofs)
948 last_ofs += hint + 1;
949 ofs += hint;
950 }
951 debug_assert!(last_ofs <= ofs && ofs <= len);
952
953 // Now a[b + lastOfs - 1] <= key < a[b + ofs], so key belongs somewhere to
954 // the right of lastOfs but no farther right than ofs. Do a binary
955 // search, with invariant a[b + lastOfs - 1] <= key < a[b + ofs].
956 while last_ofs < ofs {
957 let m = last_ofs + ((ofs - last_ofs) / 2);
958
959 if c(key, &a[base + m]) == Ordering::Less {
960 ofs = m; // key < a[b + m]
961 } else {
962 last_ofs = m + 1; // a[b + m] <= key
963 }
964 }
965 debug_assert!(last_ofs == ofs);
966 ofs
967}
968
969/// Sort a vector using the TimSort algorithm with a custom comparator.
970pub fn timsort_by<T, F>(vec: Vec<T>, compare: F) -> Vec<T>
971where
972 T: Clone,
973 F: FnMut(&T, &T) -> Ordering,
974{
975 TimSort::sort(vec, compare)
976}
977
978/// Sort a vector using the TimSort algorithm with the default ordering.
979pub fn timsort<T>(vec: Vec<T>) -> Vec<T>
980where
981 T: Clone + Ord,
982{
983 TimSort::sort(vec, |a, b| a.cmp(b))
984}
985
986/// Sort a slice in place using the TimSort algorithm with a custom comparator.
987pub fn timsort_slice_by<T, F>(slice: &mut [T], compare: F)
988where
989 T: Clone,
990 F: FnMut(&T, &T) -> Ordering,
991{
992 let vec: Vec<T> = slice.to_vec();
993 let sorted = TimSort::sort(vec, compare);
994 for (i, item) in sorted.into_iter().enumerate() {
995 slice[i] = item;
996 }
997}
998
999/// Sort a slice in place using the TimSort algorithm with the default ordering.
1000pub fn timsort_slice<T>(slice: &mut [T])
1001where
1002 T: Clone + Ord,
1003{
1004 timsort_slice_by(slice, |a, b| a.cmp(b))
1005}
1006
1007// ============================================================================
1008// IComparer-based Sorting Functions
1009// ============================================================================
1010
1011/// Sort a vector using the TimSort algorithm with an IComparer.
1012pub fn timsort_with_comparer<T, C>(vec: Vec<T>, comparer: &C) -> Vec<T>
1013where
1014 T: Clone,
1015 C: IComparer<T>,
1016{
1017 TimSort::sort(vec, |a, b| comparer.compare(a, b))
1018}
1019
1020/// Sort a slice in place using the TimSort algorithm with an IComparer.
1021pub fn timsort_slice_with_comparer<T, C>(slice: &mut [T], comparer: &C)
1022where
1023 T: Clone,
1024 C: IComparer<T>,
1025{
1026 timsort_slice_by(slice, |a, b| comparer.compare(a, b))
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031 use super::*;
1032
1033 #[test]
1034 fn test_empty_array() {
1035 let arr: Vec<i32> = vec![];
1036 let result = timsort(arr);
1037 assert!(result.is_empty());
1038 }
1039
1040 #[test]
1041 fn test_single_element() {
1042 let arr = vec![42];
1043 let result = timsort(arr);
1044 assert_eq!(result, vec![42]);
1045 }
1046
1047 #[test]
1048 fn test_already_sorted() {
1049 let arr = vec![1, 2, 3, 4, 5];
1050 let result = timsort(arr);
1051 assert_eq!(result, vec![1, 2, 3, 4, 5]);
1052 }
1053
1054 #[test]
1055 fn test_reverse_sorted() {
1056 let arr = vec![5, 4, 3, 2, 1];
1057 let result = timsort(arr);
1058 assert_eq!(result, vec![1, 2, 3, 4, 5]);
1059 }
1060
1061 #[test]
1062 fn test_random_order() {
1063 let arr = vec![3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5];
1064 let result = timsort(arr);
1065 assert_eq!(result, vec![1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 9]);
1066 }
1067
1068 #[test]
1069 fn test_duplicates() {
1070 let arr = vec![3, 3, 3, 1, 1, 2, 2];
1071 let result = timsort(arr);
1072 assert_eq!(result, vec![1, 1, 2, 2, 3, 3, 3]);
1073 }
1074
1075 #[test]
1076 fn test_custom_comparator_reverse() {
1077 let arr = vec![1, 2, 3, 4, 5];
1078 let result = timsort_by(arr, |a, b| b.cmp(a));
1079 assert_eq!(result, vec![5, 4, 3, 2, 1]);
1080 }
1081
1082 #[test]
1083 fn test_stability() {
1084 // Test that equal elements maintain their relative order
1085 #[derive(Clone, Debug)]
1086 struct Item {
1087 key: i32,
1088 index: usize,
1089 }
1090
1091 let arr = vec![
1092 Item { key: 1, index: 0 },
1093 Item { key: 2, index: 1 },
1094 Item { key: 1, index: 2 },
1095 Item { key: 2, index: 3 },
1096 Item { key: 1, index: 4 },
1097 ];
1098
1099 let result = timsort_by(arr, |a, b| a.key.cmp(&b.key));
1100
1101 // Check that items with same key maintain original order
1102 let ones: Vec<_> = result.iter().filter(|x| x.key == 1).collect();
1103 assert_eq!(ones[0].index, 0);
1104 assert_eq!(ones[1].index, 2);
1105 assert_eq!(ones[2].index, 4);
1106
1107 let twos: Vec<_> = result.iter().filter(|x| x.key == 2).collect();
1108 assert_eq!(twos[0].index, 1);
1109 assert_eq!(twos[1].index, 3);
1110 }
1111
1112 #[test]
1113 fn test_large_array() {
1114 let arr: Vec<i32> = (0..1000).rev().collect();
1115 let expected: Vec<i32> = (0..1000).collect();
1116 let result = timsort(arr);
1117 assert_eq!(result, expected);
1118 }
1119
1120 #[test]
1121 fn test_strings() {
1122 let arr = vec!["banana", "apple", "cherry", "date"];
1123 let result = timsort(arr);
1124 assert_eq!(result, vec!["apple", "banana", "cherry", "date"]);
1125 }
1126
1127 #[test]
1128 fn test_slice_sort() {
1129 let mut arr = [5, 2, 8, 1, 9];
1130 timsort_slice(&mut arr);
1131 assert_eq!(arr, [1, 2, 5, 8, 9]);
1132 }
1133
1134 // IComparer tests
1135
1136 #[test]
1137 fn test_ord_comparer() {
1138 let arr = vec![3, 1, 4, 1, 5, 9, 2, 6];
1139 let comparer = OrdComparer::<i32>::new();
1140 let result = timsort_with_comparer(arr, &comparer);
1141 assert_eq!(result, vec![1, 1, 2, 3, 4, 5, 6, 9]);
1142 }
1143
1144 #[test]
1145 fn test_reverse_comparer() {
1146 let arr = vec![1, 2, 3, 4, 5];
1147 let comparer = ReverseComparer::new(OrdComparer::<i32>::new());
1148 let result = timsort_with_comparer(arr, &comparer);
1149 assert_eq!(result, vec![5, 4, 3, 2, 1]);
1150 }
1151
1152 #[test]
1153 fn test_fn_comparer() {
1154 let arr = vec![3, 1, 4, 1, 5];
1155 let comparer = FnComparer::new(|a: &i32, b: &i32| a.cmp(b));
1156 let result = timsort_with_comparer(arr, &comparer);
1157 assert_eq!(result, vec![1, 1, 3, 4, 5]);
1158 }
1159
1160 #[test]
1161 fn test_fn_comparer_custom() {
1162 // Sort by absolute value
1163 let arr = vec![-3, 1, -4, 1, 5, -9, 2, -6];
1164 let comparer = FnComparer::new(|a: &i32, b: &i32| a.abs().cmp(&b.abs()));
1165 let result = timsort_with_comparer(arr, &comparer);
1166 assert_eq!(result, vec![1, 1, 2, -3, -4, 5, -6, -9]);
1167 }
1168
1169 #[test]
1170 fn test_slice_with_comparer() {
1171 let mut arr = [5, 2, 8, 1, 9];
1172 let comparer = OrdComparer::<i32>::new();
1173 timsort_slice_with_comparer(&mut arr, &comparer);
1174 assert_eq!(arr, [1, 2, 5, 8, 9]);
1175 }
1176}