xsd_schema/xpath/
timsort.rs

1/*
2 * Copyright 2009 Google Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26//! A stable, adaptive, iterative mergesort that requires far fewer than
27//! n lg(n) comparisons when running on partially sorted arrays, while
28//! offering performance comparable to a traditional mergesort when run
29//! on random arrays. Like all proper mergesorts, this sort is stable and
30//! runs O(n log n) time (worst case). In the worst case, this sort requires
31//! temporary storage space for n/2 object references; in the best case,
32//! it requires only a small constant amount of space.
33//!
34//! This implementation was adapted from Tim Peters's list sort for Python,
35//! which is described in detail here:
36//!   http://svn.python.org/projects/python/trunk/Objects/listsort.txt
37//!
38//! Tim's C code may be found here:
39//!   http://svn.python.org/projects/python/trunk/Objects/listobject.c
40//!
41//! The underlying techniques are described in this paper (and may have
42//! even earlier origins):
43//!   "Optimistic Sorting and Information Theoretic Complexity"
44//!   Peter McIlroy
45//!   SODA (Fourth Annual ACM-SIAM Symposium on Discrete Algorithms),
46//!   pp 467-474, Austin, Texas, 25-27 January 1993.
47//!
48//! Ported from Java/C# implementation by Josh Bloch.
49
50use std::cmp::Ordering;
51use std::marker::PhantomData;
52
53// ============================================================================
54// IComparer Trait - Similar to C#'s IComparer<T>
55// ============================================================================
56
57/// Trait for comparing two values, similar to C#'s `IComparer<T>`.
58///
59/// This trait allows creating stateful or stateless comparers that can be
60/// passed to sorting functions.
61pub trait IComparer<T> {
62    /// Compares two values and returns their ordering.
63    ///
64    /// Returns:
65    /// - `Ordering::Less` if `x < y`
66    /// - `Ordering::Equal` if `x == y`
67    /// - `Ordering::Greater` if `x > y`
68    fn compare(&self, x: &T, y: &T) -> Ordering;
69}
70
71/// A comparer that uses the default `Ord` implementation.
72#[derive(Debug, Clone, Copy, Default)]
73pub struct OrdComparer<T>(PhantomData<T>);
74
75impl<T> OrdComparer<T> {
76    /// Creates a new `OrdComparer`.
77    pub fn new() -> Self {
78        OrdComparer(PhantomData)
79    }
80}
81
82impl<T: Ord> IComparer<T> for OrdComparer<T> {
83    fn compare(&self, x: &T, y: &T) -> Ordering {
84        x.cmp(y)
85    }
86}
87
88/// A comparer that reverses the ordering of another comparer.
89#[derive(Debug, Clone, Copy)]
90pub struct ReverseComparer<C>(pub C);
91
92impl<C> ReverseComparer<C> {
93    /// Creates a new `ReverseComparer` wrapping another comparer.
94    pub fn new(comparer: C) -> Self {
95        ReverseComparer(comparer)
96    }
97}
98
99impl<T, C: IComparer<T>> IComparer<T> for ReverseComparer<C> {
100    fn compare(&self, x: &T, y: &T) -> Ordering {
101        self.0.compare(y, x)
102    }
103}
104
105/// A comparer that wraps a closure.
106pub struct FnComparer<F>(pub F);
107
108impl<F> FnComparer<F> {
109    /// Creates a new `FnComparer` wrapping a closure.
110    pub fn new(f: F) -> Self {
111        FnComparer(f)
112    }
113}
114
115impl<T, F> IComparer<T> for FnComparer<F>
116where
117    F: Fn(&T, &T) -> Ordering,
118{
119    fn compare(&self, x: &T, y: &T) -> Ordering {
120        (self.0)(x, y)
121    }
122}
123
124// ============================================================================
125// TimSort Constants
126// ============================================================================
127
128/// This is the minimum sized sequence that will be merged. Shorter
129/// sequences will be lengthened by calling binarySort. If the entire
130/// array is less than this length, no merges will be performed.
131///
132/// This constant should be a power of two. It was 64 in Tim Peter's C
133/// implementation, but 32 was empirically determined to work better in
134/// this implementation.
135const MIN_MERGE: usize = 32;
136
137/// When we get into galloping mode, we stay there until both runs win less
138/// often than MIN_GALLOP consecutive times.
139const MIN_GALLOP: usize = 7;
140
141/// Maximum initial size of tmp array, which is used for merging.
142/// The array can grow to accommodate demand.
143const INITIAL_TMP_STORAGE_LENGTH: usize = 256;
144
145/// TimSort state for an ongoing sort operation.
146pub struct TimSort<T, F>
147where
148    F: FnMut(&T, &T) -> Ordering,
149{
150    /// The array being sorted
151    a: Vec<T>,
152    /// The comparator for this sort
153    c: F,
154    /// This controls when we get *into* galloping mode.
155    /// It is initialized to MIN_GALLOP. The mergeLo and
156    /// mergeHi methods nudge it higher for random data,
157    /// and lower for highly structured data.
158    min_gallop: usize,
159    /// Temp storage for merges
160    tmp: Vec<T>,
161    /// Number of pending runs on stack
162    stack_size: usize,
163    /// Stack of pending run bases
164    run_base: Vec<usize>,
165    /// Stack of pending run lengths
166    run_len: Vec<usize>,
167}
168
169impl<T, F> TimSort<T, F>
170where
171    T: Clone,
172    F: FnMut(&T, &T) -> Ordering,
173{
174    /// Creates a TimSort instance to maintain the state of an ongoing sort.
175    fn new(a: Vec<T>, c: F) -> Self {
176        let len = a.len();
177
178        // Allocate temp storage (which may be increased later if necessary)
179        let tmp_len = if len < 2 * INITIAL_TMP_STORAGE_LENGTH {
180            len / 2
181        } else {
182            INITIAL_TMP_STORAGE_LENGTH
183        };
184
185        // Allocate runs-to-be-merged stack (which cannot be expanded). The
186        // stack length requirements are described in listsort.txt. The C
187        // version always uses the same stack length (85), but this was
188        // measured to be too expensive when sorting "mid-sized" arrays (e.g.,
189        // 100 elements) in Java. Therefore, we use smaller (but sufficiently
190        // large) stack lengths for smaller arrays.
191        let stack_len = if len < 120 {
192            5
193        } else if len < 1542 {
194            10
195        } else if len < 119151 {
196            19
197        } else {
198            40
199        };
200
201        TimSort {
202            a,
203            c,
204            min_gallop: MIN_GALLOP,
205            tmp: Vec::with_capacity(tmp_len),
206            stack_size: 0,
207            run_base: vec![0; stack_len],
208            run_len: vec![0; stack_len],
209        }
210    }
211
212    /// Sort the array in place using TimSort algorithm.
213    pub fn sort(mut a: Vec<T>, mut c: F) -> Vec<T> {
214        let len = a.len();
215        if len < 2 {
216            return a;
217        }
218
219        // If array is small, do a "mini-TimSort" with no merges
220        if len < MIN_MERGE {
221            let init_run_len = count_run_and_make_ascending(&mut a, 0, len, &mut c);
222            binary_sort(&mut a, 0, len, init_run_len, &mut c);
223            return a;
224        }
225
226        // March over the array once, left to right, finding natural runs,
227        // extending short natural runs to minRun elements, and merging runs
228        // to maintain stack invariant.
229        let mut ts = TimSort::new(a, c);
230        let min_run = min_run_length(len);
231        let mut lo = 0;
232        let mut n_remaining = len;
233
234        loop {
235            // Identify next run
236            let mut run_len = count_run_and_make_ascending(&mut ts.a, lo, len, &mut ts.c);
237
238            // If run is short, extend to min(minRun, nRemaining)
239            if run_len < min_run {
240                let force = if n_remaining <= min_run {
241                    n_remaining
242                } else {
243                    min_run
244                };
245                binary_sort(&mut ts.a, lo, lo + force, lo + run_len, &mut ts.c);
246                run_len = force;
247            }
248
249            // Push run onto pending-run stack, and maybe merge
250            ts.push_run(lo, run_len);
251            ts.merge_collapse();
252
253            // Advance to find next run
254            lo += run_len;
255            n_remaining -= run_len;
256
257            if n_remaining == 0 {
258                break;
259            }
260        }
261
262        // Merge all remaining runs to complete sort
263        ts.merge_force_collapse();
264        ts.a
265    }
266
267    /// Pushes the specified run onto the pending-run stack.
268    fn push_run(&mut self, run_base: usize, run_len: usize) {
269        self.run_base[self.stack_size] = run_base;
270        self.run_len[self.stack_size] = run_len;
271        self.stack_size += 1;
272    }
273
274    /// Examines the stack of runs waiting to be merged and merges adjacent runs
275    /// until the stack invariants are reestablished:
276    ///
277    /// 1. `runLen[i - 3] > runLen[i - 2] + runLen[i - 1]`
278    /// 2. `runLen[i - 2] > runLen[i - 1]`
279    ///
280    /// This method is called each time a new run is pushed onto the stack,
281    /// so the invariants are guaranteed to hold for i < stackSize upon
282    /// entry to the method.
283    fn merge_collapse(&mut self) {
284        while self.stack_size > 1 {
285            let mut n = self.stack_size - 2;
286            if n > 0 && self.run_len[n - 1] <= self.run_len[n] + self.run_len[n + 1] {
287                if self.run_len[n - 1] < self.run_len[n + 1] {
288                    n -= 1;
289                }
290                self.merge_at(n);
291            } else if self.run_len[n] <= self.run_len[n + 1] {
292                self.merge_at(n);
293            } else {
294                break; // Invariant is established
295            }
296        }
297    }
298
299    /// Merges all runs on the stack until only one remains.
300    /// This method is called once, to complete the sort.
301    fn merge_force_collapse(&mut self) {
302        while self.stack_size > 1 {
303            let mut n = self.stack_size - 2;
304            if n > 0 && self.run_len[n - 1] < self.run_len[n + 1] {
305                n -= 1;
306            }
307            self.merge_at(n);
308        }
309    }
310
311    /// Merges the two runs at stack indices i and i+1. Run i must be
312    /// the penultimate or antepenultimate run on the stack. In other words,
313    /// i must be equal to stackSize-2 or stackSize-3.
314    fn merge_at(&mut self, i: usize) {
315        debug_assert!(self.stack_size >= 2);
316        debug_assert!(i == self.stack_size - 2 || i == self.stack_size - 3);
317
318        let mut base1 = self.run_base[i];
319        let mut len1 = self.run_len[i];
320        let base2 = self.run_base[i + 1];
321        let mut len2 = self.run_len[i + 1];
322        debug_assert!(len1 > 0 && len2 > 0);
323        debug_assert!(base1 + len1 == base2);
324
325        // Record the length of the combined runs; if i is the 3rd-last
326        // run now, also slide over the last run (which isn't involved
327        // in this merge). The current run (i+1) goes away in any case.
328        self.run_len[i] = len1 + len2;
329        if i + 3 == self.stack_size {
330            self.run_base[i + 1] = self.run_base[i + 2];
331            self.run_len[i + 1] = self.run_len[i + 2];
332        }
333        self.stack_size -= 1;
334
335        // Find where the first element of run2 goes in run1. Prior elements
336        // in run1 can be ignored (because they're already in place).
337        let k = gallop_right(&self.a[base2], &self.a, base1, len1, 0, &mut self.c);
338        base1 += k;
339        len1 -= k;
340        if len1 == 0 {
341            return;
342        }
343
344        // Find where the last element of run1 goes in run2. Subsequent elements
345        // in run2 can be ignored (because they're already in place).
346        len2 = gallop_left(
347            &self.a[base1 + len1 - 1],
348            &self.a,
349            base2,
350            len2,
351            len2 - 1,
352            &mut self.c,
353        );
354        if len2 == 0 {
355            return;
356        }
357
358        // Merge remaining runs, using tmp array with min(len1, len2) elements
359        if len1 <= len2 {
360            self.merge_lo(base1, len1, base2, len2);
361        } else {
362            self.merge_hi(base1, len1, base2, len2);
363        }
364    }
365
366    /// Ensures that the external array tmp has at least the specified
367    /// number of elements, increasing its size if necessary. The size
368    /// increases exponentially to ensure amortized linear time complexity.
369    fn ensure_capacity(&mut self, min_capacity: usize) {
370        if self.tmp.capacity() < min_capacity {
371            // Compute smallest power of 2 > minCapacity
372            let mut new_size = min_capacity;
373            new_size |= new_size >> 1;
374            new_size |= new_size >> 2;
375            new_size |= new_size >> 4;
376            new_size |= new_size >> 8;
377            new_size |= new_size >> 16;
378            new_size = new_size.wrapping_add(1);
379
380            if new_size == 0 {
381                // overflow
382                new_size = min_capacity;
383            } else {
384                new_size = new_size.min(self.a.len() / 2);
385            }
386
387            self.tmp = Vec::with_capacity(new_size);
388        }
389        self.tmp.clear();
390    }
391
392    /// Merges two adjacent runs in place, in a stable fashion. The first
393    /// element of the first run must be greater than the first element of the
394    /// second run (a[base1] > a[base2]), and the last element of the first run
395    /// (a[base1 + len1-1]) must be greater than all elements of the second run.
396    ///
397    /// For performance, this method should be called only when len1 <= len2;
398    /// its twin, merge_hi should be called if len1 >= len2. (Either method
399    /// may be called if len1 == len2.)
400    fn merge_lo(&mut self, base1: usize, mut len1: usize, base2: usize, mut len2: usize) {
401        debug_assert!(len1 > 0 && len2 > 0 && base1 + len1 == base2);
402
403        // Copy first run into temp array
404        self.ensure_capacity(len1);
405        for i in 0..len1 {
406            self.tmp.push(self.a[base1 + i].clone());
407        }
408
409        let mut cursor1 = 0usize; // Indexes into tmp array
410        let mut cursor2 = base2; // Indexes into a
411        let mut dest = base1; // Indexes into a
412
413        // Move first element of second run and deal with degenerate cases
414        self.a[dest] = self.a[cursor2].clone();
415        dest += 1;
416        cursor2 += 1;
417        len2 -= 1;
418        if len2 == 0 {
419            for i in 0..len1 {
420                self.a[dest + i] = self.tmp[cursor1 + i].clone();
421            }
422            return;
423        }
424        if len1 == 1 {
425            for i in 0..len2 {
426                self.a[dest + i] = self.a[cursor2 + i].clone();
427            }
428            self.a[dest + len2] = self.tmp[cursor1].clone();
429            return;
430        }
431
432        let mut min_gallop = self.min_gallop;
433
434        'outer: loop {
435            let mut count1 = 0usize; // Number of times in a row that first run won
436            let mut count2 = 0usize; // Number of times in a row that second run won
437
438            // Do the straightforward thing until (if ever) one run starts
439            // winning consistently.
440            loop {
441                debug_assert!(len1 > 1 && len2 > 0);
442                if (self.c)(&self.a[cursor2], &self.tmp[cursor1]) == Ordering::Less {
443                    self.a[dest] = self.a[cursor2].clone();
444                    dest += 1;
445                    cursor2 += 1;
446                    count2 += 1;
447                    count1 = 0;
448                    len2 -= 1;
449                    if len2 == 0 {
450                        break 'outer;
451                    }
452                } else {
453                    self.a[dest] = self.tmp[cursor1].clone();
454                    dest += 1;
455                    cursor1 += 1;
456                    count1 += 1;
457                    count2 = 0;
458                    len1 -= 1;
459                    if len1 == 1 {
460                        break 'outer;
461                    }
462                }
463                if (count1 | count2) >= min_gallop {
464                    break;
465                }
466            }
467
468            // One run is winning so consistently that galloping may be a
469            // huge win. So try that, and continue galloping until (if ever)
470            // neither run appears to be winning consistently anymore.
471            loop {
472                debug_assert!(len1 > 1 && len2 > 0);
473                count1 = gallop_right(&self.a[cursor2], &self.tmp, cursor1, len1, 0, &mut self.c);
474                if count1 != 0 {
475                    for i in 0..count1 {
476                        self.a[dest + i] = self.tmp[cursor1 + i].clone();
477                    }
478                    dest += count1;
479                    cursor1 += count1;
480                    len1 -= count1;
481                    if len1 <= 1 {
482                        break 'outer;
483                    }
484                }
485                self.a[dest] = self.a[cursor2].clone();
486                dest += 1;
487                cursor2 += 1;
488                len2 -= 1;
489                if len2 == 0 {
490                    break 'outer;
491                }
492
493                count2 = gallop_left(&self.tmp[cursor1], &self.a, cursor2, len2, 0, &mut self.c);
494                if count2 != 0 {
495                    for i in 0..count2 {
496                        self.a[dest + i] = self.a[cursor2 + i].clone();
497                    }
498                    dest += count2;
499                    cursor2 += count2;
500                    len2 -= count2;
501                    if len2 == 0 {
502                        break 'outer;
503                    }
504                }
505                self.a[dest] = self.tmp[cursor1].clone();
506                dest += 1;
507                cursor1 += 1;
508                len1 -= 1;
509                if len1 == 1 {
510                    break 'outer;
511                }
512                min_gallop = min_gallop.saturating_sub(1);
513                if count1 < MIN_GALLOP && count2 < MIN_GALLOP {
514                    break;
515                }
516            }
517            min_gallop += 2; // Penalize for leaving gallop mode
518        }
519
520        self.min_gallop = if min_gallop < 1 { 1 } else { min_gallop };
521
522        if len1 == 1 {
523            debug_assert!(len2 > 0);
524            for i in 0..len2 {
525                self.a[dest + i] = self.a[cursor2 + i].clone();
526            }
527            self.a[dest + len2] = self.tmp[cursor1].clone();
528        } else if len1 == 0 {
529            panic!("Comparison method violates its general contract!");
530        } else {
531            debug_assert!(len2 == 0);
532            debug_assert!(len1 > 1);
533            for i in 0..len1 {
534                self.a[dest + i] = self.tmp[cursor1 + i].clone();
535            }
536        }
537    }
538
539    /// Like merge_lo, except that this method should be called only if
540    /// len1 >= len2; merge_lo should be called if len1 <= len2. (Either method
541    /// may be called if len1 == len2.)
542    fn merge_hi(&mut self, base1: usize, mut len1: usize, base2: usize, mut len2: usize) {
543        debug_assert!(len1 > 0 && len2 > 0 && base1 + len1 == base2);
544
545        // Copy second run into temp array
546        self.ensure_capacity(len2);
547        for i in 0..len2 {
548            self.tmp.push(self.a[base2 + i].clone());
549        }
550
551        let mut cursor1 = base1 + len1 - 1; // Indexes into a
552        let mut cursor2 = len2 - 1; // Indexes into tmp array (use isize for potential underflow)
553        let mut dest = base2 + len2 - 1; // Indexes into a
554
555        // Move last element of first run and deal with degenerate cases
556        self.a[dest] = self.a[cursor1].clone();
557        dest -= 1;
558        cursor1 -= 1;
559        len1 -= 1;
560        if len1 == 0 {
561            let start = dest - (len2 - 1);
562            for i in 0..len2 {
563                self.a[start + i] = self.tmp[i].clone();
564            }
565            return;
566        }
567        if len2 == 1 {
568            dest -= len1;
569            cursor1 -= len1;
570            for i in (0..len1).rev() {
571                self.a[dest + 1 + i] = self.a[cursor1 + 1 + i].clone();
572            }
573            self.a[dest] = self.tmp[cursor2].clone();
574            return;
575        }
576
577        let mut min_gallop = self.min_gallop;
578
579        'outer: loop {
580            let mut count1 = 0usize;
581            let mut count2 = 0usize;
582
583            // Do the straightforward thing until (if ever) one run
584            // appears to win consistently.
585            loop {
586                debug_assert!(len1 > 0 && len2 > 1);
587                if (self.c)(&self.tmp[cursor2], &self.a[cursor1]) == Ordering::Less {
588                    self.a[dest] = self.a[cursor1].clone();
589                    dest -= 1;
590                    cursor1 -= 1;
591                    count1 += 1;
592                    count2 = 0;
593                    len1 -= 1;
594                    if len1 == 0 {
595                        break 'outer;
596                    }
597                } else {
598                    self.a[dest] = self.tmp[cursor2].clone();
599                    dest -= 1;
600                    cursor2 = cursor2.wrapping_sub(1);
601                    count2 += 1;
602                    count1 = 0;
603                    len2 -= 1;
604                    if len2 == 1 {
605                        break 'outer;
606                    }
607                }
608                if (count1 | count2) >= min_gallop {
609                    break;
610                }
611            }
612
613            // One run is winning so consistently that galloping may be a
614            // huge win. So try that, and continue galloping until (if ever)
615            // neither run appears to be winning consistently anymore.
616            loop {
617                debug_assert!(len1 > 0 && len2 > 1);
618                count1 = len1
619                    - gallop_right(
620                        &self.tmp[cursor2],
621                        &self.a,
622                        base1,
623                        len1,
624                        len1 - 1,
625                        &mut self.c,
626                    );
627                if count1 != 0 {
628                    dest -= count1;
629                    cursor1 -= count1;
630                    len1 -= count1;
631                    for i in (0..count1).rev() {
632                        self.a[dest + 1 + i] = self.a[cursor1 + 1 + i].clone();
633                    }
634                    if len1 == 0 {
635                        break 'outer;
636                    }
637                }
638                self.a[dest] = self.tmp[cursor2].clone();
639                dest -= 1;
640                cursor2 = cursor2.wrapping_sub(1);
641                len2 -= 1;
642                if len2 == 1 {
643                    break 'outer;
644                }
645
646                count2 =
647                    len2 - gallop_left(&self.a[cursor1], &self.tmp, 0, len2, len2 - 1, &mut self.c);
648                if count2 != 0 {
649                    dest -= count2;
650                    cursor2 = cursor2.wrapping_sub(count2);
651                    len2 -= count2;
652                    for i in 0..count2 {
653                        self.a[dest + 1 + i] = self.tmp[cursor2.wrapping_add(1) + i].clone();
654                    }
655                    if len2 <= 1 {
656                        break 'outer;
657                    }
658                }
659                self.a[dest] = self.a[cursor1].clone();
660                dest -= 1;
661                cursor1 -= 1;
662                len1 -= 1;
663                if len1 == 0 {
664                    break 'outer;
665                }
666                min_gallop = min_gallop.saturating_sub(1);
667                if count1 < MIN_GALLOP && count2 < MIN_GALLOP {
668                    break;
669                }
670            }
671            min_gallop += 2; // Penalize for leaving gallop mode
672        }
673
674        self.min_gallop = if min_gallop < 1 { 1 } else { min_gallop };
675
676        if len2 == 1 {
677            debug_assert!(len1 > 0);
678            dest -= len1;
679            cursor1 -= len1;
680            for i in (0..len1).rev() {
681                self.a[dest + 1 + i] = self.a[cursor1 + 1 + i].clone();
682            }
683            self.a[dest] = self.tmp[cursor2].clone();
684        } else if len2 == 0 {
685            panic!("Comparison method violates its general contract!");
686        } else {
687            debug_assert!(len1 == 0);
688            debug_assert!(len2 > 0);
689            let start = dest - (len2 - 1);
690            for i in 0..len2 {
691                self.a[start + i] = self.tmp[i].clone();
692            }
693        }
694    }
695}
696
697/// Sorts the specified portion of the specified array using a binary
698/// insertion sort. This is the best method for sorting small numbers
699/// of elements. It requires O(n log n) compares, but O(n^2) data
700/// movement (worst case).
701///
702/// If the initial part of the specified range is already sorted,
703/// this method can take advantage of it: the method assumes that the
704/// elements from index `lo`, inclusive, to `start`, exclusive are already sorted.
705fn binary_sort<T, F>(a: &mut [T], lo: usize, hi: usize, mut start: usize, c: &mut F)
706where
707    T: Clone,
708    F: FnMut(&T, &T) -> Ordering,
709{
710    debug_assert!(lo <= start && start <= hi);
711    if start == lo {
712        start += 1;
713    }
714    while start < hi {
715        let pivot = a[start].clone();
716
717        // Set left (and right) to the index where a[start] (pivot) belongs
718        let mut left = lo;
719        let mut right = start;
720
721        // Invariants:
722        //   pivot >= all in [lo, left).
723        //   pivot <  all in [right, start).
724        while left < right {
725            let mid = (left + right) / 2;
726            if c(&pivot, &a[mid]) == Ordering::Less {
727                right = mid;
728            } else {
729                left = mid + 1;
730            }
731        }
732
733        // The invariants still hold: pivot >= all in [lo, left) and
734        // pivot < all in [left, start), so pivot belongs at left. Note
735        // that if there are elements equal to pivot, left points to the
736        // first slot after them -- that's why this sort is stable.
737        // Slide elements over to make room for pivot.
738        let n = start - left;
739        match n {
740            2 => {
741                a[left + 2] = a[left + 1].clone();
742                a[left + 1] = a[left].clone();
743            }
744            1 => {
745                a[left + 1] = a[left].clone();
746            }
747            _ if n > 0 => {
748                for i in (0..n).rev() {
749                    a[left + i + 1] = a[left + i].clone();
750                }
751            }
752            _ => {}
753        }
754        a[left] = pivot;
755        start += 1;
756    }
757}
758
759/// Returns the length of the run beginning at the specified position in
760/// the specified array and reverses the run if it is descending (ensuring
761/// that the run will always be ascending when the method returns).
762///
763/// A run is the longest ascending sequence with:
764///    a[lo] <= a[lo + 1] <= a[lo + 2] <= ...
765///
766/// or the longest descending sequence with:
767///    a[lo] >  a[lo + 1] >  a[lo + 2] >  ...
768///
769/// For its intended use in a stable mergesort, the strictness of the
770/// definition of "descending" is needed so that the call can safely
771/// reverse a descending sequence without violating stability.
772fn count_run_and_make_ascending<T, F>(a: &mut [T], lo: usize, hi: usize, c: &mut F) -> usize
773where
774    T: Clone,
775    F: FnMut(&T, &T) -> Ordering,
776{
777    debug_assert!(lo < hi);
778    let mut run_hi = lo + 1;
779    if run_hi == hi {
780        return 1;
781    }
782
783    // Find end of run, and reverse range if descending
784    if c(&a[run_hi], &a[lo]) == Ordering::Less {
785        // Descending
786        run_hi += 1;
787        while run_hi < hi && c(&a[run_hi], &a[run_hi - 1]) == Ordering::Less {
788            run_hi += 1;
789        }
790        reverse_range(a, lo, run_hi);
791    } else {
792        // Ascending
793        run_hi += 1;
794        while run_hi < hi && c(&a[run_hi], &a[run_hi - 1]) != Ordering::Less {
795            run_hi += 1;
796        }
797    }
798
799    run_hi - lo
800}
801
802/// Reverse the specified range of the specified array.
803fn reverse_range<T>(a: &mut [T], mut lo: usize, mut hi: usize) {
804    hi -= 1;
805    while lo < hi {
806        a.swap(lo, hi);
807        lo += 1;
808        hi -= 1;
809    }
810}
811
812/// Returns the minimum acceptable run length for an array of the specified
813/// length. Natural runs shorter than this will be extended with binary_sort.
814///
815/// Roughly speaking, the computation is:
816///   If n < MIN_MERGE, return n (it's too small to bother with fancy stuff).
817///   Else if n is an exact power of 2, return MIN_MERGE/2.
818///   Else return an int k, MIN_MERGE/2 <= k <= MIN_MERGE, such that n/k
819///    is close to, but strictly less than, an exact power of 2.
820fn min_run_length(mut n: usize) -> usize {
821    let mut r = 0; // Becomes 1 if any 1 bits are shifted off
822    while n >= MIN_MERGE {
823        r |= n & 1;
824        n >>= 1;
825    }
826    n + r
827}
828
829/// Locates the position at which to insert the specified key into the
830/// specified sorted range; if the range contains an element equal to key,
831/// returns the index of the leftmost equal element.
832fn gallop_left<T, F>(key: &T, a: &[T], base: usize, len: usize, hint: usize, c: &mut F) -> usize
833where
834    F: FnMut(&T, &T) -> Ordering,
835{
836    debug_assert!(len > 0 && hint < len);
837    let mut last_ofs = 0usize;
838    let mut ofs = 1usize;
839
840    if c(key, &a[base + hint]) == Ordering::Greater {
841        // Gallop right until a[base+hint+lastOfs] < key <= a[base+hint+ofs]
842        let max_ofs = len - hint;
843        while ofs < max_ofs && c(key, &a[base + hint + ofs]) == Ordering::Greater {
844            last_ofs = ofs;
845            ofs = (ofs << 1) + 1;
846            if ofs == 0 {
847                // int overflow
848                ofs = max_ofs;
849            }
850        }
851        if ofs > max_ofs {
852            ofs = max_ofs;
853        }
854
855        // Make offsets relative to base (and pre-increment last_ofs)
856        last_ofs += hint + 1;
857        ofs += hint;
858    } else {
859        // key <= a[base + hint]
860        // Gallop left until a[base+hint-ofs] < key <= a[base+hint-lastOfs]
861        let max_ofs = hint + 1;
862        while ofs < max_ofs && c(key, &a[base + hint - ofs]) != Ordering::Greater {
863            last_ofs = ofs;
864            ofs = (ofs << 1) + 1;
865            if ofs == 0 {
866                // int overflow
867                ofs = max_ofs;
868            }
869        }
870        if ofs > max_ofs {
871            ofs = max_ofs;
872        }
873
874        // Make offsets relative to base (and pre-increment last_ofs)
875        // Note: ofs can be hint+1 so hint-ofs would underflow for usize;
876        // folding the +1 avoids this since (hint+1)-ofs >= 0
877        let tmp = last_ofs;
878        last_ofs = (hint + 1) - ofs;
879        ofs = hint - tmp;
880    }
881    debug_assert!(last_ofs <= ofs && ofs <= len);
882
883    // Now a[base+lastOfs-1] < key <= a[base+ofs], so key belongs somewhere
884    // to the right of lastOfs but no farther right than ofs. Do a binary
885    // search, with invariant a[base + lastOfs - 1] < key <= a[base + ofs].
886    while last_ofs < ofs {
887        let m = last_ofs + ((ofs - last_ofs) / 2);
888
889        if c(key, &a[base + m]) == Ordering::Greater {
890            last_ofs = m + 1; // a[base + m] < key
891        } else {
892            ofs = m; // key <= a[base + m]
893        }
894    }
895    debug_assert!(last_ofs == ofs);
896    ofs
897}
898
899/// Like gallop_left, except that if the range contains an element equal to
900/// key, gallop_right returns the index after the rightmost equal element.
901fn gallop_right<T, F>(key: &T, a: &[T], base: usize, len: usize, hint: usize, c: &mut F) -> usize
902where
903    F: FnMut(&T, &T) -> Ordering,
904{
905    debug_assert!(len > 0 && hint < len);
906
907    let mut ofs = 1usize;
908    let mut last_ofs = 0usize;
909
910    if c(key, &a[base + hint]) == Ordering::Less {
911        // Gallop left until a[b+hint - ofs] <= key < a[b+hint - lastOfs]
912        let max_ofs = hint + 1;
913        while ofs < max_ofs && c(key, &a[base + hint - ofs]) == Ordering::Less {
914            last_ofs = ofs;
915            ofs = (ofs << 1) + 1;
916            if ofs == 0 {
917                // int overflow
918                ofs = max_ofs;
919            }
920        }
921        if ofs > max_ofs {
922            ofs = max_ofs;
923        }
924
925        // Make offsets relative to b (and pre-increment last_ofs)
926        // Note: ofs can be hint+1 so hint-ofs would underflow for usize;
927        // folding the +1 avoids this since (hint+1)-ofs >= 0
928        let tmp = last_ofs;
929        last_ofs = (hint + 1) - ofs;
930        ofs = hint - tmp;
931    } else {
932        // a[b + hint] <= key
933        // Gallop right until a[b+hint + lastOfs] <= key < a[b+hint + ofs]
934        let max_ofs = len - hint;
935        while ofs < max_ofs && c(key, &a[base + hint + ofs]) != Ordering::Less {
936            last_ofs = ofs;
937            ofs = (ofs << 1) + 1;
938            if ofs == 0 {
939                // int overflow
940                ofs = max_ofs;
941            }
942        }
943        if ofs > max_ofs {
944            ofs = max_ofs;
945        }
946
947        // Make offsets relative to b (and pre-increment last_ofs)
948        last_ofs += hint + 1;
949        ofs += hint;
950    }
951    debug_assert!(last_ofs <= ofs && ofs <= len);
952
953    // Now a[b + lastOfs - 1] <= key < a[b + ofs], so key belongs somewhere to
954    // the right of lastOfs but no farther right than ofs. Do a binary
955    // search, with invariant a[b + lastOfs - 1] <= key < a[b + ofs].
956    while last_ofs < ofs {
957        let m = last_ofs + ((ofs - last_ofs) / 2);
958
959        if c(key, &a[base + m]) == Ordering::Less {
960            ofs = m; // key < a[b + m]
961        } else {
962            last_ofs = m + 1; // a[b + m] <= key
963        }
964    }
965    debug_assert!(last_ofs == ofs);
966    ofs
967}
968
969/// Sort a vector using the TimSort algorithm with a custom comparator.
970pub fn timsort_by<T, F>(vec: Vec<T>, compare: F) -> Vec<T>
971where
972    T: Clone,
973    F: FnMut(&T, &T) -> Ordering,
974{
975    TimSort::sort(vec, compare)
976}
977
978/// Sort a vector using the TimSort algorithm with the default ordering.
979pub fn timsort<T>(vec: Vec<T>) -> Vec<T>
980where
981    T: Clone + Ord,
982{
983    TimSort::sort(vec, |a, b| a.cmp(b))
984}
985
986/// Sort a slice in place using the TimSort algorithm with a custom comparator.
987pub fn timsort_slice_by<T, F>(slice: &mut [T], compare: F)
988where
989    T: Clone,
990    F: FnMut(&T, &T) -> Ordering,
991{
992    let vec: Vec<T> = slice.to_vec();
993    let sorted = TimSort::sort(vec, compare);
994    for (i, item) in sorted.into_iter().enumerate() {
995        slice[i] = item;
996    }
997}
998
999/// Sort a slice in place using the TimSort algorithm with the default ordering.
1000pub fn timsort_slice<T>(slice: &mut [T])
1001where
1002    T: Clone + Ord,
1003{
1004    timsort_slice_by(slice, |a, b| a.cmp(b))
1005}
1006
1007// ============================================================================
1008// IComparer-based Sorting Functions
1009// ============================================================================
1010
1011/// Sort a vector using the TimSort algorithm with an IComparer.
1012pub fn timsort_with_comparer<T, C>(vec: Vec<T>, comparer: &C) -> Vec<T>
1013where
1014    T: Clone,
1015    C: IComparer<T>,
1016{
1017    TimSort::sort(vec, |a, b| comparer.compare(a, b))
1018}
1019
1020/// Sort a slice in place using the TimSort algorithm with an IComparer.
1021pub fn timsort_slice_with_comparer<T, C>(slice: &mut [T], comparer: &C)
1022where
1023    T: Clone,
1024    C: IComparer<T>,
1025{
1026    timsort_slice_by(slice, |a, b| comparer.compare(a, b))
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031    use super::*;
1032
1033    #[test]
1034    fn test_empty_array() {
1035        let arr: Vec<i32> = vec![];
1036        let result = timsort(arr);
1037        assert!(result.is_empty());
1038    }
1039
1040    #[test]
1041    fn test_single_element() {
1042        let arr = vec![42];
1043        let result = timsort(arr);
1044        assert_eq!(result, vec![42]);
1045    }
1046
1047    #[test]
1048    fn test_already_sorted() {
1049        let arr = vec![1, 2, 3, 4, 5];
1050        let result = timsort(arr);
1051        assert_eq!(result, vec![1, 2, 3, 4, 5]);
1052    }
1053
1054    #[test]
1055    fn test_reverse_sorted() {
1056        let arr = vec![5, 4, 3, 2, 1];
1057        let result = timsort(arr);
1058        assert_eq!(result, vec![1, 2, 3, 4, 5]);
1059    }
1060
1061    #[test]
1062    fn test_random_order() {
1063        let arr = vec![3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5];
1064        let result = timsort(arr);
1065        assert_eq!(result, vec![1, 1, 2, 3, 3, 4, 5, 5, 5, 6, 9]);
1066    }
1067
1068    #[test]
1069    fn test_duplicates() {
1070        let arr = vec![3, 3, 3, 1, 1, 2, 2];
1071        let result = timsort(arr);
1072        assert_eq!(result, vec![1, 1, 2, 2, 3, 3, 3]);
1073    }
1074
1075    #[test]
1076    fn test_custom_comparator_reverse() {
1077        let arr = vec![1, 2, 3, 4, 5];
1078        let result = timsort_by(arr, |a, b| b.cmp(a));
1079        assert_eq!(result, vec![5, 4, 3, 2, 1]);
1080    }
1081
1082    #[test]
1083    fn test_stability() {
1084        // Test that equal elements maintain their relative order
1085        #[derive(Clone, Debug)]
1086        struct Item {
1087            key: i32,
1088            index: usize,
1089        }
1090
1091        let arr = vec![
1092            Item { key: 1, index: 0 },
1093            Item { key: 2, index: 1 },
1094            Item { key: 1, index: 2 },
1095            Item { key: 2, index: 3 },
1096            Item { key: 1, index: 4 },
1097        ];
1098
1099        let result = timsort_by(arr, |a, b| a.key.cmp(&b.key));
1100
1101        // Check that items with same key maintain original order
1102        let ones: Vec<_> = result.iter().filter(|x| x.key == 1).collect();
1103        assert_eq!(ones[0].index, 0);
1104        assert_eq!(ones[1].index, 2);
1105        assert_eq!(ones[2].index, 4);
1106
1107        let twos: Vec<_> = result.iter().filter(|x| x.key == 2).collect();
1108        assert_eq!(twos[0].index, 1);
1109        assert_eq!(twos[1].index, 3);
1110    }
1111
1112    #[test]
1113    fn test_large_array() {
1114        let arr: Vec<i32> = (0..1000).rev().collect();
1115        let expected: Vec<i32> = (0..1000).collect();
1116        let result = timsort(arr);
1117        assert_eq!(result, expected);
1118    }
1119
1120    #[test]
1121    fn test_strings() {
1122        let arr = vec!["banana", "apple", "cherry", "date"];
1123        let result = timsort(arr);
1124        assert_eq!(result, vec!["apple", "banana", "cherry", "date"]);
1125    }
1126
1127    #[test]
1128    fn test_slice_sort() {
1129        let mut arr = [5, 2, 8, 1, 9];
1130        timsort_slice(&mut arr);
1131        assert_eq!(arr, [1, 2, 5, 8, 9]);
1132    }
1133
1134    // IComparer tests
1135
1136    #[test]
1137    fn test_ord_comparer() {
1138        let arr = vec![3, 1, 4, 1, 5, 9, 2, 6];
1139        let comparer = OrdComparer::<i32>::new();
1140        let result = timsort_with_comparer(arr, &comparer);
1141        assert_eq!(result, vec![1, 1, 2, 3, 4, 5, 6, 9]);
1142    }
1143
1144    #[test]
1145    fn test_reverse_comparer() {
1146        let arr = vec![1, 2, 3, 4, 5];
1147        let comparer = ReverseComparer::new(OrdComparer::<i32>::new());
1148        let result = timsort_with_comparer(arr, &comparer);
1149        assert_eq!(result, vec![5, 4, 3, 2, 1]);
1150    }
1151
1152    #[test]
1153    fn test_fn_comparer() {
1154        let arr = vec![3, 1, 4, 1, 5];
1155        let comparer = FnComparer::new(|a: &i32, b: &i32| a.cmp(b));
1156        let result = timsort_with_comparer(arr, &comparer);
1157        assert_eq!(result, vec![1, 1, 3, 4, 5]);
1158    }
1159
1160    #[test]
1161    fn test_fn_comparer_custom() {
1162        // Sort by absolute value
1163        let arr = vec![-3, 1, -4, 1, 5, -9, 2, -6];
1164        let comparer = FnComparer::new(|a: &i32, b: &i32| a.abs().cmp(&b.abs()));
1165        let result = timsort_with_comparer(arr, &comparer);
1166        assert_eq!(result, vec![1, 1, 2, -3, -4, 5, -6, -9]);
1167    }
1168
1169    #[test]
1170    fn test_slice_with_comparer() {
1171        let mut arr = [5, 2, 8, 1, 9];
1172        let comparer = OrdComparer::<i32>::new();
1173        timsort_slice_with_comparer(&mut arr, &comparer);
1174        assert_eq!(arr, [1, 2, 5, 8, 9]);
1175    }
1176}
xsd_schema/xpath/timsort.rs

xsd_schema/xpath/
timsort.rs