1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
use crate::Indices;
use crate::here;

/// Reverse a generic slice by reverse iteration.
/// Creates a new Vec (is immutable).
/// Its naive use for descending sort etc. is to be avoided for
/// efficiency reasons. Included here just for convenience.
pub fn revs<T>(s: &[T]) -> Vec<T> where T: Copy, 
    { s.iter().rev().map(|&x| x).collect::<Vec<T>>() }

/// Finds minimum, minimum's first index, maximum, maximum's first index of &[T] 
pub fn minmax<T>(v:&[T])  -> (T, usize, T, usize) where T: PartialOrd+Copy {  
    let mut min = v[0]; // initialise to the first value
    let mut mini = 0;
    let mut max = v[0]; // initialised as min, allowing 'else' below
    let mut maxi = 0;
    for i in 1..v.len() {
        let x = v[i];
        if x < min {
            min = x;
            mini = i
        } else if x > max {
            max = x;
            maxi = i
        }
    }
    (min, mini, max, maxi)
}

/// Binary search of a sorted list (in ascending order).
/// Returns the index of the first item that is greater than val. 
/// When none are greater, returns s.len() (invalid index but logical).
/// Example use: looking up cummulative probability density functions. 
pub fn binsearch<T>(s:&[T], val: T)  -> usize where T: PartialOrd, {     
    let n = s.len();
    if n < 2 { panic!("{} vec of data is too short!",here!()) }     
    if s[0] > val { return 0_usize }; // the first item already exceeds val
    let mut hi = n-1; // valid index of the last item
    if s[hi] <= val { return n }; // no items exceed val
    let mut lo = 0_usize; // initial index of the low limit     
    loop {
        let gap = hi - lo;
        if gap <= 1 { return hi }
        let tryi = lo+gap/2; 
        // if tryi's value is above val, reduce the high index to it
        if s[tryi] > val { hi = tryi; continue }            
        // else tryi's value is not greater than val, raise the low index to it
        // jumps also over any repeated equal values. 
        lo = tryi
    }  
}

/// Merges two ascending sorted generic vectors,
/// by classical selection and copying of their head items into the result.
/// Consider using merge_indexed instead, especially for non-primitive end types T. 
pub fn merge<T>(v1: &[T], v2: &[T]) -> Vec<T> where T: PartialOrd+Copy, {  
    let l1 = v1.len();
    let l2 = v2.len();
    let mut resvec:Vec<T> = Vec::with_capacity(l1+l2);
    let mut i1 = 0;
    let mut i2 = 0;
    loop {
        if i1 == l1 { // v1 is now processed
            for i in i2..l2 { resvec.push(v2[i]) } // copy out the rest of v2
            break // and terminate
        }
        if i2 == l2 { // v2 is now processed
            for i in i1..l1 { resvec.push(v1[i])} // copy out the rest of v1
            break // and terminate
        }
        if v1[i1] < v2[i2] { resvec.push(v1[i1]); i1 += 1; continue };
        if v1[i1] > v2[i2] { resvec.push(v2[i2]); i2 += 1; continue }; 
        // here they are equal, so consume both
        resvec.push(v1[i1]); i1 += 1;
        resvec.push(v2[i2]); i2 += 1
    }
    resvec
}

/// Merges two ascending sort indices.
/// Data is not shuffled at all, v2 is just concatenated onto v1
/// in one go and both remain in their original order. 
/// Returns the concatenated vector and a new valid sort index into it.
pub fn merge_indexed<T>(v1:&[T], idx1: &[usize], v2: &[T], idx2: &[usize]) -> ( Vec<T>,Vec<usize> ) 
    where T: PartialOrd+Copy, {    
    let res = [v1,v2].concat(); // no individual shuffling, just one concatenation     
    let l = idx1.len();
    // shift up all items in idx2 by length of indx1, so that they will 
    // refer correctly to the second part of the concatenated vector
    let idx2shifted:Vec<usize> = idx2.iter().map(|x| l+x ).collect();
    // now merge the indices      
    let residx = merge_indices(&res,idx1,&idx2shifted);   
    ( res, residx )
}

/// Merges the sort indices of two concatenated vectors.
/// Data in s is not changed at all, only consulted for the comparisons. 
/// This function is used by  `mergesort` and `merge_indexed`. 
fn merge_indices<T>(s: &[T], idx1:&[usize], idx2:&[usize]) -> Vec<usize>
    where T: PartialOrd+Copy, {
    let l1 = idx1.len();
    let l2 = idx2.len();
    let mut residx:Vec<usize> = Vec::with_capacity(l1+l2); 
    let mut i1 = 0;  
    let mut i2 = 0;
    let mut head1 = s[idx1[i1]]; 
    let mut head2 = s[idx2[i2]];
    loop {
        if head1 < head2 { 
            residx.push(idx1[i1]);
            i1 += 1;  
            if i1 == l1 { // idx1 is now fully processed
                for i in i2..l2 { residx.push(idx2[i]) } // copy out the rest of idx2
                break // and terminate
            }
            head1 = s[idx1[i1]]; // else move to the next idx1 value
            continue
        }
        if head1 > head2 { 
            residx.push(idx2[i2]); 
            i2 += 1; 
            if i2 == l2 { // idx2 is now processed
                for i in i1..l1 { residx.push(idx1[i]) } // copy out the rest of idx1
                break // and terminate
            }                    
            head2 = s[idx2[i2]]; // else move to the next idx2 value
            continue
        } 
        // here the heads are equal, so consume both
        residx.push(idx1[i1]); 
        i1 += 1; 
        if i1 == l1 { // idx1 is now fully processed
            for i in i2..l2 { residx.push(idx2[i]) } // copy out the rest of idx2
            break // and terminate
        }
        head1 = s[idx1[i1]];
        residx.push(idx2[i2]); 
        i2 += 1; 
        if i2 == l2 { // idx2 is now processed
            for i in i1..l1 { residx.push(idx1[i]) } // copy out the rest of idx1
            break // and terminate
        }                    
        head2 = s[idx2[i2]];            
    }
    residx
}

/// Doubly recursive non-destructive merge sort.  
/// The data is not moved or mutated. 
/// Efficiency is comparable to quicksort. 
/// Returns a vector of indices to s from i to i+n,
/// such that the indexed values are in ascending sort order (a sort index).  
/// Only the index values are being moved. 
fn mergesort<T>(s:&[T], i:usize, n:usize) -> Vec<usize> 
    where T: PartialOrd+Copy {
    if n == 1 { let res = vec![i]; return res };  // recursion termination
    if n == 2 {  // also terminate with two sorted items (for efficiency)          
        if s[i+1] < s[i] { return vec![i+1,i] } else { return vec![i,i+1] }
    }       
    let n1 = n / 2;  // the first part (the parts do not have to be the same) 
    let n2 = n - n1; // the remaining second part
    let sv1 = mergesort(s, i, n1); // recursively sort the first half
    let sv2 = mergesort(s, i+n1, n2); // recursively sort the second half 
    // Now merge the two sorted indices into one and return it     
    merge_indices(s,&sv1,&sv2)
}

/// A wrapper for mergesort, to obtain the sort index
/// of the (whole) input vector. Simpler than sortm.
pub fn sortidx<T>(s:&[T]) -> Vec<usize> where T:PartialOrd+Copy {
    mergesort(&s,0,s.len())
}

/// Immutable sort. Returns new sorted vector (ascending or descending). 
/// Is a wrapper for mergesort. Passes the boolean flag 'ascending' onto 'unindex'.
/// Mergesort by itself always produces only an ascending index.
pub fn sortm<T>(s:&[T], ascending:bool) -> Vec<T> where T: PartialOrd+Copy {
    mergesort(s,0,s.len()).unindex(s,ascending)
}   

/// Fast ranking of many T items, with only `n*(log(n)+1)` complexity. 
/// Ranking is done by inverting the sort index.  
/// Sort index is in sorted order, giving data positions. 
/// Ranking is in data order, giving sorted order positions. 
/// Thus sort index and ranks are in an inverse relationship. 
/// They are easily converted by `.invindex()` (for: invert index).
pub fn rank<T>(s:&[T], ascending:bool) -> Vec<usize> where T:PartialOrd+Copy {
    let n = s.len();
    let sortindex = mergesort(s,0,n);
    let mut rankvec:Vec<usize> = vec![0;n];
    if ascending { 
        for (i,&sortpos) in sortindex.iter().enumerate() {
            rankvec[sortpos] = i
        } 
    } else { // rank in the order of descending values
        for (i,&sortpos) in sortindex.iter().enumerate() {
            rankvec[sortpos] = n-i-1 
        }
    }
    rankvec 
}