rstsr-common 0.7.3

An n-Dimension Rust Tensor Toolkit
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
//! Layout rearrangement.
//!
//! Purposes of rearrangement of layouts:
//! - Faster iteration to inplace-modify storage, and binary/ternary operations.
//! - Split layout to multiple layouts.

use crate::prelude_dev::*;

// type alias for this file
type Order = TensorIterOrder;

/* #region translate tensor order to col-major with TensorIterType */

/// This function will return a f-prefer layout that make minimal memory
/// accessing efforts (pointers will not frequently back-and-forth).
///
/// Note that this function should only be used for iteration.
///
/// # Parameter `keep_shape`
///
/// Keep size of output layout when input layout is boardcasted.
/// This option should be false if [`TensorIterOrder::K`] and true if
/// [`TensorIterOrder::G`].
///
/// For example of layout shape `[5, 1, 2, 1, 3, 6]` and stride `[1000, 10, 10,
/// 40, 0, 100]`,
/// - false: shape `[2, 6, 5, 1, 1, 1]` and stride `[10, 100, 1000, 0, 0, 0]`; meaning that
///   broadcasted shapes are eliminated and moved to last axes.
/// - true: shape `[3, 1, 1, 2, 6, 5]` and stride `[0, 10, 40, 10, 100, 1000]`; meaning that
///   broadcasted shapes are iterated with most priority.
///
/// # Returns
///
/// - `layout`: The output layout of greedy iteration.
/// - `index`: Transpose index from input layout to output layout.
pub fn greedy_layout<D>(layout: &Layout<D>, keep_shape: bool) -> (Layout<D>, Vec<isize>)
where
    D: DimDevAPI,
{
    let mut layout = layout.clone();

    // if no elements in layout, return itself
    if layout.size() == 0 {
        let ndim = layout.ndim();
        return (layout, (0..ndim as isize).collect_vec());
    }

    // revert negative strides if keep_shape is not required
    if keep_shape {
        for n in 0..layout.ndim() {
            if layout.stride()[n] < 0 {
                // should not panic here
                layout = layout.dim_narrow(n as isize, slice!(None, None, -1)).unwrap();
            }
        }
    }

    let shape_old = layout.shape.as_ref();
    let stride_old = layout.stride.as_ref();

    let mut index = (0..layout.ndim() as isize).collect_vec();
    if keep_shape {
        // sort shape and strides if keep shape
        // - (shape = 1 / stride = 0) the smallest (pointer not moving for these cases)
        // - if (shape = 1 / stride = 0, broadcastable axes) preserve order
        // - (larger shape first) if not broadcastable axes, then compare stride size (smaller stride first)
        index.sort_by(|&i1, &i2| {
            let d1 = shape_old[i1 as usize];
            let d2 = shape_old[i2 as usize];
            let t1 = stride_old[i1 as usize];
            let t2 = stride_old[i2 as usize];
            match (d1 == 1 || t1 == 0, d2 == 1 || t2 == 0) {
                (true, true) => i1.cmp(&i2),
                (true, false) => core::cmp::Ordering::Less,
                (false, true) => core::cmp::Ordering::Greater,
                (false, false) => t1.abs().cmp(&t2.abs()),
            }
        });
    } else {
        // sort shape and strides if not keep shape
        // everything is similar, though broadcastable axes should be moved to last
        index.sort_by(|&i1, &i2| {
            let d1 = shape_old[i1 as usize];
            let d2 = shape_old[i2 as usize];
            let t1 = stride_old[i1 as usize];
            let t2 = stride_old[i2 as usize];
            match (d1 == 1 || t1 == 0, d2 == 1 || t2 == 0) {
                (true, true) => i1.cmp(&i2),
                (true, false) => core::cmp::Ordering::Greater,
                (false, true) => core::cmp::Ordering::Less,
                (false, false) => t1.abs().cmp(&t2.abs()),
            }
        });
    }

    let mut layout = layout.transpose(&index).unwrap();

    // for case of not keep shape, dimension of broadcastable axes will be set to 1,
    // strides will be set to 0.
    if !keep_shape {
        let mut shape = layout.shape().clone();
        let mut stride = layout.stride().clone();
        shape.as_mut().iter_mut().zip(stride.as_mut().iter_mut()).for_each(|(d, t)| {
            if *d == 1 || *t == 0 {
                *d = 1;
                *t = 0;
            }
        });
        layout = unsafe { Layout::new_unchecked(shape, stride, layout.offset()) };
    }

    return (layout, index);
}

/// Reversed permutation indices.
pub fn reversed_permute(indices: &[isize]) -> Vec<isize> {
    let mut new_indices = vec![0; indices.len()];
    for (idx, &i) in indices.iter().enumerate() {
        new_indices[i as usize] = idx as isize;
    }
    return new_indices;
}

/// Return a layout that is suitable for array copy.
pub fn layout_for_array_copy<D>(layout: &Layout<D>, order: TensorIterOrder) -> Result<Layout<D>>
where
    D: DimDevAPI,
{
    let layout = match order {
        Order::C => layout.shape().c(),
        Order::F => layout.shape().f(),
        Order::A => {
            if layout.c_contig() {
                layout.shape().c()
            } else if layout.f_contig() {
                layout.shape().f()
            } else {
                match TensorOrder::default() {
                    RowMajor => layout.shape().c(),
                    ColMajor => layout.shape().f(),
                }
            }
        },
        Order::K => {
            let (greedy, indices) = greedy_layout(layout, true);
            let layout = greedy.shape().f();
            layout.transpose(&reversed_permute(&indices))?
        },
        _ => rstsr_invalid!(order, "Iter order for copy only accepts CFAK.")?,
    };
    return Ok(layout);
}

/// Translate one layout to column-major iteration.
///
/// For how parameter `it_type` works, we refer to definition of
/// [`TensorIterOrder`].
///
/// - C: reverse axes
/// - F: preserve axes
/// - A: B if contiguous, C if c-prefer, F if f-prefer; otherwise default
/// - K: greedy layout, keep shape
/// - G: greedy layout, eliminate broadcastable dimensions
/// - B: sequential memory; valid option if `size = bound_max - bound_min`, otherwise raise err
pub fn translate_to_col_major_unary<D>(layout: &Layout<D>, order: TensorIterOrder) -> Result<Layout<D>>
where
    D: DimDevAPI,
{
    let fn_c = |l: &Layout<D>| Ok(l.reverse_axes());
    let fn_f = |l: &Layout<D>| Ok(l.clone());
    let fn_b = |l: &Layout<D>| {
        let (bounds_min, bounds_max) = l.bounds_index()?;
        rstsr_assert_eq!(
            bounds_max - bounds_min,
            l.size(),
            InvalidLayout,
            "Data in this layout could not be represented as sequential memory."
        )?;
        let mut shape = l.new_shape();
        let mut stride = l.new_stride();
        shape[0] = l.size();
        stride[0] = 1;
        for i in 1..l.ndim() {
            shape[i] = 1;
            stride[i] = l.size() as isize;
        }
        Ok(unsafe { Layout::new_unchecked(shape, stride, l.offset()) })
    };
    match order {
        Order::C => fn_c(layout),
        Order::F => fn_f(layout),
        Order::A => {
            let c_contig = layout.c_contig();
            let f_contig = layout.f_contig();
            if c_contig || f_contig {
                fn_b(layout)
            } else {
                let c_prefer = layout.c_prefer();
                let f_prefer = layout.f_prefer();
                match (c_prefer, f_prefer) {
                    (true, false) => fn_c(layout),
                    (false, true) => fn_f(layout),
                    (_, _) => match FlagOrder::default() {
                        RowMajor => fn_c(layout),
                        ColMajor => fn_f(layout),
                    },
                }
            }
        },
        Order::K => Ok(greedy_layout(layout, true).0),
        Order::G => Ok(greedy_layout(layout, false).0),
        Order::B => fn_b(layout),
    }
}

/// Translate multiple layouts to column-major iteration.
///
/// This function requires all layouts have the same shape.
///
/// For how parameter `it_type` works, we refer to definition of
/// [`TensorIterOrder`].
///
/// - C: reverse axes
/// - F: preserve axes
/// - A: B if contiguous, C if c-prefer, F if f-prefer; otherwise default
/// - K: greedy layout for the one which have the largest non-broadcast-size, otherwise left-most
///   layout (usually for mutable-assign/inplace-op)
/// - G: invalid option here
/// - B: sequential memory; valid option if `size = bound_max - bound_min`, otherwise raise err
///
/// This operation will not flip any strides.
pub fn translate_to_col_major<D>(layouts: &[&Layout<D>], order: TensorIterOrder) -> Result<Vec<Layout<D>>>
where
    D: DimAPI,
{
    if layouts.is_empty() {
        return Ok(vec![]);
    }

    // this function will map all layouts to column-major iteration by a single
    // iter-order.
    let fn_single = |ls: &[&Layout<D>], order| ls.iter().map(|l| translate_to_col_major_unary(l, order)).collect();

    // make sure all layouts have the same shape
    let is_same_shape = layouts.windows(2).all(|w| w[0].shape() == w[1].shape());
    rstsr_assert!(is_same_shape, InvalidLayout, "All shape of layout in this function must be the same.")?;

    match order {
        Order::C | Order::F | Order::B => fn_single(layouts, order),
        Order::A => {
            let c_contig = layouts.iter().all(|&l| l.c_contig());
            let f_contig = layouts.iter().all(|&l| l.f_contig());
            if c_contig || f_contig {
                fn_single(layouts, Order::B)
            } else {
                let c_prefer = layouts.iter().all(|&l| l.c_contig());
                let f_prefer = layouts.iter().all(|&l| l.f_contig());
                match (c_prefer, f_prefer) {
                    (true, false) => fn_single(layouts, Order::C),
                    (false, true) => fn_single(layouts, Order::F),
                    (_, _) => match FlagOrder::default() {
                        RowMajor => fn_single(layouts, Order::C),
                        ColMajor => fn_single(layouts, Order::F),
                    },
                }
            }
        },
        Order::K => {
            // find the layout with the largest non-broadcast-size
            let size_iter = layouts.iter().map(|l| l.size_non_broadcast()).collect_vec();
            let idx_layout = if size_iter.iter().max() == size_iter.iter().min() {
                0
            } else {
                size_iter.into_iter().enumerate().max_by_key(|(_, v)| *v).unwrap_or((0, 0)).0
            };
            // make same permutation for all layouts
            let (_, permute_index) = greedy_layout(layouts[idx_layout], true);
            layouts.iter().map(|l| l.transpose(&permute_index)).collect()
        },
        Order::G => rstsr_invalid!(order, "This option is not valid for multiple layouts")?,
    }
}

/// This function will return minimal dimension layout, that the first axis is
/// f-contiguous.
///
/// For example, if shape [2, 4, 6, 8, 10] is contiguous in f-order for the
/// first three axes, then it will return shape [48, 8, 10], and the contiguous
/// size 48.
///
/// # Notes
///
/// - Should be used after [`translate_to_col_major`].
/// - Accepts multiple layouts to be compared.
/// - Due to that final dimension is not known to compiler, this function will return dynamic
///   layout.
pub fn translate_to_col_major_with_contig<D>(layouts: &[&Layout<D>]) -> (Vec<Layout<IxD>>, usize)
where
    D: DimAPI,
{
    if layouts.is_empty() {
        return (vec![], 0);
    }

    let dims_f_contig = layouts.iter().map(|l| l.ndim_of_f_contig()).collect_vec();
    let ndim_f_contig = *dims_f_contig.iter().min().unwrap();
    // following is the worst case: no axes are contiguous in f-order
    if ndim_f_contig == 0 {
        return (layouts.iter().map(|&l| l.clone().into_dim::<IxD>().unwrap()).collect(), 0);
    } else {
        let size_contig = layouts[0].shape().as_ref()[0..ndim_f_contig].iter().product::<usize>();
        let result = layouts
            .iter()
            .map(|l| {
                let shape = l.shape().as_ref()[ndim_f_contig..].iter().cloned().collect_vec();
                let stride = l.stride().as_ref()[ndim_f_contig..].iter().cloned().collect_vec();
                unsafe { Layout::new_unchecked(shape, stride, l.offset()) }
            })
            .collect_vec();
        return (result, size_contig);
    }
}

/// Obtain composition of layout.
///
/// The returned indices are axes of input layout (with priority):
///
/// - shape = 1 part
/// - stride = 0 part
/// - (memory) contiguous part, col-major order
/// - (memory) discontiguous part, col-major order
///
/// Please note the function is mostly for operations of tensor, not for reshape (which should
/// categorize all contiguous parts, not only the memory-contiguous only).
pub fn get_axes_composition<D>(layout: &Layout<D>) -> (Vec<usize>, Vec<usize>, Vec<usize>, Vec<usize>)
where
    D: DimDevAPI,
{
    let mut comp_i = vec![]; // shape = 1 part, s[i]ngleton
    let mut comp_b = vec![]; // stride = 0 part, [b]roadcast
    let mut comp_c = vec![]; // [c]ontiguous part
    let mut comp_d = vec![]; // [d]iscontiguous part
    let mut remain = (0..layout.ndim()).collect_vec();

    // shape = 1 part, stride = 0 part
    for i in 0..layout.ndim() {
        if layout.shape()[i] == 1 {
            comp_i.push(i);
        } else if layout.stride()[i] == 0 {
            comp_b.push(i);
        }
    }
    remain.retain(|&i| !comp_i.contains(&i) && !comp_b.contains(&i));

    // get axes sorted by stride size
    remain.sort_by(|&i1, &i2| layout.stride()[i1].abs().cmp(&layout.stride()[i2].abs()));

    // get contiguous part
    let mut current_stride = 1;
    for &i in &remain {
        let (s, t) = (layout.shape()[i], layout.stride()[i]);
        if t == current_stride {
            comp_c.push(i);
            current_stride *= s as isize;
        }
    }

    // get discontiguous part
    comp_d = remain.into_iter().filter(|i| !comp_c.contains(i)).collect_vec();

    (comp_i, comp_b, comp_c, comp_d)
}

/// Compute a layout for binary operation of two layouts.
///
/// The binary operation can be element-wise operation (add, mul, etc.).
///
/// ## Rules for output
///
/// - Before calling this function, the two layouts should have the same shape (i.e., already been
///   broadcasted).
/// - First, determine the axes with shape = 1 (`a1`), stride = 0 (`a0`), contiguous parts sorted
///   (ascending) by stride size (`ac`), and remaining axes (`ad`).
/// - For output, contiguous parts remains the same as input, remaining axes are re-assigned strides
///   by the input `order` (row-major or col-major).
/// - For axes with shape = 1, this function will assign stride by near non-zero stride, right-close
///   for row-major and left-close for col-major, otherwise 1.
/// - For axes with stride = 0, this function will still assign stride 0 (as broadcasted).
///
/// ## Error processing
///
/// - If the two layouts have different shapes, this function will raise error. Note that this
///   function should be called after broadcasting, so the shapes should be the same.
pub fn get_layout_for_binary_op<D>(la: &Layout<D>, lb: &Layout<D>, order: FlagOrder) -> Result<Layout<D>>
where
    D: DimDevAPI,
{
    // broadcast must be handled before this function
    rstsr_assert_eq!(
        la.shape(),
        lb.shape(),
        InvalidLayout,
        "Shape of two layouts must be the same for this function."
    )?;

    let ndim = la.ndim();
    let (a1, a0_a, ac_a, _) = get_axes_composition(la);
    let (_, a0_b, ac_b, _) = get_axes_composition(lb);

    // a0_o = a0_a ∩ a0_b
    let a0_o = a0_a.iter().filter(|&&i| a0_b.contains(&i)).cloned().collect_vec();
    // ac_o = first same axes in ac_a and ac_b
    let mut ac_o = vec![];
    for (ic_a, ic_b) in ac_a.iter().zip(ac_b.iter()) {
        if *ic_a == *ic_b {
            ac_o.push(*ic_a);
        } else {
            break;
        }
    }
    // ad_o = remaining
    let mut ad_o = (0..ndim).filter(|&i| !a0_o.contains(&i) && !ac_o.contains(&i) && !a1.contains(&i)).collect_vec();
    // reverse ad_o if row-major
    if order == RowMajor {
        ad_o.reverse();
    }

    // generate strides
    // ac_o -> ad_o: contiguously increase stride
    let mut stride = la.new_stride();
    let mut current_stride = 1;
    for i in ac_o.iter().chain(ad_o.iter()) {
        stride[*i] = current_stride;
        current_stride *= la.shape()[*i] as isize;
    }
    // a1: refer first non-zero of stride[a1:] for row-major, last non-zero of stride[:a1] for
    // col-major, otherwise 1
    for i in a1 {
        let mut s = 1;
        match order {
            RowMajor => {
                if let Some(&x) = stride.as_ref()[i..].iter().find(|&&x| x != 0) {
                    s = x;
                }
            },
            ColMajor => {
                if let Some(&x) = stride.as_ref()[..i].iter().rev().find(|&&x| x != 0) {
                    s = x;
                }
            },
        }
        stride[i] = s;
    }
    // a0_o: stride = 0, but that has already initialized as zero.

    let shape = la.shape().clone();
    let layout = unsafe { Layout::new_unchecked(shape, stride, 0) };
    Ok(layout)
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn test_greedy_layout() {
        unsafe {
            // c-contiguous layout
            let layout = [2, 3, 4].c();
            let (greedy, _) = greedy_layout(&layout, false);
            assert_eq!(greedy, [4, 3, 2].f());
            let (greedy, _) = greedy_layout(&layout, true);
            assert_eq!(greedy, [4, 3, 2].f());
            // f-contiguous layout
            let layout = [2, 3, 4].f();
            let (greedy, _) = greedy_layout(&layout, false);
            assert_eq!(greedy, [2, 3, 4].f());
            let (greedy, _) = greedy_layout(&layout, true);
            assert_eq!(greedy, [2, 3, 4].f());
            // dimension-size 1 or stride-size 0
            let layout = Layout::new_unchecked([5, 1, 2, 1, 3, 6], [1000, 10, 10, 40, 0, 100], 0);
            let (greedy, _) = greedy_layout(&layout, false);
            let expect = Layout::new_unchecked([2, 6, 5, 1, 1, 1], [10, 100, 1000, 0, 0, 0], 0);
            assert_eq!(greedy, expect);
            let (greedy, _) = greedy_layout(&layout, true);
            let expect = Layout::new_unchecked([1, 1, 3, 2, 6, 5], [10, 40, 0, 10, 100, 1000], 0);
            assert_eq!(greedy, expect);
            // negative strides
            let layout = [2, 3, 4].f().dim_narrow(1, slice!(None, None, -1)).unwrap();
            let layout = layout.swapaxes(-1, -2).unwrap();
            let (greedy, _) = greedy_layout(&layout, true);
            assert_eq!(greedy, [2, 3, 4].f());
            let (greedy, _) = greedy_layout(&layout, false);
            assert_eq!(greedy, [2, 3, 4].f().dim_narrow(1, slice!(None, None, -1)).unwrap());
        }
    }
}