morok-schedule 0.1.0-alpha.2

Optimization passes and pattern engine for the Morok ML compiler
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
//! Comprehensive tests for reduction simplification optimizations.
//!
//! Tests verify `reduce_unparented` and `reduce_collapse` optimizations:
//! - reduce_unparented: Remove ranges that don't appear in source (2-10x speedup)
//! - reduce_collapse: Lift range-independent computations outside reductions
//! - Helper functions: no_range(), range_size_as_i64()

use std::{f32::consts::PI, sync::Arc};

use morok_dtype::DType;
use morok_ir::{AxisId, AxisType, BinaryOp, Op, ReduceOp, UOp, pattern::RewriteResult};

use crate::rangeify::transforms::reduce_collapse as reduce_collapse_inner;

/// Test helper - thin wrapper around pattern matcher for reduce_unparented tests.
fn reduce_unparented(reduce: &Arc<UOp>) -> Option<Arc<UOp>> {
    match crate::rangeify::patterns::pm_reduce_simplify().rewrite(reduce, &mut ()) {
        RewriteResult::Rewritten(r) => Some(r),
        _ => None,
    }
}

/// Test helper - wrapper for reduce_collapse that extracts src/ranges from REDUCE node.
fn reduce_collapse(reduce: &Arc<UOp>) -> Option<Arc<UOp>> {
    let Op::Reduce { src, ranges, .. } = reduce.op() else {
        return None;
    };
    reduce_collapse_inner(src, ranges)
}

// ===== Test Helper Functions =====

/// Check if graph contains any REDUCE operations
fn has_reduce_op(uop: &Arc<UOp>) -> bool {
    uop.toposort().iter().any(|n| matches!(n.op(), Op::Reduce { .. } | Op::ReduceAxis { .. }))
}

/// Check if graph contains any RANGE operations
fn has_ranges_in_graph(uop: &Arc<UOp>) -> bool {
    uop.toposort().iter().any(|n| matches!(n.op(), Op::Range { .. }))
}

// ===== reduce_unparented Tests =====

#[test]
fn test_reduce_unparented_add_basic() {
    // Input: REDUCE(CONST(5), [range(10)], ADD)
    // Expected: CONST(5) * 10
    let const_val = UOp::native_const(5i32);
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = const_val.reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_unparented(&reduce).expect("Should simplify");

    // Verify result is MUL operation
    assert!(matches!(result.op(), Op::Binary(BinaryOp::Mul, _, _)));
}

#[test]
fn test_reduce_unparented_mul() {
    // Input: REDUCE(CONST(2), [range(3)], MUL)
    // Expected: CONST(2)^3
    let const_val = UOp::native_const(2i32);
    let range = UOp::range_axis(UOp::index_const(3), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = const_val.reduce(vec![range].into(), ReduceOp::Mul);

    let result = reduce_unparented(&reduce).expect("Should simplify");

    // Verify result is POW operation
    assert!(matches!(result.op(), Op::Binary(BinaryOp::Pow, _, _)));
}

#[test]
fn test_reduce_unparented_max() {
    // Input: REDUCE(CONST(42), [range(5)], MAX)
    // Expected: CONST(42)
    let const_val = UOp::native_const(42i32);
    let range = UOp::range_axis(UOp::index_const(5), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = const_val.clone().reduce(vec![range].into(), ReduceOp::Max);

    let result = reduce_unparented(&reduce).expect("Should simplify");

    // Result should be the constant value itself
    assert!(Arc::ptr_eq(&result, &const_val));
}

#[test]
fn test_reduce_unparented_min() {
    // Input: REDUCE(CONST(42), [range(5)], MIN)
    // Expected: CONST(42)
    let const_val = UOp::native_const(42i32);
    let range = UOp::range_axis(UOp::index_const(5), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = const_val.clone().reduce(vec![range].into(), ReduceOp::Min);

    let result = reduce_unparented(&reduce).expect("Should simplify");

    // Result should be the constant value itself
    assert!(Arc::ptr_eq(&result, &const_val));
}

#[test]
fn test_reduce_unparented_all_parented() {
    // Input: REDUCE(range, [range], ADD) - can't optimize
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = Arc::clone(&range).reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_unparented(&reduce);

    // Should return None because range is parented
    assert!(result.is_none());
}

#[test]
fn test_reduce_unparented_mixed_ranges() {
    // Input: REDUCE(x + range_0, [range_0, range_1], ADD)
    // range_0 is parented, range_1 is unparented
    // Expected: REDUCE(x + range_0, [range_0], ADD) * 10
    let range_0 = UOp::range_axis(UOp::index_const(5), AxisId::Renumbered(0), AxisType::Reduce);
    let range_1 = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(1), AxisType::Reduce);

    let x = UOp::native_const(3i32);
    let src = x.try_add(&range_0.cast(DType::Int32)).unwrap();

    let reduce = src.reduce(vec![range_0.clone(), range_1].into(), ReduceOp::Add);

    let result = reduce_unparented(&reduce).expect("Should simplify");

    // Result should be: (... * 10)
    // Verify outer op is MUL
    assert!(matches!(result.op(), Op::Binary(BinaryOp::Mul, _, _)));

    // Verify inner REDUCE still has range_0
    if let Op::Binary(_, inner, _) = result.op() {
        if let Op::Reduce { ranges, .. } = inner.op() {
            assert_eq!(ranges.len(), 1);
            assert!(Arc::ptr_eq(&ranges[0], &range_0));
        } else {
            panic!("Expected REDUCE in inner op, got {:?}", inner.op());
        }
    }
}

#[test]
fn test_reduce_unparented_multiple_unparented() {
    // Input: REDUCE(CONST(5), [range(3), range(4)], ADD)
    // Expected: CONST(5) * 3 * 4
    let const_val = UOp::native_const(5i32);
    let range_0 = UOp::range_axis(UOp::index_const(3), AxisId::Renumbered(0), AxisType::Reduce);
    let range_1 = UOp::range_axis(UOp::index_const(4), AxisId::Renumbered(1), AxisType::Reduce);

    let reduce = const_val.reduce(vec![range_0, range_1].into(), ReduceOp::Add);

    let result = reduce_unparented(&reduce).expect("Should simplify");

    // Result should be nested MUL operations: (5 * 3) * 4
    // Top level should be MUL
    assert!(matches!(result.op(), Op::Binary(BinaryOp::Mul, _, _)));

    // Inner should also be MUL
    if let Op::Binary(_, inner, _) = result.op() {
        assert!(matches!(inner.op(), Op::Binary(BinaryOp::Mul, _, _)));
    }
}

#[test]
fn test_reduce_unparented_non_reduce_returns_none() {
    // Test that non-REDUCE operations return None
    let const_op = UOp::native_const(1.0f32);

    let result = reduce_unparented(&const_op);
    assert!(result.is_none());
}

// ===== reduce_collapse Tests =====

#[test]
fn test_reduce_collapse_basic() {
    // Input: REDUCE(const, [range], ADD) where const doesn't depend on range
    // Expected: After symbolic simplification, range dependency should be eliminated
    // Note: This is a simple case - reduce_unparented would also handle this
    let const_val = UOp::native_const(5i32);
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = const_val.clone().reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_collapse(&reduce).expect("reduce_collapse should succeed on constant");

    // Verify no range dependencies remain
    assert!(!has_ranges_in_graph(&result), "Result should have no range dependencies");

    // Verify REDUCE operation was eliminated
    assert!(!has_reduce_op(&result), "Result should not contain REDUCE operations");

    // Verify dtype preserved
    assert_eq!(result.dtype(), const_val.dtype(), "Should preserve dtype");
}

#[test]
fn test_reduce_collapse_with_range_dependency() {
    // Input: REDUCE(range + 1, [range], ADD)
    // This creates a true dependency on the range variable
    // Expected: reduce_collapse may succeed (substitution works), but won't eliminate the REDUCE
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let one = UOp::native_const(1i32);
    let range_int = range.cast(DType::Int32);
    let src = range_int.try_add(&one).unwrap();

    let reduce = src.reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_collapse(&reduce);

    // With the fixed logic, this should now return None since the var dependency remains
    assert!(result.is_none(), "reduce_collapse should return None when range dependency can't be eliminated");
}

#[test]
fn test_reduce_collapse_non_reduce_returns_none() {
    // Test that non-REDUCE operations return None
    let const_op = UOp::native_const(1.0f32);

    let result = reduce_collapse(&const_op);
    assert!(result.is_none());
}

#[test]
fn test_reduce_collapse_empty_ranges() {
    // REDUCE with no ranges should return None
    let const_val = UOp::native_const(5i32);
    let reduce = const_val.reduce(vec![].into(), ReduceOp::Add);

    let result = reduce_collapse(&reduce);
    assert!(result.is_none(), "reduce_collapse should return None for empty ranges");
}

#[test]
fn test_reduce_collapse_multiple_ranges_all_independent() {
    // REDUCE(const, [range1, range2], ADD) where const doesn't depend on either range
    let const_val = UOp::native_const(5i32);
    let range1 = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let range2 = UOp::range_axis(UOp::index_const(20), AxisId::Renumbered(1), AxisType::Reduce);

    let reduce = const_val.clone().reduce(vec![range1, range2].into(), ReduceOp::Add);

    let result = reduce_collapse(&reduce);

    // Should successfully collapse since const has no range dependency
    assert!(result.is_some(), "reduce_collapse should succeed with multiple independent ranges");

    if let Some(res) = result {
        // Result should have no range dependencies
        assert!(crate::rangeify::indexing::no_range(&res), "Result should have no range dependencies");
    }
}

#[test]
fn test_reduce_collapse_algebraic_simplification() {
    // Test that reduce_collapse works with algebraic patterns
    // REDUCE(x + 0, [range], ADD) where x is constant
    let x = UOp::native_const(42i32);
    let zero = UOp::native_const(0i32);
    let x_plus_0 = x.try_add(&zero).unwrap();

    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);

    let reduce = x_plus_0.reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_collapse(&reduce).expect("reduce_collapse should succeed after x+0 simplification");

    // Verify symbolic simplification eliminated both x+0 AND range dependency
    assert!(!has_ranges_in_graph(&result), "x+0 simplification should eliminate ranges");
    assert!(!has_reduce_op(&result), "Result should not contain REDUCE");

    // Verify result is simplified (no ADD operation for x+0)
    let has_add = result.toposort().iter().any(|n| matches!(n.op(), Op::Binary(BinaryOp::Add, _, _)));
    assert!(!has_add, "x+0 should be simplified away");
}

#[test]
fn test_reduce_collapse_multiplication_by_one() {
    // REDUCE(x * 1, [range], MUL) where x is constant
    let x = UOp::native_const(PI);
    let one = UOp::native_const(1.0f32);
    let x_times_1 = x.try_mul(&one).unwrap();

    let range = UOp::range_axis(UOp::index_const(5), AxisId::Renumbered(0), AxisType::Reduce);

    let reduce = x_times_1.reduce(vec![range].into(), ReduceOp::Mul);

    let result = reduce_collapse(&reduce).expect("reduce_collapse should succeed after x*1 simplification");

    // Verify symbolic simplification eliminated both x*1 AND range dependency
    assert!(!has_ranges_in_graph(&result), "x*1 simplification should eliminate ranges");
    assert!(!has_reduce_op(&result), "Result should not contain REDUCE");

    // Verify result is simplified (no MUL operation for x*1)
    let has_mul = result.toposort().iter().any(|n| matches!(n.op(), Op::Binary(BinaryOp::Mul, _, _)));
    assert!(!has_mul, "x*1 should be simplified away");
}

#[test]
fn test_reduce_collapse_preserves_dtype() {
    // Verify that reduce_collapse preserves data types correctly
    let const_val = UOp::native_const(2.5f64);
    let range = UOp::range_axis(UOp::index_const(100), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = const_val.clone().reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_collapse(&reduce);

    assert!(result.is_some(), "reduce_collapse should succeed");

    if let Some(res) = result {
        // The result dtype should match the source (Float64 in this case)
        assert_eq!(res.dtype(), const_val.dtype(), "reduce_collapse should preserve dtype");
    }
}

#[test]
fn test_reduce_collapse_different_reduce_ops() {
    // Test reduce_collapse with different ReduceOp types
    let const_val = UOp::native_const(10i32);
    let range = UOp::range_axis(UOp::index_const(5), AxisId::Renumbered(0), AxisType::Reduce);

    // Test ADD
    let reduce_add = const_val.clone().reduce(vec![range.clone()].into(), ReduceOp::Add);
    assert!(reduce_collapse(&reduce_add).is_some(), "reduce_collapse should work with ReduceOp::Add");

    // Test MUL
    let reduce_mul = const_val.clone().reduce(vec![range.clone()].into(), ReduceOp::Mul);
    assert!(reduce_collapse(&reduce_mul).is_some(), "reduce_collapse should work with ReduceOp::Mul");

    // Test MAX
    let reduce_max = const_val.clone().reduce(vec![range.clone()].into(), ReduceOp::Max);
    assert!(reduce_collapse(&reduce_max).is_some(), "reduce_collapse should work with ReduceOp::Max");

    // Test MIN
    let reduce_min = const_val.reduce(vec![range].into(), ReduceOp::Min);
    assert!(reduce_collapse(&reduce_min).is_some(), "reduce_collapse should work with ReduceOp::Min");
}

// ===== Helper Function Tests =====

#[test]
fn test_no_range_with_ranges() {
    // UOp with RANGE dependencies should return false
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let const_5 = UOp::native_const(5i32);

    // Create expression that depends on range: range + 5
    let sum = range.cast(DType::Int32).try_add(&const_5).unwrap();

    // Should return false because sum depends on range
    assert!(!crate::rangeify::indexing::no_range(&sum));
}

#[test]
fn test_no_range_without_ranges() {
    // UOp without RANGE dependencies should return true
    let const_val = UOp::native_const(42i32);
    assert!(crate::rangeify::indexing::no_range(&const_val));

    // Arithmetic operations on constants also have no ranges
    let a = UOp::native_const(10i32);
    let b = UOp::native_const(20i32);
    let sum = a.try_add(&b).unwrap();
    assert!(crate::rangeify::indexing::no_range(&sum));
}

#[test]
fn test_range_size_extraction_constant() {
    // Extract size from constant RANGE
    let range = UOp::range_axis(UOp::index_const(100), AxisId::Renumbered(0), AxisType::Loop);

    assert_eq!(crate::rangeify::indexing::range_size_as_i64(&range), Some(100));

    // Test with different constant values
    let range_42 = UOp::range_axis(UOp::index_const(42), AxisId::Renumbered(1), AxisType::Reduce);

    assert_eq!(crate::rangeify::indexing::range_size_as_i64(&range_42), Some(42));
}

#[test]
fn test_range_size_extraction_symbolic() {
    // Symbolic RANGE should return None
    let symbolic_var = UOp::define_var("N".to_string(), 0, 1000);
    let range = UOp::range_axis(symbolic_var, AxisId::Renumbered(0), AxisType::Loop);

    assert_eq!(crate::rangeify::indexing::range_size_as_i64(&range), None);
}

#[test]
fn test_range_size_extraction_non_range() {
    // Non-RANGE UOp should return None
    let const_op = UOp::native_const(100i32);
    assert_eq!(crate::rangeify::indexing::range_size_as_i64(&const_op), None);

    // Binary operation also returns None
    let a = UOp::native_const(10i32);
    let b = UOp::native_const(20i32);
    let sum = a.try_add(&b).unwrap();
    assert_eq!(crate::rangeify::indexing::range_size_as_i64(&sum), None);
}

// ===== reduce_mul_chain Tests =====

#[test]
fn test_reduce_mul_chain_simple_const() {
    // REDUCE(range * 3, [range], ADD) → REDUCE(range, [range], ADD) * 3
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let three = UOp::native_const(3i32);
    let src = range.cast(DType::Int32).mul(&three);
    let reduce = src.reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_unparented(&reduce).expect("Should factor const out of reduce");

    // Result should have MUL at top level (reduce * 3)
    assert!(matches!(result.op(), Op::Binary(BinaryOp::Mul, _, _)));

    // Inner should be REDUCE
    if let Op::Binary(BinaryOp::Mul, inner, _factor) = result.op() {
        assert!(matches!(inner.op(), Op::Reduce { .. }));
    }
}

#[test]
fn test_reduce_mul_chain_no_outside_factors() {
    // REDUCE(range * range, [range], ADD) — all factors reference the range
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let range_int = range.cast(DType::Int32);
    let src = range_int.mul(&range_int);
    let reduce = src.reduce(vec![range].into(), ReduceOp::Add);

    // reduce_unparented may still work (via reduce_collapse), but reduce_mul_chain
    // specifically shouldn't factor anything out since both factors reference the range.
    // We just verify it doesn't crash.
    let _result = reduce_unparented(&reduce);
}

#[test]
fn test_reduce_mul_chain_multiple_factors() {
    // REDUCE(a * range * b, [range], ADD) where a, b are constants
    // → REDUCE(range, [range], ADD) * a * b
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let a = UOp::native_const(2i32);
    let b = UOp::native_const(5i32);
    let range_int = range.cast(DType::Int32);
    let src = a.mul(&range_int).mul(&b);
    let reduce = src.reduce(vec![range].into(), ReduceOp::Add);

    let result = reduce_unparented(&reduce).expect("Should factor constants out");

    // Result should be: REDUCE(range_int, ...) * 2 * 5
    // Top level should be MUL
    assert!(matches!(result.op(), Op::Binary(BinaryOp::Mul, _, _)));
}

#[test]
fn test_reduce_mul_chain_max_positive_factor() {
    // REDUCE(range * 3, [range], MAX) → REDUCE(range, [range], MAX) * 3
    // (3 >= 0, so it can be factored out)
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let three = UOp::native_const(3i32);
    let range_int = range.cast(DType::Int32);
    let src = range_int.mul(&three);
    let reduce = src.reduce(vec![range].into(), ReduceOp::Max);

    let result = reduce_unparented(&reduce).expect("Should factor positive const out of MAX reduce");

    assert!(matches!(result.op(), Op::Binary(BinaryOp::Mul, _, _)));
}

#[test]
fn test_reduce_mul_chain_max_negative_factor_stays() {
    // REDUCE(range * (-1), [range], MAX) — should NOT factor out (-1 < 0)
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let neg_one = UOp::native_const(-1i32);
    let range_int = range.cast(DType::Int32);
    let src = range_int.mul(&neg_one);
    let reduce = src.reduce(vec![range].into(), ReduceOp::Max);

    // The mul_chain pattern should not fire (only 2 factors, one inside, one negative outside)
    // The result should either be None or not have factored the -1 outside
    let result = reduce_unparented(&reduce);
    if let Some(ref res) = result {
        // If some other pattern rewrote it, that's fine. But if it's a MUL at top,
        // the REDUCE shouldn't have been split incorrectly.
        if let Op::Binary(BinaryOp::Mul, _inner, factor) = res.op() {
            // The -1 should NOT be factored outside a MAX reduce
            if let Op::Const(c) = factor.op() {
                assert!(
                    c.0 != morok_ir::ConstValue::Int(-1),
                    "Negative factor should not be factored out of MAX reduce"
                );
            }
        }
    }
}

#[test]
fn test_reduce_mul_chain_single_factor_no_op() {
    // REDUCE(range, [range], ADD) — single factor, should not trigger mul_chain
    let range = UOp::range_axis(UOp::index_const(10), AxisId::Renumbered(0), AxisType::Reduce);
    let reduce = range.cast(DType::Int32).reduce(vec![range].into(), ReduceOp::Add);

    // This might succeed via reduce_collapse, but not via mul_chain
    // (mul_chain requires the src to be a MUL op)
    let _result = reduce_unparented(&reduce);
}