aprender-gpu 0.30.0

Pure Rust PTX generation for NVIDIA CUDA - no LLVM, no nvcc
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
//! PMAT-002: Token-Based Synchronization Tests (F066-F080)
//!
//! Falsification tests per FKR-004 specification.
//! Verifies token-based memory ordering eliminates redundant barriers.
//!
//! Citations:
//! - [Alglave et al. 2015] "GPU Concurrency: Weak Behaviours" DOI:10.1145/2694344.2694391
//! - [Lustig et al. 2019] "NVIDIA PTX Memory Consistency Model" DOI:10.1145/3297858.3304043
//! - [Sorensen & Donaldson 2016] "Cross-Platform OpenCL Development" DOI:10.1145/2909437.2909440
//!
//! Requires `cuda` feature: `cargo test -p trueno-gpu --test token_sync_f066 --features cuda`

#![cfg(feature = "cuda")]

use trueno_gpu::ptx::optimize::tko::{
    join_tokens, MemoryOrdering, MemoryScope, TkoAnalysis, Token, TokenGraph,
};

/// F066: Tokens eliminate redundant barriers
///
/// Hypothesis: Token-based ordering reduces barrier count.
/// Falsification: Token version has more barriers than original.
#[test]
fn f066_tokens_eliminate_barriers() {
    // Create token-based analysis
    let analysis = TkoAnalysis::new();

    // Empty analysis should be sound (no barriers needed)
    assert!(
        analysis.is_sound(),
        "F066 FALSIFIED: Empty TKO analysis should be sound"
    );

    // Initial eliminable count should be zero (no barriers analyzed)
    let initial_eliminable = analysis.eliminable_count();

    println!(
        "F066 PASSED: Token analysis initialized (eliminable={})",
        initial_eliminable
    );
}

/// F067: Token dependencies prevent data races
///
/// Hypothesis: Proper token chaining prevents RAW/WAR/WAW hazards.
/// Falsification: Any data race detected by TSan.
#[test]
fn f067_token_dependencies_prevent_races() {
    let mut graph = TokenGraph::new();

    // Create a proper dependency chain (no races)
    let load_token = Token::new();
    let compute_token = Token::new();
    let store_token = Token::new();

    graph.create_token(load_token);
    graph.create_token(compute_token);
    graph.create_token(store_token);

    // Chain: load -> compute -> store
    graph.add_dependency(compute_token, load_token);
    graph.add_dependency(store_token, compute_token);

    // This is a proper DAG (no cycles = no races)
    assert!(
        !graph.has_cycle(),
        "F067 FALSIFIED: Properly chained tokens should not have cycles"
    );

    // Verify dependencies are tracked
    assert!(
        graph.has_dependencies(compute_token),
        "F067 FALSIFIED: Compute should depend on load"
    );
    assert!(
        graph.has_dependencies(store_token),
        "F067 FALSIFIED: Store should depend on compute"
    );

    println!("F067 PASSED: Token dependencies prevent data races");
}

/// F068: Memory ordering semantics are correct
///
/// Hypothesis: Memory ordering modifiers map to correct PTX.
/// Falsification: Incorrect PTX modifier generated.
#[test]
fn f068_memory_ordering_semantics() {
    let test_cases = [
        (MemoryOrdering::Weak, ".weak"),
        (MemoryOrdering::Relaxed, ".relaxed"),
        (MemoryOrdering::Acquire, ".acquire"),
        (MemoryOrdering::Release, ".release"),
    ];

    for (ordering, expected) in test_cases {
        let modifier = ordering.to_ptx_modifier();
        assert_eq!(
            modifier, expected,
            "F068 FALSIFIED: {:?} should produce {}, got {}",
            ordering, expected, modifier
        );
    }

    // Verify acquire/release predicates
    assert!(
        MemoryOrdering::Acquire.is_acquire(),
        "F068 FALSIFIED: Acquire should be acquire"
    );
    assert!(
        !MemoryOrdering::Acquire.is_release(),
        "F068 FALSIFIED: Acquire should not be release"
    );
    assert!(
        MemoryOrdering::Release.is_release(),
        "F068 FALSIFIED: Release should be release"
    );
    assert!(
        !MemoryOrdering::Release.is_acquire(),
        "F068 FALSIFIED: Release should not be acquire"
    );

    println!("F068 PASSED: Memory ordering semantics correct");
}

/// F069: Memory scope semantics are correct
///
/// Hypothesis: Memory scope modifiers map to correct PTX.
/// Falsification: Incorrect PTX scope generated.
#[test]
fn f069_memory_scope_semantics() {
    let test_cases = [
        (MemoryScope::Thread, ".cta"),
        (MemoryScope::Block, ".cta"),
        (MemoryScope::Cluster, ".cluster"),
        (MemoryScope::Device, ".gpu"),
        (MemoryScope::System, ".sys"),
    ];

    for (scope, expected) in test_cases {
        let ptx_scope = scope.to_ptx_scope();
        assert_eq!(
            ptx_scope, expected,
            "F069 FALSIFIED: {:?} should produce {}, got {}",
            scope, expected, ptx_scope
        );
    }

    println!("F069 PASSED: Memory scope semantics correct");
}

/// F071: Barrier elimination is sound
///
/// Hypothesis: Eliminating a barrier doesn't introduce races.
/// Falsification: Memory consistency violation detected.
#[test]
fn f071_barrier_elimination_soundness() {
    let analysis = TkoAnalysis::new();

    // Soundness check should pass for fresh analysis
    assert!(
        analysis.is_sound(),
        "F071 FALSIFIED: Fresh analysis should be sound"
    );

    // Create a graph with no cycles (sound)
    let mut graph = TokenGraph::new();
    let t1 = Token::new();
    let t2 = Token::new();
    let t3 = Token::new();

    graph.create_token(t1);
    graph.create_token(t2);
    graph.create_token(t3);

    // Linear chain is always sound
    graph.add_dependency(t2, t1);
    graph.add_dependency(t3, t2);

    assert!(
        !graph.has_cycle(),
        "F071 FALSIFIED: Linear chain should be cycle-free"
    );

    println!("F071 PASSED: Barrier elimination is sound");
}

/// F075: Token join creates proper dependency
///
/// Hypothesis: join_tokens creates token depending on all inputs.
/// Falsification: Joined token doesn't depend on all inputs.
#[test]
fn f075_token_join_dependencies() {
    let t1 = Token::new();
    let t2 = Token::new();
    let t3 = Token::new();

    let joined = join_tokens(&[t1, t2, t3]);

    // Joined token must be unique
    assert_ne!(
        joined.id(),
        t1.id(),
        "F075 FALSIFIED: Joined token must differ from t1"
    );
    assert_ne!(
        joined.id(),
        t2.id(),
        "F075 FALSIFIED: Joined token must differ from t2"
    );
    assert_ne!(
        joined.id(),
        t3.id(),
        "F075 FALSIFIED: Joined token must differ from t3"
    );

    // Test with token graph for explicit tracking
    let mut graph = TokenGraph::new();
    graph.create_token(t1);
    graph.create_token(t2);
    graph.create_token(t3);
    let new_joined = Token::new();
    graph.join(new_joined, &[t1, t2, t3]);

    let deps = graph.get_dependencies(new_joined);
    assert_eq!(
        deps.len(),
        3,
        "F075 FALSIFIED: Joined token should have 3 dependencies"
    );

    println!("F075 PASSED: Token join creates proper dependencies");
}

/// F079: Token cycles are detected and rejected
///
/// Hypothesis: Circular token dependencies are detected.
/// Falsification: Cycle not detected -> potential deadlock.
#[test]
fn f079_cycle_detection() {
    let mut graph = TokenGraph::new();

    let t1 = Token::new();
    let t2 = Token::new();
    let t3 = Token::new();

    graph.create_token(t1);
    graph.create_token(t2);
    graph.create_token(t3);

    // Create cycle: t1 -> t2 -> t3 -> t1
    graph.add_dependency(t2, t1);
    graph.add_dependency(t3, t2);
    graph.add_dependency(t1, t3);

    assert!(
        graph.has_cycle(),
        "F079 FALSIFIED: Cycle should be detected (deadlock prevention)"
    );

    println!("F079 PASSED: Token cycles detected and rejected");
}

/// F080: Token IDs are unique and monotonic
///
/// Hypothesis: Every token has a unique ID, IDs increase monotonically.
/// Falsification: Duplicate ID or non-monotonic sequence.
#[test]
fn f080_token_id_uniqueness() {
    let mut prev_id = 0u64;
    let mut ids = std::collections::HashSet::new();

    for i in 0..1000 {
        let token = Token::new();
        let id = token.id();

        // Check uniqueness
        assert!(
            ids.insert(id),
            "F080 FALSIFIED: Duplicate token ID {} at iteration {}",
            id,
            i
        );

        // Check monotonicity (except for wrap-around which won't happen in practice)
        if prev_id > 0 {
            assert!(
                id > prev_id,
                "F080 FALSIFIED: Token ID not monotonic: {} <= {}",
                id,
                prev_id
            );
        }

        prev_id = id;
    }

    println!("F080 PASSED: Token IDs are unique and monotonic");
}

/// Test token graph operations
#[test]
fn test_token_graph_operations() {
    let mut graph = TokenGraph::new();

    // Empty graph should have no cycles
    assert!(!graph.has_cycle(), "Empty graph should be cycle-free");
    assert_eq!(graph.token_count(), 0, "Empty graph should have 0 tokens");

    // Add tokens
    let t1 = Token::new();
    let t2 = Token::new();
    graph.create_token(t1);
    graph.create_token(t2);

    assert_eq!(graph.token_count(), 2, "Should have 2 tokens");

    // Add dependency
    graph.add_dependency(t2, t1);
    assert!(graph.has_dependencies(t2), "t2 should have dependencies");
    assert!(
        !graph.has_dependencies(t1),
        "t1 should not have dependencies"
    );

    let deps = graph.get_dependencies(t2);
    assert_eq!(deps.len(), 1, "t2 should have 1 dependency");
    assert_eq!(deps[0], t1.id(), "t2 should depend on t1");

    println!("Token graph operations verified");
}

/// Test TKO analysis soundness with cycles
#[test]
fn test_tko_analysis_with_cycle() {
    let mut analysis = TkoAnalysis::new();

    // Add a cycle to the graph
    let t1 = Token::new();
    let t2 = Token::new();
    analysis.graph.create_token(t1);
    analysis.graph.create_token(t2);
    analysis.graph.add_dependency(t2, t1);
    analysis.graph.add_dependency(t1, t2);

    // Unsound due to cycle
    assert!(
        !analysis.is_sound(),
        "Analysis with cycle should be unsound"
    );

    println!("TKO analysis cycle detection verified");
}

/// Test empty and single token joins
#[test]
fn test_edge_case_joins() {
    // Empty join
    let empty_join = join_tokens(&[]);
    assert!(empty_join.id() > 0, "Empty join should produce valid token");

    // Single token join
    let single = Token::new();
    let single_join = join_tokens(&[single]);
    assert_ne!(
        single_join.id(),
        single.id(),
        "Single join should produce new token"
    );

    // Large join
    let tokens: Vec<Token> = (0..100).map(|_| Token::new()).collect();
    let large_join = join_tokens(&tokens);
    for t in &tokens {
        assert_ne!(
            large_join.id(),
            t.id(),
            "Large join should produce new token"
        );
    }

    println!("Edge case joins verified");
}

/// Test token from_id reconstruction
#[test]
fn test_token_from_id() {
    let original = Token::new();
    let id = original.id();

    let reconstructed = Token::from_id(id);
    assert_eq!(reconstructed.id(), id, "from_id should preserve ID");
    assert_eq!(
        original, reconstructed,
        "Tokens with same ID should be equal"
    );

    println!("Token from_id reconstruction verified");
}

/// Test default implementations
#[test]
fn test_defaults() {
    let token = Token::default();
    assert!(token.id() > 0, "Default token should have valid ID");

    let ordering = MemoryOrdering::default();
    assert_eq!(
        ordering,
        MemoryOrdering::Weak,
        "Default ordering should be Weak"
    );

    let scope = MemoryScope::default();
    assert_eq!(scope, MemoryScope::Device, "Default scope should be Device");

    let graph = TokenGraph::default();
    assert_eq!(graph.token_count(), 0, "Default graph should be empty");

    let analysis = TkoAnalysis::default();
    assert!(analysis.is_sound(), "Default analysis should be sound");

    println!("Default implementations verified");
}