Skip to main content

trueno_gpu/
lib.rs

1//! # trueno-gpu: Pure Rust PTX Generation for NVIDIA CUDA
2//!
3//! Generate PTX assembly directly from Rust - no LLVM, no nvcc, no external dependencies.
4//!
5//! ## Philosophy
6//!
7//! **Own the Stack** - Build everything from first principles for complete control,
8//! auditability, and reproducibility.
9//!
10//! ## Quick Start
11//!
12//! ```rust
13//! use trueno_gpu::ptx::{PtxModule, PtxKernel, PtxType};
14//!
15//! // Build a vector addition kernel
16//! let module = PtxModule::new()
17//!     .version(8, 0)
18//!     .target("sm_70")
19//!     .address_size(64);
20//!
21//! let ptx_source = module.emit();
22//! assert!(ptx_source.contains(".version 8.0"));
23//! ```
24//!
25//! ## Modules
26//!
27//! - [`ptx`] - PTX code generation (builder pattern)
28//! - [`driver`] - CUDA driver API (minimal FFI, optional)
29//! - [`kernels`] - Hand-optimized GPU kernels
30//! - [`memory`] - GPU memory management
31//! - [`backend`] - Multi-backend abstraction
32
33#![warn(missing_docs)]
34#![warn(rust_2018_idioms)]
35#![deny(unsafe_op_in_unsafe_fn)]
36// ============================================================================
37// Development-phase lint allows - to be addressed incrementally
38// ============================================================================
39// Allow dead code during development - will be used as API expands
40#![allow(dead_code)]
41// Allow precision loss in non-critical floating point calculations
42#![allow(clippy::cast_precision_loss)]
43// Allow possible truncation - we handle 64-bit correctly
44#![allow(clippy::cast_possible_truncation)]
45// Allow format push string - not a critical performance path
46#![allow(clippy::format_push_string)]
47// Allow doc markdown for code references - these are placeholders
48#![allow(clippy::doc_markdown)]
49// Allow missing errors doc during initial development
50#![allow(clippy::missing_errors_doc)]
51// Allow unnecessary literal bound for backend trait
52#![allow(clippy::unnecessary_literal_bound)]
53// Allow manual div_ceil - will use std when stabilized
54#![allow(clippy::manual_div_ceil)]
55// Allow missing panics doc during initial development
56#![allow(clippy::missing_panics_doc)]
57// Allow cast_lossless - we intentionally use as for u32->u64
58#![allow(clippy::cast_lossless)]
59// Allow uninlined format args - stylistic preference
60#![allow(clippy::uninlined_format_args)]
61// Allow map_unwrap_or - more readable with map().unwrap_or()
62#![allow(clippy::map_unwrap_or)]
63// Allow redundant closure for method calls - clearer intent
64#![allow(clippy::redundant_closure_for_method_calls)]
65// Allow unused self - methods will use self as API expands
66#![allow(clippy::unused_self)]
67// Allow expect_used in tests and non-critical paths
68#![allow(clippy::expect_used)]
69// Allow too_many_lines during development - will be refactored
70#![allow(clippy::too_many_lines)]
71// Allow needless_range_loop - clearer intent in some algorithms
72#![allow(clippy::needless_range_loop)]
73// Allow float_cmp in tests where exact comparison is intended
74#![allow(clippy::float_cmp)]
75// Allow unused comparisons - some are defensive checks
76#![allow(unused_comparisons)]
77// Allow unwrap_used in tests
78#![allow(clippy::unwrap_used)]
79// Allow cast_sign_loss - we know values are positive
80#![allow(clippy::cast_sign_loss)]
81// Allow field_reassign_with_default - clearer test setup
82#![allow(clippy::field_reassign_with_default)]
83// Allow panic in tests
84#![allow(clippy::panic)]
85// Allow manual_range_contains - clearer in assertions
86#![allow(clippy::manual_range_contains)]
87// Allow default_constructed_unit_structs
88#![allow(clippy::default_constructed_unit_structs)]
89// Allow clone_on_copy - clearer intent
90#![allow(clippy::clone_on_copy)]
91// Allow absurd_extreme_comparisons - defensive checks
92#![allow(clippy::absurd_extreme_comparisons)]
93// Allow no_effect_underscore_binding - intentional in tests
94#![allow(clippy::no_effect_underscore_binding)]
95// Allow must_use_candidate - methods may return values not always needed
96#![allow(clippy::must_use_candidate)]
97// Allow manual_find - clearer intent in some cases
98#![allow(clippy::manual_find)]
99// Allow type_complexity - complex return types for tuples
100#![allow(clippy::type_complexity)]
101// Allow range_plus_one - clearer in some contexts
102#![allow(clippy::range_plus_one)]
103// Allow map_clone - clearer intent
104#![allow(clippy::map_clone)]
105// Allow manual_is_multiple_of - not yet stabilized
106#![allow(clippy::manual_is_multiple_of)]
107// Allow items_after_statements - const definitions in kernels
108#![allow(clippy::items_after_statements)]
109// Allow doc_lazy_continuation - doc formatting
110#![allow(clippy::doc_lazy_continuation)]
111// Allow useless_vec in tests - clearer intent
112#![allow(clippy::useless_vec)]
113// Allow similar names - k_h vs kt_h are semantically distinct (key vs key-transposed)
114#![allow(clippy::similar_names)]
115// Allow many single char names - standard matrix notation (a, b, m, n, k)
116#![allow(clippy::many_single_char_names)]
117// Allow doc nested refdefs - acceptable in list items
118#![allow(clippy::doc_nested_refdefs)]
119// Allow cloned instead of copied - semantic clarity
120#![allow(clippy::cloned_instead_of_copied)]
121// Allow too many arguments - GPU APIs require many parameters
122#![allow(clippy::too_many_arguments)]
123// Allow explicit lifetimes - clearer for complex lifetime relationships
124#![allow(clippy::elidable_lifetime_names)]
125// Allow manual slice size calculation - clearer intent
126#![allow(clippy::manual_slice_size_calculation)]
127#![allow(clippy::large_stack_arrays)]
128
129pub mod backend;
130/// CUDA driver FFI — feature-gated behind `cuda`. Default build has zero unsafe.
131/// Will be deleted entirely once memory/resident is migrated to wgpu (§26 Phase 3).
132#[cfg(feature = "cuda")]
133pub mod driver;
134/// PMAT-291: Tensor compute graph for GPU inference (reduces 430 dispatches to ~15)
135pub mod graph;
136/// PTX kernel generators — feature-gated behind `cuda`. Safe Rust (no unsafe blocks)
137/// but produces PTX text that requires CUDA driver to execute. Dead code without `cuda`.
138#[cfg(feature = "cuda")]
139pub mod kernels;
140/// GPU memory management — feature-gated behind `cuda` (uses driver FFI).
141#[cfg(feature = "cuda")]
142pub mod memory;
143pub mod monitor;
144/// PTX instruction builder — feature-gated behind `cuda`. Safe Rust.
145#[cfg(feature = "cuda")]
146pub mod ptx;
147
148/// Error types for trueno-gpu operations
149pub mod error;
150
151/// E2E visual testing framework for GPU kernels
152pub mod testing;
153
154/// WASM visual testing bindings (requires viz feature)
155#[cfg(feature = "viz")]
156pub mod wasm;
157
158pub use error::{GpuError, Result};
159pub use monitor::{cuda_device_count, cuda_monitoring_available, CudaDeviceInfo, CudaMemoryInfo};
160
161// NOTE: ComputeBrick is available from the trueno crate, not trueno-gpu
162// This is because trueno optionally depends on trueno-gpu (not vice versa)
163// Usage: `use trueno::brick::{ComputeBrick, ComputeBackend, TokenBudget};`
164// See: trueno/src/brick.rs for the full brick architecture
165
166#[cfg(test)]
167mod tests {
168    #[test]
169    fn test_crate_compiles() {
170        // Smoke test - crate compiles
171        let _ = super::error::Result::<()>::Ok(());
172    }
173}