constensor_core/
lib.rs

1#![feature(portable_simd)]
2
3//! Constensor is an experimental ML framework featuring a graph-based JIT compiler.
4//!
5//! It's designed with clarity and efficiency in mind, so besides compile-time checks for shape, dtype, and device,
6//! it also is based around a graph design.
7//!
8//! This means that all the code you write is entered into a graph and then analyzed when you call [`Graph::optimize`]!
9//! The optimization step fuses operations, allows for automatic and seamless inplacing, constant folding, and other features.
10//!
11//! Then, by precompliling the graph with [`Graph::compile`], we can make intelligent decisions about when we run certain operations.
12//! For instance, on CUDA, streams are automatically used where possible to parallelize execution.
13//!
14//! Currently, only CUDA and CPU are supported but Metal and [cubecl](https://github.com/tracel-ai/cubecl) is support coming very soon.
15//!
16//! ## A quick guide
17//! - First, create a [`Graph`]. This will hold all the operations and do optimization and compilation.
18//! - Tensors are modelled with a [`GraphTensor`]. These represent the operation but do not perform any computation.
19//! - Be sure to optimize the graph using [`Graph::optimize`] after all operations are complete!
20//! - Compile the graph using [`Graph::compile`], which will insert the device-specific optimizations. This returns a [`CompiledGraph`].
21//! - Run using [`CompiledGraph::run`]. This returns a concrete [`Tensor`].
22//!
23//! ## What can you do with it?
24//! ```
25//! use constensor_core::{Cpu, Graph, GraphTensor, Tensor, R1, R2};
26//!
27//! let mut graph: Graph<f32> = Graph::empty();
28//! let _arange = GraphTensor::<R1<10>, f32, Cpu>::arange(&mut graph, 0., 1.);
29//! let a = GraphTensor::<R2<3, 4>, f32, Cpu>::fill(&mut graph, 1.0);
30//! let b = GraphTensor::<R2<3, 4>, f32, Cpu>::fill(&mut graph, 2.0);
31//! let c = GraphTensor::<R2<3, 4>, f32, Cpu>::fill(&mut graph, 3.0);
32//! let d = GraphTensor::<R2<3, 4>, f32, Cpu>::fill(&mut graph, 4.0);
33//! let res = a * b + c;
34//! let _out = res + d;
35//!
36//! graph.optimize();
37//!
38//! let compiled: constensor_core::CompiledGraph<R2<3, 4>, f32, Cpu> = graph.compile().unwrap();
39//! let res = compiled.run().unwrap();
40//!
41//! let tensor: Tensor<R2<3, 4>, f32, Cpu> = res;
42//!
43//! assert_eq!(tensor.data().unwrap().to_vec(), vec![vec![9.0; 4]; 3],);
44//! ```
45
46mod cpu_storage;
47#[cfg(feature = "cuda")]
48mod cuda_backend;
49mod device;
50mod dtype;
51mod error;
52mod graph;
53mod shape;
54mod storage;
55mod tensor;
56
57#[cfg(feature = "cuda")]
58pub use device::Cuda;
59pub use device::{BestDevice, Cpu};
60pub use dtype::DType;
61pub use error::{Context, Error, Result};
62pub use graph::{CompiledGraph, Graph, GraphNode, Op};
63pub use shape::{Shape, R1, R2, R3, R4, R5, R6};
64pub use tensor::{GraphTensor, Tensor};