1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
//! # attnres
//!
//! First Rust implementation of Attention Residuals from the MoonshotAI/Kimi paper,
//! built on the [burn](https://burn.dev) deep learning framework.
//!
//! Attention Residuals replace standard fixed-weight residual connections in Transformers
//! with learned softmax attention over depth, enabling selective information routing
//! across layers.
//!
//! ## Quick Start
//!
//! ```rust
//! use attnres::{AttnResConfig, AttnResTransformer};
//! use burn::prelude::*;
//! use burn::backend::NdArray;
//!
//! type B = NdArray;
//!
//! let device = Default::default();
//! let config = AttnResConfig::new(128, 8, 2)
//! .with_num_heads(4)
//! .with_vocab_size(1000);
//!
//! let model: AttnResTransformer<B> = config.init_model(&device);
//! let input_ids = Tensor::<B, 2, Int>::zeros([1, 16], &device);
//! let logits = model.forward(input_ids, None);
//! assert_eq!(logits.dims(), [1, 16, 1000]);
//! ```
// Public API re-exports
pub use ;
pub use AttnResOp;
pub use BlockState;
pub use AttnResConfig;
pub use ;
pub use AttnResLayer;
pub use AttnResTransformer;
pub use ;
pub use SerializationError;
pub use causal_mask;