Struct SparseAutoencoder

Source

pub struct SparseAutoencoder { /* private fields */ }

Expand description

A Sparse Autoencoder for mechanistic interpretability.

Loads SAE weights from SAELens-format safetensors + cfg.json, encodes model activations into sparse feature vectors, decodes back to activation space, and produces steering vectors for injection.

Each SAE targets a single hook point in the model (e.g., resid_post at layer 5). Multiple SAEs can be loaded independently for different hook points.

§Example

use candle_mi::sae::SparseAutoencoder;
use candle_core::Device;

let sae = SparseAutoencoder::from_pretrained(
    "jbloom/Gemma-2-2B-Residual-Stream-SAEs",
    "gemma-2-2b-res-jb/blocks.20.hook_resid_post",
    &Device::Cpu,
)?;
println!("SAE: d_in={}, d_sae={}", sae.d_in(), sae.d_sae());

Struct SparseAutoencoder Copy item path

§Example

Implementations§

impl SparseAutoencoder

pub fn from_local(dir: &Path, device: &Device) -> Result<Self>

§Errors

pub fn from_npz( npz_path: &Path, hook_layer: usize, device: &Device, ) -> Result<Self>

§Arguments

§Errors

pub fn from_pretrained_npz( repo_id: &str, npz_path: &str, hook_layer: usize, device: &Device, ) -> Result<Self>

§Arguments

§Errors

pub fn from_pretrained( repo_id: &str, sae_id: &str, device: &Device, ) -> Result<Self>

§Arguments

§Errors

pub const fn config(&self) -> &SaeConfig

pub const fn hook_point(&self) -> &HookPoint

pub const fn d_sae(&self) -> usize

pub const fn d_in(&self) -> usize

pub fn encode(&self, x: &Tensor) -> Result<Tensor>

§Shapes

§Errors

pub fn encode_with_strategy( &self, x: &Tensor, strategy: &TopKStrategy, ) -> Result<Tensor>

§Shapes

§Errors

pub fn encode_sparse( &self, x: &Tensor, ) -> Result<SparseActivations<SaeFeatureId>>

§Shapes

§Errors

pub fn decode(&self, features: &Tensor) -> Result<Tensor>

§Shapes

§Errors

pub fn reconstruct(&self, x: &Tensor) -> Result<Tensor>

§Shapes

§Errors

pub fn reconstruction_error(&self, x: &Tensor) -> Result<f64>

§Shapes

§Errors

pub fn decoder_vector(&self, feature_idx: usize) -> Result<Tensor>

§Shapes

§Errors

pub fn prepare_hook_injection( &self, features: &[(usize, f32)], position: usize, seq_len: usize, device: &Device, ) -> Result<HookSpec>

§Shapes

§Arguments

§Errors

Auto Trait Implementations§

impl Freeze for SparseAutoencoder

impl !RefUnwindSafe for SparseAutoencoder

impl Send for SparseAutoencoder

impl Sync for SparseAutoencoder

impl Unpin for SparseAutoencoder

impl UnsafeUnpin for SparseAutoencoder

impl !UnwindSafe for SparseAutoencoder

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T> Instrument for T

fn instrument(self, span: Span) -> Instrumented<Self>

fn in_current_span(self) -> Instrumented<Self>

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> IntoEither for T

fn into_either(self, into_left: bool) -> Either<Self, Self>

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>where F: FnOnce(&Self) -> bool,

impl<T> Pointable for T

const ALIGN: usize

type Init = T

unsafe fn init(init: <T as Pointable>::Init) -> usize

unsafe fn deref<'a>(ptr: usize) -> &'a T

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

unsafe fn drop(ptr: usize)

impl<T> PolicyExt for Twhere T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>where T: Policy<B, E>, P: Policy<B, E>,

impl<T> Same for T

Struct SparseAutoencoder

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

impl<T> PolicyExt for T
where T: ?Sized,

fn and<P, B, E>(self, other: P) -> And<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

fn or<P, B, E>(self, other: P) -> Or<T, P>
where T: Policy<B, E>, P: Policy<B, E>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

impl<T> ErasedDestructor for T
where T: 'static,