Struct InterpretabilityEngine

Source

pub struct InterpretabilityEngine {
    pub model_info: ModelInfo,
    /* private fields */
}

Expand description

The main interpretability engine

Fields§

§model_info: ModelInfo

Model dimensions

Implementations§

Source §

impl InterpretabilityEngine

Source

pub fn new(model_info: ModelInfo) -> Self

Create a new interpretability engine

Source

pub fn record_activations( &mut self, input: &str, layer_activations: HashMap<usize, Vec<f32>>, attention_patterns: HashMap<(usize, usize), Vec<f32>>, )

Record activations for analysis

Source

pub fn analyze_attention_head( &self, layer: usize, head: usize, ) -> Option<AttentionHead>

Analyze attention head patterns

Source

pub fn discover_circuits(&mut self) -> Vec<Circuit>

Discover computational circuits

Source

pub fn attribute_features(&self, tokens: Vec<String>) -> FeatureAttribution

Perform feature attribution for a decision

Source

pub fn patch_activation( &mut self, layer: usize, position: usize, new_value: f32, ) -> ActivationPatch

Apply activation patching

Source

pub fn probe_for_concept(&mut self, layer: usize, concept: &str) -> ProbeResult

Run probing classifier

Source

pub fn analyze_safety(&mut self, input: &str) -> SafetyAnalysis

Perform comprehensive safety analysis

Source

pub fn get_stats(&self) -> &InterpretabilityStats

Get engine statistics

Source

pub fn get_circuits(&self) -> &[Circuit]

Get all discovered circuits

Source

pub fn label_neuron( &mut self, layer: usize, position: usize, features: Vec<String>, )

Label a neuron with detected features

Source

pub fn find_concept_neurons(&self, concept: &str) -> Vec<(usize, usize)>

Find neurons that respond to a specific concept

Source

pub fn export_report(&self) -> InterpretabilityReport

Export interpretability report

Trait Implementations§

Source §

impl Debug for InterpretabilityEngine

Source §

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl UnwindSafe for InterpretabilityEngine

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> Same for T

Source §

type Output = T

Should always be Self

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

Struct InterpretabilityEngine Copy item path

Fields§

Implementations§

impl InterpretabilityEngine

pub fn new(model_info: ModelInfo) -> Self

pub fn record_activations( &mut self, input: &str, layer_activations: HashMap<usize, Vec<f32>>, attention_patterns: HashMap<(usize, usize), Vec<f32>>, )

pub fn analyze_attention_head( &self, layer: usize, head: usize, ) -> Option<AttentionHead>

pub fn discover_circuits(&mut self) -> Vec<Circuit>

pub fn attribute_features(&self, tokens: Vec<String>) -> FeatureAttribution

pub fn patch_activation( &mut self, layer: usize, position: usize, new_value: f32, ) -> ActivationPatch

pub fn probe_for_concept(&mut self, layer: usize, concept: &str) -> ProbeResult

pub fn analyze_safety(&mut self, input: &str) -> SafetyAnalysis

pub fn get_stats(&self) -> &InterpretabilityStats

pub fn get_circuits(&self) -> &[Circuit]

pub fn label_neuron( &mut self, layer: usize, position: usize, features: Vec<String>, )

pub fn find_concept_neurons(&self, concept: &str) -> Vec<(usize, usize)>

pub fn export_report(&self) -> InterpretabilityReport

Trait Implementations§

impl Debug for InterpretabilityEngine

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Auto Trait Implementations§

impl Freeze for InterpretabilityEngine

impl RefUnwindSafe for InterpretabilityEngine

impl Send for InterpretabilityEngine

impl Sync for InterpretabilityEngine

impl Unpin for InterpretabilityEngine

impl UnwindSafe for InterpretabilityEngine

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> Same for T

type Output = T

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct InterpretabilityEngine

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,