Struct sif_embedding::sif::Sif

source ·
pub struct Sif<'w, 'u, V, T> { /* private fields */ }
Expand description

An implementation of Smooth Inverse Frequency (SIF) that is a simple but pewerful embedding technique for sentences, described in the paper:

Sanjeev Arora, Yingyu Liang, and Tengyu Ma, A Simple but Tough-to-Beat Baseline for Sentence Embeddings, ICLR 2017.

Examples

use std::io::BufReader;

use finalfusion::compat::text::ReadText;
use finalfusion::embeddings::Embeddings;

use sif_embedding::{Sif, UnigramLM};

// Load word embeddings from a pretrained model.
let word_model = "las 0.0 1.0 2.0\nvegas -3.0 -4.0 -5.0\n";
let mut reader = BufReader::new(word_model.as_bytes());
let word_embeddings = Embeddings::read_text(&mut reader).unwrap();

// Create a unigram language model.
let word_weights = [("las", 10.), ("vegas", 20.)];
let unigram_lm = UnigramLM::new(word_weights);

// Compute sentence embeddings.
let sif = Sif::new(&word_embeddings, &unigram_lm);
let sent_embeddings = sif.embeddings(["go to las vegas", "mega vegas"]);
assert_eq!(sent_embeddings.shape(), &[2, 3]);

Implementations§

source§

impl<'w, 'u, V, T> Sif<'w, 'u, V, T>where V: Vocab, T: Storage,

source

pub const fn new( word_embeddings: &'w Embeddings<V, T>, unigram_lm: &'u UnigramLM ) -> Self

Creates a new instance.

source

pub const fn separator(self, separator: char) -> Self

Sets a separator for sentence segmentation (default: ASCII whitespace).

source

pub fn param_a(self, param_a: Float) -> Self

Sets a SIF-weighting parameter a (default: 1e-3).

source

pub fn clear_common_component(self) -> Self

Clears the common component retained by Self::embeddings_mut().

source

pub const fn is_common_component_retained(&self) -> bool

Checks if the common component is retained by Self::embeddings_mut().

source

pub fn embeddings<I, S>(&self, sentences: I) -> Array2<Float>where I: IntoIterator<Item = S>, S: AsRef<str>,

Computes embeddings for input sentences, returning a 2D-array of shape (n_sentences, embedding_size), where

Behaviors depending on the internal state

The behavior of this method varies depending on the internal state of the instance:

  • If the common component c_0 is retained by Self::embeddings_mut(), this method uses it to compute embeddings;
  • Otherwise, it computes c_0 from the input sentences and uses it to compute embeddings.
source

pub fn embeddings_mut<I, S>(&mut self, sentences: I) -> Array2<Float>where I: IntoIterator<Item = S>, S: AsRef<str>,

Computes embeddings for input sentences, returning a 2D-array of shape (n_sentences, embedding_size), where

It also retains the common component c_0 from the input sentences, allowing for its reuse in Self::embeddings().

If the input is empty, the common component will be cleared.

source

pub fn embedding_size(&self) -> usize

Returns the number of dimensions for sentence embeddings, which is equivalent to that of the input word embeddings.

Trait Implementations§

source§

impl<'w, 'u, V: Clone, T: Clone> Clone for Sif<'w, 'u, V, T>

source§

fn clone(&self) -> Sif<'w, 'u, V, T>

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl<'w, 'u, V: Debug, T: Debug> Debug for Sif<'w, 'u, V, T>

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

§

impl<'w, 'u, V, T> RefUnwindSafe for Sif<'w, 'u, V, T>where T: RefUnwindSafe, V: RefUnwindSafe,

§

impl<'w, 'u, V, T> Send for Sif<'w, 'u, V, T>where T: Sync, V: Sync,

§

impl<'w, 'u, V, T> Sync for Sif<'w, 'u, V, T>where T: Sync, V: Sync,

§

impl<'w, 'u, V, T> Unpin for Sif<'w, 'u, V, T>

§

impl<'w, 'u, V, T> UnwindSafe for Sif<'w, 'u, V, T>where T: RefUnwindSafe, V: RefUnwindSafe,

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

§

impl<T> Pointable for T

§

const ALIGN: usize = mem::align_of::<T>()

The alignment of pointer.
§

type Init = T

The type for initializers.
§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
§

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

§

fn vzip(self) -> V