Struct analiticcl::VariantModel[][src]

pub struct VariantModel {
    pub decoder: VocabDecoder,
    pub encoder: VocabEncoder,
    pub alphabet: Alphabet,
    pub index: AnaIndex,
    pub sortedindex: BTreeMap<u16, Vec<AnaValue>>,
    pub have_freq: bool,
    pub freq_sum: usize,
    pub weights: Weights,
    pub lexicons: Vec<String>,
    pub confusables: Vec<Confusable>,
    pub confusables_before_pruning: bool,
    pub debug: bool,


decoder: VocabDecoderencoder: VocabEncoderalphabet: Alphabetindex: AnaIndex

The main index, mapping anagrams to instances

sortedindex: BTreeMap<u16, Vec<AnaValue>>

A secondary sorted index indices of the outer vector correspond to the length of an anagram (in chars) - 1 Inner vector is always sorted

have_freq: bool

Does the model have frequency information?

freq_sum: usize

Total sum of all frequencies in the lexicon

weights: Weights

Weights used in scoring

lexicons: Vec<String>

Stores the names of the loaded lexicons, they will be referenced by index from individual items for provenance reasons

confusables: Vec<Confusable>confusables_before_pruning: bool

Process confusables before pruning by max_matches

debug: bool


impl VariantModel[src]

pub fn new(alphabet_file: &str, weights: Weights, debug: bool) -> VariantModel[src]

pub fn new_with_alphabet(
    alphabet: Alphabet,
    weights: Weights,
    debug: bool
) -> VariantModel

pub fn set_confusables_before_pruning(&mut self)[src]

pub fn alphabet_size(&self) -> CharIndexType[src]

pub fn get_or_create_node<'a, 'b>(
    &'a mut self,
    anahash: &'b AnaValue
) -> &'a mut AnaIndexNode

pub fn build(&mut self)[src]

pub fn contains_key(&self, key: &AnaValue) -> bool[src]

pub fn get_anagram_instances(&self, text: &str) -> Vec<&VocabValue>[src]

Get all anagram instances for a specific entry

pub fn get(&self, text: &str) -> Option<&VocabValue>[src]

Get an exact item in the lexicon (if it exists)

pub fn has(&self, text: &str) -> bool[src]

Tests if the lexicon has a specific entry, by text

pub fn get_vocab(&self, vocab_id: VocabId) -> Option<&VocabValue>[src]

Resolves a vocabulary ID

pub fn read_alphabet(&mut self, filename: &str) -> Result<(), Error>[src]

Read the alphabet from a TSV file The file contains one alphabet entry per line, but may consist of multiple tab-separated alphabet entries on that line, which will be treated as the identical. The alphabet is not limited to single characters but may consist of longer string, a greedy matching approach will be used so order matters (but only for this)

pub fn read_confusablelist(&mut self, filename: &str) -> Result<(), Error>[src]

Read a confusiblelist from a TSV file Contains edit scripts in the first columned (formatted in sesdiff style) and optionally a weight in the second column. favourable confusables have a weight > 1.0, unfavourable ones are < 1.0 (penalties) Weight values should be relatively close to 1.0 as they are applied to the entire score

pub fn add_to_confusables(
    &mut self,
    editscript: &str,
    weight: f64
) -> Result<(), Error>

pub fn read_vocabulary(
    &mut self,
    filename: &str,
    params: &VocabParams,
    lexicon_weight: f32
) -> Result<(), Error>

Read vocabulary (a lexicon or corpus-derived lexicon) from a TSV file May contain frequency information The parameters define what value can be read from what column

pub fn add_to_vocabulary(
    &mut self,
    text: &str,
    frequency: Option<u32>,
    lexicon_weight: Option<f32>,
    lexicon_index: u8

pub fn find_variants(
    input: &str,
    max_anagram_distance: u8,
    max_edit_distance: u8,
    max_matches: usize,
    score_threshold: f64,
    stop_criterion: StopCriterion,
    cache: Option<&mut Cache>
) -> Vec<(VocabId, f64)>

Find variants in the vocabulary for a given string (in its totality), returns a vector of vocabulaly ID and score pairs The resulting vocabulary Ids can be resolved through get_vocab()

pub fn find_nearest_anahashes<'a>(
    &'a self,
    focus: &AnaValue,
    normstring: &Vec<u8>,
    max_distance: u8,
    stop_criterion: StopCriterion,
    cache: Option<&mut HashSet<AnaValue>>
) -> HashSet<&'a AnaValue>

Find the nearest anahashes that exists in the model (computing anahashes in the neigbhourhood if needed).

pub fn gather_instances(
    nearest_anagrams: &HashSet<&AnaValue>,
    querystring: &[u8],
    query: &str,
    max_edit_distance: u8
) -> Vec<(VocabId, Distance)>

Gather instances and their edit distances, given a search string (normalised to the alphabet) and anagram hashes

pub fn score_and_rank(
    instances: Vec<(VocabId, Distance)>,
    input: &str,
    max_matches: usize,
    score_threshold: f64
) -> Vec<(VocabId, f64)>

Rank and score all variants

pub fn rescore_confusables(
    results: &mut Vec<(VocabId, f64)>,
    input: &str

pub fn compute_confusable_weight(&self, input: &str, candidate: VocabId) -> f64[src]

compute weight over known confusables Should return 1.0 when there are no known confusables < 1.0 when there are unfavourable confusables

1.0 when there are favourable confusables

pub fn add_to_reverse_index(
    reverseindex: &mut ReverseIndex,
    input: &str,
    matched_vocab_id: VocabId,
    score: f64

Adds the input item to the reverse index, as instantiation of the given vocabulary id

Auto Trait Implementations

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized

impl<T> Borrow<T> for T where
    T: ?Sized

impl<T> BorrowMut<T> for T where
    T: ?Sized

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.