Struct analiticcl::VariantModel [−][src]
pub struct VariantModel { pub decoder: VocabDecoder, pub encoder: VocabEncoder, pub alphabet: Alphabet, pub index: AnaIndex, pub sortedindex: BTreeMap<u16, Vec<AnaValue>>, pub have_freq: bool, pub freq_sum: usize, pub weights: Weights, pub lexicons: Vec<String>, pub confusables: Vec<Confusable>, pub confusables_before_pruning: bool, pub debug: bool, }
Fields
decoder: VocabDecoder
encoder: VocabEncoder
alphabet: Alphabet
index: AnaIndex
The main index, mapping anagrams to instances
sortedindex: BTreeMap<u16, Vec<AnaValue>>
A secondary sorted index indices of the outer vector correspond to the length of an anagram (in chars) - 1 Inner vector is always sorted
have_freq: bool
Does the model have frequency information?
freq_sum: usize
Total sum of all frequencies in the lexicon
weights: Weights
Weights used in scoring
lexicons: Vec<String>
Stores the names of the loaded lexicons, they will be referenced by index from individual items for provenance reasons
confusables: Vec<Confusable>
confusables_before_pruning: bool
Process confusables before pruning by max_matches
debug: bool
Implementations
impl VariantModel
[src]
impl VariantModel
[src]pub fn new(alphabet_file: &str, weights: Weights, debug: bool) -> VariantModel
[src]
pub fn new_with_alphabet(
alphabet: Alphabet,
weights: Weights,
debug: bool
) -> VariantModel
[src]
alphabet: Alphabet,
weights: Weights,
debug: bool
) -> VariantModel
pub fn set_confusables_before_pruning(&mut self)
[src]
pub fn alphabet_size(&self) -> CharIndexType
[src]
pub fn get_or_create_node<'a, 'b>(
&'a mut self,
anahash: &'b AnaValue
) -> &'a mut AnaIndexNode
[src]
&'a mut self,
anahash: &'b AnaValue
) -> &'a mut AnaIndexNode
pub fn build(&mut self)
[src]
pub fn contains_key(&self, key: &AnaValue) -> bool
[src]
pub fn get_anagram_instances(&self, text: &str) -> Vec<&VocabValue>
[src]
Get all anagram instances for a specific entry
pub fn get(&self, text: &str) -> Option<&VocabValue>
[src]
Get an exact item in the lexicon (if it exists)
pub fn has(&self, text: &str) -> bool
[src]
Tests if the lexicon has a specific entry, by text
pub fn get_vocab(&self, vocab_id: VocabId) -> Option<&VocabValue>
[src]
Resolves a vocabulary ID
pub fn read_alphabet(&mut self, filename: &str) -> Result<(), Error>
[src]
Read the alphabet from a TSV file The file contains one alphabet entry per line, but may consist of multiple tab-separated alphabet entries on that line, which will be treated as the identical. The alphabet is not limited to single characters but may consist of longer string, a greedy matching approach will be used so order matters (but only for this)
pub fn read_confusablelist(&mut self, filename: &str) -> Result<(), Error>
[src]
Read a confusiblelist from a TSV file Contains edit scripts in the first columned (formatted in sesdiff style) and optionally a weight in the second column. favourable confusables have a weight > 1.0, unfavourable ones are < 1.0 (penalties) Weight values should be relatively close to 1.0 as they are applied to the entire score
pub fn add_to_confusables(
&mut self,
editscript: &str,
weight: f64
) -> Result<(), Error>
[src]
&mut self,
editscript: &str,
weight: f64
) -> Result<(), Error>
pub fn read_vocabulary(
&mut self,
filename: &str,
params: &VocabParams,
lexicon_weight: f32
) -> Result<(), Error>
[src]
&mut self,
filename: &str,
params: &VocabParams,
lexicon_weight: f32
) -> Result<(), Error>
Read vocabulary (a lexicon or corpus-derived lexicon) from a TSV file May contain frequency information The parameters define what value can be read from what column
pub fn add_to_vocabulary(
&mut self,
text: &str,
frequency: Option<u32>,
lexicon_weight: Option<f32>,
lexicon_index: u8
)
[src]
&mut self,
text: &str,
frequency: Option<u32>,
lexicon_weight: Option<f32>,
lexicon_index: u8
)
pub fn find_variants(
&self,
input: &str,
max_anagram_distance: u8,
max_edit_distance: u8,
max_matches: usize,
score_threshold: f64,
stop_criterion: StopCriterion,
cache: Option<&mut Cache>
) -> Vec<(VocabId, f64)>
[src]
&self,
input: &str,
max_anagram_distance: u8,
max_edit_distance: u8,
max_matches: usize,
score_threshold: f64,
stop_criterion: StopCriterion,
cache: Option<&mut Cache>
) -> Vec<(VocabId, f64)>
Find variants in the vocabulary for a given string (in its totality), returns a vector of vocabulaly ID and score pairs
The resulting vocabulary Ids can be resolved through get_vocab()
pub fn find_nearest_anahashes<'a>(
&'a self,
focus: &AnaValue,
normstring: &Vec<u8>,
max_distance: u8,
stop_criterion: StopCriterion,
cache: Option<&mut HashSet<AnaValue>>
) -> HashSet<&'a AnaValue>
[src]
&'a self,
focus: &AnaValue,
normstring: &Vec<u8>,
max_distance: u8,
stop_criterion: StopCriterion,
cache: Option<&mut HashSet<AnaValue>>
) -> HashSet<&'a AnaValue>
Find the nearest anahashes that exists in the model (computing anahashes in the neigbhourhood if needed).
pub fn gather_instances(
&self,
nearest_anagrams: &HashSet<&AnaValue>,
querystring: &[u8],
query: &str,
max_edit_distance: u8
) -> Vec<(VocabId, Distance)>
[src]
&self,
nearest_anagrams: &HashSet<&AnaValue>,
querystring: &[u8],
query: &str,
max_edit_distance: u8
) -> Vec<(VocabId, Distance)>
Gather instances and their edit distances, given a search string (normalised to the alphabet) and anagram hashes
pub fn score_and_rank(
&self,
instances: Vec<(VocabId, Distance)>,
input: &str,
max_matches: usize,
score_threshold: f64
) -> Vec<(VocabId, f64)>
[src]
&self,
instances: Vec<(VocabId, Distance)>,
input: &str,
max_matches: usize,
score_threshold: f64
) -> Vec<(VocabId, f64)>
Rank and score all variants
pub fn rescore_confusables(
&self,
results: &mut Vec<(VocabId, f64)>,
input: &str
)
[src]
&self,
results: &mut Vec<(VocabId, f64)>,
input: &str
)
pub fn compute_confusable_weight(&self, input: &str, candidate: VocabId) -> f64
[src]
compute weight over known confusables Should return 1.0 when there are no known confusables < 1.0 when there are unfavourable confusables
1.0 when there are favourable confusables
pub fn add_to_reverse_index(
&self,
reverseindex: &mut ReverseIndex,
input: &str,
matched_vocab_id: VocabId,
score: f64
)
[src]
&self,
reverseindex: &mut ReverseIndex,
input: &str,
matched_vocab_id: VocabId,
score: f64
)
Adds the input item to the reverse index, as instantiation of the given vocabulary id
Auto Trait Implementations
impl RefUnwindSafe for VariantModel
impl RefUnwindSafe for VariantModel
impl Send for VariantModel
impl Send for VariantModel
impl Sync for VariantModel
impl Sync for VariantModel
impl Unpin for VariantModel
impl Unpin for VariantModel
impl UnwindSafe for VariantModel
impl UnwindSafe for VariantModel