WordNet

Struct WordNet 

Source
pub struct WordNet { /* private fields */ }
Expand description

In-memory view of a WordNet dictionary backed by mmap or owned buffers.

Implementations§

Source§

impl WordNet

Source

pub fn load(dict_dir: impl AsRef<Path>) -> Result<Self>

Load WordNet from a directory containing data.* and index.* files.

Defaults to memory-mapping the source files. Use [load_with_mode] to force owned buffers instead.

Source

pub fn load_with_mode( dict_dir: impl AsRef<Path>, mode: LoadMode, ) -> Result<Self>

Load WordNet choosing between mmap and owned buffers at runtime.

Examples found in repository?
examples/stats.rs (line 14)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn lemma_exists(&self, pos: Pos, lemma: &str) -> bool

Check whether a lemma exists for the given POS according to index files.

Examples found in repository?
examples/stats.rs (line 51)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn index_entry(&self, pos: Pos, lemma: &str) -> Option<IndexEntry<'_>>

Fetch a raw IndexEntry if present.

Source

pub fn synsets_for_lemma(&self, pos: Pos, lemma: &str) -> &[SynsetId]

Return the synsets associated with a lemma, or an empty slice.

Source

pub fn get_synset(&self, id: SynsetId) -> Option<Synset<'_>>

Fetch a Synset by id if loaded.

Source

pub fn iter_synsets(&self) -> impl Iterator<Item = Synset<'_>> + '_

Iterate over all synsets as borrowed views.

Examples found in repository?
examples/stats.rs (line 22)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn index_count(&self) -> usize

Number of index entries.

Examples found in repository?
examples/stats.rs (line 32)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn lemma_count(&self) -> usize

Number of lemmas tracked across all parts of speech.

Examples found in repository?
examples/stats.rs (line 33)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn synset_count(&self) -> usize

Number of synsets.

Examples found in repository?
examples/stats.rs (line 34)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn verb_frame_templates_count(&self) -> usize

Number of verb frame template strings loaded.

Examples found in repository?
examples/stats.rs (line 40)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn sense_count_entries(&self) -> usize

Number of sense-count entries parsed from cntlist.

Examples found in repository?
examples/stats.rs (line 43)
8fn main() -> Result<()> {
9    let dict_dir = env::args()
10        .nth(1)
11        .map(PathBuf::from)
12        .context("usage: cargo run -p wordnet-db --example stats -- <path-to-wordnet-dir>")?;
13
14    let wn = WordNet::load_with_mode(&dict_dir, LoadMode::Mmap)
15        .with_context(|| format!("loading WordNet from {}", dict_dir.display()))?;
16
17    let mut word_count = 0usize;
18    let mut pointer_count = 0usize;
19    let mut gloss_example_count = 0usize;
20    let mut verb_frame_instances = 0usize;
21
22    for syn in wn.iter_synsets() {
23        word_count += syn.words.len();
24        pointer_count += syn.pointers.len();
25        gloss_example_count += syn.gloss.examples.len();
26        if syn.id.pos == Pos::Verb {
27            verb_frame_instances += syn.frames.len();
28        }
29    }
30
31    println!("Dictionary: {}", dict_dir.display());
32    println!("Index entries: {}", wn.index_count());
33    println!("Lemma keys   : {}", wn.lemma_count());
34    println!("Synsets      : {}", wn.synset_count());
35    println!("Words in synsets: {}", word_count);
36    println!("Pointers     : {}", pointer_count);
37    println!("Gloss examples: {}", gloss_example_count);
38    println!(
39        "Verb frame templates (frames.vrb): {}",
40        wn.verb_frame_templates_count()
41    );
42    println!("Verb frame instances in synsets: {}", verb_frame_instances);
43    println!("Sense-count entries: {}", wn.sense_count_entries());
44
45    // Spot-check a couple of lemmas to confirm lookup.
46    for (pos, lemma) in [(Pos::Noun, "dog"), (Pos::Verb, "run")] {
47        println!(
48            "Lemma '{}' ({:?}) exists? {}",
49            lemma,
50            pos,
51            wn.lemma_exists(pos, lemma)
52        );
53    }
54
55    Ok(())
56}
Source

pub fn sense_count( &self, pos: Pos, lemma: &str, synset_offset: u32, ) -> Option<u32>

Sense frequency for a given lemma/pos/synset, if present in cntlist.rev.

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.