Word2Vec

Struct Word2Vec 

Source
pub struct Word2Vec { /* private fields */ }
Expand description

Word2Vec model for training and using word embeddings

Word2Vec is an algorithm for learning vector representations of words, also known as word embeddings. These vectors capture semantic meanings of words, allowing operations like “king - man + woman” to result in a vector close to “queen”.

This implementation supports both Continuous Bag of Words (CBOW) and Skip-gram models, with negative sampling for efficient training.

Implementations§

Source§

impl Word2Vec

Source

pub fn new() -> Self

Create a new Word2Vec model with default configuration

Examples found in repository?
examples/word2vec_example.rs (line 28)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_config(config: Word2VecConfig) -> Self

Create a new Word2Vec model with the specified configuration

Source

pub fn with_tokenizer(self, tokenizer: Box<dyn Tokenizer + Send + Sync>) -> Self

Set a custom tokenizer

Source

pub fn with_vector_size(self, vectorsize: usize) -> Self

Set vector size

Examples found in repository?
examples/word2vec_example.rs (line 29)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_window_size(self, windowsize: usize) -> Self

Set window size

Examples found in repository?
examples/word2vec_example.rs (line 30)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_min_count(self, mincount: usize) -> Self

Set minimum count

Examples found in repository?
examples/word2vec_example.rs (line 31)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_epochs(self, epochs: usize) -> Self

Set number of epochs

Examples found in repository?
examples/word2vec_example.rs (line 32)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_learning_rate(self, learningrate: f64) -> Self

Set learning rate

Source

pub fn with_algorithm(self, algorithm: Word2VecAlgorithm) -> Self

Set algorithm (CBOW or Skip-gram)

Examples found in repository?
examples/word2vec_example.rs (line 33)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_negative_samples(self, negativesamples: usize) -> Self

Set number of negative samples

Examples found in repository?
examples/word2vec_example.rs (line 34)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn with_subsample(self, subsample: f64) -> Self

Set subsampling threshold

Source

pub fn with_batch_size(self, batchsize: usize) -> Self

Set batch size

Source

pub fn build_vocabulary(&mut self, texts: &[&str]) -> Result<()>

Build vocabulary from a corpus

Source

pub fn train(&mut self, texts: &[&str]) -> Result<()>

Train the Word2Vec model on a corpus

Examples found in repository?
examples/word2vec_example.rs (line 38)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn vector_size(&self) -> usize

Get the vector size

Source

pub fn get_word_vector(&self, word: &str) -> Result<Array1<f64>>

Get the embedding vector for a word

Examples found in repository?
examples/word2vec_example.rs (line 79)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn most_similar( &self, word: &str, topn: usize, ) -> Result<Vec<(String, f64)>>

Get the most similar words to a given word

Examples found in repository?
examples/word2vec_example.rs (line 50)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn most_similar_by_vector( &self, vector: &Array1<f64>, top_n: usize, exclude_words: &[&str], ) -> Result<Vec<(String, f64)>>

Get the most similar words to a given vector

Examples found in repository?
examples/word2vec_example.rs (line 153)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn analogy( &self, a: &str, b: &str, c: &str, topn: usize, ) -> Result<Vec<(String, f64)>>

Compute the analogy: a is to b as c is to ?

Examples found in repository?
examples/word2vec_example.rs (line 69)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn save<P: AsRef<Path>>(&self, path: P) -> Result<()>

Save the Word2Vec model to a file

Examples found in repository?
examples/word2vec_example.rs (line 163)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn load<P: AsRef<Path>>(path: P) -> Result<Self>

Load a Word2Vec model from a file

Examples found in repository?
examples/word2vec_example.rs (line 167)
6fn main() {
7    println!("Word2Vec Example");
8    println!("================\n");
9
10    // Sample corpus for demonstration
11    let corpus = [
12        "the quick brown fox jumps over the lazy dog",
13        "a quick brown fox jumps over a lazy dog",
14        "the fox is quick and brown",
15        "the dog is lazy and sleepy",
16        "quick brown foxes jump over lazy dogs",
17        "the quick fox jumped over the lazy sleeping dog",
18        "a brown dog chased the quick fox",
19        "foxes and dogs are natural enemies",
20        "the quick brown cat jumps over the lazy fox",
21        "a quick brown cat jumps over a lazy fox",
22    ];
23
24    println!("Training Word2Vec model on a small corpus...");
25    let start = Instant::now();
26
27    // Create a Word2Vec model with Skip-gram algorithm
28    let mut skipgram_model = Word2Vec::new()
29        .with_vector_size(50)
30        .with_window_size(3)
31        .with_min_count(1)
32        .with_epochs(100)
33        .with_algorithm(Word2VecAlgorithm::SkipGram)
34        .with_negative_samples(5);
35
36    // Train the model
37    skipgram_model
38        .train(&corpus)
39        .expect("Failed to train Skip-gram model");
40    let elapsed = start.elapsed();
41
42    println!(
43        "Training completed in {:.2} seconds\n",
44        elapsed.as_secs_f32()
45    );
46
47    // Find similar words
48    println!("Finding words similar to 'fox':");
49    let similar_to_fox = skipgram_model
50        .most_similar("fox", 5)
51        .expect("Failed to find similar words");
52
53    for (word, similarity) in similar_to_fox {
54        println!("{word}: {similarity:.4}");
55    }
56
57    println!("\nFinding words similar to 'dog':");
58    let similar_to_dog = skipgram_model
59        .most_similar("dog", 5)
60        .expect("Failed to find similar words");
61
62    for (word, similarity) in similar_to_dog {
63        println!("{word}: {similarity:.4}");
64    }
65
66    // Compute analogies (e.g., fox is to dog as quick is to ?)
67    println!("\nAnalogy: fox is to dog as quick is to ?");
68    let analogy_result = skipgram_model
69        .analogy("fox", "dog", "quick", 3)
70        .expect("Failed to compute analogy");
71
72    for (word, similarity) in analogy_result {
73        println!("{word}: {similarity:.4}");
74    }
75
76    // Get word vectors and calculate cosine similarity manually
77    println!("\nComparing word vectors:");
78    let fox_vector = skipgram_model
79        .get_word_vector("fox")
80        .expect("Failed to get vector for 'fox'");
81    let dog_vector = skipgram_model
82        .get_word_vector("dog")
83        .expect("Failed to get vector for 'dog'");
84    let quick_vector = skipgram_model
85        .get_word_vector("quick")
86        .expect("Failed to get vector for 'quick'");
87
88    println!(
89        "Cosine similarity between 'fox' and 'dog': {:.4}",
90        cosine_similarity(&fox_vector, &dog_vector)
91    );
92    println!(
93        "Cosine similarity between 'fox' and 'quick': {:.4}",
94        cosine_similarity(&fox_vector, &quick_vector)
95    );
96    println!(
97        "Cosine similarity between 'quick' and 'dog': {:.4}",
98        cosine_similarity(&quick_vector, &dog_vector)
99    );
100
101    // Train a CBOW model on the same corpus
102    println!("\nTraining CBOW model on the same corpus...");
103    let start = Instant::now();
104
105    let mut cbow_model = Word2Vec::new()
106        .with_vector_size(50)
107        .with_window_size(3)
108        .with_min_count(1)
109        .with_epochs(100)
110        .with_algorithm(Word2VecAlgorithm::CBOW)
111        .with_negative_samples(5);
112
113    cbow_model
114        .train(&corpus)
115        .expect("Failed to train CBOW model");
116    let elapsed = start.elapsed();
117
118    println!(
119        "Training completed in {:.2} seconds\n",
120        elapsed.as_secs_f32()
121    );
122
123    // Compare results from CBOW model
124    println!("CBOW model - Words similar to 'fox':");
125    let similar_to_fox_cbow = cbow_model
126        .most_similar("fox", 5)
127        .expect("Failed to find similar words");
128
129    for (word, similarity) in similar_to_fox_cbow {
130        println!("{word}: {similarity:.4}");
131    }
132
133    // Vector arithmetic: fox - dog + cat = ?
134    println!("\nVector arithmetic: fox - dog + cat = ?");
135
136    // Manual vector arithmetic
137    let fox_vec = skipgram_model.get_word_vector("fox").unwrap();
138    let dog_vec = skipgram_model.get_word_vector("dog").unwrap();
139    let cat_vec = skipgram_model.get_word_vector("cat").unwrap();
140
141    // Compute the result vector
142    let mut result_vec = Array1::zeros(fox_vec.dim());
143    result_vec.assign(&fox_vec);
144    result_vec -= &dog_vec;
145    result_vec += &cat_vec;
146
147    // Normalize the vector
148    let norm = (result_vec.iter().fold(0.0, |sum, &val| sum + val * val)).sqrt();
149    result_vec.mapv_inplace(|val| val / norm);
150
151    // Find words similar to the result vector
152    let similar_to_result = skipgram_model
153        .most_similar_by_vector(&result_vec, 5, &["fox", "dog", "cat"])
154        .expect("Failed to find similar words");
155
156    for (word, similarity) in similar_to_result {
157        println!("{word}: {similarity:.4}");
158    }
159
160    // Save and load the model
161    println!("\nSaving and loading the model...");
162    skipgram_model
163        .save("word2vec_model.txt")
164        .expect("Failed to save model");
165    println!("Model saved to 'word2vec_model.txt'");
166
167    let loaded_model = Word2Vec::load("word2vec_model.txt").expect("Failed to load model");
168    println!("Model loaded successfully");
169
170    // Verify the loaded model works
171    let similar_words_loaded = loaded_model
172        .most_similar("fox", 3)
173        .expect("Failed to find similar words with loaded model");
174
175    println!("\nWords similar to 'fox' using loaded model:");
176    for (word, similarity) in similar_words_loaded {
177        println!("{word}: {similarity:.4}");
178    }
179}
Source

pub fn get_vocabulary(&self) -> Vec<String>

Get the vocabulary as a vector of strings

Source

pub fn get_vector_size(&self) -> usize

Get the vector size

Source

pub fn get_algorithm(&self) -> Word2VecAlgorithm

Get the algorithm

Source

pub fn get_window_size(&self) -> usize

Get the window size

Source

pub fn get_min_count(&self) -> usize

Get the minimum count

Source

pub fn get_embeddings_matrix(&self) -> Option<Array2<f64>>

Get the embeddings matrix (input embeddings)

Source

pub fn get_negative_samples(&self) -> usize

Get the number of negative samples

Source

pub fn get_learning_rate(&self) -> f64

Get the learning rate

Source

pub fn get_epochs(&self) -> usize

Get the number of epochs

Source

pub fn get_subsampling_threshold(&self) -> f64

Get the subsampling threshold

Trait Implementations§

Source§

impl Clone for Word2Vec

Source§

fn clone(&self) -> Self

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for Word2Vec

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Default for Word2Vec

Source§

fn default() -> Self

Returns the “default value” for a type. Read more
Source§

impl RegistrableModel for Word2Vec

Implementation of RegistrableModel for Word2Vec

Source§

fn serialize(&self) -> Result<SerializableModelData>

Serialize model to storable format
Source§

fn deserialize(data: &SerializableModelData) -> Result<Self>

Deserialize model from stored format
Source§

fn model_type(&self) -> ModelType

Get model type
Source§

fn get_config(&self) -> HashMap<String, String>

Get model configuration as string map

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> IntoEither for T

Source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
Source§

impl<T> Pointable for T

Source§

const ALIGN: usize

The alignment of pointer.
Source§

type Init = T

The type for initializers.
Source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
Source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
Source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
Source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
Source§

impl<T> Same for T

Source§

type Output = T

Should always be Self
Source§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

Source§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
Source§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
Source§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
Source§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V