pub struct LdaBuilder { /* private fields */ }Expand description
Builder for creating LDA models
Implementations§
Source§impl LdaBuilder
impl LdaBuilder
Sourcepub fn new() -> Self
pub fn new() -> Self
Create a new builder with default configuration
Examples found in repository?
examples/topic_modeling_demo.rs (line 52)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn ntopics(self, ntopics: usize) -> Self
pub fn ntopics(self, ntopics: usize) -> Self
Set the number of topics
Examples found in repository?
examples/topic_modeling_demo.rs (line 53)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn doc_topic_prior(self, prior: f64) -> Self
pub fn doc_topic_prior(self, prior: f64) -> Self
Set the document-topic prior (alpha)
Examples found in repository?
examples/topic_modeling_demo.rs (line 56)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn topic_word_prior(self, prior: f64) -> Self
pub fn topic_word_prior(self, prior: f64) -> Self
Set the topic-word prior (eta)
Examples found in repository?
examples/topic_modeling_demo.rs (line 57)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn learning_method(self, method: LdaLearningMethod) -> Self
pub fn learning_method(self, method: LdaLearningMethod) -> Self
Set the learning method
Examples found in repository?
examples/topic_modeling_demo.rs (line 58)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn maxiter(self, maxiter: usize) -> Self
pub fn maxiter(self, maxiter: usize) -> Self
Set the maximum iterations
Examples found in repository?
examples/topic_modeling_demo.rs (line 54)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn random_seed(self, seed: u64) -> Self
pub fn random_seed(self, seed: u64) -> Self
Set the random seed
Examples found in repository?
examples/topic_modeling_demo.rs (line 55)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Sourcepub fn build(self) -> LatentDirichletAllocation
pub fn build(self) -> LatentDirichletAllocation
Build the LDA model
Examples found in repository?
examples/topic_modeling_demo.rs (line 59)
9fn main() -> Result<(), Box<dyn std::error::Error>> {
10 println!("Topic Modeling with LDA Demo");
11 println!("===========================\n");
12
13 // Sample documents about different topics
14 let documents = vec![
15 // Technology documents
16 "Artificial intelligence and machine learning are transforming the tech industry",
17 "Deep learning neural networks require powerful GPUs for training",
18 "Computer vision algorithms can now recognize objects in real time",
19 "Natural language processing helps computers understand human language",
20 // Sports documents
21 "The basketball team won the championship after a thrilling final game",
22 "Football players need excellent physical conditioning and teamwork",
23 "Tennis requires both physical fitness and mental concentration",
24 "Swimming is an excellent full-body workout and competitive sport",
25 // Science documents
26 "Climate change is affecting global weather patterns and ecosystems",
27 "Quantum physics explores the behavior of matter at atomic scales",
28 "Genetic research is unlocking the secrets of human DNA",
29 "Space exploration continues to reveal mysteries of the universe",
30 ];
31
32 // Convert documents to document-term matrix
33 let mut vectorizer = CountVectorizer::default();
34 let doc_term_matrix = vectorizer.fit_transform(&documents)?;
35
36 println!("Document-Term Matrix:");
37 println!(
38 " Shape: ({}, {})",
39 doc_term_matrix.nrows(),
40 doc_term_matrix.ncols()
41 );
42 println!(" Vocabulary size: {}\n", vectorizer.vocabulary_size());
43
44 // Create vocabulary mapping
45 let vocabulary = vectorizer.vocabulary();
46 let mut word_index_map = HashMap::new();
47 for (word, &idx) in vocabulary.token_to_index().iter() {
48 word_index_map.insert(idx, word.clone());
49 }
50
51 // Train LDA model
52 let mut lda = LdaBuilder::new()
53 .ntopics(3)
54 .maxiter(100)
55 .random_seed(42)
56 .doc_topic_prior(0.1)
57 .topic_word_prior(0.01)
58 .learning_method(LdaLearningMethod::Batch)
59 .build();
60
61 println!("Training LDA model with 3 topics...");
62 let doc_topics = lda.fit_transform(&doc_term_matrix)?;
63 println!("Training completed!\n");
64
65 // Display document-topic assignments
66 println!("Document-Topic Assignments:");
67 for (doc_idx, topic_dist) in doc_topics.outer_iter().enumerate() {
68 let max_topic = topic_dist
69 .iter()
70 .enumerate()
71 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
72 .map(|(idx_, _)| idx_)
73 .unwrap();
74
75 println!(
76 "Document {}: Topic {} (probabilities: {:.3}, {:.3}, {:.3})",
77 doc_idx + 1,
78 max_topic,
79 topic_dist[0],
80 topic_dist[1],
81 topic_dist[2]
82 );
83 }
84 println!();
85
86 // Get topics with top words
87 let topics = lda.get_topics(10, &word_index_map)?;
88
89 println!("Discovered Topics:");
90 for topic in &topics {
91 println!("\nTopic {}:", topic.id);
92 println!("Top words:");
93 for (word, weight) in &topic.top_words {
94 println!(" {word} ({weight:.4})");
95 }
96 }
97
98 // Analyze a new document
99 println!("\n\nAnalyzing a new document:");
100 let new_doc = "Machine learning algorithms are revolutionizing artificial intelligence";
101 let new_doc_vec = vectorizer.transform(new_doc)?;
102 let new_doc_topics = lda.transform(&new_doc_vec.insert_axis(scirs2_core::ndarray::Axis(0)))?;
103
104 println!("Document: \"{new_doc}\"");
105 println!("Topic distribution:");
106 for (topic_idx, &prob) in new_doc_topics.row(0).iter().enumerate() {
107 println!(" Topic {topic_idx}: {prob:.3}");
108 }
109
110 // Create another LDA model with different configuration
111 println!("\n\nTrying different LDA configuration:");
112 let mut lda2 = LatentDirichletAllocation::with_ntopics(4);
113 lda2.fit(&doc_term_matrix)?;
114
115 let topics2 = lda2.get_topics(5, &word_index_map)?;
116 println!("Discovered {} topics with top 5 words each:", topics2.len());
117 for topic in &topics2 {
118 let words: Vec<String> = topic
119 .top_words
120 .iter()
121 .map(|(word_, _)| word_.clone())
122 .collect();
123 println!("Topic {}: {}", topic.id, words.join(", "));
124 }
125
126 Ok(())
127}Trait Implementations§
Auto Trait Implementations§
impl Freeze for LdaBuilder
impl RefUnwindSafe for LdaBuilder
impl Send for LdaBuilder
impl Sync for LdaBuilder
impl Unpin for LdaBuilder
impl UnwindSafe for LdaBuilder
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more
Source§impl<T> IntoEither for T
impl<T> IntoEither for T
Source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left is true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
Converts
self into a Left variant of Either<Self, Self>
if into_left(&self) returns true.
Converts self into a Right variant of Either<Self, Self>
otherwise. Read moreSource§impl<T> Pointable for T
impl<T> Pointable for T
Source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
Source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
The inverse inclusion map: attempts to construct
self from the equivalent element of its
superset. Read moreSource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
Checks if
self is actually part of its subset T (and can be converted to it).Source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
Use with care! Same as
self.to_subset but without any property checks. Always succeeds.Source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
The inclusion map: converts
self to the equivalent element of its superset.