1use crate::error::{Result, TextError};
13use std::collections::{HashMap, HashSet};
14
15pub fn english_stop_words() -> HashSet<String> {
21 const WORDS: &[&str] = &[
22 "a",
24 "an",
25 "the",
26 "and",
27 "or",
28 "but",
29 "nor",
30 "for",
31 "yet",
32 "so",
33 "in",
35 "on",
36 "at",
37 "to",
38 "for",
39 "of",
40 "with",
41 "by",
42 "from",
43 "as",
44 "into",
45 "through",
46 "during",
47 "before",
48 "after",
49 "above",
50 "below",
51 "between",
52 "out",
53 "off",
54 "over",
55 "under",
56 "again",
57 "further",
58 "then",
59 "once",
60 "about",
61 "against",
62 "along",
63 "around",
64 "up",
65 "down",
66 "i",
68 "me",
69 "my",
70 "myself",
71 "we",
72 "our",
73 "ours",
74 "ourselves",
75 "you",
76 "your",
77 "yours",
78 "yourself",
79 "yourselves",
80 "he",
81 "him",
82 "his",
83 "himself",
84 "she",
85 "her",
86 "hers",
87 "herself",
88 "it",
89 "its",
90 "itself",
91 "they",
92 "them",
93 "their",
94 "theirs",
95 "themselves",
96 "what",
97 "which",
98 "who",
99 "whom",
100 "this",
101 "that",
102 "these",
103 "those",
104 "is",
106 "am",
107 "are",
108 "was",
109 "were",
110 "be",
111 "been",
112 "being",
113 "have",
114 "has",
115 "had",
116 "having",
117 "do",
118 "does",
119 "did",
120 "doing",
121 "will",
122 "would",
123 "shall",
124 "should",
125 "may",
126 "might",
127 "must",
128 "can",
129 "could",
130 "not",
132 "no",
133 "nor",
134 "very",
135 "just",
136 "here",
137 "there",
138 "when",
139 "where",
140 "why",
141 "how",
142 "all",
143 "each",
144 "every",
145 "both",
146 "few",
147 "more",
148 "most",
149 "other",
150 "some",
151 "such",
152 "only",
153 "own",
154 "same",
155 "than",
156 "too",
157 "also",
158 "any",
159 "because",
160 "if",
161 "while",
162 "one",
164 "two",
165 "three",
166 "four",
167 "five",
168 "six",
169 "seven",
170 "eight",
171 "nine",
172 "ten",
173 ];
174 WORDS.iter().map(|w| w.to_string()).collect()
175}
176
177fn words_lower(text: &str) -> Vec<String> {
182 text.split(|c: char| !c.is_alphanumeric())
183 .filter(|t| !t.is_empty())
184 .map(|t| t.to_lowercase())
185 .collect()
186}
187
188fn split_sentences(text: &str) -> Vec<String> {
189 let mut sentences: Vec<String> = Vec::new();
190 let mut current = String::new();
191
192 for ch in text.chars() {
193 current.push(ch);
194 if matches!(ch, '.' | '!' | '?') {
195 let tail = current.trim().to_string();
196 if !tail.is_empty() {
197 sentences.push(tail);
198 }
199 current.clear();
200 }
201 }
202 let tail = current.trim().to_string();
203 if !tail.is_empty() {
204 sentences.push(tail);
205 }
206 sentences
207}
208
209pub struct Rake {
228 stop_words: HashSet<String>,
229 pub min_word_len: usize,
231 pub max_phrase_words: usize,
233}
234
235impl Default for Rake {
236 fn default() -> Self {
237 Self::new()
238 }
239}
240
241impl Rake {
242 pub fn new() -> Self {
244 Self {
245 stop_words: english_stop_words(),
246 min_word_len: 3,
247 max_phrase_words: 5,
248 }
249 }
250
251 pub fn with_stop_words(words: Vec<String>) -> Self {
253 Self {
254 stop_words: words.into_iter().collect(),
255 min_word_len: 3,
256 max_phrase_words: 5,
257 }
258 }
259
260 pub fn extract(&self, text: &str, top_n: usize) -> Result<Vec<(String, f64)>> {
264 if top_n == 0 {
265 return Err(TextError::InvalidInput("top_n must be > 0".to_string()));
266 }
267 if text.trim().is_empty() {
268 return Ok(Vec::new());
269 }
270
271 let candidates = self.generate_candidates(text);
272 if candidates.is_empty() {
273 return Ok(Vec::new());
274 }
275
276 let mut word_freq: HashMap<String, f64> = HashMap::new();
277 let mut word_degree: HashMap<String, f64> = HashMap::new();
278
279 for phrase in &candidates {
280 let words = self.phrase_words(phrase);
281 let degree = words.len() as f64;
282 for word in &words {
283 *word_freq.entry(word.clone()).or_insert(0.0) += 1.0;
284 *word_degree.entry(word.clone()).or_insert(0.0) += degree;
285 }
286 }
287
288 let word_score: HashMap<String, f64> = word_freq
289 .iter()
290 .map(|(w, freq)| {
291 let deg = word_degree.get(w).copied().unwrap_or(0.0);
292 (w.clone(), if *freq > 0.0 { deg / freq } else { 0.0 })
293 })
294 .collect();
295
296 let mut seen: HashSet<String> = HashSet::new();
297 let mut scored: Vec<(String, f64)> = Vec::new();
298
299 for phrase in &candidates {
300 let key = phrase.to_lowercase();
301 if seen.contains(&key) {
302 continue;
303 }
304 seen.insert(key);
305
306 let words = self.phrase_words(phrase);
307 if words.is_empty() {
308 continue;
309 }
310 let score: f64 = words
311 .iter()
312 .map(|w| word_score.get(w).copied().unwrap_or(0.0))
313 .sum();
314 scored.push((phrase.clone(), score));
315 }
316
317 scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
318 scored.truncate(top_n);
319 Ok(scored)
320 }
321
322 fn is_delimiter(&self, word: &str) -> bool {
323 word.is_empty()
324 || self.stop_words.contains(&word.to_lowercase())
325 || word.len() < self.min_word_len
326 || word.chars().all(|c| !c.is_alphanumeric())
327 }
328
329 fn phrase_words(&self, phrase: &str) -> Vec<String> {
330 phrase
331 .split_whitespace()
332 .filter(|w| w.len() >= self.min_word_len)
333 .map(|w| w.to_lowercase())
334 .collect()
335 }
336
337 fn generate_candidates(&self, text: &str) -> Vec<String> {
338 let mut candidates: Vec<String> = Vec::new();
339 let mut current: Vec<String> = Vec::new();
340
341 for raw_token in text.split(|c: char| !c.is_alphanumeric() && c != '\'') {
342 let token = raw_token.trim().to_lowercase();
343 if self.is_delimiter(&token) {
344 if !current.is_empty() {
345 if current.len() <= self.max_phrase_words {
346 candidates.push(current.join(" "));
347 }
348 current.clear();
349 }
350 } else {
351 current.push(token);
352 }
353 }
354 if !current.is_empty() && current.len() <= self.max_phrase_words {
355 candidates.push(current.join(" "));
356 }
357 candidates
358 }
359}
360
361pub struct Yake {
381 pub language: String,
383 pub max_ngram_size: usize,
385 pub dedup_threshold: f64,
387 pub window_size: usize,
389}
390
391impl Default for Yake {
392 fn default() -> Self {
393 Self::new(2)
394 }
395}
396
397impl Yake {
398 pub fn new(max_ngram: usize) -> Self {
400 Self {
401 language: "en".to_string(),
402 max_ngram_size: max_ngram.max(1),
403 dedup_threshold: 0.9,
404 window_size: 2,
405 }
406 }
407
408 pub fn extract(&self, text: &str, top_n: usize) -> Result<Vec<(String, f64)>> {
412 if top_n == 0 {
413 return Err(TextError::InvalidInput("top_n must be > 0".to_string()));
414 }
415 if text.trim().is_empty() {
416 return Ok(Vec::new());
417 }
418
419 let stop_words = english_stop_words();
420 let total_words: Vec<String> = words_lower(text);
421 let n = total_words.len();
422
423 if n == 0 {
424 return Ok(Vec::new());
425 }
426
427 let mut tf: HashMap<String, usize> = HashMap::new();
429 let mut first_pos: HashMap<String, usize> = HashMap::new();
430 let mut capitalized: HashMap<String, bool> = HashMap::new();
431 let mut left_ctx: HashMap<String, HashSet<String>> = HashMap::new();
432 let mut right_ctx: HashMap<String, HashSet<String>> = HashMap::new();
433
434 let orig_words: Vec<&str> = text
435 .split(|c: char| !c.is_alphanumeric())
436 .filter(|t| !t.is_empty())
437 .collect();
438
439 for (i, ow) in orig_words.iter().enumerate() {
440 let lower = ow.to_lowercase();
441 *tf.entry(lower.clone()).or_insert(0) += 1;
442 first_pos.entry(lower.clone()).or_insert(i);
443 let is_cap = ow.chars().next().is_some_and(|c| c.is_uppercase());
444 capitalized.entry(lower.clone()).or_insert(is_cap);
445 }
446
447 for i in 0..n {
448 let word = &total_words[i];
449 for delta in 1..=self.window_size {
450 if i + delta < n {
451 let right = total_words[i + delta].clone();
452 right_ctx
453 .entry(word.clone())
454 .or_default()
455 .insert(right.clone());
456 left_ctx.entry(right).or_default().insert(word.clone());
457 }
458 }
459 }
460
461 let sigma = 1.0_f64;
462 let tf_max = tf.values().copied().max().unwrap_or(1) as f64;
463
464 let mut word_scores: HashMap<String, f64> = HashMap::new();
465
466 for (word, &freq) in &tf {
467 if stop_words.contains(word) || word.len() < 2 {
468 continue;
469 }
470 let tf_norm = freq as f64 / tf_max;
471 let pos = first_pos.get(word).copied().unwrap_or(0) as f64;
472 let rel_pos = 1.0 - pos / n.max(1) as f64;
473 let left_div = left_ctx.get(word).map_or(0, |s| s.len()) as f64;
474 let right_div = right_ctx.get(word).map_or(0, |s| s.len()) as f64;
475 let disp = (left_div + right_div + sigma) / (2.0 * freq as f64 + sigma);
476 let cap_bonus = if *capitalized.get(word).unwrap_or(&false) {
477 0.1
478 } else {
479 0.0
480 };
481 let score = (tf_norm * disp) / (rel_pos + cap_bonus + sigma);
482 word_scores.insert(word.clone(), score);
483 }
484
485 let mut ngram_scores: Vec<(String, f64)> = Vec::new();
487
488 for n_size in 1..=self.max_ngram_size {
489 let candidates = self.generate_ngrams(&total_words, n_size, &stop_words);
490 for ngram in candidates {
491 let words: Vec<&str> = ngram.split_whitespace().collect();
492 if words.is_empty() {
493 continue;
494 }
495 if n_size > 1 {
496 let first = words[0];
497 let last = words[words.len() - 1];
498 if stop_words.contains(first) || stop_words.contains(last) {
499 continue;
500 }
501 }
502
503 let prod: f64 = words
504 .iter()
505 .map(|w| word_scores.get(*w).copied().unwrap_or(1.0))
506 .product();
507
508 let coherence: f64 = if n_size > 1 {
509 let pairs = n_size - 1;
510 let pair_count: f64 = (0..pairs)
511 .map(|p| {
512 let left = words[p];
513 let right = words[p + 1];
514 right_ctx
515 .get(left)
516 .map_or(0, |s| if s.contains(right) { 1 } else { 0 })
517 as f64
518 })
519 .sum();
520 (pair_count / pairs as f64).max(0.01)
521 } else {
522 1.0
523 };
524
525 let score = prod / (n_size as f64 * coherence + sigma);
526 ngram_scores.push((ngram, score));
527 }
528 }
529
530 ngram_scores.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
531 let deduped = self.deduplicate(ngram_scores);
532 let mut result: Vec<(String, f64)> = deduped.into_iter().take(top_n).collect();
533
534 if let Some(max_s) = result.iter().map(|(_, s)| *s).reduce(f64::max) {
535 if max_s > 0.0 {
536 for (_, s) in result.iter_mut() {
537 *s /= max_s;
538 }
539 }
540 }
541
542 Ok(result)
543 }
544
545 fn generate_ngrams(
546 &self,
547 words: &[String],
548 n: usize,
549 stop_words: &HashSet<String>,
550 ) -> Vec<String> {
551 if words.len() < n {
552 return Vec::new();
553 }
554 let mut ngrams: HashSet<String> = HashSet::new();
555
556 for window in words.windows(n) {
557 if window.iter().all(|w| stop_words.contains(w.as_str())) {
558 continue;
559 }
560 if window.iter().any(|w| w.len() < 2) {
561 continue;
562 }
563 ngrams.insert(window.join(" "));
564 }
565 ngrams.into_iter().collect()
566 }
567
568 fn deduplicate(&self, sorted: Vec<(String, f64)>) -> Vec<(String, f64)> {
569 let mut result: Vec<(String, f64)> = Vec::new();
570
571 for candidate in sorted {
572 let tokens_c: HashSet<&str> = candidate.0.split_whitespace().collect();
573 let is_dup = result.iter().any(|(existing, _)| {
574 let tokens_e: HashSet<&str> = existing.split_whitespace().collect();
575 let inter = tokens_c.intersection(&tokens_e).count();
576 let union = tokens_c.union(&tokens_e).count();
577 if union == 0 {
578 return false;
579 }
580 (inter as f64 / union as f64) >= self.dedup_threshold
581 });
582 if !is_dup {
583 result.push(candidate);
584 }
585 }
586 result
587 }
588}
589
590pub fn textrank_keywords(text: &str, top_n: usize, window: usize) -> Result<Vec<(String, f64)>> {
602 if top_n == 0 {
603 return Err(TextError::InvalidInput("top_n must be > 0".to_string()));
604 }
605 if window < 2 {
606 return Err(TextError::InvalidInput("window must be >= 2".to_string()));
607 }
608 if text.trim().is_empty() {
609 return Ok(Vec::new());
610 }
611
612 let stop_words = english_stop_words();
613 let words: Vec<String> = words_lower(text);
614 let filtered: Vec<String> = words
615 .iter()
616 .filter(|w| w.len() >= 3 && !stop_words.contains(*w))
617 .cloned()
618 .collect();
619
620 if filtered.is_empty() {
621 return Ok(Vec::new());
622 }
623
624 let mut graph: HashMap<String, HashMap<String, f64>> = HashMap::new();
625
626 for win in filtered.windows(window) {
627 for i in 0..win.len() {
628 for j in (i + 1)..win.len() {
629 let a = &win[i];
630 let b = &win[j];
631 *graph
632 .entry(a.clone())
633 .or_default()
634 .entry(b.clone())
635 .or_insert(0.0) += 1.0;
636 *graph
637 .entry(b.clone())
638 .or_default()
639 .entry(a.clone())
640 .or_insert(0.0) += 1.0;
641 }
642 }
643 }
644
645 let nodes: Vec<String> = graph.keys().cloned().collect();
646 let n = nodes.len();
647 if n == 0 {
648 return Ok(Vec::new());
649 }
650
651 let node_idx: HashMap<&str, usize> = nodes
652 .iter()
653 .enumerate()
654 .map(|(i, w)| (w.as_str(), i))
655 .collect();
656
657 const DAMPING: f64 = 0.85;
658 const MAX_ITER: usize = 100;
659 const EPS: f64 = 1e-5;
660
661 let mut scores = vec![1.0_f64 / n as f64; n];
662
663 let out_sums: Vec<f64> = nodes
664 .iter()
665 .map(|node| {
666 graph
667 .get(node)
668 .map(|nbrs| nbrs.values().sum())
669 .unwrap_or(0.0)
670 })
671 .collect();
672
673 for _ in 0..MAX_ITER {
674 let mut new_scores = vec![(1.0 - DAMPING) / n as f64; n];
675 for (j, node_j) in nodes.iter().enumerate() {
676 if out_sums[j] <= 0.0 {
677 continue;
678 }
679 if let Some(nbrs) = graph.get(node_j) {
680 for (nbr, &weight) in nbrs {
681 if let Some(&i) = node_idx.get(nbr.as_str()) {
682 new_scores[i] += DAMPING * (weight / out_sums[j]) * scores[j];
683 }
684 }
685 }
686 }
687 let diff: f64 = scores
688 .iter()
689 .zip(&new_scores)
690 .map(|(a, b)| (a - b).abs())
691 .sum();
692 scores = new_scores;
693 if diff < EPS {
694 break;
695 }
696 }
697
698 let word_scores: HashMap<String, f64> = nodes.iter().cloned().zip(scores).collect();
699
700 let all_words: Vec<String> = words_lower(text);
701 let mut phrases: Vec<(String, f64)> = Vec::new();
702 let mut phrase_buf: Vec<String> = Vec::new();
703 let mut phrase_score = 0.0_f64;
704
705 for w in &all_words {
706 if let Some(&sc) = word_scores.get(w) {
707 phrase_buf.push(w.clone());
708 phrase_score += sc;
709 } else {
710 if !phrase_buf.is_empty() {
711 phrases.push((phrase_buf.join(" "), phrase_score));
712 phrase_buf.clear();
713 phrase_score = 0.0;
714 }
715 }
716 }
717 if !phrase_buf.is_empty() {
718 phrases.push((phrase_buf.join(" "), phrase_score));
719 }
720
721 let mut seen: HashSet<String> = HashSet::new();
722 let mut unique: Vec<(String, f64)> = Vec::new();
723 for (phrase, score) in phrases {
724 if !seen.contains(&phrase) {
725 seen.insert(phrase.clone());
726 unique.push((phrase, score));
727 }
728 }
729
730 unique.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
731 unique.truncate(top_n);
732 Ok(unique)
733}
734
735#[cfg(test)]
740mod tests {
741 use super::*;
742
743 const SAMPLE_TEXT: &str = "Rust is a systems programming language that runs blazingly fast, \
744 prevents segfaults, and guarantees thread safety. \
745 Rust programming combines low-level control with high-level ergonomics. \
746 Many developers choose Rust for building reliable and efficient software.";
747
748 #[test]
751 fn test_rake_returns_results() {
752 let rake = Rake::new();
753 let keywords = rake.extract(SAMPLE_TEXT, 5).expect("RAKE should succeed");
754 assert!(!keywords.is_empty(), "RAKE should return keywords");
755 assert!(keywords.len() <= 5);
756 }
757
758 #[test]
759 fn test_rake_descending_scores() {
760 let rake = Rake::new();
761 let keywords = rake.extract(SAMPLE_TEXT, 10).expect("ok");
762 for pair in keywords.windows(2) {
763 assert!(
764 pair[0].1 >= pair[1].1,
765 "Scores should be descending: {:?}",
766 keywords
767 );
768 }
769 }
770
771 #[test]
772 fn test_rake_empty_text() {
773 let rake = Rake::new();
774 let result = rake.extract("", 5).expect("ok");
775 assert!(result.is_empty());
776 }
777
778 #[test]
779 fn test_rake_top_n_zero_errors() {
780 let rake = Rake::new();
781 assert!(rake.extract(SAMPLE_TEXT, 0).is_err());
782 }
783
784 #[test]
785 fn test_rake_custom_stop_words() {
786 let rake = Rake::with_stop_words(vec!["rust".to_string(), "is".to_string()]);
787 let keywords = rake
788 .extract("Rust is a systems language. Rust is fast.", 5)
789 .expect("ok");
790 for (kw, _) in &keywords {
791 assert!(
792 !kw.contains("rust"),
793 "Stop-word 'rust' appeared in results: {}",
794 kw
795 );
796 }
797 }
798
799 #[test]
800 fn test_rake_no_phrases_longer_than_max() {
801 let rake = Rake {
802 max_phrase_words: 2,
803 ..Rake::new()
804 };
805 let keywords = rake.extract(SAMPLE_TEXT, 10).expect("ok");
806 for (kw, _) in &keywords {
807 let wc = kw.split_whitespace().count();
808 assert!(wc <= 2, "Phrase '{}' exceeds max length", kw);
809 }
810 }
811
812 #[test]
813 fn test_rake_phrase_scores_positive() {
814 let rake = Rake::new();
815 let keywords = rake.extract(SAMPLE_TEXT, 5).expect("ok");
816 for (_, score) in &keywords {
817 assert!(*score >= 0.0, "Score should be non-negative");
818 }
819 }
820
821 #[test]
824 fn test_yake_returns_results() {
825 let yake = Yake::new(2);
826 let keywords = yake.extract(SAMPLE_TEXT, 5).expect("YAKE should succeed");
827 assert!(!keywords.is_empty(), "YAKE should return keywords");
828 assert!(keywords.len() <= 5);
829 }
830
831 #[test]
832 fn test_yake_scores_ascending() {
833 let yake = Yake::new(2);
834 let keywords = yake.extract(SAMPLE_TEXT, 10).expect("ok");
835 for pair in keywords.windows(2) {
836 assert!(
837 pair[0].1 <= pair[1].1,
838 "YAKE scores should be ascending: {:?}",
839 keywords
840 );
841 }
842 }
843
844 #[test]
845 fn test_yake_empty_text() {
846 let yake = Yake::new(2);
847 let result = yake.extract("", 5).expect("ok");
848 assert!(result.is_empty());
849 }
850
851 #[test]
852 fn test_yake_top_n_zero_errors() {
853 let yake = Yake::new(2);
854 assert!(yake.extract(SAMPLE_TEXT, 0).is_err());
855 }
856
857 #[test]
858 fn test_yake_unigram_mode() {
859 let yake = Yake::new(1);
860 let keywords = yake.extract(SAMPLE_TEXT, 5).expect("ok");
861 for (kw, _) in &keywords {
862 let wc = kw.split_whitespace().count();
863 assert_eq!(
864 wc, 1,
865 "Unigram mode should return single words, got: {}",
866 kw
867 );
868 }
869 }
870
871 #[test]
872 fn test_yake_bigram_mode() {
873 let yake = Yake::new(2);
874 let keywords = yake.extract(SAMPLE_TEXT, 10).expect("ok");
875 let has_bigram = keywords
876 .iter()
877 .any(|(kw, _)| kw.split_whitespace().count() == 2);
878 assert!(has_bigram, "Bigram mode should include 2-word phrases");
879 }
880
881 #[test]
882 fn test_yake_scores_normalized() {
883 let yake = Yake::new(2);
884 let keywords = yake.extract(SAMPLE_TEXT, 10).expect("ok");
885 for (kw, score) in &keywords {
886 assert!(
887 *score >= 0.0 && *score <= 1.0,
888 "Score {} for '{}' out of [0,1] range",
889 score,
890 kw
891 );
892 }
893 }
894
895 #[test]
898 fn test_textrank_returns_results() {
899 let keywords = textrank_keywords(SAMPLE_TEXT, 5, 3).expect("ok");
900 assert!(!keywords.is_empty());
901 assert!(keywords.len() <= 5);
902 }
903
904 #[test]
905 fn test_textrank_scores_descending() {
906 let keywords = textrank_keywords(SAMPLE_TEXT, 10, 3).expect("ok");
907 for pair in keywords.windows(2) {
908 assert!(pair[0].1 >= pair[1].1, "Scores should be descending");
909 }
910 }
911
912 #[test]
913 fn test_textrank_empty_text() {
914 let result = textrank_keywords("", 5, 3).expect("ok");
915 assert!(result.is_empty());
916 }
917
918 #[test]
919 fn test_textrank_zero_top_n_errors() {
920 assert!(textrank_keywords(SAMPLE_TEXT, 0, 3).is_err());
921 }
922
923 #[test]
924 fn test_textrank_small_window_errors() {
925 assert!(textrank_keywords(SAMPLE_TEXT, 5, 1).is_err());
926 }
927
928 #[test]
929 fn test_textrank_window_size_2() {
930 let keywords = textrank_keywords(SAMPLE_TEXT, 5, 2).expect("ok");
931 assert!(!keywords.is_empty());
932 }
933
934 #[test]
935 fn test_textrank_larger_window() {
936 let keywords = textrank_keywords(SAMPLE_TEXT, 5, 5).expect("ok");
937 assert!(!keywords.is_empty());
938 }
939
940 #[test]
943 fn test_all_methods_non_empty_for_real_text() {
944 let rake_kw = Rake::new().extract(SAMPLE_TEXT, 5).expect("RAKE ok");
945 let yake_kw = Yake::new(2).extract(SAMPLE_TEXT, 5).expect("YAKE ok");
946 let tr_kw = textrank_keywords(SAMPLE_TEXT, 5, 3).expect("TextRank ok");
947
948 assert!(!rake_kw.is_empty(), "RAKE returned empty");
949 assert!(!yake_kw.is_empty(), "YAKE returned empty");
950 assert!(!tr_kw.is_empty(), "TextRank returned empty");
951 }
952
953 #[test]
954 fn test_all_methods_handle_short_text() {
955 let short = "Quick brown fox.";
956 let _ = Rake::new().extract(short, 3).expect("RAKE ok");
957 let _ = Yake::new(1).extract(short, 3).expect("YAKE ok");
958 let _ = textrank_keywords(short, 3, 2).expect("TextRank ok");
959 }
960
961 #[test]
962 fn test_stop_word_list_not_empty() {
963 let sw = english_stop_words();
964 assert!(!sw.is_empty());
965 assert!(sw.contains("the"));
966 assert!(sw.contains("and"));
967 assert!(sw.contains("is"));
968 }
969}