pub struct KeyExtractor { /* private fields */ }Expand description
Thai keyword extractor using TF × inverse-corpus-frequency scoring.
Backed by the built-in 62k-word tokenizer, the TNC frequency table (~106k entries), and the Thai stopword list (~1 029 entries).
Construction is O(n) in the TNC table size — reuse the returned instance
rather than calling builtin() on every query.
§Filtering rules
A token is eligible as a keyword when all of the following hold:
- Kind is
Thai,Latin,Number, orNamed(whitespace, punctuation, emoji, and unknown tokens are always skipped) - Character length ≥ 2 (single-char tokens are too coarse to be keywords)
- Not in the built-in Thai stopword list
§Examples
use kham_core::keyword::KeyExtractor;
let kex = KeyExtractor::builtin();
// Rare domain-specific word outranks a common word
// "ซอฟต์แวร์" (software) is rare in TNC and should appear as a top keyword
let kws = kex.extract("นักพัฒนาซอฟต์แวร์เขียนซอฟต์แวร์ทุกวัน", 5);
assert!(kws.iter().any(|k| k.word == "ซอฟต์แวร์"));Implementations§
Source§impl KeyExtractor
impl KeyExtractor
Sourcepub fn builtin() -> Self
pub fn builtin() -> Self
Create a keyword extractor backed by the built-in tokenizer, TNC frequency table, and Thai stopword list.
§Examples
use kham_core::keyword::KeyExtractor;
let kex = KeyExtractor::builtin();
assert!(!kex.extract("กินข้าวกับปลา", 5).is_empty());Sourcepub fn extract(&self, text: &str, max_n: usize) -> Vec<Keyword>
pub fn extract(&self, text: &str, max_n: usize) -> Vec<Keyword>
Extract up to max_n keywords from text, ranked by TF-IDF score.
Returns an empty Vec when text is empty, contains no eligible
content words, or max_n is zero.
Ties in score are broken alphabetically so results are deterministic.
§Examples
use kham_core::keyword::KeyExtractor;
let kex = KeyExtractor::builtin();
// Edge cases
assert!(kex.extract("", 5).is_empty());
assert!(kex.extract("กินข้าวกับปลา", 0).is_empty());
// Score order is non-increasing
let kws = kex.extract("การเรียนภาษาโปรแกรมมิ่งเป็นทักษะสำคัญสำหรับนักพัฒนา", 10);
for pair in kws.windows(2) {
assert!(
pair[0].score >= pair[1].score,
"out-of-order: {:?} before {:?}", pair[0], pair[1]
);
}Auto Trait Implementations§
impl Freeze for KeyExtractor
impl RefUnwindSafe for KeyExtractor
impl Send for KeyExtractor
impl Sync for KeyExtractor
impl Unpin for KeyExtractor
impl UnsafeUnpin for KeyExtractor
impl UnwindSafe for KeyExtractor
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more