1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
extern crate libc;
use self::libc::*;
use std::ffi::CStr;
use std::ffi::CString;
use std::str;
pub struct Stemmer {
stemmer: *mut c_void,
}
extern {
fn sb_stemmer_list() -> *const *const c_char;
fn sb_stemmer_new(algorithm: *const c_char, charenc: *const c_char) -> *mut c_void;
fn sb_stemmer_delete(stemmer: *mut c_void);
fn sb_stemmer_stem(stemmer: *mut c_void, word: *const c_char, size: c_int) -> *const c_char;
}
impl Drop for Stemmer {
fn drop(&mut self) {
unsafe {
if !self.stemmer.is_null() {
sb_stemmer_delete(self.stemmer);
}
}
}
}
impl Stemmer {
pub fn list() -> Vec<&'static str> {
let mut i = 0;
unsafe {
let list: *const *const c_char = sb_stemmer_list();
let mut res = vec![];
loop {
let string_ptr: *const c_char = *list.offset(i);
if string_ptr.is_null() {
return res;
} else {
let bytes: &[u8] = CStr::from_ptr(string_ptr).to_bytes();
let s: &str = str::from_utf8_unchecked(bytes);
res.push(s);
i += 1;
}
}
}
}
pub fn new(algorithm: &str) -> Option<Stemmer> {
let algo = CString::new(algorithm).unwrap();
let enc = CString::new("UTF_8").unwrap();
unsafe {
let stemmer = sb_stemmer_new(algo.as_ptr(), enc.as_ptr());
if stemmer.is_null() {
return None;
} else {
return Some(Stemmer { stemmer: stemmer });
}
}
}
pub fn stem(&mut self, word: &str) -> String {
String::from(self.stem_str(word))
}
pub fn stem_str(&mut self, word: &str) -> &str {
unsafe {
let word = CString::new(word).unwrap();
let res = sb_stemmer_stem(self.stemmer, word.as_ptr(), word.to_bytes().len() as i32);
let bytes: &[u8] = CStr::from_ptr(res).to_bytes();
str::from_utf8_unchecked(bytes)
}
}
}