hfst_sys/
lib.rs

1#![allow(non_upper_case_globals)]
2#![allow(non_camel_case_types)]
3#![allow(non_snake_case)]
4
5//#![cfg(docsrs)]
6//#![feature(builtin_syntax)]
7
8//include!(concat!(env!("OUT_DIR"), "/bindings.rs"));
9include!("bindings.rs");
10
11#[cfg(test)]
12mod tests {
13    use super::*;
14    use std::{os::raw::c_char, str::Utf8Error};
15
16    /// Length of c string
17    fn strlen(s: *const c_char) -> usize {
18        let mut len = 0;
19        while unsafe { *s.add(len) } != 0 {
20            len += 1;
21        }
22        len
23    }
24
25    fn c_charptr_as_str_unchecked<'a>(s: *const c_char) -> &'a str {
26        unsafe {
27            std::str::from_utf8_unchecked(
28                std::slice::from_raw_parts(s as *const u8, strlen(s))
29            )
30        }
31    }
32
33    trait RemoveAts {
34        fn remove_ats(self) -> String;
35    }
36
37    impl RemoveAts for &str {
38        fn remove_ats(self) -> String {
39            remove_ats(self)
40        }
41    }
42
43    /// Remove everything between '@' from `s`.
44    /// E.g.
45    /// "@_____@HEY@____@THERE" => "HEYTHERE"
46    fn remove_ats(s: &str) -> String {
47        let at_positions = s
48            .char_indices()
49            .filter_map(|(pos, ch)| (ch == '@').then_some(pos as i64));
50
51        let it = std::iter::once(-1i64)
52            .chain(at_positions)
53            .chain(std::iter::once(s.len() as i64));
54
55        let mut out = String::new();
56        let mut every_other = false;
57        let mut a: usize = 0;
58
59        for el in it {
60            if every_other {
61                out.push_str(&s[a..el as usize]);
62            } else {
63                a = (el + 1) as usize;
64            }
65            every_other = !every_other;
66        }
67
68        out
69    }
70
71    fn str_as_cstr(s: &str) -> *const c_char {
72        assert!(s.ends_with('\0'));
73        s.as_ptr() as *const c_char
74    }
75
76    #[test]
77    #[ignore]
78    fn open_read_close() -> Result<(), String> {
79        let path = "/usr/share/giella/sme/analyser-dict-gt-desc.hfstol\0";
80        let input_stream = unsafe { hfst_input_stream(str_as_cstr(path)) };
81        if input_stream.is_null() {
82            return Err(format!("input_stream was NULL"));
83        }
84        assert!(unsafe { !hfst_input_stream_is_bad(input_stream) });
85
86        let tr = unsafe { hfst_transducer_from_stream(input_stream) };
87        assert!(!tr.is_null());
88
89        let mut expected_analyses = std::collections::HashMap::new();
90        expected_analyses.insert("viessat+V+IV+Imprt+Du1", false);
91        expected_analyses.insert("viessut+V+IV+Imprt+Du1", false);
92        expected_analyses.insert("viessut+V+IV+Imprt+Du2", false);
93        expected_analyses.insert("viessut+V+IV+Ind+Prs+Sg3", false);
94        expected_analyses.insert("viessut+V+IV+PrsPrc", false);
95        expected_analyses.insert("viessu+N+Sg+Nom", false);
96
97        let lookup_str = str_as_cstr("viessu\0");
98        let lookup = unsafe { hfst_lookup(tr, lookup_str) };
99        let iter = unsafe { hfst_lookup_iterator(lookup) };
100
101        unsafe {
102            let mut w = 0.0f32;
103            let mut s: *mut c_char = std::ptr::null_mut();
104            while !hfst_lookup_iterator_done(iter) {
105                hfst_lookup_iterator_value(
106                    iter,
107                    &raw mut s,
108                    &mut w,
109                );
110
111                let seen_analysis = c_charptr_as_str_unchecked(s).remove_ats();
112                println!("{}", seen_analysis);
113                let Some(v) = expected_analyses.get_mut(seen_analysis.as_str()) else {
114                    panic!("got an analysis we did not expect: {}", seen_analysis);
115                };
116                *v = true;
117
118                hfst_lookup_iterator_next(iter);
119            }
120        }
121        
122        let all_seen = expected_analyses.into_values().all(|seen| seen == true);
123        assert!(all_seen);
124
125        unsafe { hfst_input_stream_close(input_stream) };
126        Ok(())
127    }
128
129    #[test]
130    fn input_stream_nonexistant_fails() -> Result<(), String> {
131        let path = "/this/does/not/exist\0";
132        let input_stream = unsafe { hfst_input_stream(str_as_cstr(path)) };
133        assert!(input_stream.is_null());
134        Ok(())
135    }
136
137    //#[test]
138    //fn tokenizer_open_nonexistant_fails_with_error_1() -> Result<(), String> {
139    //    let path = c"/non/existant/path";
140    //    let mut error: std::ffi::c_int = 0;
141    //    let tokenizer = unsafe {
142    //        hfst_tokenizer_open(path.as_ptr(), &raw mut error)
143    //    };
144    //    assert!(tokenizer.is_null());
145    //    assert_eq!(error, 1);
146    //    Ok(())
147    //}
148
149    //#[test]
150    //fn test_tokenize() -> Result<(), String> {
151    //    let path = c"/usr/share/giella/sme/tokeniser-disamb-gt-desc.pmhfst";
152
153    //    let mut error: std::ffi::c_int = 0;
154    //    let tokenizer = unsafe { hfst_tokenizer_open(path.as_ptr(), &raw mut error) };
155
156    //    assert!(!tokenizer.is_null());
157
158    //    let input = c"Mun lean Anders\n".to_owned();
159    //    //let input = c"Mun lean Anders, ja mun barggan universitehtas\n\n".to_owned();
160    //    let len = strlen(input.as_ptr());
161
162    //    let output = unsafe {
163    //        hfst_tokenizer_tokenize(tokenizer, input.as_ptr(), len)
164    //    };
165
166    //    let output = unsafe { std::ffi::CStr::from_ptr(output) };
167    //    println!("we got {} bytes of output: '{:?}'", strlen(output.as_ptr()), output);
168    //    Ok(())
169    //}
170}