cld2_sys/
lib.rs

1//! Unsafe, low-level wrapper around cld2, the "compact language detector"
2//! based on Chromium's code, plus a very thin C wrapper layer.  Normally
3//! you won't want to use this library directly unless you're writing
4//! your own cld2 wrapper library.
5//!
6//! If you need access to APIs which are not currently wrapped, please feel
7//! free to send pull requests!
8
9#![allow(non_camel_case_types)]
10#![allow(non_upper_case_globals)]
11
12extern crate libc;
13
14pub use encodings::*;
15pub use languages::*;
16pub use flags::*;
17pub use wrapper::*;
18
19mod encodings;
20mod languages;
21mod flags;
22mod wrapper;
23
24// Just a single placeholder test in case somebody runs 'cargo test' in
25// this library's directory, and not in the main library's directory.  This
26// is not intended to be comprehensive, but please add regression tests for
27// any bugs.
28#[test]
29fn test_detection() {
30    let english = "
31It is an ancient Mariner,
32And he stoppeth one of three.
33'By thy long grey beard and glittering eye,
34Now wherefore stopp'st thou me?
35
36'The Bridegroom's doors are opened wide,
37And I am next of kin;
38The guests are met, the feast is set:
39May'st hear the merry din.'
40
41He holds him with his skinny hand,
42'There was a ship,' quoth he.
43'Hold off! unhand me, grey-beard loon!'
44Eftsoons his hand dropt he.
45";
46
47    let mut is_reliable: bool = false;
48    let language = unsafe {
49        CLD2_DetectLanguage(english.as_ptr() as *const i8,
50                            english.len() as libc::c_int,
51                            true, &mut is_reliable)
52    };
53    assert_eq!(Language::ENGLISH, language);
54    assert_eq!(true, is_reliable);
55}
56
57// This particular API has extra wrapper code, so we want to test it.
58#[test]
59fn test_result_chunks() {
60    use libc::{c_int, c_double};
61    use std::slice::from_raw_parts;
62    use std::iter::repeat;
63
64    let mixed = "
65It is an ancient Mariner,
66And he stoppeth one of three.
67'By thy long grey beard and glittering eye,
68Now wherefore stopp'st thou me?
69
70Sur le pont d'Avignon,
71L'on y danse, l'on y danse,
72Sur le pont d'Avignon
73L'on y danse tous en rond.
74
75Les belles dames font comme ça
76Et puis encore comme ça.
77Les messieurs font comme ça
78Et puis encore comme ça.
79";
80
81    let hints = CLDHints{content_language_hint: std::ptr::null(),
82                         tld_hint: std::ptr::null(),
83                         encoding_hint: Encoding::UNKNOWN_ENCODING as c_int,
84                         language_hint: Language::UNKNOWN_LANGUAGE};
85    let mut language3: Vec<Language> =
86        repeat(Language::UNKNOWN_LANGUAGE).take(3).collect();
87    let mut percent3: Vec<c_int> = repeat(0).take(3).collect();
88    let mut normalized_score3: Vec<c_double> = repeat(0.0).take(3).collect();
89    let mut text_bytes: c_int = 0;
90    let mut is_reliable: bool = false;
91
92    let chunks = unsafe { CLD2_ResultChunkVector_new() };
93
94    let language = unsafe {
95        CLD2_ExtDetectLanguageSummary4(mixed.as_ptr() as *const i8,
96                                       mixed.len() as c_int,
97                                       true, &hints, 0,
98                                       language3.as_mut_ptr(),
99                                       percent3.as_mut_ptr(),
100                                       normalized_score3.as_mut_ptr(),
101                                       chunks,
102                                       &mut text_bytes, &mut is_reliable)
103    };
104    assert_eq!(Language::FRENCH, language);
105
106    unsafe {
107        let data = CLD2_ResultChunkVector_data(chunks as *const ResultChunks);
108        let size = CLD2_ResultChunkVector_size(chunks as *const ResultChunks);
109        let slice: &[ResultChunk] = from_raw_parts(data, size as usize);
110        //println!("Chunks: {}", slice);
111        let mut found_mariner = false;
112        let mut found_comme_ca = false;
113        for chunk in slice.iter() {
114            let text =
115                &mixed[chunk.offset as usize..
116                       chunk.offset as usize + chunk.bytes as usize];
117
118            if chunk.lang1 == Language::ENGLISH as u16
119                && text.contains("ancient Mariner")
120            {
121                found_mariner = true;
122            }
123
124            if chunk.lang1 == Language::FRENCH as u16
125                && text.contains("comme ça")
126            {
127                found_comme_ca = true;
128            }
129        }
130        assert!(found_mariner);
131        assert!(found_comme_ca);
132    };
133
134    unsafe { CLD2_ResultChunkVector_delete(chunks); }
135}
136
137#[test]
138fn test_language_names() {
139    use std::ffi::{CString, CStr};
140    use std::str::from_utf8;
141
142    let code = unsafe { 
143        let char_ptr = CLD2_LanguageCode(Language::ENGLISH);
144        let bytes = CStr::from_ptr(char_ptr).to_bytes();
145        from_utf8(bytes).unwrap().to_string()
146    };
147    assert_eq!("en", code);
148
149    let language = unsafe {
150        let c_str = CString::new("fr".as_bytes()).unwrap();
151        CLD2_GetLanguageFromName(c_str.as_ptr())
152    };
153    assert_eq!(Language::FRENCH, language);
154}