#![recursion_limit = "1024"]
#![deny(missing_docs)]
#[macro_use]
extern crate error_chain;
extern crate libc;
extern crate uchardet_sys as ffi;
use libc::size_t;
use std::ffi::CStr;
use std::str::from_utf8;
use std::os::raw::c_char;
pub use errors::*;
#[allow(missing_docs)]
mod errors {
error_chain! {
errors {
UnrecognizableCharset {
description("unrecognizable charset")
display("uchardet was unable to recognize a charset")
}
OutOfMemory {
description("out of memory error")
display("uchardet ran out of memory")
}
Other(int: i32) {
description("unknown error")
display("uchardet returned unknown error {}", int)
}
}
}
}
impl ErrorKind {
fn from_nsresult(nsresult: ::ffi::nsresult) -> ErrorKind {
assert!(nsresult != 0);
match nsresult {
1 => ErrorKind::OutOfMemory,
int => ErrorKind::Other(int),
}
}
}
struct EncodingDetector {
ptr: ffi::uchardet_t
}
pub fn detect_encoding_name(data: &[u8]) -> Result<String> {
let mut detector = EncodingDetector::new();
try!(detector.handle_data(data));
detector.data_end();
detector.charset()
}
impl EncodingDetector {
fn new() -> EncodingDetector {
let ptr = unsafe { ffi::uchardet_new() };
assert!(!ptr.is_null());
EncodingDetector{ptr: ptr}
}
fn handle_data(&mut self, data: &[u8]) -> Result<()> {
let nsresult = unsafe {
ffi::uchardet_handle_data(self.ptr, data.as_ptr() as *const c_char,
data.len() as size_t)
};
match nsresult {
0 => Ok(()),
int => {
Err(ErrorKind::from_nsresult(int).into())
}
}
}
fn data_end(&mut self) {
unsafe { ffi::uchardet_data_end(self.ptr); }
}
fn charset(&self) -> Result<String> {
unsafe {
let internal_str = ffi::uchardet_get_charset(self.ptr);
assert!(!internal_str.is_null());
let bytes = CStr::from_ptr(internal_str).to_bytes();
let charset = from_utf8(bytes);
match charset {
Err(_) =>
panic!("uchardet_get_charset returned a charset name \
containing invalid characters"),
Ok("") => Err(ErrorKind::UnrecognizableCharset.into()),
Ok(encoding) => Ok(encoding.to_string())
}
}
}
}
impl Drop for EncodingDetector {
fn drop(&mut self) {
unsafe { ffi::uchardet_delete(self.ptr) };
}
}