igo-rs 0.2.2

Pure Rust port of the Igo, a POS(Part-Of-Speech) tagger for Japanese (日本語 形態素解析).
Documentation
use std::io::{self, BufReader, BufRead};
use std::fs::{self, File};
use std::path::Path;
use std::error::Error;
use std::mem;
use std::convert::From;
use byteorder::{NativeEndian as NE, ReadBytesExt, WriteBytesExt};
use Utf16Char;

pub trait InputUtil: io::Read {
    fn get_int(&mut self) -> io::Result<i32> {
        self.read_i32::<NE>()
    }

    fn get_int_array(&mut self, count: usize) -> io::Result<Box<[i32]>> {
        let mut v = vec![0i32; count];
        for i in 0..count {
            v[i] = self.read_i32::<NE>()?;
        }
        Ok(v.into_boxed_slice())
    }

    fn get_short_array(&mut self, count: usize) -> io::Result<Box<[i16]>> {
        let mut v = vec![0i16; count];
        for i in 0..count {
            v[i] = self.read_i16::<NE>()?;
        }
        Ok(v.into_boxed_slice())
    }

    fn get_char_array(&mut self, count: usize) -> io::Result<Box<[Utf16Char]>> {
        let mut v = vec![0u16; count];
        for i in 0..count {
            v[i] = self.read_u16::<NE>()?;
        }
        Ok(v.into_boxed_slice())
    }

    fn get_string(&mut self, count: usize) -> io::Result<Box<[Utf16Char]>> {
        let mut v = vec![0u16; count];
        for i in 0..count {
            v[i] = self.read_u16::<NE>()?;
        }
        Ok(v.into_boxed_slice())
    }
}

impl<R: io::Read + ? Sized> InputUtil for R {}


pub fn read_all_as_chars(file_path: &Path) -> io::Result<Box<[Utf16Char]>> {
    let metadata = fs::metadata(file_path)?;
    let file = File::open(file_path)?;
    let mut reader = BufReader::new(file);
    reader.get_string((metadata.len() as usize) / mem::size_of::<u16>())
}

pub fn read_all_as_int_array(file_path: &Path) -> io::Result<Box<[i32]>> {
    let metadata = fs::metadata(file_path)?;
    let file = File::open(file_path)?;
    let mut reader = BufReader::new(file);
    reader.get_int_array((metadata.len() as usize) / mem::size_of::<i32>())
}


pub trait OutputUtil: io::Write {
    fn put_string(&mut self, str: &[Utf16Char]) -> io::Result<()> {
        for i in 0..str.len() {
            self.write_u16::<NE>(str[i])?;
        }
        Ok(())
    }
}

impl<W: io::Write + ? Sized> OutputUtil for W {}


use encoding::{EncodingRef, DecoderTrap};
use encoding::label::encoding_from_whatwg_label;
use dictionary::build::*;

pub struct ReadLine<'a> {
    reader: BufReader<File>,
    line_number: i32,
    path: &'a Path,
    decoder: Option<EncodingRef>,
    encoded_buf: Vec<u8>,
}

impl<'a> ReadLine<'a> {
    pub fn new(file_path: &'a Path, encoding: &str) -> AppResult<ReadLine<'a>> {
        let file = File::open(file_path)?;
        let decoder = encoding_from_whatwg_label(encoding)
            .ok_or(format!("Unknown encoding; {}", encoding))?;
        //        debug!("decoder: {}", decoder.name());
        Ok(ReadLine {
            reader: BufReader::new(file),
            line_number: 0,
            path: file_path,
            decoder: if decoder.name() != "utf-8" { Some(decoder) } else { None },
            encoded_buf: Vec::new(),
        })
    }

    pub fn next(&mut self, read_buf: &mut String) -> AppResult<usize> {
        match self.decoder {
            Some(decoder) => self.next_with_decoder(decoder, read_buf),
            None => self.next_without_decoder(read_buf)
        }
    }

    fn next_without_decoder(&mut self, read_buf: &mut String) -> AppResult<usize> {
        read_buf.clear();
        let r = self.reader.read_line(read_buf);
        if r.as_ref().map(|len| *len > 0).unwrap_or(false) {
            self.line_number += 1;
        }
        r.map_err(|io_err| AppError::from(io_err))
    }

    fn next_with_decoder(&mut self, decoder: EncodingRef, decode_buf: &mut String) -> AppResult<usize> {
        self.encoded_buf.clear();
        decode_buf.clear();
        match self.reader.read_until(b'\n', self.encoded_buf.as_mut()) {
            Ok(len) => {
                if len > 0 {
                    decoder.decode_to(&self.encoded_buf, DecoderTrap::Strict, decode_buf)
                        .map_err(|e| AppError::from(e))
                        .map(|_| {
                            self.line_number += 1;
                            decode_buf.len()
                        })
                } else {
                    Ok(0)
                }
            },
            Err(io_err) => Err(AppError::from(io_err))
        }
    }

    pub fn parse_error<S: Into<String>>(&self, msg: S) -> AppError {
        AppError::Parse {
            message: msg.into(),
            path: self.path.to_path_buf(),
            line_number: self.line_number
        }
    }

    pub fn convert_error<E: Error>(&self, e: E) -> AppError {
        self.parse_error(e.description())
    }
}


#[allow(dead_code)]
pub mod debug {
    use std::fs::File;
    use std::io::Write;

    pub fn dump_string_list(list: &Vec<String>, path: &str) {
        let mut f = File::create(path).unwrap();
        for s in list {
            f.write_all(s.as_bytes()).unwrap();
            f.write_all("\n".as_bytes()).unwrap();
        }
    }
}