use super::ffi;
use super::{Error, Model, Node, NodeCursor};
use bitflags::bitflags;
use std::ffi::CStr;
use std::ffi::c_char;
use std::marker::PhantomData;
use std::ptr::NonNull;
pub struct Tagger<'a> {
inner: NonNull<ffi::mecab_t>,
_marker: PhantomData<&'a Model>,
}
impl<'a> Tagger<'a> {
pub fn as_ptr(&self) -> *mut ffi::mecab_t {
self.inner.as_ptr()
}
pub fn new(model: &'a Model) -> Result<Self, Error> {
unsafe {
let inner = ffi::mecab_model_new_tagger(model.inner.as_ptr());
NonNull::new(inner)
.map(|inner| Self {
inner,
_marker: PhantomData,
})
.ok_or_else(Error::global)
}
}
pub fn parse(&self, lattice: &mut LatticeGuard<'_, '_, '_>) -> Result<(), Error> {
unsafe {
let result = ffi::mecab_parse_lattice(self.as_ptr(), lattice.lattice.as_ptr());
if result == 0 {
Err(Error::with_lattice(lattice.as_ref()))
} else {
Ok(())
}
}
}
}
impl Drop for Tagger<'_> {
fn drop(&mut self) {
unsafe {
ffi::mecab_destroy(self.as_ptr());
}
}
}
unsafe impl Send for Tagger<'_> {}
pub struct Lattice<'a> {
inner: NonNull<ffi::mecab_lattice_t>,
_marker: PhantomData<&'a Model>,
}
impl<'a> Lattice<'a> {
pub fn as_ptr(&self) -> *mut ffi::mecab_lattice_t {
self.inner.as_ptr()
}
pub fn new(model: &'a Model) -> Result<Self, Error> {
unsafe {
let inner = ffi::mecab_model_new_lattice(model.inner.as_ptr());
NonNull::new(inner)
.map(|inner| Self {
inner,
_marker: PhantomData,
})
.ok_or_else(Error::global)
}
}
pub fn set_sentence<'l, 's>(&'l mut self, s: &'s str) -> LatticeGuard<'a, 'l, 's> {
unsafe {
ffi::mecab_lattice_set_sentence2(self.as_ptr(), s.as_ptr() as *const c_char, s.len());
LatticeGuard {
lattice: self,
_marker: PhantomData,
}
}
}
pub fn clear(&mut self) {
unsafe { ffi::mecab_lattice_clear(self.as_ptr()) }
}
pub fn is_available(&self) -> bool {
unsafe {
let res = ffi::mecab_lattice_is_available(self.as_ptr());
res != 0
}
}
pub fn crf_norm_factor(&self) -> f64 {
unsafe { ffi::mecab_lattice_get_z(self.as_ptr()) }
}
pub fn set_crf_norm_factor(&mut self, z: f64) {
unsafe { ffi::mecab_lattice_set_z(self.as_ptr(), z) }
}
pub fn temparature(&self) -> f64 {
unsafe { ffi::mecab_lattice_get_theta(self.as_ptr()) }
}
pub fn set_temparature(&mut self, theta: f64) {
unsafe { ffi::mecab_lattice_set_theta(self.as_ptr(), theta) }
}
pub fn request_type(&self) -> RequestType {
unsafe { RequestType::from_raw(ffi::mecab_lattice_get_request_type(self.as_ptr())) }
}
pub fn set_request_type(&mut self, request_type: RequestType) {
unsafe { ffi::mecab_lattice_set_request_type(self.as_ptr(), request_type.as_raw()) }
}
pub fn boundary_constraint(&self, pos: usize) -> BoundaryConstraintType {
unsafe {
BoundaryConstraintType::from_raw(ffi::mecab_lattice_get_boundary_constraint(
self.as_ptr(),
pos,
))
}
}
pub fn set_boundary_constraint(&mut self, pos: usize, constraint: BoundaryConstraintType) {
unsafe {
ffi::mecab_lattice_set_boundary_constraint(self.as_ptr(), pos, constraint.as_raw());
}
}
}
impl Drop for Lattice<'_> {
fn drop(&mut self) {
unsafe {
ffi::mecab_lattice_destroy(self.as_ptr());
}
}
}
unsafe impl Send for Lattice<'_> {}
pub struct LatticeGuard<'a, 'l, 's> {
lattice: &'l mut Lattice<'a>,
_marker: PhantomData<&'s c_char>,
}
impl<'a> AsRef<Lattice<'a>> for LatticeGuard<'a, '_, '_> {
fn as_ref(&self) -> &Lattice<'a> {
self.lattice
}
}
impl<'a> AsMut<Lattice<'a>> for LatticeGuard<'a, '_, '_> {
fn as_mut(&mut self) -> &mut Lattice<'a> {
self.lattice
}
}
impl<'a, 'l> LatticeGuard<'a, 'l, '_> {
pub fn into_inner(self) -> &'l mut Lattice<'a> {
self.lattice
}
}
impl LatticeGuard<'_, '_, '_> {
pub fn to_str(&mut self) -> &str {
unsafe {
let s = ffi::mecab_lattice_tostr(self.lattice.as_ptr());
let s = CStr::from_ptr(s);
std::str::from_utf8_unchecked(s.to_bytes())
}
}
pub fn bos_node(&self) -> NodeCursor<'_> {
unsafe {
let node = ffi::mecab_lattice_get_bos_node(self.lattice.as_ptr());
let curr = Node::from_ptr(node);
NodeCursor { curr }
}
}
pub fn eos_node(&self) -> NodeCursor<'_> {
unsafe {
let node = ffi::mecab_lattice_get_eos_node(self.lattice.as_ptr());
let curr = Node::from_ptr(node);
NodeCursor { curr }
}
}
}
impl<'s> LatticeGuard<'_, '_, 's> {
pub fn sentence(&self) -> &str {
unsafe {
let ptr = ffi::mecab_lattice_get_sentence(self.lattice.as_ptr());
if ptr.is_null() {
""
} else {
let slice = std::slice::from_raw_parts(ptr as *const u8, self.sentence_len());
std::str::from_utf8_unchecked(slice)
}
}
}
pub fn sentence_len(&self) -> usize {
unsafe { ffi::mecab_lattice_get_size(self.lattice.as_ptr()) }
}
}
bitflags! {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct RequestType: u8 {
const ONE_BEST = 1;
const NBEST = 1 << 1;
const PARTIAL = 1 << 2;
const MARGINAL_PROB = 1 << 3;
const ALTERNATIVE = 1 << 4;
const ALL_MORPHS = 1 << 5;
const ALLOCATE_SENTENCE = 1 << 6;
}
}
impl Default for RequestType {
fn default() -> Self {
Self::ONE_BEST
}
}
impl RequestType {
pub fn as_raw(self) -> i32 {
self.bits() as _
}
pub fn from_raw(request_type: i32) -> Self {
Self::from_bits_truncate(request_type as u8)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum BoundaryConstraintType {
Any,
Token,
InsideToken,
}
impl BoundaryConstraintType {
pub fn as_raw(self) -> i32 {
match self {
Self::Any => 0,
Self::Token => 1,
Self::InsideToken => 2,
}
}
pub fn from_raw(constraint: i32) -> Self {
match constraint {
1 => Self::Token,
2 => Self::InsideToken,
_ => Self::Any,
}
}
}