onig 1.3.0

Rust-Onig is a set of rust bindings for the oniguruma regular expression library.
Documentation
use std::ptr::null;
use std::mem::transmute;
use libc::c_int;
use onig_sys;

use super::CaptureTreeNode;
use super::flags::{TraverseCallbackAt, CALLBACK_AT_FIRST};

/// Represents a set of capture groups found in a search or match.
#[derive(Debug, Eq, PartialEq)]
pub struct Region {
    raw: onig_sys::OnigRegion,
}

impl Region {
    /// Create a new empty `Region`
    pub fn new() -> Region {
        Region {
            raw: onig_sys::OnigRegion {
                allocated: 0,
                num_regs: 0,
                beg: null(),
                end: null(),
                history_root: null(),
            },
        }
    }

    /// Create a new region with a given capacity. This function allocates
    /// a new region object as in `Region::new` and resizes it to
    /// contain at least `capacity` regions.
    ///
    /// # Arguments
    ///
    /// * `capacity` - the number of captures this region should be
    /// capable of storing without allocation.
    pub fn with_capacity(capacity: usize) -> Region {
        let mut region = Self::new();
        region.reserve(capacity);
        region
    }

    /// Clone From Raw
    ///
    /// Construct a new region based on an existing raw
    /// `*onig_sys::OnigRegion` pointer by copying.
    pub fn clone_from_raw(ptr: *const onig_sys::OnigRegion) -> Self {
        let mut region = Self::new();
        unsafe {
            onig_sys::onig_region_copy(&mut region.raw, ptr);
        }
        region
    }

    /// This can be used to clear out a region so it can be used
    /// again. See [`onig_sys::onig_region_clear`][region_clear]
    ///
    /// [region_clear]: ./onig_sys/fn.onig_region_clear.html
    pub fn clear(&mut self) {
        unsafe {
            onig_sys::onig_region_clear(&mut self.raw);
        }
    }

    /// Get the current capacity of the region.
    pub fn capacity(&self) -> usize {
        self.raw.allocated as usize
    }

    /// Updates the region to contain `new_capacity` slots. See
    /// [`onig_sys::onig_region_resize`][region_resize] for mor
    /// information.
    ///
    /// [region_resize]: ./onig_sys/fn.onig_region_resize.html
    ///
    /// # Arguments
    ///
    ///  * `new_capacity` - The new number of groups in the region.
    pub fn reserve(&mut self, new_capacity: usize) {
        let r = unsafe { onig_sys::onig_region_resize(&mut self.raw, new_capacity as c_int) };
        if r != onig_sys::ONIG_NORMAL {
            panic!("Onig: fail to memory allocation during region resize")
        }
    }

    /// Get the size of the region. Returns the number of registers in
    /// the region.
    pub fn len(&self) -> usize {
        self.raw.num_regs as usize
    }

    /// Returns the start and end positions of the Nth capture group. Returns
    /// `None` if `pos` is not a valid capture group or if the capture group did
    /// not match anything. The positions returned are always byte indices with
    /// respect to the original string matched.
    pub fn pos(&self, pos: usize) -> Option<(usize, usize)> {
        if pos >= self.len() {
            return None;
        }
        let (beg, end) =
            unsafe { (*self.raw.beg.offset(pos as isize), *self.raw.end.offset(pos as isize)) };
        if beg != onig_sys::ONIG_REGION_NOTPOS {
            Some((beg as usize, end as usize))
        } else {
            None
        }
    }

    /// Get Capture Tree
    ///
    /// Returns the capture tree for this region, if there is one.
    pub fn tree(&self) -> Option<&CaptureTreeNode> {
        let tree = unsafe { onig_sys::onig_get_capture_tree(&self.raw) };
        if tree.is_null() {
            None
        } else {
            Some(unsafe { transmute(tree) })
        }
    }

    /// Get an iterator over the captures in the region.
    pub fn iter<'a>(&'a self) -> RegionIter<'a> {
        RegionIter {
            region: self,
            pos: 0,
        }
    }

    /// Walk the Tree of Captures
    ///
    /// The given callback is invoked for each node in the capture
    /// tree. Each node is passed to the callback before any children.
    pub fn tree_traverse<F>(&self, callback: F) -> i32
        where F: Fn(u32, (usize, usize), u32) -> bool
    {
        self.tree_traverse_at(CALLBACK_AT_FIRST, callback)
    }

    /// Walk the Tree of Captures in a Given Order
    ///
    /// The given callback is invoked for each node in the capture
    /// tree. The order in which the callback is invoked can be
    /// chosen.
    pub fn tree_traverse_at<F>(&self, at: TraverseCallbackAt, mut callback: F) -> i32
        where F: Fn(u32, (usize, usize), u32) -> bool
    {
        use onig_sys::onig_capture_tree_traverse;
        use libc::{c_void, c_int};

        extern "C" fn traverse_cb<F>(group: c_int,
                                     beg: c_int,
                                     end: c_int,
                                     level: c_int,
                                     _at: c_int,
                                     ud: *mut c_void)
                                     -> c_int
            where F: Fn(u32, (usize, usize), u32) -> bool
        {
            let callback = unsafe { &*(ud as *mut F) };
            if callback(group as u32, (beg as usize, end as usize), level as u32) {
                0
            } else {
                -1
            }
        }

        unsafe {
            onig_capture_tree_traverse(&self.raw,
                                       at.bits(), // ONIG_TRAVERSE_CALLBACK_AT_FIRST,
                                       traverse_cb::<F>,
                                       &mut callback as *mut F as *mut c_void)
        }
    }
}

impl Drop for Region {
    fn drop(&mut self) {
        unsafe {
            onig_sys::onig_region_free(&mut self.raw, 0);
        }
    }
}

impl Clone for Region {
    fn clone(&self) -> Self {
        Self::clone_from_raw(&self.raw)
    }
}

impl<'a> IntoIterator for &'a Region {
    type Item = (usize, usize);
    type IntoIter = RegionIter<'a>;
    fn into_iter(self) -> Self::IntoIter {
        return self.iter();
    }
}

/// Region Iterator
///
/// This struct is responsible for holding iteration state over a
/// given region.
pub struct RegionIter<'a> {
    region: &'a Region,
    pos: usize,
}

impl<'a> Iterator for RegionIter<'a> {
    type Item = (usize, usize);

    fn next(&mut self) -> Option<Self::Item> {
        let next = self.region.pos(self.pos);
        self.pos += 1;
        next
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let len = self.region.len();
        (len, Some(len))
    }

    fn count(self) -> usize {
        self.region.len()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use super::super::{Regex, SEARCH_OPTION_NONE};

    #[test]
    fn test_region_create() {
        Region::new();
    }

    #[test]
    fn test_region_clear() {
        let mut region = Region::new();
        region.clear();
    }

    #[test]
    fn test_region_copy() {
        let region = Region::new();
        let new_region = region.clone();
        assert_eq!(new_region.len(), region.len());
    }

    #[test]
    fn test_region_resize() {
        {
            let mut region = Region::new();
            assert!(region.capacity() == 0);
            region.reserve(100);
            {
                // can still get the capacity without a mutable borrow
                let region_borrowed = &region;
                assert!(region_borrowed.capacity() == 100);
            }
        }

        {
            let region = Region::with_capacity(10);
            assert!(region.capacity() == 10);
        }
    }

    #[test]
    fn test_region_empty_iterate() {
        let region = Region::new();
        for _ in &region {
            panic!("region should not contain any elements");
        }
    }

    #[test]
    fn test_region_iter_returns_iterator() {
        let region = Region::new();
        let all = region.iter().collect::<Vec<_>>();
        assert_eq!(all, Vec::new());
    }

    #[test]
    fn test_region_iterate_with_captures() {
        let mut region = Region::new();
        let reg = Regex::new("(a+)(b+)(c+)").unwrap();
        let res = reg.search_with_options("aaaabbbbc", 0, 9, SEARCH_OPTION_NONE, Some(&mut region));
        assert!(res.is_some());
        let all = region.iter().collect::<Vec<_>>();
        assert_eq!(all, vec![(0, 9), (0, 4), (4, 8), (8, 9)]);
    }

    #[test]
    fn test_region_all_iteration_options() {
        let mut region = Region::new();
        let reg = Regex::new("a(b)").unwrap();
        let res = reg.search_with_options("habitat", 0, 7, SEARCH_OPTION_NONE, Some(&mut region));
        assert!(res.is_some());

        // collect into a vector by iterating with a for loop
        let mut a = Vec::<(usize, usize)>::new();
        for pos in &region {
            a.push(pos)
        }

        // collect into a vector by using `iter` and collec
        let b = region.iter().collect::<Vec<_>>();

        let expected = vec![(1, 3), (2, 3)];
        assert_eq!(expected, a);
        assert_eq!(expected, b);

        assert_eq!(2, region.iter().count());
    }
}