onig 1.3.0

Rust-Onig is a set of rust bindings for the oniguruma regular expression library.
Documentation
use std::marker::PhantomData;
use std::iter::Iterator;
use std::ptr::null;
use std::str::from_utf8_unchecked;
use std::slice::from_raw_parts;
use libc::{c_int, c_uint, c_void, c_uchar};

use onig_sys::{self, OnigUChar, OnigRegex};

use super::Regex;

impl Regex {
    /// Returns the number of named groups into regex.
    pub fn capture_names_len(&self) -> usize {
        unsafe { onig_sys::onig_number_of_names(self.raw) as usize }
    }

    /// Returns the iterator over named groups as a tuple with the group name
    /// and group indexes.
    pub fn capture_names<'r>(&'r self) -> CaptureNames<'r> {
        CaptureNames {
            table: unsafe { (*self.raw).name_table as *const StTable },
            bin_idx: -1,
            entry_ptr: null(),
            _phantom: PhantomData,
        }
    }

    /// Calls `callback` for each named group in the regex. Each callback gets the group name
    /// and group indices.
    pub fn foreach_name<F>(&self, mut callback: F) -> i32
        where F: FnMut(&str, &[u32]) -> bool
    {
        extern "C" fn foreach_cb<F>(name: *const OnigUChar,
                                    name_end: *const OnigUChar,
                                    ngroup_num: c_int,
                                    group_nums: *const c_int,
                                    _regex: OnigRegex,
                                    arg: *mut c_void)
                                    -> c_int
            where F: FnMut(&str, &[u32]) -> bool
        {
            let name = unsafe {
                from_utf8_unchecked(from_raw_parts(name, (name_end as usize - name as usize)))
            };

            let groups = unsafe { from_raw_parts(group_nums as *const u32, ngroup_num as usize) };

            let callback = unsafe { &mut *(arg as *mut F) };

            if callback(name, groups) { 0 } else { -1 }
        }

        unsafe {
            onig_sys::onig_foreach_name(self.raw,
                                        foreach_cb::<F>,
                                        &mut callback as *mut F as *mut c_void)
        }
    }
}

#[repr(C)]
#[derive(Debug)]
struct NameEntry {
    name: *const c_uchar,
    name_len: c_int,
    back_num: c_int,
    back_alloc: c_int,
    back_ref1: c_int,
    back_refs: *const c_int,
}

#[cfg(windows)]
type StDataT = ::libc::uintptr_t;

#[cfg(not(windows))]
type StDataT = ::libc::c_ulong;

#[repr(C)]
#[derive(Debug)]
struct StTableEntry {
    hash: c_uint,
    key: StDataT,
    record: StDataT,
    next: *const StTableEntry,
}

#[repr(C)]
#[derive(Debug)]
struct StTable {
    type_: *const c_void,
    num_bins: c_int,
    num_entries: c_int,
    bins: *const *const StTableEntry,
}

/// CaptureNames is an iterator over named groups as a tuple with the group name
/// and group indexes.
///
/// `'r` is the lifetime of the Regex object.
#[derive(Debug)]
pub struct CaptureNames<'r> {
    table: *const StTable,
    bin_idx: c_int,
    entry_ptr: *const StTableEntry,
    _phantom: PhantomData<&'r Regex>,
}

impl<'r> Iterator for CaptureNames<'r> {
    type Item = (&'r str, &'r [u32]);

    fn next(&mut self) -> Option<(&'r str, &'r [u32])> {
        unsafe {
            while self.entry_ptr.is_null() {
                if self.table.is_null() || self.bin_idx + 1 >= (*self.table).num_bins {
                    return None;
                }
                self.bin_idx += 1;
                self.entry_ptr = *(*self.table).bins.offset(self.bin_idx as isize)
            }
            let entry = (*self.entry_ptr).record as *const NameEntry;
            let name = from_utf8_unchecked(from_raw_parts((*entry).name,
                                                          (*entry).name_len as usize));
            let groups = if (*entry).back_num > 1 {
                let ptr = (*entry).back_refs as *const u32;
                let len = (*entry).back_num as usize;
                from_raw_parts(ptr, len)
            } else {
                let ptr = &(*entry).back_ref1 as *const i32 as *const u32;
                from_raw_parts(ptr, 1)
            };
            self.entry_ptr = (*self.entry_ptr).next;
            Some((name, groups))
        }
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        if self.table.is_null() {
            (0, None)
        } else {
            let size = unsafe { (*self.table).num_bins } as usize;
            (size, Some(size))
        }
    }
}

#[cfg(test)]
mod tests {
    use super::super::*;

    #[test]
    fn test_regex_names_len() {
        let regex = Regex::new("(he)(l+)(o)").unwrap();
        assert_eq!(regex.capture_names_len(), 0);
        let regex = Regex::new("(?<foo>he)(?<bar>l+)(?<bar>o)").unwrap();
        assert_eq!(regex.capture_names_len(), 2);
        assert_eq!(regex.capture_histories_len(), 0);
    }

    #[test]
    fn test_regex_names() {
        let regex = Regex::new("(he)(l+)(o)").unwrap();
        let names = regex.capture_names().collect::<Vec<_>>();
        assert_eq!(names, vec![]);
        let regex = Regex::new("(?<foo>he)(?<bar>l+)(?<bar>o)").unwrap();
        let names = regex.capture_names().collect::<Vec<_>>();
        assert_eq!(names,
                   [("foo", &[1u32] as &[u32]), ("bar", &[2u32, 3] as &[u32])]);
    }
}