#![cfg_attr(feature = "std-pattern", feature(pattern))]
#![deny(missing_docs)]
#[macro_use]
extern crate bitflags;
#[macro_use]
extern crate lazy_static;
extern crate onig_sys;
#[cfg(windows)]
extern crate libc;
mod find;
mod flags;
mod region;
mod replace;
mod match_param;
mod names;
mod syntax;
mod tree;
mod utils;
mod buffers;
#[cfg(feature = "std-pattern")]
mod pattern;
pub use flags::*;
pub use match_param::MatchParam;
pub use names::CaptureNames;
pub use region::Region;
pub use find::{Captures, FindCaptures, FindMatches, RegexSplits, RegexSplitsN, SubCaptures,
SubCapturesPos};
pub use buffers::{EncodedBytes, EncodedChars};
pub use replace::Replacer;
pub use tree::{CaptureTreeNode, CaptureTreeNodeIter};
pub use syntax::{MetaChar, Syntax};
pub use utils::{copyright, define_user_property, version};
use std::{error, fmt, str};
use std::sync::Mutex;
use std::ptr::{null, null_mut};
use std::os::raw::c_int;
pub struct Error {
code: c_int,
description: String,
}
#[derive(Debug, Eq, PartialEq)]
pub struct Regex {
raw: onig_sys::OnigRegexMut,
}
unsafe impl Send for Regex {}
unsafe impl Sync for Regex {}
impl Error {
fn from_code_and_info(code: c_int, info: &onig_sys::OnigErrorInfo) -> Error {
Error::new(code, info)
}
fn from_code(code: c_int) -> Error {
Error::new(code, null())
}
fn new(code: c_int, info: *const onig_sys::OnigErrorInfo) -> Error {
let buff = &mut [0; onig_sys::ONIG_MAX_ERROR_MESSAGE_LEN as usize];
let len = unsafe { onig_sys::onig_error_code_to_str(buff.as_mut_ptr(), code, info) };
let description = str::from_utf8(&buff[..len as usize]).unwrap();
Error {
code,
description: description.to_owned(),
}
}
pub fn code(&self) -> i32 {
self.code
}
pub fn description(&self) -> &str {
&self.description
}
}
impl error::Error for Error {
fn description(&self) -> &str {
&self.description
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Oniguruma error: {}", self.description())
}
}
impl fmt::Debug for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Error({}, {})", self.code, self.description())
}
}
lazy_static! {
static ref REGEX_NEW_MUTEX: Mutex<()> = Mutex::new(());
}
impl Regex {
pub fn new(pattern: &str) -> Result<Self, Error> {
Regex::with_encoding(pattern)
}
pub fn with_encoding<T>(pattern: T) -> Result<Regex, Error>
where
T: EncodedChars,
{
Regex::with_options_and_encoding(
pattern,
RegexOptions::REGEX_OPTION_NONE,
Syntax::default(),
)
}
pub fn with_options(
pattern: &str,
option: RegexOptions,
syntax: &Syntax,
) -> Result<Regex, Error> {
Regex::with_options_and_encoding(pattern, option, syntax)
}
pub fn with_options_and_encoding<T>(
pattern: T,
option: RegexOptions,
syntax: &Syntax,
) -> Result<Self, Error>
where
T: EncodedChars,
{
let mut reg: onig_sys::OnigRegexMut = null_mut();
let reg_ptr = &mut reg as *mut onig_sys::OnigRegexMut;
let mut error = onig_sys::OnigErrorInfo {
enc: null(),
par: null(),
par_end: null(),
};
let err = unsafe {
let _guard = REGEX_NEW_MUTEX.lock().unwrap();
onig_sys::onig_new(
reg_ptr,
pattern.start_ptr(),
pattern.limit_ptr(),
option.bits(),
pattern.encoding(),
syntax as *const Syntax as *const onig_sys::OnigSyntaxType,
&mut error,
)
};
if err == onig_sys::ONIG_NORMAL {
Ok(Regex { raw: reg })
} else {
Err(Error::from_code_and_info(err, &error))
}
}
pub fn match_with_options(
&self,
str: &str,
at: usize,
options: SearchOptions,
region: Option<&mut Region>,
) -> Option<usize> {
self.match_with_encoding(str, at, options, region)
}
pub fn match_with_encoding<T>(
&self,
chars: T,
at: usize,
options: SearchOptions,
region: Option<&mut Region>,
) -> Option<usize>
where
T: EncodedChars,
{
let match_param = MatchParam::default();
let result = self.match_with_param(chars, at, options, region, match_param);
match result {
Ok(r) => r,
Err(e) => panic!("Onig: Regex match error: {}", e.description())
}
}
pub fn match_with_param<T>(
&self,
chars: T,
at: usize,
options: SearchOptions,
region: Option<&mut Region>,
match_param: MatchParam,
) -> Result<Option<usize>, Error>
where
T: EncodedChars,
{
assert_eq!(chars.encoding(), self.encoding());
let r = unsafe {
let offset = chars.start_ptr().add(at);
assert!(offset <= chars.limit_ptr());
onig_sys::onig_match_with_param(
self.raw,
chars.start_ptr(),
chars.limit_ptr(),
offset,
match region {
Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
None => std::ptr::null_mut(),
},
options.bits(),
match_param.as_raw()
)
};
if r >= 0 {
Ok(Some(r as usize))
} else if r == onig_sys::ONIG_MISMATCH {
Ok(None)
} else {
Err(Error::from_code(r))
}
}
pub fn search_with_options(
&self,
str: &str,
from: usize,
to: usize,
options: SearchOptions,
region: Option<&mut Region>,
) -> Option<usize> {
self.search_with_encoding(str, from, to, options, region)
}
pub fn search_with_encoding<T>(
&self,
chars: T,
from: usize,
to: usize,
options: SearchOptions,
region: Option<&mut Region>,
) -> Option<usize>
where
T: EncodedChars,
{
let match_param = MatchParam::default();
let result = self.search_with_param(chars, from, to, options, region, match_param);
match result {
Ok(r) => r,
Err(e) => panic!("Onig: Regex search error: {}", e.description)
}
}
pub fn search_with_param<T>(
&self,
chars: T,
from: usize,
to: usize,
options: SearchOptions,
region: Option<&mut Region>,
match_param: MatchParam,
) -> Result<Option<usize>, Error>
where
T: EncodedChars,
{
let (beg, end) = (chars.start_ptr(), chars.limit_ptr());
assert_eq!(self.encoding(), chars.encoding());
let r = unsafe {
let start = beg.add(from );
let range = beg.add(to);
assert!(start <= end);
assert!(range <= end);
onig_sys::onig_search_with_param(
self.raw,
beg,
end,
start,
range,
match region {
Some(region) => region as *mut Region as *mut onig_sys::OnigRegion,
None => std::ptr::null_mut(),
},
options.bits(),
match_param.as_raw()
)
};
if r >= 0 {
Ok(Some(r as usize))
} else if r == onig_sys::ONIG_MISMATCH {
Ok(None)
} else {
Err(Error::from_code(r))
}
}
pub fn is_match(&self, text: &str) -> bool {
self.match_with_options(text, 0, SearchOptions::SEARCH_OPTION_NONE, None)
.map(|r| r == text.len())
.unwrap_or(false)
}
pub fn find(&self, text: &str) -> Option<(usize, usize)> {
self.find_with_encoding(text)
}
pub fn find_with_encoding<T>(&self, text: T) -> Option<(usize, usize)>
where
T: EncodedChars,
{
let mut region = Region::new();
let len = text.len();
self.search_with_encoding(
text,
0,
len,
SearchOptions::SEARCH_OPTION_NONE,
Some(&mut region),
).and_then(|_| region.pos(0))
}
pub fn encoding(&self) -> onig_sys::OnigEncoding {
unsafe { onig_sys::onig_get_encoding(self.raw) }
}
pub fn captures_len(&self) -> usize {
unsafe { onig_sys::onig_number_of_captures(self.raw) as usize }
}
pub fn capture_histories_len(&self) -> usize {
unsafe { onig_sys::onig_number_of_capture_histories(self.raw) as usize }
}
}
impl Drop for Regex {
fn drop(&mut self) {
unsafe {
onig_sys::onig_free(self.raw);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::panic;
#[test]
fn test_regex_create() {
Regex::with_options(".*", RegexOptions::REGEX_OPTION_NONE, Syntax::default()).unwrap();
Regex::new(r#"a \w+ word"#).unwrap();
}
#[test]
fn test_regex_invalid() {
let e = Regex::new("\\p{foo}").unwrap_err();
assert_eq!(e.code(), -223);
assert_eq!(e.description(), "invalid character property name {foo}");
}
#[test]
fn test_failed_match() {
let regex = Regex::new("foo").unwrap();
let res = regex.match_with_options("bar", 0, SearchOptions::SEARCH_OPTION_NONE, None);
assert!(res.is_none());
}
#[test]
fn test_regex_search_with_options() {
let mut region = Region::new();
let regex = Regex::new("e(l+)").unwrap();
let r = regex.search_with_options(
"hello",
0,
5,
SearchOptions::SEARCH_OPTION_NONE,
Some(&mut region),
);
assert!(region.tree().is_none());
assert_eq!(r, Some(1));
assert_eq!(region.len(), 2);
let pos1 = region.pos(0).unwrap();
let pos2 = region.pos(1).unwrap();
assert_eq!(pos1, (1, 4));
assert_eq!(pos2, (2, 4));
let cloned_region = region.clone();
let pos1_clone = cloned_region.pos(0).unwrap();
assert_eq!(pos1_clone, pos1);
}
#[test]
fn test_regex_match_with_options() {
let mut region = Region::new();
let regex = Regex::new("he(l+)").unwrap();
let r = regex.match_with_options(
"hello",
0,
SearchOptions::SEARCH_OPTION_NONE,
Some(&mut region),
);
assert!(region.tree().is_none());
assert_eq!(r, Some(4));
assert_eq!(region.len(), 2);
let pos1 = region.pos(0).unwrap();
let pos2 = region.pos(1).unwrap();
assert_eq!(pos1, (0, 4));
assert_eq!(pos2, (2, 4));
}
#[test]
fn test_regex_is_match() {
let regex = Regex::new("he(l+)o").unwrap();
assert!(regex.is_match("hello"));
assert!(!regex.is_match("hello 2.0"));
}
#[test]
fn test_regex_find() {
let regex = Regex::new("he(l+)o").unwrap();
assert_eq!(regex.find("hey, hello!"), Some((5, 10)));
assert_eq!(regex.find("hey, honey!"), None);
}
#[test]
fn test_regex_captures_len() {
let regex = Regex::new("(he)(l+)(o)").unwrap();
assert_eq!(regex.captures_len(), 3);
}
#[test]
fn test_regex_error_is_match() {
let regex = Regex::new("(a|b|ab)*bc").unwrap();
let result = regex.match_with_param(
"ababababababababababababababababababababababababababababacbc",
0, SearchOptions::SEARCH_OPTION_NONE, None, MatchParam::default());
let e = result.err().unwrap();
assert_eq!("retry-limit-in-match over", e.description());
}
#[test]
fn test_regex_panic_is_match() {
let regex = Regex::new("(a|b|ab)*bc").unwrap();
let result = panic::catch_unwind(||
regex.is_match("ababababababababababababababababababababababababababababacbc")
);
let e = result.err().unwrap();
let message = e.downcast_ref::<String>().unwrap();
assert_eq!(message.as_str(),
"Onig: Regex match error: retry-limit-in-match over");
}
#[test]
fn test_regex_error_find() {
let regex = Regex::new("(a|b|ab)*bc").unwrap();
let s = "ababababababababababababababababababababababababababababacbc";
let result = regex.search_with_param(
s, 0, s.len(), SearchOptions::SEARCH_OPTION_NONE, None, MatchParam::default());
let e = result.err().unwrap();
assert_eq!("retry-limit-in-match over", e.description());
}
#[test]
fn test_regex_panic_find() {
let regex = Regex::new("(a|b|ab)*bc").unwrap();
let result = panic::catch_unwind(||
regex.find("ababababababababababababababababababababababababababababacbc")
);
let e = result.err().unwrap();
let message = e.downcast_ref::<String>().unwrap();
assert_eq!(message.as_str(),
"Onig: Regex search error: retry-limit-in-match over");
}
}