#![allow(non_upper_case_globals)]
#![allow(unused_variables)]
#![allow(unused_assignments)]
#![allow(unused_mut)]
use std::sync::atomic::{AtomicU32, Ordering};
use crate::oniguruma::*;
use crate::regenc::*;
use crate::regint::*;
use crate::regparse_types::*;
static DEFAULT_CASE_FOLD_FLAG: AtomicU32 = AtomicU32::new(ONIGENC_CASE_FOLD_MIN);
#[cfg_attr(coverage_nightly, coverage(off))]
pub fn onig_get_default_case_fold_flag() -> OnigCaseFoldType {
DEFAULT_CASE_FOLD_FLAG.load(Ordering::Relaxed)
}
#[cfg_attr(coverage_nightly, coverage(off))]
pub fn onig_set_default_case_fold_flag(flag: OnigCaseFoldType) -> i32 {
DEFAULT_CASE_FOLD_FLAG.store(flag, Ordering::Relaxed);
0
}
fn enclen(enc: OnigEncoding, p: &[u8], _offset: usize) -> usize {
if p.is_empty() {
return 1;
}
enc.mbc_enc_len(p)
}
const SIZE_INC: i32 = 1;
const OPSIZE_ANYCHAR_STAR: i32 = 1;
const OPSIZE_ANYCHAR_STAR_PEEK_NEXT: i32 = 1;
const OPSIZE_JUMP: i32 = 1;
const OPSIZE_PUSH: i32 = 1;
const OPSIZE_PUSH_SUPER: i32 = 1;
const OPSIZE_POP: i32 = 1;
const OPSIZE_POP_TO_MARK: i32 = 1;
const OPSIZE_PUSH_OR_JUMP_EXACT1: i32 = 1;
const OPSIZE_PUSH_IF_PEEK_NEXT: i32 = 1;
const OPSIZE_REPEAT: i32 = 1;
const OPSIZE_REPEAT_INC: i32 = 1;
const OPSIZE_REPEAT_INC_NG: i32 = 1;
const OPSIZE_WORD_BOUNDARY: i32 = 1;
const OPSIZE_BACKREF: i32 = 1;
const OPSIZE_FAIL: i32 = 1;
const OPSIZE_MEM_START: i32 = 1;
const OPSIZE_MEM_START_PUSH: i32 = 1;
const OPSIZE_MEM_END_PUSH: i32 = 1;
const OPSIZE_MEM_END_PUSH_REC: i32 = 1;
const OPSIZE_MEM_END: i32 = 1;
const OPSIZE_MEM_END_REC: i32 = 1;
const OPSIZE_EMPTY_CHECK_START: i32 = 1;
const OPSIZE_EMPTY_CHECK_END: i32 = 1;
const OPSIZE_CHECK_POSITION: i32 = 1;
const OPSIZE_CALL: i32 = 1;
const OPSIZE_RETURN: i32 = 1;
const OPSIZE_MOVE: i32 = 1;
const OPSIZE_STEP_BACK_START: i32 = 1;
const OPSIZE_STEP_BACK_NEXT: i32 = 1;
const OPSIZE_CUT_TO_MARK: i32 = 1;
const OPSIZE_MARK: i32 = 1;
const OPSIZE_SAVE_VAL: i32 = 1;
const OPSIZE_UPDATE_VAR: i32 = 1;
fn add_op(reg: &mut RegexType, opcode: OpCode, payload: OperationPayload) -> i32 {
let idx = reg.ops.len();
reg.ops.push(Operation { opcode, payload });
idx as i32
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn ops_curr_offset(reg: &RegexType) -> i32 {
(reg.ops.len() as i32) - 1
}
#[cfg_attr(coverage_nightly, coverage(off))]
fn len_multiply_cmp(a: OnigLen, b: i32, limit: OnigLen) -> bool {
if a == 0 || b == 0 {
return false;
}
if a > limit / (b as OnigLen) {
return true;
}
a * (b as OnigLen) > limit
}
pub fn distance_add(d1: OnigLen, d2: OnigLen) -> OnigLen {
if d1 == INFINITE_LEN || d2 == INFINITE_LEN {
INFINITE_LEN
} else if d1 <= INFINITE_LEN - d2 {
d1 + d2
} else {
INFINITE_LEN
}
}
fn distance_multiply(d: OnigLen, m: i32) -> OnigLen {
if m == 0 {
return 0;
}
if d >= INFINITE_LEN / (m as OnigLen) {
return INFINITE_LEN;
}
d * (m as OnigLen)
}
fn bitset_is_empty(bs: &BitSet) -> bool {
for i in 0..BITSET_REAL_SIZE {
if bs[i] != 0 {
return false;
}
}
true
}
fn is_strict_real_node(node: &Node) -> bool {
match &node.inner {
NodeInner::String(_) | NodeInner::CClass(_) | NodeInner::CType(_) => true,
_ => false,
}
}
fn get_tree_head_literal<'a>(node: &'a Node, exact: bool, reg: &RegexType) -> Option<&'a Node> {
match &node.inner {
NodeInner::BackRef(_) | NodeInner::Alt(_) | NodeInner::Call(_) => None,
NodeInner::CType(ct) => {
if ct.ctype == CTYPE_ANYCHAR {
return None;
}
if !exact {
Some(node)
} else {
None
}
}
NodeInner::CClass(_) => {
if !exact {
Some(node)
} else {
None
}
}
NodeInner::List(cons) => get_tree_head_literal(&cons.car, exact, reg),
NodeInner::String(sn) => {
if sn.s.is_empty() {
return None;
}
let is_real_ic = (node.status & ND_ST_IGNORECASE) != 0 && !sn.is_crude();
if !exact || !is_real_ic {
Some(node)
} else {
None
}
}
NodeInner::Quant(qn) => {
if qn.lower > 0 {
if let Some(he) = qn.head_exact {
None } else {
qn.body
.as_ref()
.and_then(|b| get_tree_head_literal(b, exact, reg))
}
} else {
None
}
}
NodeInner::Bag(bn) => match bn.bag_type {
BagType::Option | BagType::Memory | BagType::StopBacktrack => bn
.body
.as_ref()
.and_then(|b| get_tree_head_literal(b, exact, reg)),
_ => None,
},
NodeInner::Anchor(an) => {
if an.anchor_type == ANCR_PREC_READ {
an.body
.as_ref()
.and_then(|b| get_tree_head_literal(b, exact, reg))
} else {
None
}
}
_ => None,
}
}
fn get_head_literal_byte(node: &Node, exact: bool, reg: &RegexType) -> Option<u8> {
let n = get_tree_head_literal(node, exact, reg)?;
if let NodeInner::String(sn) = &n.inner {
if !sn.s.is_empty() && sn.s[0] != 0 {
return Some(sn.s[0]);
}
}
None
}
fn onig_is_code_in_cc(enc: OnigEncoding, code: OnigCodePoint, cc: &CClassNode) -> bool {
let in_bs = if (code as usize) < SINGLE_BYTE_SIZE {
bitset_at(&cc.bs, code as usize)
} else {
false
};
let in_mbuf = if let Some(ref mbuf) = cc.mbuf {
onig_is_in_code_range_bbuf(mbuf, code)
} else {
false
};
let result = in_bs || in_mbuf;
if cc.is_not() {
!result
} else {
result
}
}
fn onig_is_in_code_range_bbuf(mbuf: &BBuf, code: OnigCodePoint) -> bool {
let data = &mbuf.data;
if data.len() < 4 {
return false;
}
let read_u32 = |offset: usize| -> u32 {
if offset + 4 > data.len() {
return 0;
}
u32::from_ne_bytes([
data[offset],
data[offset + 1],
data[offset + 2],
data[offset + 3],
])
};
let n = read_u32(0) as usize;
let mut low = 0usize;
let mut high = n;
while low < high {
let mid = (low + high) / 2;
let from = read_u32((mid * 2 + 1) * 4);
let to = read_u32((mid * 2 + 2) * 4);
if code < from {
high = mid;
} else if code > to {
low = mid + 1;
} else {
return true;
}
}
false
}
fn is_exclusive(x: &Node, y: &Node, reg: &RegexType) -> bool {
match (&x.inner, &y.inner) {
(NodeInner::CType(xct), NodeInner::CType(yct)) => {
if xct.ctype == CTYPE_ANYCHAR || yct.ctype == CTYPE_ANYCHAR {
return false;
}
xct.ctype == yct.ctype && xct.not != yct.not && xct.ascii_mode == yct.ascii_mode
}
(NodeInner::CType(_), NodeInner::CClass(_)) => is_exclusive(y, x, reg),
(NodeInner::CType(_), NodeInner::String(_)) => is_exclusive(y, x, reg),
(NodeInner::CClass(xc), NodeInner::CType(yct)) => {
if yct.ctype == CTYPE_ANYCHAR {
return false;
}
if yct.ctype != ONIGENC_CTYPE_WORD as i32 {
return false;
}
if !yct.not {
if xc.mbuf.is_some() || xc.is_not() {
return false;
}
let range = if yct.ascii_mode {
128
} else {
SINGLE_BYTE_SIZE
};
for i in 0..range {
if bitset_at(&xc.bs, i) {
if is_code_word(reg.enc, i as OnigCodePoint) {
return false;
}
}
}
true
} else {
if xc.mbuf.is_some() || xc.is_not() {
return false;
}
let range = if yct.ascii_mode {
128
} else {
SINGLE_BYTE_SIZE
};
for i in 0..range {
if !is_code_word(reg.enc, i as OnigCodePoint) {
if bitset_at(&xc.bs, i) {
return false;
}
}
}
for i in range..SINGLE_BYTE_SIZE {
if bitset_at(&xc.bs, i) {
return false;
}
}
true
}
}
(NodeInner::CClass(xc), NodeInner::CClass(yc)) => {
for i in 0..SINGLE_BYTE_SIZE {
let xv = bitset_at(&xc.bs, i);
let x_in = if xc.is_not() { !xv } else { xv };
if x_in {
let yv = bitset_at(&yc.bs, i);
let y_in = if yc.is_not() { !yv } else { yv };
if y_in {
return false;
}
}
}
if (xc.mbuf.is_none() && !xc.is_not()) || (yc.mbuf.is_none() && !yc.is_not()) {
return true;
}
false
}
(NodeInner::CClass(_), NodeInner::String(_)) => is_exclusive(y, x, reg),
(NodeInner::String(xs), NodeInner::CType(yct)) => {
if xs.s.is_empty() {
return false;
}
if yct.ctype == CTYPE_ANYCHAR {
return false;
}
if yct.ctype == ONIGENC_CTYPE_WORD as i32 {
let is_word = if !yct.ascii_mode {
is_mbc_word(reg.enc, &xs.s)
} else {
is_mbc_word_ascii(reg.enc, &xs.s)
};
return if is_word { yct.not } else { !yct.not };
}
false
}
(NodeInner::String(xs), NodeInner::CClass(yc)) => {
if xs.s.is_empty() {
return false;
}
let code = reg.enc.mbc_to_code(&xs.s, xs.s.len());
!onig_is_code_in_cc(reg.enc, code, yc)
}
(NodeInner::String(xs), NodeInner::String(ys)) => {
if xs.s.is_empty() || ys.s.is_empty() {
return false;
}
let len = xs.s.len().min(ys.s.len());
for i in 0..len {
if xs.s[i] != ys.s[i] {
return true;
}
}
false
}
_ => false,
}
}
fn is_code_word(enc: OnigEncoding, code: OnigCodePoint) -> bool {
if code < 128 {
let c = code as u8;
c.is_ascii_alphanumeric() || c == b'_'
} else {
enc.is_code_ctype(code, ONIGENC_CTYPE_WORD)
}
}
fn is_mbc_word(enc: OnigEncoding, buf: &[u8]) -> bool {
if buf.is_empty() {
return false;
}
let code = enc.mbc_to_code(buf, buf.len());
is_code_word(enc, code)
}
fn is_mbc_word_ascii(_enc: OnigEncoding, buf: &[u8]) -> bool {
if buf.is_empty() {
return false;
}
let c = buf[0];
c.is_ascii_alphanumeric() || c == b'_'
}
fn tune_next(node: &mut Node, next_node: &Node, reg: &RegexType) -> i32 {
let mut cur = node as *mut Node;
let mut called = false;
loop {
let n = unsafe { &mut *cur };
match &mut n.inner {
NodeInner::Quant(ref mut qn) => {
if qn.greedy && is_infinite_repeat(qn.upper) {
if !called {
if let Some(byte) = get_head_literal_byte(next_node, true, reg) {
qn.next_head_exact = Some(byte);
}
}
if qn.lower <= 1 {
if let Some(ref body) = qn.body {
if is_strict_real_node(body) {
let x = get_tree_head_literal(body, false, reg);
if let Some(x_node) = x {
let y = get_tree_head_literal(next_node, false, reg);
if let Some(y_node) = y {
if is_exclusive(x_node, y_node, reg) {
let body_taken = qn.body.take().unwrap();
let quant_node = Node {
inner: NodeInner::Quant(QuantNode {
body: Some(body_taken),
lower: qn.lower,
upper: qn.upper,
greedy: qn.greedy,
emptiness: qn.emptiness,
head_exact: qn.head_exact.take(),
next_head_exact: qn.next_head_exact.take(),
include_referred: qn.include_referred,
empty_status_mem: qn.empty_status_mem,
}),
status: n.status,
parent: std::ptr::null_mut(),
};
n.inner = NodeInner::Bag(BagNode {
body: Some(Box::new(quant_node)),
bag_type: BagType::StopBacktrack,
bag_data: BagData::StopBacktrack,
min_len: 0,
max_len: INFINITE_LEN,
min_char_len: 0,
max_char_len: INFINITE_LEN,
opt_count: 0,
});
n.status |= ND_ST_STRICT_REAL_REPEAT;
}
}
}
}
}
}
}
return 0;
}
NodeInner::Bag(ref bn) => {
if bn.bag_type == BagType::Memory {
if (n.status & ND_ST_CALLED) != 0 {
called = true;
}
if let Some(ref body) = bn.body {
cur = body.as_ref() as *const Node as *mut Node;
continue;
}
}
return 0;
}
_ => return 0,
}
}
}
fn select_str_opcode(mb_len: i32, str_len: i32) -> OpCode {
if mb_len == 1 {
match str_len {
1 => OpCode::Str1,
2 => OpCode::Str2,
3 => OpCode::Str3,
4 => OpCode::Str4,
5 => OpCode::Str5,
_ => OpCode::StrN,
}
} else if mb_len == 2 {
match str_len {
1 => OpCode::StrMb2n1,
2 => OpCode::StrMb2n2,
3 => OpCode::StrMb2n3,
_ => OpCode::StrMb2n,
}
} else if mb_len == 3 {
OpCode::StrMb3n
} else {
OpCode::StrMbn
}
}
fn add_compile_string_length(_s: &[u8], mb_len: i32, str_len: i32) -> i32 {
SIZE_INC
}
fn add_compile_string(reg: &mut RegexType, s: &[u8], mb_len: i32, str_len: i32) -> i32 {
let op = select_str_opcode(mb_len, str_len);
let byte_len = mb_len * str_len;
let payload = if mb_len == 1 && str_len <= 5 {
let mut buf = [0u8; 16];
buf[..byte_len as usize].copy_from_slice(&s[..byte_len as usize]);
OperationPayload::Exact { s: buf }
} else if mb_len == 1 {
OperationPayload::ExactN {
s: s[..byte_len as usize].to_vec(),
n: str_len,
}
} else {
OperationPayload::ExactLenN {
s: s[..byte_len as usize].to_vec(),
n: byte_len, len: mb_len, }
};
add_op(reg, op, payload);
0
}
fn compile_length_string_node(node: &Node, reg: &RegexType) -> i32 {
let sn = node.as_str().unwrap();
let enc = reg.enc;
if sn.s.is_empty() {
return 0;
}
let mut len = 0i32;
let mut pos = 0usize;
let slen = sn.s.len();
let ambig = node.has_status(ND_ST_IGNORECASE);
while pos < slen {
let first_len = enc.mbc_enc_len(&sn.s[pos..]);
let mut run = 1;
let next = pos + first_len;
let mut p = next;
while p < slen {
let enc_len = enc.mbc_enc_len(&sn.s[p..]);
if enc_len != first_len {
break;
}
run += 1;
p += enc_len;
}
len += add_compile_string_length(&sn.s[pos..], first_len as i32, run);
pos = p;
}
len
}
fn compile_length_string_crude_node(node: &Node, reg: &RegexType) -> i32 {
let sn = node.as_str().unwrap();
if sn.s.is_empty() {
return 0;
}
SIZE_INC
}
fn compile_string_node(node: &Node, reg: &mut RegexType) -> i32 {
let sn = node.as_str().unwrap();
let enc = reg.enc;
if sn.s.is_empty() {
return 0;
}
let ambig = node.has_status(ND_ST_IGNORECASE);
let mut pos = 0usize;
let slen = sn.s.len();
while pos < slen {
let first_len = enc.mbc_enc_len(&sn.s[pos..]);
let mut run = 1;
let next = pos + first_len;
let mut p = next;
while p < slen {
let enc_len = enc.mbc_enc_len(&sn.s[p..]);
if enc_len != first_len {
break;
}
run += 1;
p += enc_len;
}
let r = add_compile_string(reg, &sn.s[pos..], first_len as i32, run);
if r != 0 {
return r;
}
pos = p;
}
0
}
fn compile_string_crude_node(node: &Node, reg: &mut RegexType) -> i32 {
let sn = node.as_str().unwrap();
if sn.s.is_empty() {
return 0;
}
let byte_len = sn.s.len();
let payload = if byte_len <= 16 {
let mut buf = [0u8; 16];
buf[..byte_len].copy_from_slice(&sn.s[..byte_len]);
OperationPayload::Exact { s: buf }
} else {
OperationPayload::ExactN {
s: sn.s.clone(),
n: byte_len as i32,
}
};
add_op(reg, select_str_opcode(1, byte_len as i32), payload);
0
}
fn compile_length_cclass_node(cc: &CClassNode, reg: &RegexType) -> i32 {
SIZE_INC
}
fn bbuf_to_u32_vec(data: &[u8]) -> Vec<u32> {
data.chunks_exact(4)
.map(|chunk| u32::from_ne_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
.collect()
}
fn compile_cclass_node(cc: &CClassNode, reg: &mut RegexType) -> i32 {
let has_mb = cc.mbuf.is_some();
let has_sb = !bitset_is_empty(&cc.bs);
if has_mb && has_sb {
let opcode = if cc.is_not() {
OpCode::CClassMixNot
} else {
OpCode::CClassMix
};
let mb_data = cc
.mbuf
.as_ref()
.map(|b| bbuf_to_u32_vec(&b.data))
.unwrap_or_default();
add_op(
reg,
opcode,
OperationPayload::CClassMix {
mb: mb_data,
bsp: Box::new(cc.bs),
},
);
} else if has_mb {
let opcode = if cc.is_not() {
OpCode::CClassMbNot
} else {
OpCode::CClassMb
};
let mb_data = cc
.mbuf
.as_ref()
.map(|b| bbuf_to_u32_vec(&b.data))
.unwrap_or_default();
add_op(reg, opcode, OperationPayload::CClassMb { mb: mb_data });
} else {
let opcode = if cc.is_not() {
OpCode::CClassNot
} else {
OpCode::CClass
};
add_op(
reg,
opcode,
OperationPayload::CClass {
bsp: Box::new(cc.bs),
},
);
}
0
}
fn entry_repeat_range(reg: &mut RegexType, lower: i32, upper: i32) -> Result<i32, i32> {
let id = reg.num_repeat;
reg.num_repeat += 1;
reg.repeat_range.push(RepeatRange {
lower,
upper,
u_offset: 0,
});
Ok(id)
}
fn collect_mem_status(node: &Node) -> u32 {
let mut status: u32 = 0;
match &node.inner {
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur = node;
loop {
let (car, cdr) = match &cur.inner {
NodeInner::List(cons) => (&cons.car, &cons.cdr),
NodeInner::Alt(cons) => (&cons.car, &cons.cdr),
_ => break,
};
status |= collect_mem_status(car);
match cdr {
Some(next) => cur = next,
None => break,
}
}
}
NodeInner::Quant(qn) => {
if let Some(ref body) = qn.body {
status |= collect_mem_status(body);
}
}
NodeInner::Bag(bn) => {
if bn.bag_type == BagType::Memory {
if let BagData::Memory { regnum, .. } = bn.bag_data {
if regnum > 0 && regnum < 31 {
status |= 1u32 << regnum;
}
}
}
if let Some(ref body) = bn.body {
status |= collect_mem_status(body);
}
}
_ => {}
}
status
}
fn compile_quant_body_with_empty_check(
node: &Node,
reg: &mut RegexType,
env: &ParseEnv,
emptiness: BodyEmptyType,
qn_empty_status_mem: u32,
) -> i32 {
let is_empty = emptiness != BodyEmptyType::NotEmpty;
let saved_mem = reg.num_empty_check;
if is_empty {
reg.num_empty_check += 1;
add_op(
reg,
OpCode::EmptyCheckStart,
OperationPayload::EmptyCheckStart { mem: saved_mem },
);
}
let r = compile_tree(node, reg, env);
if r != 0 {
return r;
}
if is_empty {
let mem = saved_mem;
let empty_status_mem = if emptiness == BodyEmptyType::MayBeEmptyMem
|| emptiness == BodyEmptyType::MayBeEmptyRec
{
if qn_empty_status_mem != 0 {
qn_empty_status_mem
} else {
collect_mem_status(node)
}
} else {
0
};
let opcode = match emptiness {
BodyEmptyType::MayBeEmptyMem => {
if qn_empty_status_mem != 0 {
OpCode::EmptyCheckEndMemst
} else {
OpCode::EmptyCheckEnd
}
}
BodyEmptyType::MayBeEmptyRec => OpCode::EmptyCheckEndMemstPush,
_ => OpCode::EmptyCheckEnd,
};
add_op(
reg,
opcode,
OperationPayload::EmptyCheckEnd {
mem,
empty_status_mem,
},
);
}
0
}
fn compile_tree_n_times(node: &Node, n: i32, reg: &mut RegexType, env: &ParseEnv) -> i32 {
for _ in 0..n {
let r = compile_tree(node, reg, env);
if r != 0 {
return r;
}
}
0
}
fn is_anychar_infinite_greedy(qn: &QuantNode) -> bool {
if qn.greedy && is_infinite_repeat(qn.upper) && qn.lower <= 1 {
if let Some(body) = &qn.body {
return matches!(body.inner, NodeInner::CType(ref ct) if ct.ctype == CTYPE_ANYCHAR);
}
}
false
}
fn is_anychar_multiline(body: &Node) -> bool {
matches!(&body.inner, NodeInner::CType(_) if (body.status & ND_ST_MULTILINE) != 0)
}
fn compile_length_quantifier_node(qn: &QuantNode, reg: &RegexType, env: &ParseEnv) -> i32 {
let body = qn.body.as_ref().unwrap();
if qn.upper == 0 {
if qn.include_referred != 0 {
let tlen = compile_length_tree(body, reg, env);
return OPSIZE_JUMP + tlen;
}
if is_anychar_infinite_greedy(qn) {
return SIZE_INC;
}
return 0;
}
if is_anychar_infinite_greedy(qn) {
let tlen = compile_length_tree(body, reg, env);
if qn.next_head_exact.is_some() {
return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn.lower;
}
return SIZE_INC + tlen * qn.lower;
}
let is_empty = qn.emptiness != BodyEmptyType::NotEmpty;
let body_len = compile_length_tree(body, reg, env);
if body_len < 0 {
return body_len;
}
let empty_len = if is_empty {
OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END
} else {
0
};
let mod_tlen = body_len + empty_len;
if is_infinite_repeat(qn.upper) {
if qn.lower <= 1 {
let push_size = if qn.greedy && qn.head_exact.is_some() {
OPSIZE_PUSH_OR_JUMP_EXACT1
} else if qn.greedy && qn.next_head_exact.is_some() {
OPSIZE_PUSH_IF_PEEK_NEXT
} else {
OPSIZE_PUSH
};
body_len * qn.lower + push_size + mod_tlen + OPSIZE_JUMP
} else {
let n_body_len = compile_length_tree_n_times(body, qn.lower, reg, env);
n_body_len + OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP
}
} else if qn.upper == 0 {
0
} else if !is_infinite_repeat(qn.upper) && qn.lower == qn.upper {
if qn.lower == 1 {
body_len
} else {
let id_len = OPSIZE_REPEAT + mod_tlen + OPSIZE_REPEAT_INC;
id_len
}
} else if !qn.greedy && qn.upper == 1 && qn.lower == 0 {
OPSIZE_PUSH + OPSIZE_JUMP + body_len
} else if qn.greedy && !is_infinite_repeat(qn.upper) {
let n = qn.upper - qn.lower;
body_len * qn.lower + n * (OPSIZE_PUSH + body_len)
} else {
OPSIZE_REPEAT + mod_tlen + OPSIZE_REPEAT_INC
}
}
fn compile_length_tree_n_times(node: &Node, n: i32, reg: &RegexType, env: &ParseEnv) -> i32 {
let len = compile_length_tree(node, reg, env);
if len < 0 {
return len;
}
len * n
}
fn compile_quantifier_node(qn: &QuantNode, reg: &mut RegexType, env: &ParseEnv) -> i32 {
let body = qn.body.as_ref().unwrap();
if qn.upper == 0 {
if qn.include_referred != 0 {
let tlen = compile_length_tree(body, reg, env);
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: tlen + SIZE_INC,
},
);
return compile_tree(body, reg, env);
}
return 0;
}
if is_anychar_infinite_greedy(qn) {
let r = compile_tree_n_times(body, qn.lower, reg, env);
if r != 0 {
return r;
}
if let Some(c) = qn.next_head_exact {
let opcode = if is_anychar_multiline(body) {
OpCode::AnyCharMlStarPeekNext
} else {
OpCode::AnyCharStarPeekNext
};
add_op(reg, opcode, OperationPayload::AnyCharStarPeekNext { c });
} else {
let opcode = if is_anychar_multiline(body) {
OpCode::AnyCharMlStar
} else {
OpCode::AnyCharStar
};
add_op(reg, opcode, OperationPayload::None);
}
return 0;
}
let is_empty = qn.emptiness != BodyEmptyType::NotEmpty;
let body_len = compile_length_tree(body, reg, env);
if body_len < 0 {
return body_len;
}
let empty_len = if is_empty {
OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END
} else {
0
};
let mod_tlen = body_len + empty_len;
if is_infinite_repeat(qn.upper) {
if qn.lower <= 1 {
if qn.greedy {
if qn.lower == 1 {
compile_tree_n_times(body, 1, reg, env);
}
let addr;
if let Some(c) = qn.head_exact {
add_op(
reg,
OpCode::PushOrJumpExact1,
OperationPayload::PushOrJumpExact1 {
addr: SIZE_INC + mod_tlen + OPSIZE_JUMP,
c,
},
);
let r = compile_quant_body_with_empty_check(
body,
reg,
env,
qn.emptiness,
qn.empty_status_mem,
);
if r != 0 {
return r;
}
addr = -(mod_tlen + OPSIZE_PUSH_OR_JUMP_EXACT1);
} else if let Some(c) = qn.next_head_exact {
add_op(
reg,
OpCode::PushIfPeekNext,
OperationPayload::PushIfPeekNext {
addr: SIZE_INC + mod_tlen + OPSIZE_JUMP,
c,
},
);
let r = compile_quant_body_with_empty_check(
body,
reg,
env,
qn.emptiness,
qn.empty_status_mem,
);
if r != 0 {
return r;
}
addr = -(mod_tlen + OPSIZE_PUSH_IF_PEEK_NEXT);
} else {
add_op(
reg,
OpCode::Push,
OperationPayload::Push {
addr: SIZE_INC + mod_tlen + OPSIZE_JUMP,
},
);
let r = compile_quant_body_with_empty_check(
body,
reg,
env,
qn.emptiness,
qn.empty_status_mem,
);
if r != 0 {
return r;
}
addr = -(mod_tlen + OPSIZE_PUSH);
}
add_op(reg, OpCode::Jump, OperationPayload::Jump { addr });
} else {
if qn.lower == 1 {
compile_tree_n_times(body, 1, reg, env);
}
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: mod_tlen + SIZE_INC,
},
);
let r = compile_quant_body_with_empty_check(
body,
reg,
env,
qn.emptiness,
qn.empty_status_mem,
);
if r != 0 {
return r;
}
add_op(
reg,
OpCode::Push,
OperationPayload::Push { addr: -mod_tlen },
);
}
} else {
let r = compile_tree_n_times(body, qn.lower, reg, env);
if r != 0 {
return r;
}
if qn.greedy {
add_op(
reg,
OpCode::Push,
OperationPayload::Push {
addr: SIZE_INC + mod_tlen + OPSIZE_JUMP,
},
);
let r = compile_quant_body_with_empty_check(
body,
reg,
env,
qn.emptiness,
qn.empty_status_mem,
);
if r != 0 {
return r;
}
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: -(mod_tlen + OPSIZE_PUSH),
},
);
} else {
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: mod_tlen + SIZE_INC,
},
);
let r = compile_quant_body_with_empty_check(
body,
reg,
env,
qn.emptiness,
qn.empty_status_mem,
);
if r != 0 {
return r;
}
add_op(
reg,
OpCode::Push,
OperationPayload::Push { addr: -mod_tlen },
);
}
}
} else if qn.lower == qn.upper {
if qn.lower == 1 {
let r = compile_tree(body, reg, env);
return r;
}
let id = entry_repeat_range(reg, qn.lower, qn.upper);
if let Err(e) = id {
return e;
}
let id = id.unwrap();
add_op(
reg,
OpCode::Repeat,
OperationPayload::Repeat {
id,
addr: SIZE_INC + mod_tlen + OPSIZE_REPEAT_INC,
},
);
reg.repeat_range[id as usize].u_offset = reg.ops.len() as i32;
let r =
compile_quant_body_with_empty_check(body, reg, env, qn.emptiness, qn.empty_status_mem);
if r != 0 {
return r;
}
add_op(
reg,
if qn.greedy {
OpCode::RepeatInc
} else {
OpCode::RepeatIncNg
},
OperationPayload::RepeatInc { id },
);
} else if !qn.greedy && qn.upper == 1 && qn.lower == 0 {
add_op(
reg,
OpCode::Push,
OperationPayload::Push {
addr: SIZE_INC + OPSIZE_JUMP,
},
);
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: body_len + SIZE_INC,
},
);
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
} else if qn.greedy && !is_infinite_repeat(qn.upper) {
let r = compile_tree_n_times(body, qn.lower, reg, env);
if r != 0 {
return r;
}
let n = qn.upper - qn.lower;
let goal = reg.ops.len() as i32 + n * (OPSIZE_PUSH + body_len);
for _i in 0..n {
let push_addr = goal - reg.ops.len() as i32;
add_op(
reg,
OpCode::Push,
OperationPayload::Push { addr: push_addr },
);
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
} else {
let id = entry_repeat_range(reg, qn.lower, qn.upper);
if let Err(e) = id {
return e;
}
let id = id.unwrap();
let opcode = if qn.greedy {
OpCode::Repeat
} else {
OpCode::RepeatNg
};
add_op(
reg,
opcode,
OperationPayload::Repeat {
id,
addr: SIZE_INC + mod_tlen + OPSIZE_REPEAT_INC,
},
);
reg.repeat_range[id as usize].u_offset = reg.ops.len() as i32;
let r =
compile_quant_body_with_empty_check(body, reg, env, qn.emptiness, qn.empty_status_mem);
if r != 0 {
return r;
}
add_op(
reg,
if qn.greedy {
OpCode::RepeatInc
} else {
OpCode::RepeatIncNg
},
OperationPayload::RepeatInc { id },
);
}
0
}
fn compile_length_bag_node(
bag: &BagNode,
node_status: u32,
reg: &RegexType,
env: &ParseEnv,
) -> i32 {
let body = bag.body.as_ref();
match bag.bag_type {
BagType::Memory => {
let body_len = if let Some(b) = body {
compile_length_tree(b, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
let regnum = match &bag.bag_data {
BagData::Memory { regnum, .. } => *regnum,
_ => 0,
};
if regnum == 0 && (node_status & ND_ST_CALLED) != 0 {
return OPSIZE_CALL + OPSIZE_JUMP + body_len + OPSIZE_RETURN;
}
let mut len = OPSIZE_MEM_START + body_len + OPSIZE_MEM_END;
if (node_status & ND_ST_CALLED) != 0 {
len += OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
}
len
}
BagType::StopBacktrack => {
let body_len = if let Some(b) = body {
compile_length_tree(b, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
OPSIZE_MARK + body_len + OPSIZE_CUT_TO_MARK
}
BagType::Option => {
let body_len = if let Some(b) = body {
compile_length_tree(b, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
body_len
}
BagType::IfElse => {
let cond_len = if let Some(b) = body {
compile_length_tree(b, reg, env)
} else {
0
};
if cond_len < 0 {
return cond_len;
}
let mut len = OPSIZE_PUSH + OPSIZE_MARK + cond_len + OPSIZE_CUT_TO_MARK;
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bag.bag_data
{
if let Some(ref then_n) = then_node {
let tlen = compile_length_tree(then_n, reg, env);
if tlen < 0 {
return tlen;
}
len += tlen;
}
len += OPSIZE_JUMP + OPSIZE_CUT_TO_MARK;
if let Some(ref else_n) = else_node {
let elen = compile_length_tree(else_n, reg, env);
if elen < 0 {
return elen;
}
len += elen;
}
}
len
}
}
}
fn compile_bag_memory_node(
bag: &BagNode,
node_status: u32,
reg: &mut RegexType,
env: &ParseEnv,
) -> i32 {
let regnum = match &bag.bag_data {
BagData::Memory { regnum, .. } => *regnum,
_ => return ONIGERR_TYPE_BUG as i32,
};
let is_called = (node_status & ND_ST_CALLED) != 0;
if is_called {
let body_len = if let Some(body) = &bag.body {
compile_length_tree(body, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
if regnum == 0 {
let callable_len = body_len + OPSIZE_RETURN;
let call_idx = reg.ops.len();
let entry_addr = (call_idx + 2) as i32;
add_op(
reg,
OpCode::Call,
OperationPayload::Call { addr: entry_addr },
);
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: callable_len + SIZE_INC,
},
);
let called_addr = reg.ops.len() as i32;
if reg.called_addrs.is_empty() {
reg.called_addrs.resize(1, -1);
}
reg.called_addrs[0] = called_addr;
if let Some(body) = &bag.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(reg, OpCode::Return, OperationPayload::Return);
return 0;
}
let callable_len = OPSIZE_MEM_START + body_len + OPSIZE_MEM_END + OPSIZE_RETURN;
let call_idx = reg.ops.len();
let entry_addr = (call_idx + 2) as i32;
add_op(
reg,
OpCode::Call,
OperationPayload::Call { addr: entry_addr },
);
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: callable_len + SIZE_INC,
},
);
let called_addr = reg.ops.len() as i32;
if reg.called_addrs.len() <= regnum as usize {
reg.called_addrs.resize(regnum as usize + 1, -1);
}
reg.called_addrs[regnum as usize] = called_addr;
}
let need_push = mem_status_at(reg.push_mem_start, regnum as usize);
if need_push {
add_op(
reg,
OpCode::MemStartPush,
OperationPayload::MemoryStart { num: regnum },
);
} else {
add_op(
reg,
OpCode::MemStart,
OperationPayload::MemoryStart { num: regnum },
);
}
if let Some(body) = &bag.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
let need_push_end = mem_status_at(reg.push_mem_end, regnum as usize);
let is_recursion = (node_status & ND_ST_RECURSION) != 0;
if need_push_end {
let opcode = if is_recursion {
OpCode::MemEndPushRec
} else {
OpCode::MemEndPush
};
add_op(reg, opcode, OperationPayload::MemoryEnd { num: regnum });
} else {
let opcode = if is_recursion {
OpCode::MemEndRec
} else {
OpCode::MemEnd
};
add_op(reg, opcode, OperationPayload::MemoryEnd { num: regnum });
}
if is_called {
add_op(reg, OpCode::Return, OperationPayload::Return);
}
0
}
fn compile_bag_node(bag: &BagNode, node_status: u32, reg: &mut RegexType, env: &ParseEnv) -> i32 {
match bag.bag_type {
BagType::Memory => compile_bag_memory_node(bag, node_status, reg, env),
BagType::StopBacktrack => {
let id = reg.num_call; reg.num_call += 1;
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark { id, save_pos: true },
);
if let Some(body) = &bag.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(
reg,
OpCode::CutToMark,
OperationPayload::CutToMark {
id,
restore_pos: false,
},
);
0
}
BagType::Option => {
if let Some(body) = &bag.body {
return compile_tree(body, reg, env);
}
0
}
BagType::IfElse => {
let id = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark {
id,
save_pos: false,
},
);
let cond_len = if let Some(body) = &bag.body {
compile_length_tree(body, reg, env)
} else {
0
};
if cond_len < 0 {
return cond_len;
}
let then_len = if let BagData::IfElse { ref then_node, .. } = bag.bag_data {
if let Some(ref then_n) = then_node {
compile_length_tree(then_n, reg, env)
} else {
0
}
} else {
0
};
if then_len < 0 {
return then_len;
}
let jump_len = cond_len + OPSIZE_CUT_TO_MARK + then_len + OPSIZE_JUMP;
add_op(
reg,
OpCode::Push,
OperationPayload::Push {
addr: SIZE_INC + jump_len,
},
);
if let Some(body) = &bag.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(
reg,
OpCode::CutToMark,
OperationPayload::CutToMark {
id,
restore_pos: false,
},
);
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bag.bag_data
{
if let Some(ref then_n) = then_node {
let r = compile_tree(then_n, reg, env);
if r != 0 {
return r;
}
}
let else_len = if let Some(ref else_n) = else_node {
compile_length_tree(else_n, reg, env)
} else {
0
};
if else_len < 0 {
return else_len;
}
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: OPSIZE_CUT_TO_MARK + else_len + SIZE_INC,
},
);
add_op(
reg,
OpCode::CutToMark,
OperationPayload::CutToMark {
id,
restore_pos: false,
},
);
if let Some(ref else_n) = else_node {
let r = compile_tree(else_n, reg, env);
if r != 0 {
return r;
}
}
}
0
}
}
}
fn compile_length_anchor_node(an: &AnchorNode, reg: &RegexType, env: &ParseEnv) -> i32 {
let at = an.anchor_type;
if at == ANCR_PREC_READ {
let body_len = if let Some(body) = &an.body {
compile_length_tree(body, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
OPSIZE_MARK + body_len + OPSIZE_CUT_TO_MARK
} else if at == ANCR_PREC_READ_NOT {
let body_len = if let Some(body) = &an.body {
compile_length_tree(body, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
OPSIZE_PUSH + OPSIZE_MARK + body_len + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL
} else if at == ANCR_LOOK_BEHIND {
let body_len = if let Some(body) = &an.body {
compile_length_tree(body, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
if an.char_min_len == an.char_max_len {
OPSIZE_MARK + OPSIZE_STEP_BACK_START + body_len + OPSIZE_CUT_TO_MARK
} else {
let mut len = OPSIZE_SAVE_VAL
+ OPSIZE_UPDATE_VAR
+ OPSIZE_MARK
+ OPSIZE_PUSH
+ OPSIZE_JUMP
+ OPSIZE_UPDATE_VAR
+ OPSIZE_FAIL
+ OPSIZE_STEP_BACK_START
+ OPSIZE_STEP_BACK_NEXT
+ body_len
+ OPSIZE_CHECK_POSITION
+ OPSIZE_CUT_TO_MARK
+ OPSIZE_UPDATE_VAR;
if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
}
len
}
} else if at == ANCR_LOOK_BEHIND_NOT {
let body_len = if let Some(body) = &an.body {
compile_length_tree(body, reg, env)
} else {
0
};
if body_len < 0 {
return body_len;
}
if an.char_min_len == an.char_max_len {
OPSIZE_MARK
+ OPSIZE_PUSH
+ OPSIZE_STEP_BACK_START
+ body_len
+ OPSIZE_POP_TO_MARK
+ OPSIZE_FAIL
+ OPSIZE_POP
} else {
let mut len = OPSIZE_SAVE_VAL
+ OPSIZE_UPDATE_VAR
+ OPSIZE_MARK
+ OPSIZE_PUSH
+ OPSIZE_STEP_BACK_START
+ OPSIZE_STEP_BACK_NEXT
+ body_len
+ OPSIZE_CHECK_POSITION
+ OPSIZE_POP_TO_MARK
+ OPSIZE_UPDATE_VAR
+ OPSIZE_POP
+ OPSIZE_FAIL
+ OPSIZE_UPDATE_VAR
+ OPSIZE_POP
+ OPSIZE_POP;
if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
len += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
}
len
}
} else {
SIZE_INC
}
}
fn compile_anchor_node(
an: &AnchorNode,
node_status: u32,
reg: &mut RegexType,
env: &ParseEnv,
) -> i32 {
let at = an.anchor_type;
if at == ANCR_PREC_READ {
let id = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark { id, save_pos: true },
);
if let Some(body) = &an.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(
reg,
OpCode::CutToMark,
OperationPayload::CutToMark {
id,
restore_pos: true,
},
);
return 0;
}
if at == ANCR_PREC_READ_NOT {
let body_len = if let Some(body) = &an.body {
compile_length_tree(body, reg, env)
} else {
0
};
let id = reg.num_call;
reg.num_call += 1;
let push_addr =
SIZE_INC + OPSIZE_MARK + body_len + OPSIZE_POP_TO_MARK + OPSIZE_POP + OPSIZE_FAIL;
add_op(
reg,
OpCode::Push,
OperationPayload::Push { addr: push_addr },
);
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark {
id,
save_pos: false,
},
);
if let Some(body) = &an.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(reg, OpCode::PopToMark, OperationPayload::PopToMark { id });
add_op(reg, OpCode::Pop, OperationPayload::None);
add_op(reg, OpCode::Fail, OperationPayload::None);
return 0;
}
if at == ANCR_LOOK_BEHIND {
if an.char_min_len == an.char_max_len {
let id = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark { id, save_pos: true },
);
let char_len = an.char_min_len as i32;
add_op(
reg,
OpCode::StepBackStart,
OperationPayload::StepBackStart {
initial: char_len,
remaining: 0,
addr: 1,
},
);
if let Some(body) = &an.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(
reg,
OpCode::CutToMark,
OperationPayload::CutToMark {
id,
restore_pos: true,
},
);
} else {
let mid1 = reg.num_call;
reg.num_call += 1;
let mid2 = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::SaveVal,
OperationPayload::SaveVal {
save_type: SaveType::RightRange,
id: mid1,
},
);
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeToS,
id: 0,
clear: false,
},
);
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark {
id: mid2,
save_pos: false,
},
);
add_op(
reg,
OpCode::Push,
OperationPayload::Push {
addr: SIZE_INC + OPSIZE_JUMP,
},
);
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump {
addr: SIZE_INC + OPSIZE_UPDATE_VAR + OPSIZE_FAIL,
},
);
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeFromStack,
id: mid1,
clear: false,
},
);
add_op(reg, OpCode::Fail, OperationPayload::None);
let mid3 = if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
let mid3 = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::SaveVal,
OperationPayload::SaveVal {
save_type: SaveType::RightRange,
id: mid3,
},
);
mid3
} else {
0
};
let diff = if an.char_max_len != INFINITE_LEN {
(an.char_max_len - an.char_min_len) as i32
} else {
INFINITE_LEN as i32
};
add_op(
reg,
OpCode::StepBackStart,
OperationPayload::StepBackStart {
initial: an.char_min_len as i32,
remaining: diff,
addr: 2,
},
);
add_op(reg, OpCode::StepBackNext, OperationPayload::None);
if let Some(body) = &an.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeFromStack,
id: mid3,
clear: false,
},
);
}
add_op(
reg,
OpCode::CheckPosition,
OperationPayload::CheckPosition {
check_type: CheckPositionType::CurrentRightRange,
},
);
add_op(
reg,
OpCode::CutToMark,
OperationPayload::CutToMark {
id: mid2,
restore_pos: false,
},
);
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeFromStack,
id: mid1,
clear: true,
},
);
}
return 0;
}
if at == ANCR_LOOK_BEHIND_NOT {
let body_len = if let Some(body) = &an.body {
compile_length_tree(body, reg, env)
} else {
0
};
if an.char_min_len == an.char_max_len {
let id = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark {
id,
save_pos: false,
},
);
let push_addr =
SIZE_INC + OPSIZE_STEP_BACK_START + body_len + OPSIZE_POP_TO_MARK + OPSIZE_FAIL;
add_op(
reg,
OpCode::Push,
OperationPayload::Push { addr: push_addr },
);
let char_len = an.char_min_len as i32;
add_op(
reg,
OpCode::StepBackStart,
OperationPayload::StepBackStart {
initial: char_len,
remaining: 0,
addr: 1,
},
);
if let Some(body) = &an.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
add_op(reg, OpCode::PopToMark, OperationPayload::PopToMark { id });
add_op(reg, OpCode::Fail, OperationPayload::None);
add_op(reg, OpCode::Pop, OperationPayload::None);
} else {
let mid1 = reg.num_call;
reg.num_call += 1;
let mid2 = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::SaveVal,
OperationPayload::SaveVal {
save_type: SaveType::RightRange,
id: mid1,
},
);
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeToS,
id: 0,
clear: false,
},
);
add_op(
reg,
OpCode::Mark,
OperationPayload::Mark {
id: mid2,
save_pos: false,
},
);
let mut push_addr = SIZE_INC
+ OPSIZE_STEP_BACK_START
+ OPSIZE_STEP_BACK_NEXT
+ body_len
+ OPSIZE_CHECK_POSITION
+ OPSIZE_POP_TO_MARK
+ OPSIZE_UPDATE_VAR
+ OPSIZE_POP
+ OPSIZE_FAIL;
if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
push_addr += OPSIZE_SAVE_VAL + OPSIZE_UPDATE_VAR;
}
add_op(
reg,
OpCode::Push,
OperationPayload::Push { addr: push_addr },
);
let mid3 = if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
let mid3 = reg.num_call;
reg.num_call += 1;
add_op(
reg,
OpCode::SaveVal,
OperationPayload::SaveVal {
save_type: SaveType::RightRange,
id: mid3,
},
);
mid3
} else {
0
};
let diff = if an.char_max_len != INFINITE_LEN {
(an.char_max_len - an.char_min_len) as i32
} else {
INFINITE_LEN as i32
};
add_op(
reg,
OpCode::StepBackStart,
OperationPayload::StepBackStart {
initial: an.char_min_len as i32,
remaining: diff,
addr: 2,
},
);
add_op(reg, OpCode::StepBackNext, OperationPayload::None);
if let Some(body) = &an.body {
let r = compile_tree(body, reg, env);
if r != 0 {
return r;
}
}
if (env.flags & PE_FLAG_HAS_ABSENT_STOPPER) != 0 {
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeFromStack,
id: mid3,
clear: false,
},
);
}
add_op(
reg,
OpCode::CheckPosition,
OperationPayload::CheckPosition {
check_type: CheckPositionType::CurrentRightRange,
},
);
add_op(
reg,
OpCode::PopToMark,
OperationPayload::PopToMark { id: mid2 },
);
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeFromStack,
id: mid1,
clear: false,
},
);
add_op(reg, OpCode::Pop, OperationPayload::None);
add_op(reg, OpCode::Fail, OperationPayload::None);
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::RightRangeFromStack,
id: mid1,
clear: false,
},
);
add_op(reg, OpCode::Pop, OperationPayload::None);
add_op(reg, OpCode::Pop, OperationPayload::None);
}
return 0;
}
match at {
ANCR_BEGIN_BUF => {
add_op(reg, OpCode::BeginBuf, OperationPayload::None);
}
ANCR_END_BUF => {
add_op(reg, OpCode::EndBuf, OperationPayload::None);
}
ANCR_BEGIN_LINE => {
add_op(reg, OpCode::BeginLine, OperationPayload::None);
}
ANCR_END_LINE => {
add_op(reg, OpCode::EndLine, OperationPayload::None);
}
ANCR_SEMI_END_BUF => {
add_op(reg, OpCode::SemiEndBuf, OperationPayload::None);
}
ANCR_BEGIN_POSITION => {
add_op(
reg,
OpCode::CheckPosition,
OperationPayload::CheckPosition {
check_type: CheckPositionType::SearchStart,
},
);
}
ANCR_WORD_BOUNDARY => {
let mode = if an.ascii_mode { 1 } else { 0 };
add_op(
reg,
OpCode::WordBoundary,
OperationPayload::WordBoundary { mode },
);
}
ANCR_NO_WORD_BOUNDARY => {
let mode = if an.ascii_mode { 1 } else { 0 };
add_op(
reg,
OpCode::NoWordBoundary,
OperationPayload::WordBoundary { mode },
);
}
ANCR_WORD_BEGIN => {
let mode = if an.ascii_mode { 1 } else { 0 };
add_op(
reg,
OpCode::WordBegin,
OperationPayload::WordBoundary { mode },
);
}
ANCR_WORD_END => {
let mode = if an.ascii_mode { 1 } else { 0 };
add_op(
reg,
OpCode::WordEnd,
OperationPayload::WordBoundary { mode },
);
}
ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY => {
let boundary_type = if (node_status & ND_ST_TEXT_SEGMENT_WORD) != 0 {
TextSegmentBoundaryType::Word
} else {
TextSegmentBoundaryType::ExtendedGraphemeCluster
};
let not = at == ANCR_NO_TEXT_SEGMENT_BOUNDARY;
add_op(
reg,
OpCode::TextSegmentBoundary,
OperationPayload::TextSegmentBoundary { boundary_type, not },
);
}
_ => {
return ONIGERR_TYPE_BUG as i32;
}
}
0
}
fn compile_length_gimmick_node(gn: &GimmickNode) -> i32 {
match gn.gimmick_type {
GimmickType::Fail => SIZE_INC,
GimmickType::Save => OPSIZE_SAVE_VAL,
GimmickType::UpdateVar => OPSIZE_UPDATE_VAR,
GimmickType::Callout => {
if gn.detail_type == OnigCalloutOf::Name as i32 {
SIZE_INC } else {
SIZE_INC }
}
}
}
fn compile_gimmick_node(gn: &GimmickNode, reg: &mut RegexType, env: &ParseEnv) -> i32 {
match gn.gimmick_type {
GimmickType::Fail => {
add_op(reg, OpCode::Fail, OperationPayload::None);
}
GimmickType::Save => {
let save_type = match gn.detail_type {
1 => SaveType::S,
2 => SaveType::RightRange,
_ => SaveType::Keep,
};
add_op(
reg,
OpCode::SaveVal,
OperationPayload::SaveVal {
save_type,
id: gn.id,
},
);
}
GimmickType::UpdateVar => {
let var_type = match gn.detail_type {
1 => UpdateVarType::SFromStack,
2 => UpdateVarType::RightRangeFromStack,
3 => UpdateVarType::RightRangeFromSStack,
4 => UpdateVarType::RightRangeToS,
5 => UpdateVarType::RightRangeInit,
_ => UpdateVarType::KeepFromStackLast,
};
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type,
id: gn.id,
clear: false,
},
);
}
GimmickType::Callout => {
if gn.detail_type == OnigCalloutOf::Name as i32 {
add_op(
reg,
OpCode::CalloutName,
OperationPayload::CalloutName {
num: gn.num,
id: gn.id,
},
);
} else {
add_op(
reg,
OpCode::CalloutContents,
OperationPayload::CalloutContents { num: gn.num },
);
}
}
}
0
}
pub fn compile_length_tree(node: &Node, reg: &RegexType, env: &ParseEnv) -> i32 {
match &node.inner {
NodeInner::List(cons) => {
let mut len = 0i32;
len += compile_length_tree(&cons.car, reg, env);
let mut cur = cons.cdr.as_ref();
while let Some(next) = cur {
if let NodeInner::List(c) = &next.inner {
len += compile_length_tree(&c.car, reg, env);
cur = c.cdr.as_ref();
} else {
len += compile_length_tree(next, reg, env);
break;
}
}
len
}
NodeInner::Alt(cons) => {
let mut total = 0i32;
let mut n_alts = 0i32;
let first_len = compile_length_tree(&cons.car, reg, env);
total += first_len;
n_alts += 1;
let mut cur = cons.cdr.as_ref();
while let Some(next) = cur {
if let NodeInner::Alt(c) = &next.inner {
let branch_len = compile_length_tree(&c.car, reg, env);
total += branch_len;
n_alts += 1;
cur = c.cdr.as_ref();
} else {
let branch_len = compile_length_tree(next, reg, env);
total += branch_len;
n_alts += 1;
cur = None;
}
}
total += (n_alts - 1) * (OPSIZE_PUSH + OPSIZE_JUMP);
total
}
NodeInner::String(_) => {
let sn = node.as_str().unwrap();
if sn.is_crude() {
compile_length_string_crude_node(node, reg)
} else {
compile_length_string_node(node, reg)
}
}
NodeInner::CClass(cc) => compile_length_cclass_node(cc, reg),
NodeInner::CType(ct) => SIZE_INC,
NodeInner::BackRef(_br) => OPSIZE_BACKREF,
NodeInner::Quant(qn) => compile_length_quantifier_node(qn, reg, env),
NodeInner::Bag(bag) => compile_length_bag_node(bag, node.status, reg, env),
NodeInner::Anchor(an) => compile_length_anchor_node(an, reg, env),
NodeInner::Gimmick(gn) => compile_length_gimmick_node(gn),
NodeInner::Call(_) => OPSIZE_CALL,
}
}
pub fn compile_tree(node: &Node, reg: &mut RegexType, env: &ParseEnv) -> i32 {
match &node.inner {
NodeInner::List(cons) => {
let r = compile_tree(&cons.car, reg, env);
if r != 0 {
return r;
}
let mut cur = cons.cdr.as_ref();
while let Some(next) = cur {
if let NodeInner::List(c) = &next.inner {
let r = compile_tree(&c.car, reg, env);
if r != 0 {
return r;
}
cur = c.cdr.as_ref();
} else {
return compile_tree(next, reg, env);
}
}
0
}
NodeInner::Alt(cons) => {
let is_super = node.has_status(ND_ST_SUPER);
let push_opcode = if is_super {
OpCode::PushSuper
} else {
OpCode::Push
};
let mut branches: Vec<&Node> = Vec::new();
branches.push(&cons.car);
let mut cur = cons.cdr.as_ref();
while let Some(next) = cur {
if let NodeInner::Alt(c) = &next.inner {
branches.push(&c.car);
cur = c.cdr.as_ref();
} else {
branches.push(next);
cur = None;
}
}
let n = branches.len();
if n == 1 {
return compile_tree(branches[0], reg, env);
}
let mut branch_lens: Vec<i32> = Vec::with_capacity(n);
for b in &branches {
branch_lens.push(compile_length_tree(b, reg, env));
}
let mut total_len = 0i32;
for i in 0..n {
total_len += branch_lens[i];
if i < n - 1 {
total_len += OPSIZE_PUSH + OPSIZE_JUMP;
}
}
let goal = reg.ops.len() as i32 + total_len;
for i in 0..n {
if i < n - 1 {
let push_addr = SIZE_INC + branch_lens[i] + OPSIZE_JUMP;
add_op(reg, push_opcode, OperationPayload::Push { addr: push_addr });
}
let r = compile_tree(branches[i], reg, env);
if r != 0 {
return r;
}
if i < n - 1 {
let jump_addr = goal - reg.ops.len() as i32;
add_op(
reg,
OpCode::Jump,
OperationPayload::Jump { addr: jump_addr },
);
}
}
0
}
NodeInner::String(_) => {
let sn = node.as_str().unwrap();
if sn.is_crude() {
compile_string_crude_node(node, reg)
} else {
compile_string_node(node, reg)
}
}
NodeInner::CClass(cc) => compile_cclass_node(cc, reg),
NodeInner::CType(ct) => {
let opcode = match ct.ctype as u32 {
ONIGENC_CTYPE_WORD => {
if ct.not {
if ct.ascii_mode {
OpCode::NoWordAscii
} else {
OpCode::NoWord
}
} else {
if ct.ascii_mode {
OpCode::WordAscii
} else {
OpCode::Word
}
}
}
_ => {
if node.has_status(ND_ST_MULTILINE) {
OpCode::AnyCharMl
} else {
OpCode::AnyChar
}
}
};
add_op(reg, opcode, OperationPayload::None);
0
}
NodeInner::BackRef(br) => {
let refs = br.back_refs();
if node.has_status(ND_ST_CHECKER) {
let ns = refs.to_vec();
let opcode = if node.has_status(ND_ST_NEST_LEVEL) {
OpCode::BackRefCheckWithLevel
} else {
OpCode::BackRefCheck
};
add_op(
reg,
opcode,
OperationPayload::BackRefGeneral {
num: refs.len() as i32,
ns,
nest_level: br.nest_level,
},
);
} else if node.has_status(ND_ST_NEST_LEVEL) {
let ns = refs.to_vec();
let opcode = if node.has_status(ND_ST_IGNORECASE) {
OpCode::BackRefWithLevelIc
} else {
OpCode::BackRefWithLevel
};
add_op(
reg,
opcode,
OperationPayload::BackRefGeneral {
num: refs.len() as i32,
ns,
nest_level: br.nest_level,
},
);
} else if refs.len() == 1 {
let n = refs[0];
if node.has_status(ND_ST_IGNORECASE) {
add_op(
reg,
OpCode::BackRefNIc,
OperationPayload::BackRefN { n1: n },
);
} else {
match n {
1 => {
add_op(reg, OpCode::BackRef1, OperationPayload::None);
}
2 => {
add_op(reg, OpCode::BackRef2, OperationPayload::None);
}
_ => {
add_op(reg, OpCode::BackRefN, OperationPayload::BackRefN { n1: n });
}
}
}
} else {
let ns = refs.to_vec();
if node.has_status(ND_ST_IGNORECASE) {
add_op(
reg,
OpCode::BackRefMultiIc,
OperationPayload::BackRefGeneral {
num: refs.len() as i32,
ns,
nest_level: 0,
},
);
} else {
add_op(
reg,
OpCode::BackRefMulti,
OperationPayload::BackRefGeneral {
num: refs.len() as i32,
ns,
nest_level: 0,
},
);
}
}
0
}
NodeInner::Quant(qn) => compile_quantifier_node(qn, reg, env),
NodeInner::Bag(bag) => compile_bag_node(bag, node.status, reg, env),
NodeInner::Anchor(an) => compile_anchor_node(an, node.status, reg, env),
NodeInner::Gimmick(gn) => compile_gimmick_node(gn, reg, env),
NodeInner::Call(call) => {
let gnum = call.called_gnum as usize;
let addr = if gnum < reg.called_addrs.len() && reg.called_addrs[gnum] >= 0 {
reg.called_addrs[gnum]
} else {
0 };
add_op(reg, OpCode::Call, OperationPayload::Call { addr });
if gnum >= reg.called_addrs.len() || reg.called_addrs[gnum] < 0 {
let call_idx = reg.ops.len() - 1;
reg.unset_call_addrs.push((call_idx, gnum as i32));
}
0
}
}
}
#[inline]
fn mem_status_is_all_on(stats: MemStatusType) -> bool {
(stats & 1) != 0
}
const IN_ALT: i32 = 1 << 0;
const IN_NOT: i32 = 1 << 1;
const IN_REAL_REPEAT: i32 = 1 << 2;
const IN_VAR_REPEAT: i32 = 1 << 3;
const IN_MULTI_ENTRY: i32 = 1 << 5;
const IN_ZERO_REPEAT: i32 = 1 << 4;
const IN_PREC_READ: i32 = 1 << 6;
const IN_LOOK_BEHIND: i32 = 1 << 7;
const IN_PEEK: i32 = 1 << 8;
fn node_min_byte_len(node: &Node, env: &ParseEnv) -> OnigLen {
match &node.inner {
NodeInner::String(sn) => sn.s.len() as OnigLen,
NodeInner::CType(_) | NodeInner::CClass(_) => env.enc.min_enc_len() as OnigLen,
NodeInner::List(_) => {
let mut len: OnigLen = 0;
let mut cur = node;
loop {
if let NodeInner::List(cons) = &cur.inner {
let tmin = node_min_byte_len(&cons.car, env);
len = distance_add(len, tmin);
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
len
}
NodeInner::Alt(_) => {
let mut len: OnigLen = 0;
let mut first = true;
let mut cur = node;
loop {
if let NodeInner::Alt(cons) = &cur.inner {
let tmin = node_min_byte_len(&cons.car, env);
if first {
len = tmin;
first = false;
} else if len > tmin {
len = tmin;
}
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
len
}
NodeInner::Quant(qn) => {
if qn.lower > 0 {
if let Some(ref body) = qn.body {
let len = node_min_byte_len(body, env);
distance_multiply(len, qn.lower)
} else {
0
}
} else {
0
}
}
NodeInner::Bag(bn) => {
match bn.bag_type {
BagType::Option | BagType::StopBacktrack => {
if let Some(ref body) = bn.body {
node_min_byte_len(body, env)
} else {
0
}
}
BagType::Memory => {
if node.has_status(ND_ST_FIXED_MIN) {
bn.min_len
} else if node.has_status(ND_ST_MARK1) {
0 } else {
unsafe {
let node_ptr = node as *const Node as *mut Node;
(*node_ptr).status_add(ND_ST_MARK1);
let len = if let Some(ref body) = bn.body {
node_min_byte_len(body, env)
} else {
0
};
(*node_ptr).status_remove(ND_ST_MARK1);
if let NodeInner::Bag(ref mut bn_mut) = (*node_ptr).inner {
bn_mut.min_len = len;
}
(*node_ptr).status_add(ND_ST_FIXED_MIN);
len
}
}
}
BagType::IfElse => {
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
let mut len = if let Some(ref body) = bn.body {
node_min_byte_len(body, env)
} else {
0
};
if let Some(ref then_n) = then_node {
len += node_min_byte_len(then_n, env);
}
let elen = if let Some(ref else_n) = else_node {
node_min_byte_len(else_n, env)
} else {
0
};
if elen < len {
elen
} else {
len
}
} else {
0
}
}
}
}
NodeInner::BackRef(br) => {
if node.has_status(ND_ST_CHECKER) {
0
} else {
0
}
}
NodeInner::Call(ref cn) => {
if !cn.target_node.is_null() {
unsafe { node_min_byte_len(&*cn.target_node, env) }
} else {
0
}
}
NodeInner::Anchor(_) | NodeInner::Gimmick(_) => 0,
}
}
fn quantifiers_memory_node_info(node: &Node) -> BodyEmptyType {
let mut r = BodyEmptyType::MayBeEmpty;
match &node.inner {
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur = node;
loop {
let (car, cdr) = match &cur.inner {
NodeInner::List(cons) => (&cons.car, &cons.cdr),
NodeInner::Alt(cons) => (&cons.car, &cons.cdr),
_ => break,
};
let v = quantifiers_memory_node_info(car);
if v as i32 > r as i32 {
r = v;
}
match cdr {
Some(next) => cur = next,
None => break,
}
}
}
NodeInner::Quant(qn) => {
if qn.upper != 0 {
if let Some(ref body) = qn.body {
r = quantifiers_memory_node_info(body);
}
}
}
NodeInner::Bag(bn) => match bn.bag_type {
BagType::Memory => {
return BodyEmptyType::MayBeEmptyMem;
}
BagType::Option | BagType::StopBacktrack => {
if let Some(ref body) = bn.body {
r = quantifiers_memory_node_info(body);
}
}
BagType::IfElse => {
if let Some(ref body) = bn.body {
r = quantifiers_memory_node_info(body);
}
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
if let Some(ref then_n) = then_node {
let v = quantifiers_memory_node_info(then_n);
if v as i32 > r as i32 {
r = v;
}
}
if let Some(ref else_n) = else_node {
let v = quantifiers_memory_node_info(else_n);
if v as i32 > r as i32 {
r = v;
}
}
}
}
},
_ => {}
}
r
}
fn get_min_max_byte_len_case_fold_items(
n: i32,
items: &[OnigCaseFoldCodeItem],
) -> (OnigLen, OnigLen) {
let mut min_len: OnigLen = INFINITE_LEN;
let mut max_len: OnigLen = 0;
for i in 0..(n as usize) {
let len = items[i].byte_len as OnigLen;
if len < min_len {
min_len = len;
}
if len > max_len {
max_len = len;
}
}
(min_len, max_len)
}
fn unravel_case_fold_string(node: &mut Node, reg: &mut RegexType, state: i32) -> i32 {
let enc = reg.enc;
let in_look_behind = (state & IN_LOOK_BEHIND) != 0;
let s_bytes = if let NodeInner::String(ref sn) = node.inner {
sn.s.clone()
} else {
return ONIG_NORMAL;
};
node.status_remove(ND_ST_IGNORECASE);
let mut items = vec![
OnigCaseFoldCodeItem {
byte_len: 0,
code_len: 0,
code: [0; ONIGENC_MAX_COMP_CASE_FOLD_CODE_LEN]
};
ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
let mut nodes: Vec<Box<Node>> = Vec::new();
let mut pending: Vec<u8> = Vec::new();
let mut pos = 0;
while pos < s_bytes.len() {
let one_len = enc.mbc_enc_len(&s_bytes[pos..]);
let mut n = enc.get_case_fold_codes_by_str(
reg.case_fold_flag,
&s_bytes[pos..],
s_bytes.len(),
&mut items,
);
if n > 0 {
if !pending.is_empty() {
nodes.push(node_new_str(&pending));
pending.clear();
}
if in_look_behind {
let q = pos + one_len;
if items[0].byte_len != one_len as i32 {
n = enc.get_case_fold_codes_by_str(
reg.case_fold_flag,
&s_bytes[pos..q],
q - pos,
&mut items,
);
}
let found = (0..n as usize)
.any(|i| items[i].byte_len == one_len as i32 && items[i].code_len == 1);
if !found {
pending.extend_from_slice(&s_bytes[pos..q]);
pos = q;
} else {
let mut cc_node = node_new_cclass();
let cc = cc_node.as_cclass_mut().unwrap();
let code = enc.mbc_to_code(&s_bytes[pos..], s_bytes.len() - pos);
crate::regparse::add_code_into_cc(cc, code, enc);
for i in 0..(n as usize) {
if items[i].byte_len == one_len as i32 && items[i].code_len == 1 {
crate::regparse::add_code_into_cc(cc, items[i].code[0], enc);
}
}
nodes.push(cc_node);
pos = q;
}
} else {
let all_single = (0..n as usize).all(|i| items[i].code_len == 1);
if all_single {
let mut cc_node = node_new_cclass();
let cc = cc_node.as_cclass_mut().unwrap();
let code = enc.mbc_to_code(&s_bytes[pos..], s_bytes.len() - pos);
crate::regparse::add_code_into_cc(cc, code, enc);
for i in 0..(n as usize) {
crate::regparse::add_code_into_cc(cc, items[i].code[0], enc);
}
nodes.push(cc_node);
pos += one_len;
} else {
let (min_byte_len, max_byte_len_val) =
get_min_max_byte_len_case_fold_items(n, &items);
if min_byte_len != max_byte_len_val {
return ONIGERR_PARSER_BUG;
}
let max_byte_len = max_byte_len_val as usize;
let orig_str = &s_bytes[pos..pos + max_byte_len];
let mut alt_node: Box<Node> = node_new_alt(node_new_str(orig_str), None);
let mut curr = &mut alt_node;
for i in 0..(n as usize) {
let item = &items[i];
let mut buf = Vec::new();
let mut tmp = [0u8; 6]; for ci in 0..(item.code_len as usize) {
let blen = enc.code_to_mbc(item.code[ci], &mut tmp);
buf.extend_from_slice(&tmp[..blen as usize]);
}
let new_alt = node_new_alt(node_new_str(&buf), None);
if let NodeInner::Alt(ref mut ca) = curr.inner {
ca.cdr = Some(new_alt);
curr = ca.cdr.as_mut().unwrap();
}
}
nodes.push(alt_node);
pos += max_byte_len;
}
}
} else {
pending.extend_from_slice(&s_bytes[pos..pos + one_len]);
pos += one_len;
}
}
if !pending.is_empty() {
nodes.push(node_new_str(&pending));
}
if nodes.is_empty() {
node.inner = NodeInner::String(StrNode {
s: Vec::new(),
flag: 0,
});
} else if nodes.len() == 1 {
let n = nodes.pop().unwrap();
*node = *n;
} else {
let mut list: Option<Box<Node>> = None;
for n in nodes.into_iter().rev() {
list = Some(node_new_list(n, list));
}
*node = *list.unwrap();
}
ONIG_NORMAL
}
enum CharLenResult {
Fixed(OnigLen),
Variable(OnigLen, OnigLen),
}
fn node_char_len(node: &Node, enc: OnigEncoding) -> CharLenResult {
match &node.inner {
NodeInner::String(sn) => {
let n = onigenc_strlen(enc, &sn.s, 0, sn.s.len());
CharLenResult::Fixed(n as OnigLen)
}
NodeInner::CType(_) | NodeInner::CClass(_) => CharLenResult::Fixed(1),
NodeInner::List(_) => {
let mut sum: OnigLen = 0;
let mut variable = false;
let mut min_sum: OnigLen = 0;
let mut max_sum: OnigLen = 0;
let mut cur = node;
loop {
if let NodeInner::List(cons) = &cur.inner {
match node_char_len(&cons.car, enc) {
CharLenResult::Fixed(n) => {
if variable {
min_sum = distance_add(min_sum, n);
max_sum = distance_add(max_sum, n);
} else {
sum = distance_add(sum, n);
}
}
CharLenResult::Variable(mn, mx) => {
if !variable {
min_sum = sum;
max_sum = sum;
variable = true;
}
min_sum = distance_add(min_sum, mn);
max_sum = distance_add(max_sum, mx);
}
}
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
if variable {
CharLenResult::Variable(min_sum, max_sum)
} else {
CharLenResult::Fixed(sum)
}
}
NodeInner::Alt(_) => {
let mut min: OnigLen = OnigLen::MAX;
let mut max: OnigLen = 0;
let mut cur = node;
loop {
if let NodeInner::Alt(cons) = &cur.inner {
let (mn, mx) = match node_char_len(&cons.car, enc) {
CharLenResult::Fixed(n) => (n, n),
CharLenResult::Variable(mn, mx) => (mn, mx),
};
if mn < min {
min = mn;
}
if mx > max {
max = mx;
}
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
if min == max {
CharLenResult::Fixed(min)
} else {
CharLenResult::Variable(min, max)
}
}
NodeInner::Quant(qn) => {
if let Some(ref body) = qn.body {
match node_char_len(body, enc) {
CharLenResult::Fixed(n) => {
let lo = distance_multiply(n, qn.lower);
let hi = if qn.upper == INFINITE_REPEAT {
INFINITE_LEN
} else {
distance_multiply(n, qn.upper)
};
if lo == hi {
CharLenResult::Fixed(lo)
} else {
CharLenResult::Variable(lo, hi)
}
}
CharLenResult::Variable(mn, mx) => {
let lo = distance_multiply(mn, qn.lower);
let hi = if qn.upper == INFINITE_REPEAT {
INFINITE_LEN
} else {
distance_multiply(mx, qn.upper)
};
CharLenResult::Variable(lo, hi)
}
}
} else {
CharLenResult::Fixed(0)
}
}
NodeInner::Bag(bn) => {
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
let cond_len = if let Some(ref body) = bn.body {
if body.has_status(ND_ST_CHECKER) {
(0 as OnigLen, 0 as OnigLen)
} else {
match node_char_len(body, enc) {
CharLenResult::Fixed(n) => (n, n),
CharLenResult::Variable(mn, mx) => (mn, mx),
}
}
} else {
(0, 0)
};
let then_len = if let Some(ref n) = then_node {
match node_char_len(n, enc) {
CharLenResult::Fixed(n) => (n, n),
CharLenResult::Variable(mn, mx) => (mn, mx),
}
} else {
(0, 0)
};
let else_len = if let Some(ref n) = else_node {
match node_char_len(n, enc) {
CharLenResult::Fixed(n) => (n, n),
CharLenResult::Variable(mn, mx) => (mn, mx),
}
} else {
(0, 0)
};
let success_min = distance_add(cond_len.0, then_len.0);
let success_max = distance_add(cond_len.1, then_len.1);
let min = std::cmp::min(success_min, else_len.0);
let max = std::cmp::max(success_max, else_len.1);
if min == max {
CharLenResult::Fixed(min)
} else {
CharLenResult::Variable(min, max)
}
} else if let Some(ref body) = bn.body {
node_char_len(body, enc)
} else {
CharLenResult::Fixed(0)
}
}
NodeInner::Anchor(_) => CharLenResult::Fixed(0),
NodeInner::BackRef(_) => CharLenResult::Variable(0, INFINITE_LEN),
NodeInner::Call(ref cn) => {
if !cn.target_node.is_null() {
let target = unsafe { &*cn.target_node };
node_char_len(target, enc)
} else {
CharLenResult::Fixed(0)
}
}
_ => CharLenResult::Fixed(0),
}
}
fn divide_look_behind_alt(node: &mut Node, anchor_type: i32, enc: OnigEncoding) -> i32 {
let (body, ascii_mode) = if let NodeInner::Anchor(ref mut an) = node.inner {
(an.body.take().unwrap(), an.ascii_mode)
} else {
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
};
let mut branches: Vec<Box<Node>> = Vec::new();
let mut cur = body;
loop {
if let NodeInner::Alt(cons) = cur.inner {
branches.push(cons.car);
match cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
branches.push(cur);
break;
}
}
let use_list = anchor_type == ANCR_LOOK_BEHIND_NOT;
let mut result: Option<Box<Node>> = None;
for branch in branches.into_iter().rev() {
let char_len = match node_char_len(&branch, enc) {
CharLenResult::Fixed(n) => n,
CharLenResult::Variable(_, _) => return ONIGERR_INVALID_LOOK_BEHIND_PATTERN,
};
let mut anchor = node_new_anchor(anchor_type);
if let NodeInner::Anchor(ref mut an) = anchor.inner {
an.body = Some(branch);
an.char_min_len = char_len;
an.char_max_len = char_len;
an.ascii_mode = ascii_mode;
}
if use_list {
result = Some(node_new_list(anchor, result));
} else {
result = Some(node_new_alt(anchor, result));
}
}
if let Some(new_node) = result {
*node = *new_node;
}
ONIG_NORMAL
}
fn is_alt_all_branches_fixed(node: &Node, enc: OnigEncoding) -> bool {
let mut cur = node;
loop {
if let NodeInner::Alt(cons) = &cur.inner {
match node_char_len(&cons.car, enc) {
CharLenResult::Fixed(_) => {}
CharLenResult::Variable(_, _) => return false,
}
match &cons.cdr {
Some(next) => cur = next,
None => return true,
}
} else {
return false;
}
}
}
const ALLOWED_TYPE_IN_LB: u32 = ND_BIT_LIST
| ND_BIT_ALT
| ND_BIT_STRING
| ND_BIT_CCLASS
| ND_BIT_CTYPE
| ND_BIT_ANCHOR
| ND_BIT_BAG
| ND_BIT_QUANT
| ND_BIT_CALL
| ND_BIT_BACKREF
| ND_BIT_GIMMICK;
const ALLOWED_BAG_IN_LB: u32 = (1 << BagType::Memory as u32)
| (1 << BagType::Option as u32)
| (1 << BagType::StopBacktrack as u32)
| (1 << BagType::IfElse as u32);
const ALLOWED_BAG_IN_LB_NOT: u32 = (1 << BagType::Option as u32)
| (1 << BagType::StopBacktrack as u32)
| (1 << BagType::IfElse as u32);
const ALLOWED_ANCHOR_IN_LB: i32 = ANCR_LOOK_BEHIND
| ANCR_BEGIN_LINE
| ANCR_END_LINE
| ANCR_BEGIN_BUF
| ANCR_BEGIN_POSITION
| ANCR_WORD_BOUNDARY
| ANCR_NO_WORD_BOUNDARY
| ANCR_WORD_BEGIN
| ANCR_WORD_END
| ANCR_TEXT_SEGMENT_BOUNDARY
| ANCR_NO_TEXT_SEGMENT_BOUNDARY;
const ALLOWED_ANCHOR_IN_LB_NOT: i32 = ANCR_LOOK_BEHIND
| ANCR_LOOK_BEHIND_NOT
| ANCR_BEGIN_LINE
| ANCR_END_LINE
| ANCR_BEGIN_BUF
| ANCR_BEGIN_POSITION
| ANCR_WORD_BOUNDARY
| ANCR_NO_WORD_BOUNDARY
| ANCR_WORD_BEGIN
| ANCR_WORD_END
| ANCR_TEXT_SEGMENT_BOUNDARY
| ANCR_NO_TEXT_SEGMENT_BOUNDARY;
fn check_called_node_in_look_behind(node: &Node, not: bool) -> i32 {
match &node.inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => {
let mut r = check_called_node_in_look_behind(&cons.car, not);
if r == 0 {
if let Some(ref cdr) = cons.cdr {
r = check_called_node_in_look_behind(cdr, not);
}
}
r
}
NodeInner::Quant(qn) => {
if let Some(ref body) = qn.body {
check_called_node_in_look_behind(body, not)
} else {
0
}
}
NodeInner::Bag(en) => {
if en.bag_type == BagType::Memory {
if node.has_status(ND_ST_MARK1) {
return 0;
}
if let Some(ref body) = en.body {
return check_called_node_in_look_behind(body, not);
}
0
} else {
let mut r = 0;
if let Some(ref body) = en.body {
r = check_called_node_in_look_behind(body, not);
}
if r == 0 {
if let BagData::IfElse {
ref then_node,
ref else_node,
} = en.bag_data
{
if let Some(ref tn) = then_node {
r = check_called_node_in_look_behind(tn, not);
if r != 0 {
return r;
}
}
if let Some(ref en) = else_node {
r = check_called_node_in_look_behind(en, not);
}
}
}
r
}
}
NodeInner::Anchor(an) => {
if let Some(ref body) = an.body {
check_called_node_in_look_behind(body, not)
} else {
0
}
}
NodeInner::Gimmick(_) => {
if node.has_status(ND_ST_ABSENT_WITH_SIDE_EFFECTS) {
1
} else {
0
}
}
_ => 0,
}
}
fn check_node_in_look_behind(node: &Node, not: bool, used: &mut bool) -> i32 {
let type_bit = node.node_type_bit();
if (type_bit & ALLOWED_TYPE_IN_LB) == 0 {
return 1;
}
match &node.inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => {
let mut r = check_node_in_look_behind(&cons.car, not, used);
if r == 0 {
if let Some(ref cdr) = cons.cdr {
r = check_node_in_look_behind(cdr, not, used);
}
}
r
}
NodeInner::Quant(qn) => {
if let Some(ref body) = qn.body {
check_node_in_look_behind(body, not, used)
} else {
0
}
}
NodeInner::Bag(en) => {
let bag_mask = if not {
ALLOWED_BAG_IN_LB_NOT
} else {
ALLOWED_BAG_IN_LB
};
if ((1 << en.bag_type as u32) & bag_mask) == 0 {
return 1;
}
let mut r = 0;
if let Some(ref body) = en.body {
r = check_node_in_look_behind(body, not, used);
if r != 0 {
return r;
}
}
if en.bag_type == BagType::Memory {
if node.has_status(ND_ST_BACKREF)
|| node.has_status(ND_ST_CALLED)
|| node.has_status(ND_ST_REFERENCED)
{
*used = true;
}
} else if let BagData::IfElse {
ref then_node,
ref else_node,
} = en.bag_data
{
if let Some(ref tn) = then_node {
r = check_node_in_look_behind(tn, not, used);
if r != 0 {
return r;
}
}
if let Some(ref en) = else_node {
r = check_node_in_look_behind(en, not, used);
}
}
r
}
NodeInner::Anchor(an) => {
let anchor_mask = if not {
ALLOWED_ANCHOR_IN_LB_NOT
} else {
ALLOWED_ANCHOR_IN_LB
};
if (an.anchor_type & anchor_mask) == 0 {
return 1;
}
if let Some(ref body) = an.body {
check_node_in_look_behind(body, not, used)
} else {
0
}
}
NodeInner::Call(ref cn) => {
if node.has_status(ND_ST_RECURSION) {
*used = true;
0
} else if !cn.target_node.is_null() {
let target = unsafe { &*cn.target_node };
check_called_node_in_look_behind(target, not)
} else {
0
}
}
NodeInner::Gimmick(ref gn) => {
if node.has_status(ND_ST_ABSENT_WITH_SIDE_EFFECTS) {
return 1;
}
if gn.gimmick_type == GimmickType::Save && gn.detail_type == SaveType::Keep as i32 {
*used = true;
}
0
}
_ => 0,
}
}
fn node_reduce_in_look_behind(node: &mut Node) -> bool {
if let NodeInner::Quant(ref mut qn) = node.inner {
if let Some(ref body) = qn.body {
let reducible = matches!(
body.inner,
NodeInner::String(_)
| NodeInner::CType(_)
| NodeInner::CClass(_)
| NodeInner::BackRef(_)
);
if reducible {
qn.upper = qn.lower;
return qn.upper == 0;
}
}
}
false
}
fn list_reduce_in_look_behind(node: &mut Node) {
match node.inner {
NodeInner::Quant(_) => {
node_reduce_in_look_behind(node);
}
NodeInner::List(_) => {
let mut cur = node as *mut Node;
loop {
unsafe {
if let NodeInner::List(ref mut cons) = (*cur).inner {
let removed = node_reduce_in_look_behind(&mut cons.car);
if !removed {
break;
}
if let Some(ref mut cdr) = cons.cdr {
cur = cdr.as_mut() as *mut Node;
} else {
break;
}
} else {
break;
}
}
}
}
_ => {}
}
}
fn alt_reduce_in_look_behind(node: &mut Node) {
match node.inner {
NodeInner::Alt(_) => {
let mut cur = node as *mut Node;
loop {
unsafe {
if let NodeInner::Alt(ref mut cons) = (*cur).inner {
list_reduce_in_look_behind(&mut cons.car);
if let Some(ref mut cdr) = cons.cdr {
cur = cdr.as_mut() as *mut Node;
} else {
break;
}
} else {
break;
}
}
}
}
_ => {
list_reduce_in_look_behind(node);
}
}
}
fn tune_look_behind(node: &mut Node, enc: OnigEncoding, syntax: &OnigSyntaxType) -> i32 {
let (anchor_type, has_body) = if let NodeInner::Anchor(ref an) = node.inner {
(an.anchor_type, an.body.is_some())
} else {
return 0;
};
if !has_body {
return 0;
}
let mut lb_used = false;
{
let is_not = anchor_type == ANCR_LOOK_BEHIND_NOT;
let body = if let NodeInner::Anchor(ref an) = node.inner {
an.body.as_ref().unwrap()
} else {
return 0;
};
let r = check_node_in_look_behind(body, is_not, &mut lb_used);
if r < 0 {
return r;
}
if r > 0 {
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
}
let body_char_len = {
let body = if let NodeInner::Anchor(ref an) = node.inner {
an.body.as_ref().unwrap()
} else {
return 0;
};
node_char_len(body, enc)
};
const LOOK_BEHIND_MAX_CHAR_LEN: OnigLen = 65535;
let (cmin, cmax) = match body_char_len {
CharLenResult::Fixed(n) => (n, n),
CharLenResult::Variable(mn, mx) => (mn, mx),
};
if (cmax != INFINITE_LEN && cmax > LOOK_BEHIND_MAX_CHAR_LEN) || cmin > LOOK_BEHIND_MAX_CHAR_LEN
{
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
match body_char_len {
CharLenResult::Fixed(len) => {
if let NodeInner::Anchor(ref mut an) = node.inner {
an.char_min_len = len;
an.char_max_len = len;
}
ONIG_NORMAL
}
CharLenResult::Variable(min, max) => {
let top_alt_fixed = if let NodeInner::Anchor(ref an) = node.inner {
if let Some(ref body) = an.body {
is_alt_all_branches_fixed(body, enc)
} else {
false
}
} else {
false
};
if top_alt_fixed {
if is_syntax_bv(syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND) {
let r = divide_look_behind_alt(node, anchor_type, enc);
if r == ONIG_NORMAL {
return r;
}
}
if is_syntax_bv(syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND) {
if min == INFINITE_LEN {
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
if let NodeInner::Anchor(ref mut an) = node.inner {
an.char_min_len = min;
an.char_max_len = max;
}
ONIG_NORMAL
} else {
ONIGERR_INVALID_LOOK_BEHIND_PATTERN
}
} else {
if !is_syntax_bv(syntax, ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND) {
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
if min == INFINITE_LEN {
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
if let NodeInner::Anchor(ref mut an) = node.inner {
an.char_min_len = min;
an.char_max_len = max;
}
ONIG_NORMAL
}
}
}
}
fn resolve_call_references(node: &mut Node, reg: &mut RegexType, env: &mut ParseEnv) -> i32 {
match &mut node.inner {
NodeInner::Call(call) => {
let mem_node_ptr;
if call.by_number {
let gnum = call.called_gnum;
if gnum > env.num_mem || gnum < 0 {
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
}
mem_node_ptr = env.mem_env(gnum as usize).mem_node;
if !mem_node_ptr.is_null() {
unsafe {
(*mem_node_ptr).status_add(ND_ST_CALLED);
}
}
} else {
let name = call.name.clone();
if let Some(ref nt) = reg.name_table {
if let Some(nums) = nt.name_to_group_numbers(&name) {
if nums.len() != 1 {
return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
}
call.called_gnum = nums[0];
mem_node_ptr = env.mem_env(nums[0] as usize).mem_node;
if !mem_node_ptr.is_null() {
unsafe {
(*mem_node_ptr).status_add(ND_ST_CALLED);
}
}
} else {
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
} else {
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
}
if !mem_node_ptr.is_null() {
call.target_node = mem_node_ptr;
}
0
}
NodeInner::List(cons) | NodeInner::Alt(cons) => {
let r = resolve_call_references(&mut cons.car, reg, env);
if r != 0 {
return r;
}
if let Some(ref mut cdr) = cons.cdr {
resolve_call_references(cdr, reg, env)
} else {
0
}
}
NodeInner::Quant(qn) => {
if let Some(ref mut body) = qn.body {
resolve_call_references(body, reg, env)
} else {
0
}
}
NodeInner::Bag(bn) => {
if let Some(ref mut body) = bn.body {
let r = resolve_call_references(body, reg, env);
if r != 0 {
return r;
}
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut then_n) = then_node {
let r = resolve_call_references(then_n, reg, env);
if r != 0 {
return r;
}
}
if let Some(ref mut else_n) = else_node {
let r = resolve_call_references(else_n, reg, env);
if r != 0 {
return r;
}
}
}
0
}
NodeInner::Anchor(an) => {
if let Some(ref mut body) = an.body {
resolve_call_references(body, reg, env)
} else {
0
}
}
_ => 0,
}
}
fn recursive_call_check_inner(node: &mut Node) -> i32 {
let node_ptr = node as *mut Node;
let node_type = node.node_type();
match node_type {
NodeType::List | NodeType::Alt => {
let mut r = 0;
let mut cur: *mut Node = node;
unsafe {
loop {
let (car, cdr) = match &mut (*cur).inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => (
&mut cons.car as *mut Box<Node>,
&mut cons.cdr as *mut Option<Box<Node>>,
),
_ => break,
};
r |= recursive_call_check_inner(&mut *(*car));
match &mut *cdr {
Some(ref mut next) => cur = &mut **next,
None => break,
}
}
}
r
}
NodeType::Anchor => {
if let NodeInner::Anchor(ref mut an) = node.inner {
if let Some(ref mut body) = an.body {
recursive_call_check_inner(body)
} else {
0
}
} else {
0
}
}
NodeType::Quant => {
if let NodeInner::Quant(ref mut qn) = node.inner {
if let Some(ref mut body) = qn.body {
recursive_call_check_inner(body)
} else {
0
}
} else {
0
}
}
NodeType::Call => {
unsafe {
if let NodeInner::Call(ref cn) = (*node_ptr).inner {
let target = cn.target_node;
if !target.is_null() {
let r = recursive_call_check_inner(&mut *target);
if r != 0 && (*target).has_status(ND_ST_MARK1) {
(*node_ptr).status_add(ND_ST_RECURSION);
}
r
} else {
0
}
} else {
0
}
}
}
NodeType::Bag => {
let is_memory = if let NodeInner::Bag(ref bn) = node.inner {
bn.bag_type == BagType::Memory
} else {
false
};
if is_memory {
unsafe {
if (*node_ptr).has_status(ND_ST_MARK2) {
return 0;
} else if (*node_ptr).has_status(ND_ST_MARK1) {
return 1; }
(*node_ptr).status_add(ND_ST_MARK2);
let r = if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let Some(ref mut body) = bn.body {
recursive_call_check_inner(body)
} else {
0
}
} else {
0
};
(*node_ptr).status_remove(ND_ST_MARK2);
r
}
} else {
unsafe {
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
let mut r = 0;
if let Some(ref mut t) = then_node {
r |= recursive_call_check_inner(t);
}
if let Some(ref mut e) = else_node {
r |= recursive_call_check_inner(e);
}
if let Some(ref mut body) = bn.body {
r |= recursive_call_check_inner(body);
}
r
} else {
if let Some(ref mut body) = bn.body {
recursive_call_check_inner(body)
} else {
0
}
}
} else {
0
}
}
}
}
_ => 0,
}
}
const IN_RECURSION: i32 = 1;
const FOUND_CALLED_NODE: i32 = 1;
fn recursive_call_check_trav(node: &mut Node, env: &mut ParseEnv, state: i32) -> i32 {
let node_ptr = node as *mut Node;
let node_type = node.node_type();
match node_type {
NodeType::List | NodeType::Alt => {
let mut r = 0;
let mut cur: *mut Node = node;
unsafe {
loop {
let (car, cdr) = match &mut (*cur).inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => (
&mut cons.car as *mut Box<Node>,
&mut cons.cdr as *mut Option<Box<Node>>,
),
_ => break,
};
let ret = recursive_call_check_trav(&mut *(*car), env, state);
if ret == FOUND_CALLED_NODE {
r = FOUND_CALLED_NODE;
}
match &mut *cdr {
Some(ref mut next) => cur = &mut **next,
None => break,
}
}
}
r
}
NodeType::Quant => unsafe {
let upper = if let NodeInner::Quant(ref qn) = (*node_ptr).inner {
qn.upper
} else {
0
};
if let NodeInner::Quant(ref mut qn) = (*node_ptr).inner {
if let Some(ref mut body) = qn.body {
let r = recursive_call_check_trav(body, env, state);
if upper == 0 && r == FOUND_CALLED_NODE {
qn.include_referred = 1;
}
r
} else {
0
}
} else {
0
}
},
NodeType::Anchor => {
if let NodeInner::Anchor(ref mut an) = node.inner {
if let Some(ref mut body) = an.body {
recursive_call_check_trav(body, env, state)
} else {
0
}
} else {
0
}
}
NodeType::Bag => {
let (is_memory, is_if_else, is_called, regnum) = unsafe {
if let NodeInner::Bag(ref bn) = (*node_ptr).inner {
let is_mem = bn.bag_type == BagType::Memory;
let is_ie = matches!(bn.bag_data, BagData::IfElse { .. });
let regnum = match bn.bag_data {
BagData::Memory { regnum, .. } => regnum,
_ => 0,
};
(is_mem, is_ie, (*node_ptr).has_status(ND_ST_CALLED), regnum)
} else {
return 0;
}
};
let mut r = 0;
if is_memory {
if is_called {
r = FOUND_CALLED_NODE;
}
if is_called || (state & IN_RECURSION) != 0 {
unsafe {
if !(*node_ptr).has_status(ND_ST_RECURSION) {
(*node_ptr).status_add(ND_ST_MARK1);
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let Some(ref mut body) = bn.body {
let ret = recursive_call_check_inner(body);
if ret != 0 {
(*node_ptr).status_add(ND_ST_RECURSION);
env.backtrack_mem |= 1u32 << (regnum as u32);
}
}
}
(*node_ptr).status_remove(ND_ST_MARK1);
}
}
}
}
let mut state1 = state;
unsafe {
if (*node_ptr).has_status(ND_ST_RECURSION) {
state1 |= IN_RECURSION;
}
}
unsafe {
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let Some(ref mut body) = bn.body {
let ret = recursive_call_check_trav(body, env, state1);
if ret == FOUND_CALLED_NODE {
r = FOUND_CALLED_NODE;
}
}
if is_if_else {
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
let ret = recursive_call_check_trav(t, env, state1);
if ret == FOUND_CALLED_NODE {
r = FOUND_CALLED_NODE;
}
}
if let Some(ref mut e) = else_node {
let ret = recursive_call_check_trav(e, env, state1);
if ret == FOUND_CALLED_NODE {
r = FOUND_CALLED_NODE;
}
}
}
}
}
}
r
}
_ => 0,
}
}
fn make_named_capture_number_map(
node: &mut Node,
map: &mut [GroupNumMap],
counter: &mut i32,
) -> i32 {
let node_type = node.node_type();
match node_type {
NodeType::List | NodeType::Alt => {
let cur = node as *mut Node;
unsafe {
let mut p = cur;
loop {
let (car_ptr, cdr_opt) = match &mut (*p).inner {
NodeInner::List(ref mut cons) | NodeInner::Alt(ref mut cons) => {
let car = &mut *cons.car as *mut Node;
let cdr = cons.cdr.as_mut().map(|c| &mut **c as *mut Node);
(car, cdr)
}
_ => break,
};
let r = make_named_capture_number_map(&mut *car_ptr, map, counter);
if r < 0 {
return r;
}
match cdr_opt {
Some(next) => p = next,
None => break,
}
}
}
0
}
NodeType::Quant => {
let body_ptr: Option<*mut Node> = if let NodeInner::Quant(ref mut qn) = node.inner {
qn.body.as_mut().map(|b| &mut **b as *mut Node)
} else {
None
};
if let Some(bp) = body_ptr {
let r = unsafe { make_named_capture_number_map(&mut *bp, map, counter) };
if r < 0 {
return r;
}
}
0
}
NodeType::Bag => {
let is_memory =
matches!(&node.inner, NodeInner::Bag(ref bn) if bn.bag_type == BagType::Memory);
let is_named = node.has_status(ND_ST_NAMED_GROUP);
if is_memory && !is_named {
let body = if let NodeInner::Bag(ref mut bn) = node.inner {
bn.body.take()
} else {
None
};
if let Some(body) = body {
let body = *body;
node.inner = body.inner;
node.status = body.status;
} else {
node.inner = NodeInner::String(StrNode {
s: Vec::new(),
flag: 0,
});
}
let r = make_named_capture_number_map(node, map, counter);
if r < 0 {
return r;
}
return 1;
}
if is_memory && is_named {
if let NodeInner::Bag(ref mut bn) = node.inner {
*counter += 1;
if let BagData::Memory { ref mut regnum, .. } = bn.bag_data {
map[*regnum as usize].new_val = *counter;
*regnum = *counter;
}
if let Some(ref mut body) = bn.body {
let r = make_named_capture_number_map(body, map, counter);
if r < 0 {
return r;
}
}
}
return 0;
}
unsafe {
let node_ptr = node as *mut Node;
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if bn.bag_type == BagType::IfElse {
if let Some(ref mut body) = bn.body {
let r = make_named_capture_number_map(body, map, counter);
if r < 0 {
return r;
}
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut tn) = then_node {
let r = make_named_capture_number_map(tn, map, counter);
if r < 0 {
return r;
}
}
if let Some(ref mut en) = else_node {
let r = make_named_capture_number_map(en, map, counter);
if r < 0 {
return r;
}
}
}
} else {
if let Some(ref mut body) = bn.body {
let r = make_named_capture_number_map(body, map, counter);
if r < 0 {
return r;
}
}
}
}
}
0
}
NodeType::Anchor => {
if let NodeInner::Anchor(ref mut a) = node.inner {
if let Some(ref mut body) = a.body {
let r = make_named_capture_number_map(body, map, counter);
if r < 0 {
return r;
}
}
}
0
}
_ => 0,
}
}
fn renumber_backref_node(node: &mut Node, map: &[GroupNumMap]) -> i32 {
if !node.has_status(ND_ST_BY_NAME) {
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
}
if let NodeInner::BackRef(ref mut br) = node.inner {
let old_num = br.back_num as usize;
let mut pos = 0usize;
if let Some(ref mut dyn_refs) = br.back_dynamic {
for i in 0..old_num {
let n = map[dyn_refs[i] as usize].new_val;
if n > 0 {
dyn_refs[pos] = n;
pos += 1;
}
}
} else {
for i in 0..old_num {
let n = map[br.back_static[i] as usize].new_val;
if n > 0 {
br.back_static[pos] = n;
pos += 1;
}
}
}
br.back_num = pos as i32;
}
0
}
fn renumber_backref_traverse(node: &mut Node, map: &[GroupNumMap]) -> i32 {
match node.node_type() {
NodeType::List | NodeType::Alt => {
let cur = node as *mut Node;
unsafe {
let mut p = cur;
loop {
let (car_ptr, cdr_opt) = match &mut (*p).inner {
NodeInner::List(ref mut cons) | NodeInner::Alt(ref mut cons) => {
let car = &mut *cons.car as *mut Node;
let cdr = cons.cdr.as_mut().map(|c| &mut **c as *mut Node);
(car, cdr)
}
_ => break,
};
let r = renumber_backref_traverse(&mut *car_ptr, map);
if r != 0 {
return r;
}
match cdr_opt {
Some(next) => p = next,
None => break,
}
}
}
0
}
NodeType::Quant => {
if let NodeInner::Quant(ref mut qn) = node.inner {
if let Some(ref mut body) = qn.body {
return renumber_backref_traverse(body, map);
}
}
0
}
NodeType::Bag => {
unsafe {
let node_ptr = node as *mut Node;
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let Some(ref mut body) = bn.body {
let r = renumber_backref_traverse(body, map);
if r != 0 {
return r;
}
}
if bn.bag_type == BagType::IfElse {
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut tn) = then_node {
let r = renumber_backref_traverse(tn, map);
if r != 0 {
return r;
}
}
if let Some(ref mut en) = else_node {
let r = renumber_backref_traverse(en, map);
if r != 0 {
return r;
}
}
}
}
}
}
0
}
NodeType::BackRef => renumber_backref_node(node, map),
NodeType::Anchor => {
if let NodeInner::Anchor(ref mut a) = node.inner {
if let Some(ref mut body) = a.body {
return renumber_backref_traverse(body, map);
}
}
0
}
_ => 0,
}
}
fn numbered_ref_check(node: &Node) -> i32 {
match &node.inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => {
let r = numbered_ref_check(&cons.car);
if r != 0 {
return r;
}
if let Some(ref next) = cons.cdr {
return numbered_ref_check(next);
}
0
}
NodeInner::Quant(ref qn) => {
if let Some(ref body) = qn.body {
numbered_ref_check(body)
} else {
0
}
}
NodeInner::Anchor(ref a) => {
if let Some(ref body) = a.body {
numbered_ref_check(body)
} else {
0
}
}
NodeInner::Bag(ref bn) => {
if let Some(ref body) = bn.body {
let r = numbered_ref_check(body);
if r != 0 {
return r;
}
}
if bn.bag_type == BagType::IfElse {
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
if let Some(ref tn) = then_node {
let r = numbered_ref_check(tn);
if r != 0 {
return r;
}
}
if let Some(ref en) = else_node {
let r = numbered_ref_check(en);
if r != 0 {
return r;
}
}
}
}
0
}
NodeInner::BackRef(_) => {
if !node.has_status(ND_ST_BY_NAME) {
ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED
} else {
0
}
}
_ => 0,
}
}
fn disable_noname_group_capture(root: &mut Node, reg: &mut RegexType, env: &mut ParseEnv) -> i32 {
let num_mem = env.num_mem as usize;
let mut map: Vec<GroupNumMap> = (0..=num_mem).map(|_| GroupNumMap { new_val: 0 }).collect();
let mut counter: i32 = 0;
let r = make_named_capture_number_map(root, &mut map, &mut counter);
if r < 0 {
return r;
}
let r = renumber_backref_traverse(root, &map);
if r != 0 {
return r;
}
let mut pos: usize = 1;
for i in 1..=num_mem {
if map[i].new_val > 0 {
if pos != i {
let src_node = env.mem_env(i).mem_node;
let src_empty = env.mem_env(i).empty_repeat_node;
let dst = env.mem_env_mut(pos);
dst.mem_node = src_node;
dst.empty_repeat_node = src_empty;
}
pos += 1;
}
}
let loc = env.cap_history;
env.cap_history = 0;
for i in 1..=std::cmp::min(num_mem, 31) {
if (loc & (1u32 << i)) != 0 {
let new_val = map[i].new_val;
if new_val > 0 && new_val <= 31 {
env.cap_history |= 1u32 << (new_val as u32);
}
}
}
env.num_mem = env.num_named;
reg.num_mem = env.num_named;
if let Some(ref mut nt) = reg.name_table {
for entry in nt.entries.values_mut() {
for back_ref in entry.back_refs.iter_mut() {
let idx = *back_ref as usize;
if idx < map.len() {
*back_ref = map[idx].new_val;
}
}
}
}
0
}
const RECURSION_EXIST: i32 = 1 << 0;
const RECURSION_MUST: i32 = 1 << 1;
const RECURSION_INFINITE: i32 = 1 << 2;
fn infinite_recursive_call_check(node: &mut Node, env: &ParseEnv, head: i32) -> i32 {
let mut r: i32 = 0;
match node.node_type() {
NodeType::List => {
let mut head = head;
let cur = node as *mut Node;
unsafe {
let mut p = cur;
loop {
match &mut (*p).inner {
NodeInner::List(ref mut cons) => {
let ret = infinite_recursive_call_check(&mut cons.car, env, head);
if ret < 0 || (ret & RECURSION_INFINITE) != 0 {
return ret;
}
r |= ret;
if head != 0 {
let min = node_min_byte_len(&cons.car, env);
if min != 0 {
head = 0;
}
}
match cons.cdr {
Some(ref mut next) => p = &mut **next,
None => break,
}
}
_ => break,
}
}
}
}
NodeType::Alt => {
let mut must = RECURSION_MUST;
let cur = node as *mut Node;
unsafe {
let mut p = cur;
loop {
match &mut (*p).inner {
NodeInner::Alt(ref mut cons) => {
let ret = infinite_recursive_call_check(&mut cons.car, env, head);
if ret < 0 || (ret & RECURSION_INFINITE) != 0 {
return ret;
}
r |= ret & RECURSION_EXIST;
must &= ret;
match cons.cdr {
Some(ref mut next) => p = &mut **next,
None => break,
}
}
_ => break,
}
}
}
r |= must;
}
NodeType::Quant => {
if let NodeInner::Quant(ref mut qn) = node.inner {
if qn.upper == 0 {
return 0;
}
if let Some(ref mut body) = qn.body {
r = infinite_recursive_call_check(body, env, head);
if r < 0 {
return r;
}
if (r & RECURSION_MUST) != 0 && qn.lower == 0 {
r &= !RECURSION_MUST;
}
}
}
}
NodeType::Anchor => {
if let NodeInner::Anchor(ref mut a) = node.inner {
if let Some(ref mut body) = a.body {
r = infinite_recursive_call_check(body, env, head);
}
}
}
NodeType::Call => {
if let NodeInner::Call(ref cn) = node.inner {
if !cn.target_node.is_null() {
r = unsafe { infinite_recursive_call_check(&mut *cn.target_node, env, head) };
}
}
}
NodeType::Bag => {
let bag_type = if let NodeInner::Bag(ref bn) = node.inner {
bn.bag_type
} else {
BagType::Option
};
match bag_type {
BagType::Memory => {
if node.has_status(ND_ST_MARK2) {
return 0;
} else if node.has_status(ND_ST_MARK1) {
return if head == 0 {
RECURSION_EXIST | RECURSION_MUST
} else {
RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE
};
} else {
node.status_add(ND_ST_MARK2);
if let NodeInner::Bag(ref mut bn) = node.inner {
if let Some(ref mut body) = bn.body {
r = infinite_recursive_call_check(body, env, head);
}
}
node.status_remove(ND_ST_MARK2);
}
}
BagType::IfElse => {
unsafe {
let node_ptr = node as *mut Node;
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let Some(ref mut body) = bn.body {
let ret = infinite_recursive_call_check(body, env, head);
if ret < 0 || (ret & RECURSION_INFINITE) != 0 {
return ret;
}
r |= ret;
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut tn) = then_node {
let min = if head != 0 {
if let Some(ref body) = bn.body {
node_min_byte_len(body, env)
} else {
0
}
} else {
0
};
let ret = infinite_recursive_call_check(
tn,
env,
if min != 0 { 0 } else { head },
);
if ret < 0 || (ret & RECURSION_INFINITE) != 0 {
return ret;
}
r |= ret;
}
if let Some(ref mut en) = else_node {
let eret = infinite_recursive_call_check(en, env, head);
if eret < 0 || (eret & RECURSION_INFINITE) != 0 {
return eret;
}
r |= eret & RECURSION_EXIST;
if (eret & RECURSION_MUST) == 0 {
r &= !RECURSION_MUST;
}
} else {
r &= !RECURSION_MUST;
}
}
}
}
}
_ => {
if let NodeInner::Bag(ref mut bn) = node.inner {
if let Some(ref mut body) = bn.body {
r = infinite_recursive_call_check(body, env, head);
}
}
}
}
}
_ => {}
}
r
}
fn infinite_recursive_call_check_trav(node: &mut Node, env: &ParseEnv) -> i32 {
let r;
match node.node_type() {
NodeType::List | NodeType::Alt => {
let cur = node as *mut Node;
unsafe {
let mut p = cur;
loop {
match &mut (*p).inner {
NodeInner::List(ref mut cons) | NodeInner::Alt(ref mut cons) => {
let ret = infinite_recursive_call_check_trav(&mut cons.car, env);
if ret != 0 {
return ret;
}
match cons.cdr {
Some(ref mut next) => p = &mut **next,
None => break,
}
}
_ => break,
}
}
}
r = 0;
}
NodeType::Anchor => {
if let NodeInner::Anchor(ref mut a) = node.inner {
if let Some(ref mut body) = a.body {
return infinite_recursive_call_check_trav(body, env);
}
}
r = 0;
}
NodeType::Quant => {
if let NodeInner::Quant(ref mut qn) = node.inner {
if let Some(ref mut body) = qn.body {
return infinite_recursive_call_check_trav(body, env);
}
}
r = 0;
}
NodeType::Bag => {
let bag_type = if let NodeInner::Bag(ref bn) = node.inner {
bn.bag_type
} else {
BagType::Option
};
if bag_type == BagType::Memory {
if node.has_status(ND_ST_RECURSION) && node.has_status(ND_ST_CALLED) {
node.status_add(ND_ST_MARK1);
let ret = if let NodeInner::Bag(ref mut bn) = node.inner {
if let Some(ref mut body) = bn.body {
infinite_recursive_call_check(body, env, 1)
} else {
0
}
} else {
0
};
if ret < 0 {
return ret;
}
if (ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0 {
return ONIGERR_NEVER_ENDING_RECURSION;
}
node.status_remove(ND_ST_MARK1);
}
}
if bag_type == BagType::IfElse {
unsafe {
let node_ptr = node as *mut Node;
if let NodeInner::Bag(ref mut bn) = (*node_ptr).inner {
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut tn) = then_node {
let ret = infinite_recursive_call_check_trav(tn, env);
if ret != 0 {
return ret;
}
}
if let Some(ref mut en) = else_node {
let ret = infinite_recursive_call_check_trav(en, env);
if ret != 0 {
return ret;
}
}
}
}
}
}
if let NodeInner::Bag(ref mut bn) = node.inner {
if let Some(ref mut body) = bn.body {
return infinite_recursive_call_check_trav(body, env);
}
}
r = 0;
}
_ => {
r = 0;
}
}
r
}
fn tune_call(node: &mut Node, state: i32) {
let np = node as *mut Node;
unsafe {
match &mut (*np).inner {
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::List(c) | NodeInner::Alt(c) => {
tune_call(&mut c.car, state);
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::Quant(qn) => {
let s = if qn.upper == 0 {
state | IN_ZERO_REPEAT
} else {
state
};
if let Some(ref mut body) = qn.body {
tune_call(body, s);
}
}
NodeInner::Anchor(an) => {
if let Some(ref mut body) = an.body {
tune_call(body, state);
}
}
NodeInner::Bag(bn) => {
let bt = bn.bag_type;
if bt == BagType::Memory {
if (state & IN_ZERO_REPEAT) != 0 {
(*np).status_add(ND_ST_IN_ZERO_REPEAT);
if let NodeInner::Bag(ref mut bn) = (*np).inner {
if let BagData::Memory {
ref mut entry_count,
..
} = bn.bag_data
{
*entry_count -= 1;
}
if let Some(ref mut body) = bn.body {
tune_call(body, state);
}
}
} else if let Some(ref mut body) = bn.body {
tune_call(body, state);
}
} else if bt == BagType::IfElse {
if let Some(ref mut body) = bn.body {
tune_call(body, state);
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
tune_call(t, state);
}
if let Some(ref mut e) = else_node {
tune_call(e, state);
}
}
} else if let Some(ref mut body) = bn.body {
tune_call(body, state);
}
}
NodeInner::Call(cn) => {
if (state & IN_ZERO_REPEAT) != 0 {
(*np).status_add(ND_ST_IN_ZERO_REPEAT);
cn.entry_count -= 1;
}
}
_ => {}
}
}
}
fn tune_call2_call(node: &mut Node) {
let np = node as *mut Node;
unsafe {
match &mut (*np).inner {
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::List(c) | NodeInner::Alt(c) => {
tune_call2_call(&mut c.car);
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::Quant(qn) => {
if let Some(ref mut body) = qn.body {
tune_call2_call(body);
}
}
NodeInner::Anchor(an) => {
if let Some(ref mut body) = an.body {
tune_call2_call(body);
}
}
NodeInner::Bag(bn) => {
let bt = bn.bag_type;
if bt == BagType::Memory {
if !(*np).has_status(ND_ST_MARK1) {
(*np).status_add(ND_ST_MARK1);
if let NodeInner::Bag(ref mut bn) = (*np).inner {
if let Some(ref mut body) = bn.body {
tune_call2_call(body);
}
}
(*np).status_remove(ND_ST_MARK1);
}
} else if bt == BagType::IfElse {
if let Some(ref mut body) = bn.body {
tune_call2_call(body);
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
tune_call2_call(t);
}
if let Some(ref mut e) = else_node {
tune_call2_call(e);
}
}
} else if let Some(ref mut body) = bn.body {
tune_call2_call(body);
}
}
NodeInner::Call(cn) => {
if !(*np).has_status(ND_ST_MARK1) {
(*np).status_add(ND_ST_MARK1);
cn.entry_count += 1;
let target = cn.target_node;
if !target.is_null() {
(*target).status_add(ND_ST_CALLED);
if let NodeInner::Bag(ref mut tbn) = (*target).inner {
if let BagData::Memory {
ref mut entry_count,
..
} = tbn.bag_data
{
*entry_count += 1;
}
}
tune_call2_call(&mut *target);
}
(*np).status_remove(ND_ST_MARK1);
}
}
_ => {}
}
}
}
fn tune_call2(node: &mut Node) -> i32 {
let np = node as *mut Node;
unsafe {
match &mut (*np).inner {
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::List(c) | NodeInner::Alt(c) => {
let r = tune_call2(&mut c.car);
if r != 0 {
return r;
}
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::Quant(qn) => {
if qn.upper != 0 {
if let Some(ref mut body) = qn.body {
return tune_call2(body);
}
}
}
NodeInner::Anchor(an) => {
if let Some(ref mut body) = an.body {
return tune_call2(body);
}
}
NodeInner::Bag(bn) => {
let in_zero = (*np).has_status(ND_ST_IN_ZERO_REPEAT);
if !in_zero {
if let Some(ref mut body) = bn.body {
let r = tune_call2(body);
if r != 0 {
return r;
}
}
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
let r = tune_call2(t);
if r != 0 {
return r;
}
}
if let Some(ref mut e) = else_node {
return tune_call2(e);
}
}
}
NodeInner::Call(_) => {
if !(*np).has_status(ND_ST_IN_ZERO_REPEAT) {
tune_call2_call(&mut *np);
}
}
_ => {}
}
}
0
}
fn tune_called_state_call(node: &mut Node, state: i32) {
let np = node as *mut Node;
unsafe {
match &mut (*np).inner {
NodeInner::Alt(_) => {
let s = state | IN_ALT;
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::Alt(c) | NodeInner::List(c) => {
tune_called_state_call(&mut c.car, s);
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::List(_) => {
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::List(c) | NodeInner::Alt(c) => {
tune_called_state_call(&mut c.car, state);
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::Quant(qn) => {
let mut s = state;
if is_infinite_repeat(qn.upper) || qn.upper >= 2 {
s |= IN_REAL_REPEAT;
}
if qn.lower != qn.upper {
s |= IN_VAR_REPEAT;
}
if (state & IN_PEEK) != 0 {
(*np).status_add(ND_ST_INPEEK);
}
if let Some(ref mut body) = qn.body {
tune_called_state_call(body, s);
}
}
NodeInner::Anchor(an) => match an.anchor_type {
ANCR_PREC_READ_NOT | ANCR_LOOK_BEHIND_NOT => {
if let Some(ref mut body) = an.body {
tune_called_state_call(body, state | IN_NOT | IN_PEEK);
}
}
ANCR_PREC_READ | ANCR_LOOK_BEHIND => {
if let Some(ref mut body) = an.body {
tune_called_state_call(body, state | IN_PEEK);
}
}
_ => {}
},
NodeInner::Bag(bn) => {
let bt = bn.bag_type;
if bt == BagType::Memory {
if (*np).has_status(ND_ST_MARK1) {
if let NodeInner::Bag(ref mut bn) = (*np).inner {
if let BagData::Memory {
ref mut called_state,
..
} = bn.bag_data
{
if (!*called_state & state) != 0 {
*called_state |= state;
if let Some(ref mut body) = bn.body {
tune_called_state_call(body, state);
}
}
}
}
} else {
(*np).status_add(ND_ST_MARK1);
if let NodeInner::Bag(ref mut bn) = (*np).inner {
if let BagData::Memory {
ref mut called_state,
..
} = bn.bag_data
{
*called_state |= state;
}
if let Some(ref mut body) = bn.body {
tune_called_state_call(body, state);
}
}
(*np).status_remove(ND_ST_MARK1);
}
} else if bt == BagType::IfElse {
let s = state | IN_ALT;
if let Some(ref mut body) = bn.body {
tune_called_state_call(body, s);
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
tune_called_state_call(t, s);
}
if let Some(ref mut e) = else_node {
tune_called_state_call(e, s);
}
}
} else if let Some(ref mut body) = bn.body {
tune_called_state_call(body, state);
}
}
NodeInner::Call(cn) => {
if (state & IN_PEEK) != 0 {
(*np).status_add(ND_ST_INPEEK);
}
if (state & IN_REAL_REPEAT) != 0 {
(*np).status_add(ND_ST_IN_REAL_REPEAT);
}
if let Some(ref mut body) = cn.body {
tune_called_state_call(body, state);
}
}
_ => {}
}
}
}
fn tune_called_state(node: &mut Node, state: i32) {
let np = node as *mut Node;
unsafe {
match &mut (*np).inner {
NodeInner::Alt(_) => {
let s = state | IN_ALT;
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::Alt(c) | NodeInner::List(c) => {
tune_called_state(&mut c.car, s);
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::List(_) => {
let mut cur = np;
loop {
match &mut (*cur).inner {
NodeInner::List(c) | NodeInner::Alt(c) => {
tune_called_state(&mut c.car, state);
match &mut c.cdr {
Some(ref mut next) => cur = next.as_mut() as *mut Node,
None => break,
}
}
_ => break,
}
}
}
NodeInner::Call(_) => {
if (state & IN_PEEK) != 0 {
(*np).status_add(ND_ST_INPEEK);
}
if (state & IN_REAL_REPEAT) != 0 {
(*np).status_add(ND_ST_IN_REAL_REPEAT);
}
tune_called_state_call(&mut *np, state);
}
NodeInner::Bag(bn) => {
let bt = bn.bag_type;
match bt {
BagType::Memory => {
let mut s = state;
if let BagData::Memory {
entry_count,
ref mut called_state,
..
} = bn.bag_data
{
if entry_count > 1 {
s |= IN_MULTI_ENTRY;
}
*called_state |= s;
}
if let Some(ref mut body) = bn.body {
tune_called_state(body, s);
}
}
BagType::Option | BagType::StopBacktrack => {
if let Some(ref mut body) = bn.body {
tune_called_state(body, state);
}
}
BagType::IfElse => {
let s = state | IN_ALT;
if let Some(ref mut body) = bn.body {
tune_called_state(body, s);
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
tune_called_state(t, s);
}
if let Some(ref mut e) = else_node {
tune_called_state(e, s);
}
}
}
}
}
NodeInner::Quant(qn) => {
let mut s = state;
if is_infinite_repeat(qn.upper) || qn.upper >= 2 {
s |= IN_REAL_REPEAT;
}
if qn.lower != qn.upper {
s |= IN_VAR_REPEAT;
}
if (state & IN_PEEK) != 0 {
(*np).status_add(ND_ST_INPEEK);
}
if let Some(ref mut body) = qn.body {
tune_called_state(body, s);
}
}
NodeInner::Anchor(an) => match an.anchor_type {
ANCR_PREC_READ_NOT | ANCR_LOOK_BEHIND_NOT => {
if let Some(ref mut body) = an.body {
tune_called_state(body, state | IN_NOT | IN_PEEK);
}
}
ANCR_PREC_READ | ANCR_LOOK_BEHIND => {
if let Some(ref mut body) = an.body {
tune_called_state(body, state | IN_PEEK);
}
}
_ => {}
},
_ => {}
}
}
}
pub fn tune_tree(node: &mut Node, reg: &mut RegexType, state: i32, env: &mut ParseEnv) -> i32 {
if let NodeInner::String(ref sn) = node.inner {
if node.has_status(ND_ST_IGNORECASE) && !sn.is_crude() {
let r = unravel_case_fold_string(node, reg, state);
if r != 0 {
return r;
}
return tune_tree(node, reg, state, env);
}
}
match &mut node.inner {
NodeInner::List(_) => {
let mut cur: *mut Node = node;
let mut prev: *mut Node = std::ptr::null_mut();
unsafe {
loop {
if let NodeInner::List(ref mut cons) = (*cur).inner {
let r = tune_tree(&mut cons.car, reg, state, env);
if r != 0 {
return r;
}
if !prev.is_null() {
let r = tune_next(&mut *prev, &cons.car, reg);
if r != 0 {
return r;
}
}
prev = &mut *cons.car;
match cons.cdr {
Some(ref mut next) => cur = &mut **next,
None => break,
}
} else {
break;
}
}
}
0
}
NodeInner::Alt(_) => {
let mut cur: *mut Node = node;
unsafe {
loop {
if let NodeInner::Alt(ref mut cons) = (*cur).inner {
let r = tune_tree(&mut cons.car, reg, state | IN_ALT, env);
if r != 0 {
return r;
}
match cons.cdr {
Some(ref mut next) => cur = &mut **next,
None => break,
}
} else {
break;
}
}
}
0
}
NodeInner::Quant(ref mut qn) => {
if (state & IN_REAL_REPEAT) != 0 {
node.status |= ND_ST_IN_REAL_REPEAT;
}
if (state & IN_MULTI_ENTRY) != 0 {
node.status |= ND_ST_IN_MULTI_ENTRY;
}
if is_infinite_repeat(qn.upper) || qn.upper >= 1 {
if let Some(ref body) = qn.body {
let d = node_min_byte_len(body, env);
if d == 0 {
qn.emptiness = quantifiers_memory_node_info(body);
}
}
}
let mut new_state = state;
if is_infinite_repeat(qn.upper) || qn.upper >= 2 {
new_state |= IN_REAL_REPEAT;
}
if qn.lower != qn.upper {
new_state |= IN_VAR_REPEAT;
}
if let Some(ref mut body) = qn.body {
let r = tune_tree(body, reg, new_state, env);
if r != 0 {
return r;
}
}
const EXPAND_STRING_MAX_LENGTH: i32 = 100;
if let Some(ref body) = qn.body {
if let NodeInner::String(ref sn) = body.inner {
if !is_infinite_repeat(qn.lower)
&& qn.lower == qn.upper
&& qn.lower > 1
&& qn.lower <= EXPAND_STRING_MAX_LENGTH
{
let len = sn.s.len() as i32;
if len * qn.lower <= EXPAND_STRING_MAX_LENGTH {
let n = qn.lower as usize;
let orig_bytes = sn.s.clone();
let flag = sn.flag;
let mut expanded = Vec::with_capacity(orig_bytes.len() * n);
for _ in 0..n {
expanded.extend_from_slice(&orig_bytes);
}
let mut str_node = node_new_str(&expanded);
if let NodeInner::String(ref mut esn) = str_node.inner {
esn.flag = flag;
}
str_node.status = node.status;
*node = *str_node;
return 0;
}
}
}
}
if qn.greedy && qn.emptiness == BodyEmptyType::NotEmpty {
if let Some(ref body) = qn.body {
if let NodeInner::Quant(ref tqn) = body.inner {
if tqn.head_exact.is_some() {
qn.head_exact = tqn.head_exact;
}
} else {
qn.head_exact = get_head_literal_byte(body, true, reg);
}
}
}
0
}
NodeInner::Bag(ref mut bn) => {
match bn.bag_type {
BagType::Option => {
let saved_options = reg.options;
if let BagData::Option { options } = bn.bag_data {
reg.options = options;
}
let r = if let Some(ref mut body) = bn.body {
tune_tree(body, reg, state, env)
} else {
0
};
reg.options = saved_options;
r
}
BagType::Memory => {
let mut state = state;
if let BagData::Memory { called_state, .. } = bn.bag_data {
state |= called_state;
}
if (state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
|| (node.status & ND_ST_RECURSION) != 0
{
if let BagData::Memory { regnum, .. } = bn.bag_data {
mem_status_on(&mut env.backtrack_mem, regnum as usize);
}
}
if let Some(ref mut body) = bn.body {
tune_tree(body, reg, state, env)
} else {
0
}
}
BagType::StopBacktrack => {
if let Some(ref mut body) = bn.body {
tune_tree(body, reg, state, env)
} else {
0
}
}
BagType::IfElse => {
if let Some(ref mut body) = bn.body {
let r = tune_tree(body, reg, state | IN_ALT, env);
if r != 0 {
return r;
}
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut then_n) = then_node {
let r = tune_tree(then_n, reg, state | IN_ALT, env);
if r != 0 {
return r;
}
}
if let Some(ref mut else_n) = else_node {
let r = tune_tree(else_n, reg, state | IN_ALT, env);
if r != 0 {
return r;
}
}
}
0
}
}
}
NodeInner::Anchor(ref mut an) => {
let at = an.anchor_type;
if at == ANCR_LOOK_BEHIND || at == ANCR_LOOK_BEHIND_NOT {
let enc = env.enc;
let r = tune_look_behind(node, enc, env.syntax);
if r != 0 {
return r;
}
if !matches!(node.inner, NodeInner::Anchor(_)) {
return tune_tree(node, reg, state, env);
}
}
if let NodeInner::Anchor(ref mut an) = node.inner {
let anchor_type = an.anchor_type;
if let Some(ref mut body) = an.body {
let new_state = if anchor_type == ANCR_PREC_READ {
state | IN_PREC_READ
} else if anchor_type == ANCR_PREC_READ_NOT {
state | IN_PREC_READ | IN_NOT
} else if anchor_type == ANCR_LOOK_BEHIND_NOT {
state | IN_NOT | IN_LOOK_BEHIND
} else if anchor_type == ANCR_LOOK_BEHIND {
state | IN_LOOK_BEHIND
} else {
state
};
let r = tune_tree(body, reg, new_state, env);
if r != 0 {
return r;
}
if anchor_type == ANCR_LOOK_BEHIND || anchor_type == ANCR_LOOK_BEHIND_NOT {
alt_reduce_in_look_behind(body);
}
0
} else {
0
}
} else {
0
}
}
NodeInner::BackRef(ref br) => {
for &back in br.back_refs() {
if back > 0 {
mem_status_on(&mut env.backrefed_mem, back as usize);
}
}
0
}
NodeInner::String(_)
| NodeInner::CType(_)
| NodeInner::CClass(_)
| NodeInner::Call(_)
| NodeInner::Gimmick(_) => 0,
}
}
fn mark_empty_repeat_node(node: &mut Node, env: &mut ParseEnv) {
let node_ptr = node as *mut Node;
match &mut node.inner {
NodeInner::Quant(ref mut qn) => {
let is_empty = qn.emptiness == BodyEmptyType::MayBeEmptyMem
|| qn.emptiness == BodyEmptyType::MayBeEmptyRec;
if is_empty {
if let Some(ref body) = qn.body {
set_empty_repeat_node_in_body(body, node_ptr as *const Node, env);
}
}
if let Some(ref mut body) = qn.body {
mark_empty_repeat_node(body, env);
}
}
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur: *mut Node = node;
unsafe {
loop {
let (car, cdr) = match &mut (*cur).inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => (
&mut cons.car as *mut Box<Node>,
&mut cons.cdr as *mut Option<Box<Node>>,
),
_ => break,
};
mark_empty_repeat_node(&mut *(*car), env);
match &mut *cdr {
Some(ref mut next) => cur = &mut **next,
None => break,
}
}
}
}
NodeInner::Bag(ref mut bn) => {
if let Some(ref mut body) = bn.body {
mark_empty_repeat_node(body, env);
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
mark_empty_repeat_node(t, env);
}
if let Some(ref mut e) = else_node {
mark_empty_repeat_node(e, env);
}
}
}
NodeInner::Anchor(ref mut an) => {
if let Some(ref mut body) = an.body {
mark_empty_repeat_node(body, env);
}
}
_ => {}
}
}
fn set_empty_repeat_node_in_body(node: &Node, quant_ptr: *const Node, env: &mut ParseEnv) {
match &node.inner {
NodeInner::Bag(bn) => {
if bn.bag_type == BagType::Memory {
if let BagData::Memory { regnum, .. } = bn.bag_data {
let regnum = regnum as usize;
let entry = env.mem_env_mut(regnum);
entry.empty_repeat_node = quant_ptr as *mut Node;
}
}
if let Some(ref body) = bn.body {
set_empty_repeat_node_in_body(body, quant_ptr, env);
}
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
if let Some(ref t) = then_node {
set_empty_repeat_node_in_body(t, quant_ptr, env);
}
if let Some(ref e) = else_node {
set_empty_repeat_node_in_body(e, quant_ptr, env);
}
}
}
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur: &Node = node;
loop {
let (car, cdr) = match &cur.inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => (&cons.car, &cons.cdr),
_ => break,
};
set_empty_repeat_node_in_body(car, quant_ptr, env);
match cdr {
Some(ref next) => cur = next,
None => break,
}
}
}
NodeInner::Quant(qn) => {
if let Some(ref body) = qn.body {
set_empty_repeat_node_in_body(body, quant_ptr, env);
}
}
NodeInner::Anchor(an) => {
if let Some(ref body) = an.body {
set_empty_repeat_node_in_body(body, quant_ptr, env);
}
}
_ => {}
}
}
fn resolve_empty_status_backrefs(
node: &mut Node,
enclosing_quants: &mut Vec<*const Node>,
env: &ParseEnv,
) {
let node_ptr = node as *const Node;
match &mut node.inner {
NodeInner::Quant(ref mut qn) => {
let is_empty_quant = qn.emptiness == BodyEmptyType::MayBeEmptyMem
|| qn.emptiness == BodyEmptyType::MayBeEmptyRec;
if is_empty_quant {
enclosing_quants.push(node_ptr);
}
if let Some(ref mut body) = qn.body {
resolve_empty_status_backrefs(body, enclosing_quants, env);
}
if is_empty_quant {
enclosing_quants.pop();
}
}
NodeInner::BackRef(ref br) => {
for &back in br.back_refs() {
if back <= 0 {
continue;
}
let back = back as usize;
let entry = env.mem_env(back);
let er_node = entry.empty_repeat_node;
if !er_node.is_null() {
if !enclosing_quants.contains(&(er_node as *const Node)) {
unsafe {
if let NodeInner::Quant(ref mut qn) = (*er_node).inner {
qn.empty_status_mem |= 1u32 << back;
(*er_node).status |= ND_ST_EMPTY_STATUS_CHECK;
}
}
}
}
}
}
NodeInner::List(_) | NodeInner::Alt(_) => {
let mut cur: *mut Node = node;
unsafe {
loop {
let (car, cdr) = match &mut (*cur).inner {
NodeInner::List(cons) | NodeInner::Alt(cons) => (
&mut cons.car as *mut Box<Node>,
&mut cons.cdr as *mut Option<Box<Node>>,
),
_ => break,
};
resolve_empty_status_backrefs(&mut *(*car), enclosing_quants, env);
match &mut *cdr {
Some(ref mut next) => cur = &mut **next,
None => break,
}
}
}
}
NodeInner::Bag(ref mut bn) => {
if let Some(ref mut body) = bn.body {
resolve_empty_status_backrefs(body, enclosing_quants, env);
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = bn.bag_data
{
if let Some(ref mut t) = then_node {
resolve_empty_status_backrefs(t, enclosing_quants, env);
}
if let Some(ref mut e) = else_node {
resolve_empty_status_backrefs(e, enclosing_quants, env);
}
}
}
NodeInner::Anchor(ref mut an) => {
if let Some(ref mut body) = an.body {
resolve_empty_status_backrefs(body, enclosing_quants, env);
}
}
_ => {}
}
}
fn setup_empty_status_mem(root: &mut Node, env: &mut ParseEnv) {
mark_empty_repeat_node(root, env);
let mut enclosing = Vec::new();
resolve_empty_status_backrefs(root, &mut enclosing, env);
}
fn flatten_list(mut node: Box<Node>) -> Vec<Box<Node>> {
let mut items = Vec::new();
loop {
match node.inner {
NodeInner::List(cons) => {
items.push(cons.car);
match cons.cdr {
Some(next) => node = next,
None => break,
}
}
_ => {
items.push(node);
break;
}
}
}
items
}
fn rebuild_list(items: Vec<Box<Node>>) -> Box<Node> {
let mut items = items;
assert!(!items.is_empty());
let mut result = Node {
status: 0,
parent: std::ptr::null_mut(),
inner: NodeInner::List(ConsAltNode {
car: items.pop().unwrap(),
cdr: None,
}),
};
while let Some(item) = items.pop() {
result = Node {
status: 0,
parent: std::ptr::null_mut(),
inner: NodeInner::List(ConsAltNode {
car: item,
cdr: Some(Box::new(result)),
}),
};
}
Box::new(result)
}
pub fn reduce_string_list(node: &mut Node, enc: OnigEncoding) -> i32 {
match &mut node.inner {
NodeInner::List(_) => {
let placeholder = NodeInner::String(StrNode {
s: Vec::new(),
flag: 0,
});
let old_inner = std::mem::replace(&mut node.inner, placeholder);
let list_node = Box::new(Node {
status: 0,
parent: std::ptr::null_mut(),
inner: old_inner,
});
let mut items = flatten_list(list_node);
for item in items.iter_mut() {
if item.node_type() != NodeType::String {
let r = reduce_string_list(item, enc);
if r != 0 {
node.inner = rebuild_list(items).inner;
return r;
}
}
}
let mut merged: Vec<Box<Node>> = Vec::new();
for item in items {
if item.node_type() == NodeType::String {
let can_merge = if let Some(last) = merged.last() {
if last.node_type() == NodeType::String {
let last_str = last.as_str().unwrap();
let curr_str = item.as_str().unwrap();
last_str.flag == curr_str.flag && last.status == item.status
} else {
false
}
} else {
false
};
if can_merge {
let curr_bytes = item.as_str().unwrap().s.clone();
let last = merged.last_mut().unwrap();
last.as_str_mut().unwrap().s.extend_from_slice(&curr_bytes);
} else {
merged.push(item);
}
} else {
merged.push(item);
}
}
if merged.len() == 1 {
let single = merged.into_iter().next().unwrap();
*node = *single;
} else {
node.inner = rebuild_list(merged).inner;
}
0
}
NodeInner::Alt(_) => {
let saved_status = node.status; let placeholder = NodeInner::String(StrNode {
s: Vec::new(),
flag: 0,
});
let old_inner = std::mem::replace(&mut node.inner, placeholder);
let alt_node = Box::new(Node {
status: 0,
parent: std::ptr::null_mut(),
inner: old_inner,
});
let mut items = Vec::new();
let mut current: Option<Box<Node>> = Some(alt_node);
while let Some(n) = current {
match n.inner {
NodeInner::Alt(cons) => {
items.push(cons.car);
current = cons.cdr;
}
_ => {
items.push(n);
current = None;
}
}
}
for item in items.iter_mut() {
let r = reduce_string_list(item, enc);
if r != 0 {
let mut result = Node {
status: 0,
parent: std::ptr::null_mut(),
inner: NodeInner::Alt(ConsAltNode {
car: items.pop().unwrap(),
cdr: None,
}),
};
while let Some(item) = items.pop() {
result = Node {
status: 0,
parent: std::ptr::null_mut(),
inner: NodeInner::Alt(ConsAltNode {
car: item,
cdr: Some(Box::new(result)),
}),
};
}
*node = result;
return r;
}
}
let mut items_rev: Vec<Box<Node>> = items;
let last = items_rev.pop().unwrap();
let mut result = Node {
status: 0,
parent: std::ptr::null_mut(),
inner: NodeInner::Alt(ConsAltNode {
car: last,
cdr: None,
}),
};
while let Some(item) = items_rev.pop() {
result = Node {
status: 0,
parent: std::ptr::null_mut(),
inner: NodeInner::Alt(ConsAltNode {
car: item,
cdr: Some(Box::new(result)),
}),
};
}
result.status = saved_status;
*node = result;
0
}
NodeInner::Quant(ref mut q) => {
if let Some(ref mut body) = q.body {
reduce_string_list(body, enc)
} else {
0
}
}
NodeInner::Anchor(ref mut a) => {
if let Some(ref mut body) = a.body {
let r = reduce_string_list(body, enc);
if r != 0 {
return r;
}
}
0
}
NodeInner::Bag(ref mut b) => {
if let Some(ref mut body) = b.body {
let r = reduce_string_list(body, enc);
if r != 0 {
return r;
}
}
if let BagData::IfElse {
ref mut then_node,
ref mut else_node,
} = b.bag_data
{
if let Some(ref mut then_n) = then_node {
let r = reduce_string_list(then_n, enc);
if r != 0 {
return r;
}
}
if let Some(ref mut else_n) = else_node {
let r = reduce_string_list(else_n, enc);
if r != 0 {
return r;
}
}
}
0
}
_ => 0,
}
}
#[cfg_attr(coverage_nightly, coverage(off))]
pub fn compile_from_tree(root: &Node, reg: &mut RegexType, env: &ParseEnv) -> i32 {
reg.ops.clear();
let r = compile_tree(root, reg, env);
if r != 0 {
return r;
}
add_op(reg, OpCode::End, OperationPayload::None);
0
}
const MAX_ND_OPT_INFO_REF_COUNT: i32 = 5;
fn map_position_value(enc: OnigEncoding, i: usize) -> i32 {
static VALS: [i16; 128] = [
5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5,
5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 5, 6, 5, 5, 5, 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 5, 5, 5, 5, 1,
];
if i < VALS.len() {
if i == 0 && enc.min_enc_len() > 1 {
20
} else {
VALS[i] as i32
}
} else {
4
}
}
fn distance_value(mm: &MinMaxLen) -> i32 {
static DIST_VALS: [i16; 100] = [
1000, 500, 333, 250, 200, 167, 143, 125, 111, 100, 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
48, 45, 43, 42, 40, 38, 37, 36, 34, 33, 32, 31, 30, 29, 29, 28, 27, 26, 26, 25, 24, 24, 23,
23, 22, 22, 21, 21, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15,
15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11,
11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10,
];
if mm.max == INFINITE_LEN {
return 0;
}
let d = (mm.max - mm.min) as usize;
if d < DIST_VALS.len() {
DIST_VALS[d] as i32
} else {
1
}
}
fn comp_distance_value(d1: &MinMaxLen, d2: &MinMaxLen, v1: i32, v2: i32) -> i32 {
if v2 <= 0 {
return -1;
}
if v1 <= 0 {
return 1;
}
let v1 = v1 * distance_value(d1);
let v2 = v2 * distance_value(d2);
if v2 > v1 {
return 1;
}
if v2 < v1 {
return -1;
}
if d2.min < d1.min {
return 1;
}
if d2.min > d1.min {
return -1;
}
0
}
fn concat_opt_anc_info(
to: &mut OptAnc,
left: &OptAnc,
right: &OptAnc,
left_len: OnigLen,
right_len: OnigLen,
) {
to.clear();
to.left = left.left;
if left_len == 0 {
to.left |= right.left;
}
to.right = right.right;
if right_len == 0 {
to.right |= left.right;
} else {
to.right |= left.right & ANCR_PREC_READ_NOT;
}
}
fn is_left_anchor(a: i32) -> bool {
!(a == ANCR_END_BUF
|| a == ANCR_SEMI_END_BUF
|| a == ANCR_END_LINE
|| a == ANCR_PREC_READ
|| a == ANCR_PREC_READ_NOT)
}
fn is_set_opt_anc_info(to: &OptAnc, anc: i32) -> bool {
(to.left & anc) != 0 || (to.right & anc) != 0
}
fn add_opt_anc_info(to: &mut OptAnc, anc: i32) {
if is_left_anchor(anc) {
to.left |= anc;
} else {
to.right |= anc;
}
}
fn remove_opt_anc_info(to: &mut OptAnc, anc: i32) {
if is_left_anchor(anc) {
to.left &= !anc;
} else {
to.right &= !anc;
}
}
fn alt_merge_opt_anc_info(to: &mut OptAnc, add: &OptAnc) {
to.left &= add.left;
to.right &= add.right;
}
fn concat_opt_exact(to: &mut OptStr, add: &OptStr, enc: OnigEncoding) -> i32 {
let mut r = 0;
let mut i = to.len;
let mut p = 0usize; let end = add.len;
while p < end {
let len = enclen(enc, &add.s[p..], p);
if i + len > OPT_EXACT_MAXLEN {
r = 1;
break;
}
for j in 0..len {
if p >= end {
break;
}
to.s[i] = add.s[p];
i += 1;
p += 1;
let _ = j;
}
}
to.len = i;
to.reach_end = if p == end { add.reach_end } else { 0 };
let mut tanc = OptAnc::new();
concat_opt_anc_info(&mut tanc, &to.anc, &add.anc, 1, 1);
if to.reach_end == 0 {
tanc.right = 0;
}
to.anc = tanc;
r
}
fn concat_opt_exact_str(to: &mut OptStr, s: &[u8], enc: OnigEncoding) {
let mut i = to.len;
let mut p = 0usize;
while p < s.len() && i < OPT_EXACT_MAXLEN {
let len = enclen(enc, &s[p..], p);
if i + len > OPT_EXACT_MAXLEN {
break;
}
for _ in 0..len {
if p >= s.len() {
break;
}
to.s[i] = s[p];
i += 1;
p += 1;
}
}
to.len = i;
if p >= s.len() {
to.reach_end = 1;
}
}
fn alt_merge_opt_exact(to: &mut OptStr, add: &OptStr, env_enc: OnigEncoding) {
if add.len == 0 || to.len == 0 {
to.clear();
return;
}
if !to.mm.is_equal(&add.mm) {
to.clear();
return;
}
let mut i = 0;
while i < to.len && i < add.len {
if to.s[i] != add.s[i] {
break;
}
let len = enclen(env_enc, &to.s[i..], i);
let mut ok = true;
for j in 1..len {
if i + j >= to.len || i + j >= add.len || to.s[i + j] != add.s[i + j] {
ok = false;
break;
}
}
if !ok {
break;
}
i += len;
}
if add.reach_end == 0 || i < add.len || i < to.len {
to.reach_end = 0;
}
to.len = i;
alt_merge_opt_anc_info(&mut to.anc, &add.anc);
if to.reach_end == 0 {
to.anc.right = 0;
}
}
fn select_opt_exact(enc: OnigEncoding, now: &mut OptStr, alt: &OptStr) {
let mut vn = now.len as i32;
let mut va = alt.len as i32;
if va == 0 {
return;
}
if vn == 0 {
*now = *alt;
return;
}
if vn <= 2 && va <= 2 {
va = map_position_value(enc, now.s[0] as usize);
vn = map_position_value(enc, alt.s[0] as usize);
if now.len > 1 {
vn += 5;
}
if alt.len > 1 {
va += 5;
}
}
vn *= 2;
va *= 2;
if comp_distance_value(&now.mm, &alt.mm, vn, va) > 0 {
*now = *alt;
}
}
fn add_char_opt_map(m: &mut OptMap, c: u8, enc: OnigEncoding) {
if m.map[c as usize] == 0 {
m.map[c as usize] = 1;
m.value += map_position_value(enc, c as usize);
}
}
fn select_opt_map(now: &mut OptMap, alt: &OptMap) {
let z: i32 = 1 << 15;
if alt.value == 0 {
return;
}
if now.value == 0 {
*now = *alt;
return;
}
let vn = z / now.value;
let va = z / alt.value;
if comp_distance_value(&now.mm, &alt.mm, vn, va) > 0 {
*now = *alt;
}
}
fn comp_opt_exact_or_map(e: &OptStr, m: &OptMap) -> i32 {
const COMP_EM_BASE: i32 = 20;
if m.value <= 0 {
return -1;
}
let case_value = 3;
let ae = COMP_EM_BASE * e.len as i32 * case_value;
let am = COMP_EM_BASE * 5 * 2 / m.value;
comp_distance_value(&e.mm, &m.mm, ae, am)
}
fn alt_merge_opt_map(enc: OnigEncoding, to: &mut OptMap, add: &OptMap) {
if to.value == 0 {
return;
}
if add.value == 0 || to.mm.max < add.mm.min {
to.clear();
return;
}
to.mm.alt_merge(&add.mm);
let mut val = 0;
for i in 0..CHAR_MAP_SIZE {
if add.map[i] != 0 {
to.map[i] = 1;
}
if to.map[i] != 0 {
val += map_position_value(enc, i);
}
}
to.value = val;
alt_merge_opt_anc_info(&mut to.anc, &add.anc);
}
fn set_bound_node_opt_info(opt: &mut OptNode, plen: &MinMaxLen) {
opt.sb.mm = *plen;
opt.spr.mm = *plen;
opt.map.mm = *plen;
}
fn concat_left_node_opt_info(enc: OnigEncoding, to: &mut OptNode, add: &mut OptNode) {
let mut tanc = OptAnc::new();
concat_opt_anc_info(&mut tanc, &to.anc, &add.anc, to.len.max, add.len.max);
to.anc = tanc;
if add.sb.len > 0 && to.len.max == 0 {
let mut tanc2 = OptAnc::new();
concat_opt_anc_info(&mut tanc2, &to.anc, &add.sb.anc, to.len.max, add.len.max);
add.sb.anc = tanc2;
}
if add.map.value > 0 && to.len.max == 0 {
if add.map.mm.max == 0 {
add.map.anc.left |= to.anc.left;
}
}
let sb_reach = to.sb.reach_end;
let sm_reach = to.sm.reach_end;
if add.len.max != 0 {
to.sb.reach_end = 0;
to.sm.reach_end = 0;
}
if add.sb.len > 0 {
if sb_reach != 0 {
concat_opt_exact(&mut to.sb, &add.sb, enc);
add.sb.clear();
} else if sm_reach != 0 {
concat_opt_exact(&mut to.sm, &add.sb, enc);
add.sb.clear();
}
}
select_opt_exact(enc, &mut to.sm, &add.sb);
select_opt_exact(enc, &mut to.sm, &add.sm);
if to.spr.len > 0 {
if add.len.max > 0 {
if to.spr.mm.max == 0 {
select_opt_exact(enc, &mut to.sb, &to.spr.clone());
} else {
select_opt_exact(enc, &mut to.sm, &to.spr.clone());
}
}
} else if add.spr.len > 0 {
to.spr = add.spr;
}
select_opt_map(&mut to.map, &add.map);
to.len.add(&add.len);
}
fn alt_merge_node_opt_info(to: &mut OptNode, add: &OptNode, env_enc: OnigEncoding) {
alt_merge_opt_anc_info(&mut to.anc, &add.anc);
alt_merge_opt_exact(&mut to.sb, &add.sb, env_enc);
alt_merge_opt_exact(&mut to.sm, &add.sm, env_enc);
alt_merge_opt_exact(&mut to.spr, &add.spr, env_enc);
alt_merge_opt_map(env_enc, &mut to.map, &add.map);
to.len.alt_merge(&add.len);
}
fn node_max_byte_len(node: &Node, env: &ParseEnv) -> OnigLen {
match &node.inner {
NodeInner::List(_) => {
let mut len: OnigLen = 0;
let mut cur = node;
loop {
if let NodeInner::List(cons) = &cur.inner {
let tmax = node_max_byte_len(&cons.car, env);
len = distance_add(len, tmax);
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
len
}
NodeInner::Alt(_) => {
let mut len: OnigLen = 0;
let mut cur = node;
loop {
if let NodeInner::Alt(cons) = &cur.inner {
let tmax = node_max_byte_len(&cons.car, env);
if len < tmax {
len = tmax;
}
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
len
}
NodeInner::String(sn) => sn.s.len() as OnigLen,
NodeInner::CType(_) | NodeInner::CClass(_) => env.enc.max_enc_len() as OnigLen,
NodeInner::BackRef(br) => {
if node.has_status(ND_ST_CHECKER) {
0
} else if node.has_status(ND_ST_RECURSION) {
if br.nest_level != 0 {
INFINITE_LEN
} else {
0
}
} else {
let mut len: OnigLen = 0;
for &back in br.back_refs() {
let me = env.mem_env(back as usize);
if !me.mem_node.is_null() {
let mem_node = unsafe { &*me.mem_node };
let tmax = node_max_byte_len(mem_node, env);
if len < tmax {
len = tmax;
}
}
}
len
}
}
NodeInner::Call(cn) => {
if node.has_status(ND_ST_RECURSION) {
INFINITE_LEN
} else if !cn.target_node.is_null() {
unsafe { node_max_byte_len(&*cn.target_node, env) }
} else {
0
}
}
NodeInner::Quant(qn) => {
if qn.upper == 0 {
0
} else if let Some(ref body) = qn.body {
let len = node_max_byte_len(body, env);
if len != 0 {
if !is_infinite_repeat(qn.upper) {
distance_multiply(len, qn.upper)
} else {
INFINITE_LEN
}
} else {
0
}
} else {
0
}
}
NodeInner::Bag(bn) => match bn.bag_type {
BagType::Memory => {
if node.has_status(ND_ST_FIXED_MAX) {
bn.max_len
} else if node.has_status(ND_ST_MARK1) {
INFINITE_LEN
} else {
unsafe {
let node_ptr = node as *const Node as *mut Node;
(*node_ptr).status_add(ND_ST_MARK1);
let len = if let Some(ref body) = bn.body {
node_max_byte_len(body, env)
} else {
0
};
(*node_ptr).status_remove(ND_ST_MARK1);
if let NodeInner::Bag(ref mut bm) = (*node_ptr).inner {
bm.max_len = len;
}
(*node_ptr).status_add(ND_ST_FIXED_MAX);
len
}
}
}
BagType::Option | BagType::StopBacktrack => {
if let Some(ref body) = bn.body {
node_max_byte_len(body, env)
} else {
0
}
}
BagType::IfElse => {
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
let mut len = if let Some(ref body) = bn.body {
node_max_byte_len(body, env)
} else {
0
};
if let Some(ref then_n) = then_node {
let tlen = node_max_byte_len(then_n, env);
len = distance_add(len, tlen);
}
let elen = if let Some(ref else_n) = else_node {
node_max_byte_len(else_n, env)
} else {
0
};
if elen > len {
elen
} else {
len
}
} else {
0
}
}
},
NodeInner::Anchor(_) | NodeInner::Gimmick(_) => 0,
}
}
fn optimize_nodes(
node: &Node,
opt: &mut OptNode,
env_enc: OnigEncoding,
env_case_fold_flag: OnigCaseFoldType,
env_mm: &mut MinMaxLen,
scan_env: &ParseEnv,
) -> i32 {
let enc = env_enc;
opt.clear();
set_bound_node_opt_info(opt, env_mm);
match &node.inner {
NodeInner::List(_) => {
let mut nenv_mm = *env_mm;
let mut cur = node;
loop {
if let NodeInner::List(cons) = &cur.inner {
let mut xo = OptNode::new();
let r = optimize_nodes(
&cons.car,
&mut xo,
enc,
env_case_fold_flag,
&mut nenv_mm,
scan_env,
);
if r != 0 {
return r;
}
nenv_mm.add(&xo.len);
concat_left_node_opt_info(enc, opt, &mut xo);
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
}
NodeInner::Alt(_) => {
let mut first = true;
let mut cur = node;
loop {
if let NodeInner::Alt(cons) = &cur.inner {
let mut xo = OptNode::new();
let r = optimize_nodes(
&cons.car,
&mut xo,
enc,
env_case_fold_flag,
env_mm,
scan_env,
);
if r != 0 {
return r;
}
if first {
*opt = xo;
first = false;
} else {
alt_merge_node_opt_info(opt, &xo, enc);
}
match &cons.cdr {
Some(next) => cur = next,
None => break,
}
} else {
break;
}
}
}
NodeInner::String(sn) => {
let slen = sn.s.len();
concat_opt_exact_str(&mut opt.sb, &sn.s, enc);
if slen > 0 {
add_char_opt_map(&mut opt.map, sn.s[0], enc);
}
opt.len.set(slen as OnigLen, slen as OnigLen);
}
NodeInner::CClass(cc) => {
if cc.mbuf.is_some() || cc.is_not() {
let min = enc.min_enc_len() as OnigLen;
let max = enc.max_enc_len() as OnigLen;
opt.len.set(min, max);
} else {
for i in 0..SINGLE_BYTE_SIZE {
let z = bitset_at(&cc.bs, i);
if (z && !cc.is_not()) || (!z && cc.is_not()) {
add_char_opt_map(&mut opt.map, i as u8, enc);
}
}
opt.len.set(1, 1);
}
}
NodeInner::CType(ct) => {
let max = enc.max_enc_len() as OnigLen;
let min;
if max == 1 {
min = 1;
match ct.ctype {
CTYPE_ANYCHAR => { }
_ if ct.ctype == crate::oniguruma::ONIGENC_CTYPE_WORD as i32 => {
let range = if ct.ascii_mode { 128 } else { SINGLE_BYTE_SIZE };
if ct.not {
for i in 0..range {
if !crate::regenc::onigenc_is_code_word(enc, i as u32) {
add_char_opt_map(&mut opt.map, i as u8, enc);
}
}
for i in range..SINGLE_BYTE_SIZE {
add_char_opt_map(&mut opt.map, i as u8, enc);
}
} else {
for i in 0..range {
if crate::regenc::onigenc_is_code_word(enc, i as u32) {
add_char_opt_map(&mut opt.map, i as u8, enc);
}
}
}
}
_ => {}
}
} else {
min = enc.min_enc_len() as OnigLen;
}
opt.len.set(min, max);
}
NodeInner::Anchor(an) => {
match an.anchor_type {
ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_BEGIN_LINE | ANCR_END_BUF
| ANCR_SEMI_END_BUF | ANCR_END_LINE | ANCR_PREC_READ_NOT | ANCR_LOOK_BEHIND => {
add_opt_anc_info(&mut opt.anc, an.anchor_type);
}
ANCR_PREC_READ => {
if let Some(ref body) = an.body {
let mut xo = OptNode::new();
let r = optimize_nodes(
body,
&mut xo,
enc,
env_case_fold_flag,
env_mm,
scan_env,
);
if r == 0 {
if xo.sb.len > 0 {
opt.spr = xo.sb;
} else if xo.sm.len > 0 {
opt.spr = xo.sm;
}
opt.spr.reach_end = 0;
if xo.map.value > 0 {
opt.map = xo.map;
}
}
}
}
_ => { }
}
}
NodeInner::BackRef(_br) => {
if !node.has_status(ND_ST_CHECKER) {
let min = node_min_byte_len(node, scan_env);
let max = node_max_byte_len(node, scan_env);
opt.len.set(min, max);
}
}
NodeInner::Call(cn) => {
if node.has_status(ND_ST_RECURSION) {
opt.len.set(0, INFINITE_LEN);
} else if !cn.target_node.is_null() {
let target = unsafe { &*cn.target_node };
let r = optimize_nodes(target, opt, enc, env_case_fold_flag, env_mm, scan_env);
if r != 0 {
return r;
}
}
}
NodeInner::Quant(qn) => {
if qn.upper == 0 {
opt.len.set(0, 0);
} else if let Some(ref body) = qn.body {
let mut xo = OptNode::new();
let r = optimize_nodes(body, &mut xo, enc, env_case_fold_flag, env_mm, scan_env);
if r != 0 {
return r;
}
if qn.lower > 0 {
*opt = xo.clone();
if xo.sb.len > 0 && xo.sb.reach_end != 0 {
let mut i = 2;
while i <= qn.lower && !opt.sb.is_full() {
let rc = concat_opt_exact(&mut opt.sb, &xo.sb, enc);
if rc > 0 {
break;
}
i += 1;
}
if i < qn.lower {
opt.sb.reach_end = 0;
}
}
if qn.lower != qn.upper {
opt.sb.reach_end = 0;
opt.sm.reach_end = 0;
}
if qn.lower > 1 {
opt.sm.reach_end = 0;
}
}
let max;
if is_infinite_repeat(qn.upper) {
if env_mm.max == 0 && body.is_anychar() && qn.greedy {
if body.has_status(ND_ST_MULTILINE) {
add_opt_anc_info(&mut opt.anc, ANCR_ANYCHAR_INF_ML);
} else {
add_opt_anc_info(&mut opt.anc, ANCR_ANYCHAR_INF);
}
}
max = if xo.len.max > 0 { INFINITE_LEN } else { 0 };
} else {
max = distance_multiply(xo.len.max, qn.upper);
}
let min = distance_multiply(xo.len.min, qn.lower);
opt.len.set(min, max);
}
}
NodeInner::Bag(bn) => match bn.bag_type {
BagType::StopBacktrack | BagType::Option => {
if let Some(ref body) = bn.body {
let r = optimize_nodes(body, opt, enc, env_case_fold_flag, env_mm, scan_env);
if r != 0 {
return r;
}
}
}
BagType::Memory => {
let opt_count = unsafe {
let bn_ptr = bn as *const BagNode as *mut BagNode;
(*bn_ptr).opt_count += 1;
(*bn_ptr).opt_count
};
if opt_count > MAX_ND_OPT_INFO_REF_COUNT {
let min = if node.has_status(ND_ST_FIXED_MIN) {
bn.min_len
} else {
0
};
let max = if node.has_status(ND_ST_FIXED_MAX) {
bn.max_len
} else {
INFINITE_LEN
};
opt.len.set(min, max);
} else if let Some(ref body) = bn.body {
let r = optimize_nodes(body, opt, enc, env_case_fold_flag, env_mm, scan_env);
if r != 0 {
return r;
}
if is_set_opt_anc_info(&opt.anc, ANCR_ANYCHAR_INF_MASK) {
if mem_status_at(scan_env.backrefed_mem, bn.regnum() as usize) {
remove_opt_anc_info(&mut opt.anc, ANCR_ANYCHAR_INF_MASK);
}
}
}
}
BagType::IfElse => {
if let BagData::IfElse {
ref then_node,
ref else_node,
} = bn.bag_data
{
if else_node.is_some() {
let mut nenv_mm = *env_mm;
if let Some(ref body) = bn.body {
let mut xo = OptNode::new();
let r = optimize_nodes(
body,
&mut xo,
enc,
env_case_fold_flag,
&mut nenv_mm,
scan_env,
);
if r != 0 {
return r;
}
nenv_mm.add(&xo.len);
concat_left_node_opt_info(enc, opt, &mut xo);
}
if let Some(ref then_n) = then_node {
let mut xo = OptNode::new();
let r = optimize_nodes(
then_n,
&mut xo,
enc,
env_case_fold_flag,
&mut nenv_mm,
scan_env,
);
if r != 0 {
return r;
}
concat_left_node_opt_info(enc, opt, &mut xo);
}
if let Some(ref else_n) = else_node {
let mut xo = OptNode::new();
let r = optimize_nodes(
else_n,
&mut xo,
enc,
env_case_fold_flag,
env_mm,
scan_env,
);
if r != 0 {
return r;
}
alt_merge_node_opt_info(opt, &xo, enc);
}
}
}
}
},
NodeInner::Gimmick(_) => {}
}
0
}
fn set_sunday_quick_search_or_bmh_skip_table(
enc: OnigEncoding,
s: &[u8],
skip: &mut [u8; CHAR_MAP_SIZE],
roffset: &mut i32,
) -> i32 {
let mut offset = crate::regenc::enc_get_skip_offset(enc) as i32;
if offset == 7 {
let mut p = 0;
loop {
let len = enclen(enc, &s[p..], p);
if p + len >= s.len() {
offset = if len == 1 { 1 } else { 0 };
break;
}
p += len;
}
}
let slen = s.len() as i32;
if slen + offset >= 255 {
return ONIGERR_PARSER_BUG;
}
*roffset = offset;
for i in 0..CHAR_MAP_SIZE {
skip[i] = (slen + offset) as u8;
}
let mut p = 0;
while p < s.len() {
let clen = {
let l = enclen(enc, &s[p..], p);
if p + l > s.len() {
s.len() - p
} else {
l
}
};
let remaining = (s.len() - p) as i32;
for j in 0..clen {
let z = remaining - j as i32 + (offset - 1);
if z <= 0 {
break;
}
skip[s[p + j] as usize] = z as u8;
}
p += clen;
}
0
}
fn set_optimize_exact(reg: &mut RegexType, e: &OptStr) -> i32 {
if e.len == 0 {
return 0;
}
reg.exact = e.s[..e.len].to_vec();
let allow_reverse = reg.enc.is_allowed_reverse_match(®.exact);
if e.len >= 2 || (e.len >= 1 && allow_reverse) {
let exact_copy = reg.exact.clone();
let r = set_sunday_quick_search_or_bmh_skip_table(
reg.enc,
&exact_copy,
&mut reg.map,
&mut reg.map_offset,
);
if r != 0 {
return r;
}
reg.optimize = if allow_reverse {
OptimizeType::StrFast
} else {
OptimizeType::StrFastStepForward
};
} else {
reg.optimize = OptimizeType::Str;
}
reg.dist_min = e.mm.min;
reg.dist_max = e.mm.max;
if reg.dist_min != INFINITE_LEN {
reg.threshold_len = (reg.dist_min as i32) + (reg.exact.len() as i32);
}
0
}
fn set_optimize_map(reg: &mut RegexType, m: &OptMap) {
reg.map = m.map;
reg.optimize = OptimizeType::Map;
reg.dist_min = m.mm.min;
reg.dist_max = m.mm.max;
if reg.dist_min != INFINITE_LEN {
reg.threshold_len = (reg.dist_min as i32) + (reg.enc.min_enc_len() as i32);
}
let mut bytes = [0u8; 3];
let mut count: u8 = 0;
for i in 0..CHAR_MAP_SIZE {
if m.map[i] != 0 {
if count >= 3 || i >= 0x80 {
count = 0;
break;
}
bytes[count as usize] = i as u8;
count += 1;
}
}
reg.map_bytes = bytes;
reg.map_byte_count = count;
}
fn set_sub_anchor(reg: &mut RegexType, anc: &OptAnc) {
reg.sub_anchor |= anc.left & ANCR_BEGIN_LINE;
reg.sub_anchor |= anc.right & ANCR_END_LINE;
}
fn set_optimize_info_from_tree(root: &Node, reg: &mut RegexType, scan_env: &ParseEnv) -> i32 {
let mut env_mm = MinMaxLen::new();
let mut opt = OptNode::new();
let r = optimize_nodes(
root,
&mut opt,
reg.enc,
reg.case_fold_flag,
&mut env_mm,
scan_env,
);
if r != 0 {
return r;
}
reg.anchor = opt.anc.left
& (ANCR_BEGIN_BUF
| ANCR_BEGIN_POSITION
| ANCR_ANYCHAR_INF
| ANCR_ANYCHAR_INF_ML
| ANCR_LOOK_BEHIND);
if (opt.anc.left & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) != 0 {
reg.anchor &= !ANCR_ANYCHAR_INF_ML;
}
reg.anchor |= opt.anc.right & (ANCR_END_BUF | ANCR_SEMI_END_BUF | ANCR_PREC_READ_NOT);
if (reg.anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) != 0 {
reg.anc_dist_min = opt.len.min;
reg.anc_dist_max = opt.len.max;
}
if opt.sb.len > 0 || opt.sm.len > 0 {
select_opt_exact(reg.enc, &mut opt.sb, &opt.sm);
if opt.map.value > 0 && comp_opt_exact_or_map(&opt.sb, &opt.map) > 0 {
set_optimize_map(reg, &opt.map);
set_sub_anchor(reg, &opt.map.anc);
} else {
let r = set_optimize_exact(reg, &opt.sb);
if r != 0 {
return r;
}
set_sub_anchor(reg, &opt.sb.anc);
}
} else if opt.map.value > 0 {
set_optimize_map(reg, &opt.map);
set_sub_anchor(reg, &opt.map.anc);
} else {
reg.sub_anchor |= opt.anc.left & ANCR_BEGIN_LINE;
if opt.len.max == 0 {
reg.sub_anchor |= opt.anc.right & ANCR_END_LINE;
}
}
0
}
pub fn onig_compile(reg: &mut RegexType, pattern: &[u8]) -> i32 {
reg.ops.clear();
let mut env = ParseEnv {
options: reg.options,
case_fold_flag: reg.case_fold_flag,
enc: reg.enc,
syntax: unsafe { &*reg.syntax },
cap_history: 0,
backtrack_mem: 0,
backrefed_mem: 0,
pattern: std::ptr::null(),
pattern_end: std::ptr::null(),
error: std::ptr::null(),
error_end: std::ptr::null(),
reg: reg as *mut RegexType,
num_call: 0,
num_mem: 0,
num_named: 0,
mem_alloc: 0,
mem_env_static: Default::default(),
mem_env_dynamic: None,
backref_num: 0,
keep_num: 0,
id_num: 0,
save_alloc_num: 0,
saves: None,
unset_addr_list: None,
parse_depth: 0,
flags: 0,
};
let mut root = match crate::regparse::onig_parse_tree(pattern, reg, &mut env) {
Ok(node) => node,
Err(e) => return e,
};
if env.num_named > 0
&& is_syntax_bv(env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP)
&& !opton_capture_group(reg.options)
{
let r = if env.num_named != env.num_mem {
disable_noname_group_capture(&mut root, reg, &mut env)
} else {
numbered_ref_check(&root)
};
if r != 0 {
return r;
}
}
let r = reduce_string_list(&mut root, reg.enc);
if r != 0 {
return r;
}
if env.num_call > 0 {
let r = resolve_call_references(&mut root, reg, &mut env);
if r != 0 {
return r;
}
tune_call(&mut root, 0);
let r = tune_call2(&mut root);
if r != 0 {
return r;
}
recursive_call_check_trav(&mut root, &mut env, 0);
let r = infinite_recursive_call_check_trav(&mut root, &env);
if r != 0 {
return r;
}
tune_called_state(&mut root, 0);
}
let r = tune_tree(&mut root, reg, 0, &mut env);
if r != 0 {
return r;
}
setup_empty_status_mem(&mut root, &mut env);
reg.capture_history = env.cap_history;
reg.push_mem_start = env.backtrack_mem | env.cap_history;
reg.num_mem = env.num_mem;
if mem_status_is_all_on(reg.push_mem_start) {
reg.push_mem_end = env.backrefed_mem | env.cap_history;
} else {
reg.push_mem_end = reg.push_mem_start & (env.backrefed_mem | env.cap_history);
}
reg.num_call = env.id_num;
let r = compile_tree(&root, reg, &env);
if r != 0 {
return r;
}
if !reg.unset_call_addrs.is_empty() {
for &(op_idx, gnum) in ®.unset_call_addrs.clone() {
let gnum = gnum as usize;
if gnum < reg.called_addrs.len() && reg.called_addrs[gnum] >= 0 {
let addr = reg.called_addrs[gnum];
reg.ops[op_idx].payload = OperationPayload::Call { addr };
}
}
}
if env.keep_num > 0 {
add_op(
reg,
OpCode::UpdateVar,
OperationPayload::UpdateVar {
var_type: UpdateVarType::KeepFromStackLast,
id: 0,
clear: false,
},
);
}
add_op(reg, OpCode::End, OperationPayload::None);
if let Some(ref ext) = reg.extp {
if ext.callout_num != 0 {
reg.push_mem_end = reg.push_mem_start;
}
}
let has_callouts = reg.extp.as_ref().map_or(false, |e| e.callout_num != 0);
if reg.push_mem_end != 0
|| reg.num_repeat != 0
|| reg.num_empty_check != 0
|| reg.num_call > 0
|| has_callouts
{
reg.stack_pop_level = StackPopLevel::All;
} else if reg.push_mem_start != 0 {
reg.stack_pop_level = StackPopLevel::MemStart;
} else {
reg.stack_pop_level = StackPopLevel::Free;
}
let r = set_optimize_info_from_tree(&root, reg, &env);
if r != 0 {
return r;
}
0
}
pub fn onig_new(
pattern: &[u8],
option: OnigOptionType,
enc: OnigEncoding,
syntax: &OnigSyntaxType,
) -> Result<RegexType, crate::error::RegexError> {
if option.intersects(ONIG_OPTION_DONT_CAPTURE_GROUP) && option.intersects(ONIG_OPTION_CAPTURE_GROUP) {
return Err(ONIGERR_INVALID_COMBINATION_OF_OPTIONS.into());
}
let mut effective_option = option;
let syn = syntax;
if option.intersects(ONIG_OPTION_NEGATE_SINGLELINE) {
effective_option |= syn.options;
effective_option &= !ONIG_OPTION_SINGLELINE;
} else {
effective_option |= syn.options;
}
let mut case_fold_flag = ONIGENC_CASE_FOLD_MIN;
if effective_option.intersects(ONIG_OPTION_IGNORECASE_IS_ASCII) {
case_fold_flag &=
!(INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR | ONIGENC_CASE_FOLD_TURKISH_AZERI);
case_fold_flag |= ONIGENC_CASE_FOLD_ASCII_ONLY;
}
let mut reg = RegexType {
ops: Vec::new(),
string_pool: Vec::new(),
num_mem: 0,
num_repeat: 0,
num_empty_check: 0,
num_call: 0,
capture_history: 0,
push_mem_start: 0,
push_mem_end: 0,
stack_pop_level: StackPopLevel::Free,
repeat_range: Vec::new(),
enc,
options: effective_option,
syntax: syntax as *const OnigSyntaxType,
case_fold_flag,
name_table: None,
optimize: OptimizeType::None,
threshold_len: 0,
anchor: 0,
anc_dist_min: 0,
anc_dist_max: 0,
sub_anchor: 0,
exact: Vec::new(),
map: [0u8; CHAR_MAP_SIZE],
map_offset: 0,
map_bytes: [0u8; 3],
map_byte_count: 0,
dist_min: 0,
dist_max: 0,
called_addrs: vec![],
unset_call_addrs: vec![],
extp: None,
};
let r = onig_compile(&mut reg, pattern);
if r != 0 {
return Err(r.into());
}
Ok(reg)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::regparse;
use crate::regsyntax::OnigSyntaxOniguruma;
fn make_test_context() -> (RegexType, ParseEnv) {
let reg = RegexType {
ops: Vec::new(),
string_pool: Vec::new(),
num_mem: 0,
num_repeat: 0,
num_empty_check: 0,
num_call: 0,
capture_history: 0,
push_mem_start: 0,
push_mem_end: 0,
stack_pop_level: StackPopLevel::Free,
repeat_range: Vec::new(),
enc: &crate::encodings::utf8::ONIG_ENCODING_UTF8,
options: ONIG_OPTION_NONE,
syntax: &OnigSyntaxOniguruma,
case_fold_flag: ONIGENC_CASE_FOLD_MIN,
name_table: None,
optimize: OptimizeType::None,
threshold_len: 0,
anchor: 0,
anc_dist_min: 0,
anc_dist_max: 0,
sub_anchor: 0,
exact: Vec::new(),
map: [0u8; CHAR_MAP_SIZE],
map_offset: 0,
map_bytes: [0u8; 3],
map_byte_count: 0,
dist_min: 0,
dist_max: 0,
called_addrs: vec![],
unset_call_addrs: vec![],
extp: None,
};
let env = ParseEnv {
options: OnigOptionType::empty(),
case_fold_flag: 0,
enc: &crate::encodings::utf8::ONIG_ENCODING_UTF8,
syntax: &OnigSyntaxOniguruma,
cap_history: 0,
backtrack_mem: 0,
backrefed_mem: 0,
pattern: std::ptr::null(),
pattern_end: std::ptr::null(),
error: std::ptr::null(),
error_end: std::ptr::null(),
reg: std::ptr::null_mut(),
num_call: 0,
num_mem: 0,
num_named: 0,
mem_alloc: 0,
mem_env_static: Default::default(),
mem_env_dynamic: None,
backref_num: 0,
keep_num: 0,
id_num: 0,
save_alloc_num: 0,
saves: None,
unset_addr_list: None,
parse_depth: 0,
flags: 0,
};
(reg, env)
}
fn parse_and_compile(pattern: &[u8]) -> Result<RegexType, i32> {
let (mut reg, mut env) = make_test_context();
let root = regparse::onig_parse_tree(pattern, &mut reg, &mut env)?;
let r = compile_from_tree(&root, &mut reg, &env);
if r != 0 {
return Err(r);
}
Ok(reg)
}
#[test]
fn compile_literal_string() {
let reg = parse_and_compile(b"abc").unwrap();
assert!(!reg.ops.is_empty());
let last = reg.ops.last().unwrap();
assert_eq!(last.opcode, OpCode::End);
}
#[test]
fn compile_alternation() {
let reg = parse_and_compile(b"a|b").unwrap();
assert!(reg.ops.len() >= 4);
assert_eq!(reg.ops[0].opcode, OpCode::Push);
assert_eq!(reg.ops.last().unwrap().opcode, OpCode::End);
}
#[test]
fn compile_star_quantifier() {
let reg = parse_and_compile(b"a*").unwrap();
assert!(reg.ops.len() >= 3);
assert_eq!(reg.ops.last().unwrap().opcode, OpCode::End);
let has_push = reg.ops.iter().any(|op| op.opcode == OpCode::Push);
let has_jump = reg.ops.iter().any(|op| op.opcode == OpCode::Jump);
assert!(has_push, "expected PUSH for a*");
assert!(has_jump, "expected JUMP for a*");
}
#[test]
fn compile_plus_quantifier() {
let reg = parse_and_compile(b"a+").unwrap();
assert!(reg.ops.len() >= 3);
assert_eq!(reg.ops.last().unwrap().opcode, OpCode::End);
}
#[test]
fn compile_capture_group() {
let reg = parse_and_compile(b"(a)").unwrap();
let has_mem_start = reg
.ops
.iter()
.any(|op| op.opcode == OpCode::MemStart || op.opcode == OpCode::MemStartPush);
let has_mem_end = reg
.ops
.iter()
.any(|op| op.opcode == OpCode::MemEnd || op.opcode == OpCode::MemEndPush);
assert!(has_mem_start, "expected MemStart for (a)");
assert!(has_mem_end, "expected MemEnd for (a)");
}
#[test]
fn compile_char_class() {
let reg = parse_and_compile(b"[abc]").unwrap();
let has_cclass = reg.ops.iter().any(|op| op.opcode == OpCode::CClass);
assert!(has_cclass, "expected CClass for [abc]");
}
#[test]
fn compile_anchor_begin() {
let reg = parse_and_compile(b"^a").unwrap();
assert_eq!(reg.ops[0].opcode, OpCode::BeginLine);
}
#[test]
fn compile_word_type() {
let reg = parse_and_compile(b"\\w").unwrap();
let has_word = reg
.ops
.iter()
.any(|op| op.opcode == OpCode::Word || op.opcode == OpCode::WordAscii);
assert!(has_word, "expected Word for \\w");
}
#[test]
fn compile_interval_quantifier() {
let reg = parse_and_compile(b"a{2,5}").unwrap();
let has_push = reg.ops.iter().any(|op| op.opcode == OpCode::Push);
assert!(has_push, "expected Push for a{{2,5}} greedy expansion");
}
#[test]
fn compile_complex_pattern() {
let reg = parse_and_compile(b"^[a-z]+\\d{2,4}$").unwrap();
assert_eq!(reg.ops.last().unwrap().opcode, OpCode::End);
}
#[test]
fn compile_empty_pattern() {
let reg = parse_and_compile(b"").unwrap();
assert_eq!(reg.ops.len(), 1); assert_eq!(reg.ops[0].opcode, OpCode::End);
}
#[test]
fn compile_non_capturing_group() {
let reg = parse_and_compile(b"(?:abc)").unwrap();
let has_mem_start = reg
.ops
.iter()
.any(|op| op.opcode == OpCode::MemStart || op.opcode == OpCode::MemStartPush);
assert!(
!has_mem_start,
"non-capturing group should not have MemStart"
);
assert_eq!(reg.ops.last().unwrap().opcode, OpCode::End);
}
#[test]
fn compile_lookahead() {
let reg = parse_and_compile(b"(?=abc)").unwrap();
let has_mark = reg.ops.iter().any(|op| op.opcode == OpCode::Mark);
let has_cut = reg.ops.iter().any(|op| op.opcode == OpCode::CutToMark);
assert!(has_mark, "expected Mark for lookahead");
assert!(has_cut, "expected CutToMark for lookahead");
}
#[test]
fn compile_negative_lookahead() {
let reg = parse_and_compile(b"(?!abc)").unwrap();
let has_fail = reg.ops.iter().any(|op| op.opcode == OpCode::Fail);
assert!(has_fail, "expected Fail for negative lookahead");
}
#[test]
fn onig_new_basic() {
let reg = onig_new(
b"abc",
ONIG_OPTION_NONE,
&crate::encodings::utf8::ONIG_ENCODING_UTF8,
&OnigSyntaxOniguruma,
)
.unwrap();
assert!(!reg.ops.is_empty());
assert_eq!(reg.ops.last().unwrap().opcode, OpCode::End);
}
#[test]
fn onig_new_with_captures() {
let reg = onig_new(
b"(a)(b)",
ONIG_OPTION_NONE,
&crate::encodings::utf8::ONIG_ENCODING_UTF8,
&OnigSyntaxOniguruma,
)
.unwrap();
assert_eq!(reg.num_mem, 2);
}
#[test]
fn onig_new_stack_pop_level_free() {
let reg = onig_new(
b"abc",
ONIG_OPTION_NONE,
&crate::encodings::utf8::ONIG_ENCODING_UTF8,
&OnigSyntaxOniguruma,
)
.unwrap();
assert_eq!(reg.stack_pop_level, StackPopLevel::Free);
}
#[test]
fn onig_new_invalid_pattern() {
let result = onig_new(
b"(",
ONIG_OPTION_NONE,
&crate::encodings::utf8::ONIG_ENCODING_UTF8,
&OnigSyntaxOniguruma,
);
assert!(result.is_err());
}
#[test]
fn reduce_string_list_merges() {
let (mut reg, mut env) = make_test_context();
let mut root = regparse::onig_parse_tree(b"abc", &mut reg, &mut env).unwrap();
let before_type = root.node_type();
let r = reduce_string_list(&mut root, env.enc);
assert_eq!(r, 0);
assert_eq!(root.node_type(), NodeType::String);
let s = root.as_str().unwrap();
assert_eq!(s.s, b"abc");
}
#[test]
fn reduce_string_list_preserves_non_strings() {
let (mut reg, mut env) = make_test_context();
let mut root = regparse::onig_parse_tree(b"a.b", &mut reg, &mut env).unwrap();
let r = reduce_string_list(&mut root, env.enc);
assert_eq!(r, 0);
assert_eq!(root.node_type(), NodeType::List);
}
#[test]
fn test_never_ending_recursion_direct() {
let mut reg = make_test_context().0;
let r = onig_compile(&mut reg, b"(?<abc>\\g<abc>)");
assert_eq!(r, ONIGERR_NEVER_ENDING_RECURSION);
}
#[test]
fn test_never_ending_recursion_conditional() {
let mut reg = make_test_context().0;
let r = onig_compile(&mut reg, b"(()(?(2)\\g<1>))");
assert_eq!(r, ONIGERR_NEVER_ENDING_RECURSION);
}
}