#![allow(clippy::expect_used)]
use super::*;
use crate::{debug, depth::Depth};
use std::{
cell::RefCell,
fmt::{self, Debug, Display},
str::{self, Utf8Error},
};
#[derive(Debug, Default, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct NodesResult {
spans: Vec<Vec<u8>>,
}
impl NodesResult {
#[must_use]
#[inline(always)]
pub fn get(&self) -> &[impl AsRef<[u8]>] {
&self.spans
}
#[must_use]
#[inline(always)]
pub fn iter_as_utf8(&self) -> impl IntoIterator<Item = Result<&str, Utf8Error>> {
self.spans.iter().map(|x| str::from_utf8(x))
}
#[must_use]
#[inline(always)]
pub fn into_inner(self) -> Vec<Vec<u8>> {
self.spans
}
}
impl From<NodesResult> for Vec<Vec<u8>> {
#[inline(always)]
fn from(result: NodesResult) -> Self {
result.spans
}
}
impl Display for NodesResult {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for span in &self.spans {
let display = String::from_utf8_lossy(span);
writeln!(f, "{display}")?;
}
Ok(())
}
}
pub struct NodesRecorder<'s, B, S> {
internal: RefCell<InternalRecorder<'s, B, S>>,
}
impl<'s, B, S> NodesRecorder<'s, B, S>
where
B: Deref<Target = [u8]>,
S: Sink<Match>,
{
pub(crate) fn build_recorder(sink: &'s mut S) -> Self {
Self {
internal: RefCell::new(InternalRecorder::new(sink)),
}
}
}
impl<'s, B, S> InputRecorder<B> for NodesRecorder<'s, B, S>
where
B: Deref<Target = [u8]>,
S: Sink<Match>,
{
#[inline(always)]
fn record_block_start(&self, new_block: B) {
self.internal.borrow_mut().record_block(new_block)
}
}
impl<'s, B, S> Recorder<B> for NodesRecorder<'s, B, S>
where
B: Deref<Target = [u8]>,
S: Sink<Match>,
{
#[inline]
fn record_match(&self, idx: usize, depth: Depth, ty: MatchedNodeType) -> Result<(), EngineError> {
debug!("Recording match at {idx}");
self.internal.borrow_mut().record_match(idx, depth, ty);
Ok(())
}
#[inline]
fn record_value_terminator(&self, idx: usize, depth: Depth) -> Result<(), EngineError> {
self.internal
.borrow_mut()
.record_value_terminator(idx, depth)
.map_err(|err| EngineError::SinkError(Box::new(err)))
}
}
enum InternalRecorder<'s, B, S> {
Simple(SimpleRecorder<'s, B, S>),
Stack(StackRecorder<'s, B, S>),
Transition,
}
impl<'s, B, S> InternalRecorder<'s, B, S>
where
B: Deref<Target = [u8]>,
S: Sink<Match>,
{
fn new(sink: &'s mut S) -> Self {
Self::Simple(SimpleRecorder::new(sink))
}
#[inline(always)]
fn record_block(&mut self, block: B) {
match self {
Self::Simple(r) => r.record_block(block),
Self::Stack(r) => r.record_block(block),
Self::Transition => unreachable!(),
}
}
#[inline(always)]
fn record_match(&mut self, idx: usize, depth: Depth, ty: MatchedNodeType) {
match self {
Self::Simple(simple) => {
if !simple.try_record_match(idx, depth, ty) {
let simple = match std::mem::replace(self, Self::Transition) {
Self::Simple(s) => s,
Self::Stack(_) | Self::Transition => unreachable!(),
};
let mut stack = simple.transform_to_stack();
stack.record_match(idx, depth, ty);
*self = Self::Stack(stack);
}
}
Self::Stack(stack) => stack.record_match(idx, depth, ty),
Self::Transition => unreachable!(),
}
}
#[allow(clippy::panic_in_result_fn)] #[inline(always)]
fn record_value_terminator(&mut self, idx: usize, depth: Depth) -> Result<(), EngineError> {
match self {
Self::Simple(r) => r.record_value_terminator(idx, depth),
Self::Stack(r) => r.record_value_terminator(idx, depth),
Self::Transition => unreachable!(),
}
}
}
struct SimpleRecorder<'s, B, S> {
idx: usize,
current_block: Option<B>,
node: Option<SimplePartialNode>,
sink: &'s mut S,
}
struct SimplePartialNode {
start_idx: usize,
start_depth: Depth,
buf: Vec<u8>,
ty: MatchedNodeType,
}
impl<'s, B, S> SimpleRecorder<'s, B, S>
where
B: Deref<Target = [u8]>,
S: Sink<Match>,
{
fn new(sink: &'s mut S) -> Self {
Self {
idx: 0,
current_block: None,
node: None,
sink,
}
}
fn record_block(&mut self, block: B) {
if let Some(finished) = self.current_block.as_ref() {
if let Some(node) = self.node.as_mut() {
debug!("Continuing node, idx is {}", self.idx);
append_block(&mut node.buf, finished, self.idx, node.start_idx)
}
self.idx += finished.len();
}
self.current_block = Some(block);
debug!("New block, idx = {}", self.idx);
}
fn record_value_terminator(&mut self, idx: usize, depth: Depth) -> Result<(), EngineError> {
debug!("Value terminator at {idx}, depth {depth}");
if let Some(node) = self.node.as_ref() {
if node.start_depth >= depth {
let mut node = self.node.take().expect("node is Some");
debug!("Mark node as ended at {}", idx + 1);
append_final_block(
&mut node.buf,
self.current_block
.as_ref()
.ok_or(EngineError::MissingOpeningCharacter())?,
self.idx,
node.start_idx,
idx + 1,
);
finalize_node(&mut node.buf, node.ty);
debug!("Committing and outputting node");
self.sink
.add_match(Match {
bytes: node.buf,
span: MatchSpan {
start_idx: node.start_idx,
end_idx: idx + 1,
},
})
.map_err(|err| EngineError::SinkError(Box::new(err)))?;
}
}
Ok(())
}
fn try_record_match(&mut self, idx: usize, depth: Depth, ty: MatchedNodeType) -> bool {
if self.node.is_some() {
debug!("nested match detected, switching to stack");
return false;
}
let node = SimplePartialNode {
start_idx: idx,
start_depth: depth,
buf: vec![],
ty,
};
self.node = Some(node);
true
}
fn transform_to_stack(self) -> StackRecorder<'s, B, S> {
match self.node {
Some(node) => StackRecorder {
idx: self.idx,
match_count: 1,
current_block: self.current_block,
stack: vec![PartialNode {
id: 0,
start_idx: node.start_idx,
start_depth: node.start_depth,
buf: node.buf,
ty: node.ty,
}],
output_queue: OutputQueue::new(),
sink: self.sink,
},
None => StackRecorder {
idx: self.idx,
match_count: 0,
current_block: self.current_block,
stack: vec![],
output_queue: OutputQueue::new(),
sink: self.sink,
},
}
}
}
struct StackRecorder<'s, B, S> {
idx: usize,
match_count: usize,
current_block: Option<B>,
stack: Vec<PartialNode>,
output_queue: OutputQueue,
sink: &'s mut S,
}
struct PartialNode {
id: usize,
start_idx: usize,
start_depth: Depth,
buf: Vec<u8>,
ty: MatchedNodeType,
}
struct OutputQueue {
offset: usize,
nodes: Vec<Option<Match>>,
}
impl OutputQueue {
fn new() -> Self {
Self {
offset: 0,
nodes: vec![],
}
}
fn insert(&mut self, id: usize, node: Match) {
let actual_idx = id - self.offset;
while self.nodes.len() <= actual_idx {
self.nodes.push(None);
}
self.nodes[actual_idx] = Some(node);
}
fn output_to<S>(&mut self, sink: &mut S) -> Result<(), S::Error>
where
S: Sink<Match>,
{
self.offset += self.nodes.len();
for node in self.nodes.drain(..) {
sink.add_match(node.expect("output_to called only after all matches are complete"))?;
}
Ok(())
}
}
impl<'s, B, S> StackRecorder<'s, B, S>
where
B: Deref<Target = [u8]>,
S: Sink<Match>,
{
fn record_block(&mut self, block: B) {
if let Some(finished) = self.current_block.as_ref() {
for node in &mut self.stack {
debug!("Continuing node: {node:?}, idx is {}", self.idx);
append_block(&mut node.buf, finished, self.idx, node.start_idx)
}
self.idx += finished.len();
}
self.current_block = Some(block);
debug!("New block, idx = {}", self.idx);
}
fn record_match(&mut self, idx: usize, depth: Depth, ty: MatchedNodeType) {
let node = PartialNode {
id: self.match_count,
start_idx: idx,
start_depth: depth,
buf: vec![],
ty,
};
debug!("New node {node:?}");
self.match_count += 1;
self.stack.push(node);
}
#[inline]
fn record_value_terminator(&mut self, idx: usize, depth: Depth) -> Result<(), EngineError> {
debug!("Value terminator at {idx}, depth {depth}");
while let Some(node) = self.stack.last() {
if node.start_depth >= depth {
debug!("Mark node {node:?} as ended at {}", idx + 1);
let mut node = self.stack.pop().expect("last was Some, pop must succeed");
append_final_block(
&mut node.buf,
self.current_block
.as_ref()
.ok_or(EngineError::MissingOpeningCharacter())?,
self.idx,
node.start_idx,
idx + 1,
);
finalize_node(&mut node.buf, node.ty);
debug!("Committing node: {node:?}");
self.output_queue.insert(
node.id,
Match {
bytes: node.buf,
span: MatchSpan {
start_idx: node.start_idx,
end_idx: idx + 1,
},
},
);
} else {
break;
}
}
if self.stack.is_empty() {
debug!("Outputting batch of nodes.");
self.output_queue
.output_to(self.sink)
.map_err(|err| EngineError::SinkError(Box::new(err)))?;
}
Ok(())
}
}
#[inline(always)]
fn append_block(dest: &mut Vec<u8>, src: &[u8], src_start: usize, read_start: usize) {
if read_start >= src_start + src.len() {
return;
}
let to_extend = if read_start > src_start {
let in_block_start = read_start - src_start;
&src[in_block_start..]
} else {
src
};
dest.extend(to_extend);
}
#[inline(always)]
fn append_final_block(dest: &mut Vec<u8>, src: &[u8], src_start: usize, read_start: usize, read_end: usize) {
debug_assert!(read_end >= src_start);
let in_block_start = if read_start > src_start {
read_start - src_start
} else {
0
};
let in_block_end = read_end - src_start;
dest.extend(&src[in_block_start..in_block_end]);
}
#[inline(always)]
fn finalize_node(buf: &mut Vec<u8>, ty: MatchedNodeType) {
debug!("Finalizing node");
if ty == MatchedNodeType::Atomic {
let mut i = buf.len() - 2;
while buf[i] == b' ' || buf[i] == b'\t' || buf[i] == b'\n' || buf[i] == b'\r' {
i -= 1;
}
buf.truncate(i + 1);
}
}
impl Debug for PartialNode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("PartialNode")
.field("start_idx", &self.start_idx)
.field("start_depth", &self.start_depth)
.field("ty", &self.ty)
.field("buf", &str::from_utf8(&self.buf).unwrap_or("[invalid utf8]"))
.finish()
}
}