tastty-driver 0.1.0

Terminal automation driver built on tastty
//! Per-span scan helpers.

use regex::Regex;
use tastty::{AbsolutePosition, LogicalLineSpan};

use super::{Capture, SearchMatch};

/// One byte-offset boundary in a span's haystack text, paired with
/// the absolute cell coordinate that produced the bytes starting at
/// that offset.
#[derive(Clone, Copy, Debug)]
pub(super) struct CellBoundary {
    byte_start: usize,
    byte_end: usize,
    pos: AbsolutePosition,
}

pub(super) struct SpanHaystack {
    pub(super) text: String,
    boundaries: Vec<CellBoundary>,
}

pub(super) fn build_span_haystack(span: &LogicalLineSpan<'_>) -> SpanHaystack {
    let mut text = String::new();
    let mut boundaries: Vec<CellBoundary> = Vec::new();
    // Walk via the span's cell iterator so wide-cell continuations
    // are skipped by the same convention as `Screen::cells`. Trailing
    // empty cells are filtered here so `$` anchors at the last
    // populated cell rather than at the right edge of the row.
    let cells: Vec<_> = span.cells().collect();
    let last_populated = cells
        .iter()
        .rposition(|(_, cell)| cell.has_contents())
        .map(|i| i + 1)
        .unwrap_or(0);
    for (pos, cell) in cells.into_iter().take(last_populated) {
        let byte_start = text.len();
        let contents = cell.contents();
        if contents.is_empty() {
            text.push(' ');
        } else {
            text.push_str(contents);
        }
        let byte_end = text.len();
        boundaries.push(CellBoundary {
            byte_start,
            byte_end,
            pos,
        });
    }
    SpanHaystack { text, boundaries }
}

pub(super) fn scan_span(
    regex: &Regex,
    haystack: &SpanHaystack,
    names: &[Option<String>],
    out: &mut Vec<SearchMatch>,
) {
    let text = haystack.text.as_str();
    let mut byte_cursor = 0usize;

    while byte_cursor <= text.len() {
        let Some(caps) = regex.captures_at(text, byte_cursor) else {
            break;
        };
        let m_top = caps.get(0).expect("captures_at yields a top-level match");
        let m_range = m_top.range();

        // Empty match: skip recording (no inclusive AbsolutePosition
        // pair describes a zero-width hit) and resync by advancing
        // one codepoint past the match end. Patterns like `\b`,
        // `(?:)?`, or `(.*)?` would otherwise stall the iterator at
        // this byte. We advance by `c.len_utf8()` rather than by a
        // single byte so the regex engine never has to skip a
        // mid-codepoint cursor; grapheme-cluster fidelity is
        // preserved at the per-cell boundary table rather than here,
        // since cells are already keyed by lead-half position.
        let advance_to = if m_range.is_empty() {
            match text[m_range.end..].chars().next() {
                Some(c) => m_range.end + c.len_utf8(),
                None => break,
            }
        } else {
            let (start_pos, end_pos) =
                byte_range_to_positions(m_range.start, m_range.end, &haystack.boundaries);
            let match_text = text[m_range.start..m_range.end].to_string();
            let mut captures: Vec<Capture> = Vec::new();
            for (idx, group) in caps.iter().enumerate().skip(1) {
                let Some(group) = group else {
                    continue;
                };
                let g_range = group.range();
                if g_range.is_empty() {
                    continue;
                }
                let (cs, ce) =
                    byte_range_to_positions(g_range.start, g_range.end, &haystack.boundaries);
                let cap_text = text[g_range.start..g_range.end].to_string();
                let name = names.get(idx).cloned().flatten();
                captures.push(Capture {
                    name,
                    index: idx,
                    start: cs,
                    end: ce,
                    text: cap_text,
                });
            }
            out.push(SearchMatch {
                start: start_pos,
                end: end_pos,
                text: match_text,
                captures,
            });
            m_range.end
        };

        if advance_to <= byte_cursor {
            // Defensive: ensure forward progress even if the
            // captures_at result somehow lands behind the cursor.
            break;
        }
        byte_cursor = advance_to;
    }
}

/// Convert a byte-range (inclusive start, exclusive end) on the
/// haystack to an inclusive [`AbsolutePosition`] pair on the cell
/// grid. The range must be non-empty and must fall within the cell
/// boundaries built by [`build_span_haystack`].
fn byte_range_to_positions(
    byte_start: usize,
    byte_end: usize,
    boundaries: &[CellBoundary],
) -> (AbsolutePosition, AbsolutePosition) {
    debug_assert!(byte_end > byte_start);
    let start = boundary_at(byte_start, boundaries);
    let end = boundary_at(byte_end - 1, boundaries);
    (start, end)
}

fn boundary_at(byte_offset: usize, boundaries: &[CellBoundary]) -> AbsolutePosition {
    let idx = boundaries.binary_search_by(|b| {
        if b.byte_end <= byte_offset {
            std::cmp::Ordering::Less
        } else if b.byte_start > byte_offset {
            std::cmp::Ordering::Greater
        } else {
            std::cmp::Ordering::Equal
        }
    });
    let idx = match idx {
        Ok(i) => i,
        Err(insert) => {
            // build_span_haystack covers every byte of the haystack
            // with a CellBoundary, so a matched byte must always land
            // inside some boundary. An out-of-range fallback would
            // mask a mismatch between the boundary table and the
            // haystack text.
            debug_assert!(
                insert < boundaries.len(),
                "byte offset {byte_offset} not covered by any cell boundary"
            );
            insert.min(boundaries.len().saturating_sub(1))
        }
    };
    boundaries[idx].pos
}