Skip to main content

perl_parser_core/syntax/
text_line.rs

1//! Text-line cursor helpers.
2//!
3//! This crate has a single responsibility: map cursor offsets to line
4//! boundaries and provide conservative token-boundary primitives for
5//! single-line scanning.
6
7#![deny(unsafe_code)]
8#![warn(rust_2018_idioms)]
9#![warn(missing_docs)]
10#![warn(clippy::all)]
11
12/// Return the byte span of the line containing `cursor_pos`.
13///
14/// The returned range is inclusive of the first line byte and exclusive of
15/// one past the last byte, matching half-open Rust range conventions.
16#[must_use]
17pub fn line_bounds_at(text: &str, cursor_pos: usize) -> (usize, usize) {
18    let cursor = cursor_pos.min(text.len());
19    let start = text[..cursor].rfind('\n').map_or(0, |idx| idx + 1);
20    let end = text[cursor..].find('\n').map_or(text.len(), |idx| cursor + idx);
21    (start, end)
22}
23
24/// Return `true` when `byte` is an identifier character (`[A-Za-z0-9_]`).
25#[must_use]
26pub fn is_identifier_byte(byte: u8) -> bool {
27    byte.is_ascii_alphanumeric() || byte == b'_'
28}
29
30/// Return `true` when token `keyword` bytes in `[start, start + len)` are
31/// bounded on both sides by non-identifier bytes.
32#[must_use]
33pub fn is_keyword_boundary(bytes: &[u8], start: usize, len: usize) -> bool {
34    if start > bytes.len() {
35        return false;
36    }
37
38    let end = start.saturating_add(len);
39    if end > bytes.len() {
40        return false;
41    }
42
43    if start > 0 && is_identifier_byte(bytes[start - 1]) {
44        return false;
45    }
46
47    if end < bytes.len() && is_identifier_byte(bytes[end]) {
48        return false;
49    }
50
51    true
52}
53
54/// Advance `idx` while bytes at the cursor are ASCII whitespace.
55#[must_use]
56pub fn skip_ascii_whitespace(bytes: &[u8], mut idx: usize) -> usize {
57    while idx < bytes.len() && bytes[idx].is_ascii_whitespace() {
58        idx += 1;
59    }
60    idx
61}