rustik-highlight 0.1.0

Rustik code highlighter.
Documentation
//! Small shared helpers for text and lookup normalization.
//!
//! The highlighter works in byte ranges but must avoid splitting UTF-8 code
//! points when regex captures or caller ranges land inside a character. This
//! module also centralizes lowercase lookup-key handling for syntax inference.

use std::path::Path;

/// Returns the next valid UTF-8 boundary at or after `index`.
pub(crate) fn next_char_boundary(text: &str, index: usize) -> usize {
    let mut index = index.min(text.len());
    while index < text.len() && !text.is_char_boundary(index) {
        index += 1;
    }
    index
}

/// Returns the previous valid UTF-8 boundary at or before `index`.
pub(crate) fn previous_char_boundary(text: &str, index: usize) -> usize {
    let mut index = index.min(text.len());
    while index > 0 && !text.is_char_boundary(index) {
        index -= 1;
    }
    index
}

/// Returns `line` without trailing line terminators.
#[inline]
pub(crate) fn trim_line_end(line: &str) -> &str {
    line.trim_end_matches(['\r', '\n'])
}

/// Returns byte offsets for each source line start.
pub(crate) fn line_starts(input: &str) -> Vec<usize> {
    if input.is_empty() {
        return Vec::new();
    }
    let mut starts = vec![0];
    for (index, byte) in input.bytes().enumerate() {
        if byte == b'\n' && index + 1 < input.len() {
            starts.push(index + 1);
        }
    }
    starts
}

/// Lowercases a lookup key.
#[inline]
pub(crate) fn key(input: &str) -> String {
    input.to_ascii_lowercase()
}

/// Returns lowercase syntax lookup keys for a path.
pub(crate) fn path_keys(path: &Path) -> impl Iterator<Item = String> + '_ {
    [
        path.file_name().and_then(|name| name.to_str()),
        path.extension().and_then(|extension| extension.to_str()),
    ]
    .into_iter()
    .flatten()
    .map(key)
}