use crate::lex::token::core::Token;
use std::ops::Range as ByteRange;
#[allow(dead_code)]
pub trait SourceTokenProvider {
fn source_tokens(&self) -> &[(Token, ByteRange<usize>)];
}
#[allow(dead_code)]
pub fn unroll<T: SourceTokenProvider>(tokens: &[T]) -> Vec<(Token, ByteRange<usize>)> {
tokens
.iter()
.flat_map(|t| t.source_tokens().iter().cloned())
.collect()
}
#[allow(dead_code)]
pub fn flatten_token_vecs(
token_vecs: &[Vec<(Token, ByteRange<usize>)>],
) -> Vec<(Token, ByteRange<usize>)> {
token_vecs.iter().flat_map(|v| v.iter().cloned()).collect()
}
pub fn compute_bounding_box(tokens: &[(Token, ByteRange<usize>)]) -> ByteRange<usize> {
if tokens.is_empty() {
return 0..0;
}
let min_start = tokens
.iter()
.map(|(_, range)| range.start)
.min()
.unwrap_or(0);
let max_end = tokens.iter().map(|(_, range)| range.end).max().unwrap_or(0);
min_start..max_end
}
pub fn extract_text(range: ByteRange<usize>, source: &str) -> String {
source[range].to_string()
}
pub fn compute_column(offset: usize, source: &str) -> usize {
let mut last_newline = 0;
for (i, c) in source.char_indices() {
if i >= offset {
break;
}
if c == '\n' {
last_newline = i + 1;
}
}
offset - last_newline
}
#[allow(dead_code)]
pub fn tokens_to_text(tokens: &[(Token, ByteRange<usize>)], source: &str) -> String {
let range = compute_bounding_box(tokens);
extract_text(range, source)
}
#[cfg(test)]
mod tests {
use super::*;
struct MockToken {
tokens: Vec<(Token, ByteRange<usize>)>,
}
impl SourceTokenProvider for MockToken {
fn source_tokens(&self) -> &[(Token, ByteRange<usize>)] {
&self.tokens
}
}
#[test]
fn test_compute_bounding_box_single_token() {
let tokens = vec![(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
)];
let bbox = compute_bounding_box(&tokens);
assert_eq!(bbox, 0..5);
}
#[test]
fn test_compute_bounding_box_multiple_contiguous() {
let tokens = vec![
(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
),
(Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
(
Token::Text("world".to_string()),
ByteRange { start: 6, end: 11 },
),
];
let bbox = compute_bounding_box(&tokens);
assert_eq!(bbox, 0..11);
}
#[test]
fn test_compute_bounding_box_non_contiguous() {
let tokens = vec![
(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
),
(
Token::Text("world".to_string()),
ByteRange { start: 10, end: 15 },
),
];
let bbox = compute_bounding_box(&tokens);
assert_eq!(bbox, 0..15);
}
#[test]
fn test_compute_bounding_box_empty_returns_zero_range() {
let tokens: Vec<(Token, ByteRange<usize>)> = vec![];
assert_eq!(compute_bounding_box(&tokens), 0..0);
}
#[test]
fn test_extract_text_simple() {
let source = "hello world";
assert_eq!(
extract_text(ByteRange { start: 0, end: 5 }, source),
"hello"
);
assert_eq!(
extract_text(ByteRange { start: 6, end: 11 }, source),
"world"
);
}
#[test]
fn test_extract_text_multiline() {
let source = "line one\nline two\nline three";
assert_eq!(
extract_text(ByteRange { start: 0, end: 8 }, source),
"line one"
);
assert_eq!(
extract_text(ByteRange { start: 9, end: 17 }, source),
"line two"
);
}
#[test]
fn test_extract_text_unicode() {
let source = "hello 世界";
let text = extract_text(ByteRange { start: 6, end: 12 }, source);
assert_eq!(text, "世界");
}
#[test]
fn test_unroll_single_token() {
let mock = MockToken {
tokens: vec![(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
)],
};
let unrolled = unroll(&[mock]);
assert_eq!(unrolled.len(), 1);
assert_eq!(unrolled[0].1, 0..5);
}
#[test]
fn test_unroll_multiple_tokens() {
let mock1 = MockToken {
tokens: vec![(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
)],
};
let mock2 = MockToken {
tokens: vec![
(Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
(
Token::Text("world".to_string()),
ByteRange { start: 6, end: 11 },
),
],
};
let unrolled = unroll(&[mock1, mock2]);
assert_eq!(unrolled.len(), 3);
assert_eq!(unrolled[0].1, 0..5);
assert_eq!(unrolled[1].1, 5..6);
assert_eq!(unrolled[2].1, 6..11);
}
#[test]
fn test_tokens_to_text_convenience() {
let source = "hello world";
let tokens = vec![
(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
),
(Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
];
let text = tokens_to_text(&tokens, source);
assert_eq!(text, "hello ");
}
#[test]
fn test_flatten_token_vecs_empty() {
let vecs: Vec<Vec<(Token, ByteRange<usize>)>> = vec![];
let flattened = flatten_token_vecs(&vecs);
assert_eq!(flattened.len(), 0);
}
#[test]
fn test_flatten_token_vecs_single() {
let vecs = vec![vec![
(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
),
(Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
]];
let flattened = flatten_token_vecs(&vecs);
assert_eq!(flattened.len(), 2);
assert_eq!(flattened[0].1, 0..5);
assert_eq!(flattened[1].1, 5..6);
}
#[test]
fn test_flatten_token_vecs_multiple() {
let vecs = vec![
vec![(
Token::Text("hello".to_string()),
ByteRange { start: 0, end: 5 },
)],
vec![
(Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
(
Token::Text("world".to_string()),
ByteRange { start: 6, end: 11 },
),
],
];
let flattened = flatten_token_vecs(&vecs);
assert_eq!(flattened.len(), 3);
assert_eq!(flattened[0].1, 0..5);
assert_eq!(flattened[1].1, 5..6);
assert_eq!(flattened[2].1, 6..11);
}
}