use crate::lex::token::normalization::utilities::{compute_bounding_box, extract_text};
use crate::lex::token::Token;
use std::ops::Range as ByteRange;
#[derive(Debug, Clone)]
pub(in crate::lex::building) struct ParagraphData {
pub text_lines: Vec<(String, ByteRange<usize>)>,
pub overall_byte_range: ByteRange<usize>,
}
pub(in crate::lex::building) fn extract_paragraph_data(
token_lines: Vec<Vec<(Token, ByteRange<usize>)>>,
source: &str,
) -> ParagraphData {
let text_lines: Vec<(String, ByteRange<usize>)> = token_lines
.iter()
.map(|tokens| {
let byte_range = compute_bounding_box(tokens);
let text = extract_text(byte_range.clone(), source);
(text, byte_range)
})
.collect();
let all_tokens: Vec<(Token, ByteRange<usize>)> = token_lines.into_iter().flatten().collect();
let overall_byte_range = if all_tokens.is_empty() {
0..0
} else {
compute_bounding_box(&all_tokens)
};
ParagraphData {
text_lines,
overall_byte_range,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_paragraph_data() {
let source = "hello world";
let token_lines = vec![vec![(Token::Text("hello".to_string()), 0..5)]];
let data = extract_paragraph_data(token_lines, source);
assert_eq!(data.text_lines.len(), 1);
assert_eq!(data.text_lines[0].0, "hello");
assert_eq!(data.text_lines[0].1, 0..5);
assert_eq!(data.overall_byte_range, 0..5);
}
}