use super::parameter::{parse_parameter, ParameterData};
use crate::lex::annotation::split_label_tokens_with_ranges;
use crate::lex::token::normalization::utilities::{compute_bounding_box, extract_text};
use crate::lex::token::Token;
use std::ops::Range as ByteRange;
#[derive(Debug, Clone)]
pub(in crate::lex::building) struct DataExtraction {
pub label_text: String,
pub label_byte_range: ByteRange<usize>,
pub parameters: Vec<ParameterData>,
}
pub(in crate::lex::building) fn extract_data(
tokens: Vec<(Token, ByteRange<usize>)>,
source: &str,
) -> DataExtraction {
if tokens.is_empty() {
panic!("Annotation header tokens cannot be empty; parser must ensure labels are present");
}
let (label_tokens, mut i, has_label) = split_label_tokens_with_ranges(&tokens);
if !has_label {
panic!("Annotation header must include a label before parameters");
}
let (label_text, label_byte_range) = if !label_tokens.is_empty() {
let meaningful_tokens: Vec<_> = label_tokens
.iter()
.skip_while(|(t, _)| matches!(t, Token::Whitespace(_) | Token::Indentation))
.cloned()
.collect();
if !meaningful_tokens.is_empty() {
let range = compute_bounding_box(&meaningful_tokens);
let text = extract_text(range.clone(), source).trim().to_string();
(text, range)
} else {
(String::new(), 0..0)
}
} else {
(String::new(), 0..0)
};
let mut parameters = Vec::new();
while i < tokens.len() {
if let Some((param_data, next_i)) = parse_parameter(&tokens, i, source) {
parameters.push(param_data);
i = next_i;
} else {
break;
}
}
DataExtraction {
label_text,
label_byte_range,
parameters,
}
}