use std::{
collections::{HashMap, VecDeque},
io::BufRead,
};
use crate::{Token, Tokenizer, TokenizerError};
#[derive(Debug, PartialEq, Clone)]
pub enum ParserError {
TokenizerError(TokenizerError),
OpenBraceNoKey,
}
pub type ElementIndex = usize;
#[derive(Debug, PartialEq, Clone)]
pub struct Element {
pub key: String,
pub tokens: Vec<String>,
pub children: Vec<usize>,
pub parent_index: Option<usize>,
}
impl Element {
pub fn new(key: String) -> Self {
Self {
key,
tokens: Vec::new(),
children: Vec::new(),
parent_index: None,
}
}
}
#[derive(Default, Debug, PartialEq, Clone)]
pub struct ElementAmphitheatre {
elements: Vec<Element>,
}
impl ElementAmphitheatre {
pub fn new() -> Self {
Self {
elements: Vec::new(),
}
}
pub fn insert(&mut self, element: Element) -> usize {
let index = self.elements.len();
self.elements.push(element);
index
}
pub fn get(&self, index: ElementIndex) -> Option<&Element> {
self.elements.get(index)
}
pub fn get_mut(&mut self, index: ElementIndex) -> Option<&mut Element> {
self.elements.get_mut(index)
}
pub fn get_by_key(&self, key: &str) -> Option<&Element> {
self.elements.iter().find(|element| element.key == key)
}
pub fn with_capacity(n: usize) -> Self {
Self {
elements: Vec::with_capacity(n),
}
}
pub fn capacity(&self) -> usize {
self.elements.capacity()
}
pub fn reserve(&mut self, additional: usize) {
self.elements.reserve(additional);
}
pub fn count(&self) -> usize {
self.elements.len()
}
pub fn is_empty(&self) -> bool {
self.elements.is_empty()
}
pub fn iter(&self) -> std::slice::Iter<'_, Element> {
self.elements.iter()
}
pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, Element> {
self.elements.iter_mut()
}
pub fn clear(&mut self) {
self.elements.clear();
}
pub fn as_slice(&self) -> &[Element] {
self.elements.as_slice()
}
pub fn as_mut_slice(&mut self) -> &mut [Element] {
self.elements.as_mut_slice()
}
#[inline]
#[must_use]
pub fn get_handle(&self, index: ElementIndex) -> Option<ElementHandle<'_>> {
if self.get(index).is_some() {
Some(ElementHandle::new(self, index))
} else {
None
}
}
#[inline]
#[must_use]
pub fn get_handle_by_key(&self, key: &str) -> Option<ElementHandle<'_>> {
self.elements
.iter()
.enumerate()
.find(|(_, element)| element.key == key)
.map(|(index, _)| ElementHandle::new(self, index))
}
pub fn extract_subtree(&self, index: ElementIndex) -> Option<ElementAttribute> {
let element = self.get(index)?.clone();
if element.children.is_empty() {
return Some(ElementAttribute::Leaf(Box::new(LeafAttribute {
key: element.key,
tokens: element.tokens,
})));
}
let mut subtree = ElementAmphitheatre::new();
let mut queue = VecDeque::new();
queue.push_back((element, None));
let mut root_index = None;
while let Some((element, parent_index)) = queue.pop_front() {
let index = subtree.insert(element);
let element = subtree.get_mut(index)?;
element.parent_index = parent_index;
for child_index in &element.children {
let child = self.get(*child_index)?.clone();
queue.push_back((child, Some(index)));
}
element.children.clear();
if parent_index.is_none() {
root_index = Some(index);
continue;
}
if let Some(parent) = subtree.get_mut(parent_index.unwrap()) {
parent.children.push(index);
}
}
Some(ElementAttribute::SubTree(Box::new(SubTreeAttribute {
amphitheatre: subtree,
root_element_index: root_index.unwrap(),
})))
}
}
#[derive(Debug, Clone, Copy)]
pub struct ElementHandle<'a> {
arena: &'a ElementAmphitheatre,
index: ElementIndex,
}
impl<'a> ElementHandle<'a> {
#[inline]
#[must_use]
pub fn new(arena: &'a ElementAmphitheatre, index: ElementIndex) -> Self {
assert!(
arena.get(index).is_some(),
"The element index is not valid in the given arena: index={}",
index
);
Self { arena, index }
}
#[inline]
#[must_use]
pub fn arena(&self) -> &'a ElementAmphitheatre {
self.arena
}
#[inline]
#[must_use]
pub fn index(&self) -> ElementIndex {
self.index
}
#[inline]
#[must_use]
pub fn element(&self) -> &'a Element {
self.arena
.get(self.index)
.expect("Element index should be valid")
}
#[inline]
pub fn to_attribute(&self) -> Option<ElementAttribute> {
self.arena().extract_subtree(self.index)
}
#[inline]
#[must_use]
pub fn key(&self) -> &'a str {
&self.element().key
}
#[inline]
#[must_use]
pub fn tokens(&self) -> &'a [String] {
&self.element().tokens
}
#[inline]
pub fn has_children(&self) -> bool {
!self.element().children.is_empty()
}
#[inline]
#[must_use]
pub fn children(&self) -> ElementChildren<'a> {
ElementChildren {
arena: self.arena,
indices: self.element().children.iter(),
}
}
#[inline]
#[must_use]
pub fn children_by_key(&self, key: &str) -> ElementChildrenByKey<'a> {
ElementChildrenByKey {
key: key.to_string(),
children_iter: self.children(),
}
}
#[inline]
#[must_use]
pub fn first_child_by_key(&self, key: &str) -> Option<Self> {
self.children_by_key(key).next()
}
#[inline]
#[must_use]
pub fn parent(&self) -> Option<Self> {
self.element()
.parent_index
.map(|parent_index| Self::new(self.arena, parent_index))
}
#[inline]
#[must_use]
pub fn first_child(&self) -> Option<Self> {
self.element()
.children
.first()
.map(|&index| Self::new(self.arena, index))
}
#[inline]
#[must_use]
pub fn last_child(&self) -> Option<Self> {
self.element()
.children
.last()
.map(|&index| Self::new(self.arena, index))
}
#[inline]
#[must_use]
pub fn previous_sibling(&self) -> Option<Self> {
let parent = self.parent()?;
let parent_element = parent.element();
let position = parent_element
.children
.iter()
.position(|&idx| idx == self.index)?;
if position > 0 {
let prev_index = parent_element.children[position - 1];
Some(Self::new(self.arena, prev_index))
} else {
None
}
}
#[inline]
#[must_use]
pub fn next_sibling(&self) -> Option<Self> {
let parent = self.parent()?;
let parent_element = parent.element();
let position = parent_element
.children
.iter()
.position(|&idx| idx == self.index)?;
if position + 1 < parent_element.children.len() {
let next_index = parent_element.children[position + 1];
Some(Self::new(self.arena, next_index))
} else {
None
}
}
}
#[derive(Debug, PartialEq, Clone)]
pub struct LeafAttribute {
pub key: String,
pub tokens: Vec<String>,
}
#[derive(Debug, PartialEq, Clone)]
pub struct SubTreeAttribute {
pub amphitheatre: ElementAmphitheatre,
pub root_element_index: usize,
}
#[derive(Debug, PartialEq, Clone)]
pub enum ElementAttribute {
Leaf(Box<LeafAttribute>),
SubTree(Box<SubTreeAttribute>),
}
impl ElementAttribute {
pub fn is_leaf(&self) -> bool {
matches!(self, ElementAttribute::Leaf(_))
}
pub fn is_sub_tree(&self) -> bool {
matches!(self, ElementAttribute::SubTree(_))
}
pub fn get_key(&self) -> &str {
match self {
ElementAttribute::Leaf(leaf) => &leaf.key,
ElementAttribute::SubTree(sub_tree) => {
&sub_tree
.amphitheatre
.get(sub_tree.root_element_index)
.expect("Root element index should exist in SubTree")
.key
}
}
}
pub fn get_tokens(&self) -> &[String] {
match self {
ElementAttribute::Leaf(leaf) => &leaf.tokens,
ElementAttribute::SubTree(sub_tree) => {
&sub_tree
.amphitheatre
.get(sub_tree.root_element_index)
.expect("Root element index should exist in SubTree")
.tokens
}
}
}
pub fn get_children(&self) -> HashMap<String, Vec<ElementAttribute>> {
match self {
ElementAttribute::Leaf(_) => HashMap::new(),
ElementAttribute::SubTree(st) => {
let arena = &st.amphitheatre;
let Some(root) = arena.get(st.root_element_index) else {
return HashMap::new();
};
let mut out = HashMap::new();
for &child_idx in &root.children {
let Some(sub) = arena.extract_subtree(child_idx) else {
continue;
};
let key = arena
.get(child_idx)
.map(|e| e.key.clone())
.unwrap_or_default();
out.entry(key).or_insert(Vec::new()).push(sub);
}
out
}
}
}
pub fn get_children_distinct(&self) -> HashMap<String, ElementAttribute> {
match self {
ElementAttribute::Leaf(_) => HashMap::new(),
ElementAttribute::SubTree(st) => {
let arena = &st.amphitheatre;
let Some(root) = arena.get(st.root_element_index) else {
return HashMap::new();
};
let mut out = HashMap::new();
for &child_idx in &root.children {
let Some(sub) = arena.extract_subtree(child_idx) else {
continue;
};
let key = arena
.get(child_idx)
.map(|e| e.key.clone())
.unwrap_or_default();
out.insert(key, sub);
}
out
}
}
}
}
impl<'a> From<ElementHandle<'a>> for Option<ElementAttribute> {
fn from(value: ElementHandle<'a>) -> Self {
value.to_attribute()
}
}
#[derive(Debug, PartialEq)]
pub enum ElementParseError {
MissingValueToken,
ParseError(String),
}
impl TryFrom<ElementHandle<'_>> for u32 {
type Error = ElementParseError;
fn try_from(value: ElementHandle<'_>) -> Result<Self, Self::Error> {
let value = value
.tokens()
.first()
.ok_or(ElementParseError::MissingValueToken)?;
let result = value
.parse::<u32>()
.map_err(|e| ElementParseError::ParseError(e.to_string()))?;
Ok(result)
}
}
impl TryFrom<ElementHandle<'_>> for String {
type Error = ElementParseError;
fn try_from(value: ElementHandle<'_>) -> Result<Self, Self::Error> {
let value = value
.tokens()
.first()
.ok_or(ElementParseError::MissingValueToken)?;
Ok(value.to_string())
}
}
#[derive(Clone)]
pub struct ElementChildren<'a> {
arena: &'a ElementAmphitheatre,
indices: std::slice::Iter<'a, usize>,
}
impl<'a> Iterator for ElementChildren<'a> {
type Item = ElementHandle<'a>;
fn next(&mut self) -> Option<Self::Item> {
let index = *self.indices.next()?;
Some(ElementHandle::new(self.arena, index))
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.indices.size_hint()
}
}
impl ExactSizeIterator for ElementChildren<'_> {}
impl std::iter::FusedIterator for ElementChildren<'_> {}
impl<'a> std::fmt::Debug for ElementChildren<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("ElementChildren").finish()
}
}
#[derive(Clone)]
pub struct ElementChildrenByKey<'a> {
key: String,
children_iter: ElementChildren<'a>,
}
impl<'a> Iterator for ElementChildrenByKey<'a> {
type Item = ElementHandle<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.children_iter.find(|child| child.key() == self.key)
}
}
impl std::iter::FusedIterator for ElementChildrenByKey<'_> {}
impl<'a> std::fmt::Debug for ElementChildrenByKey<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("ElementChildrenByKey")
.field("key", &self.key)
.finish()
}
}
pub struct Parser<R: BufRead> {
tokenizer: Tokenizer<R>,
element_arena: ElementAmphitheatre,
}
impl<R: BufRead> Parser<R> {
pub fn new(tokenizer: Tokenizer<R>) -> Self {
Self {
tokenizer,
element_arena: ElementAmphitheatre::new(),
}
}
pub fn load(mut self) -> Result<ElementAmphitheatre, ParserError> {
let mut iter = self.iter();
while let Some(result) = iter.next() {
result?;
}
Ok(self.element_arena)
}
pub fn get_arena_ref(&self) -> &ElementAmphitheatre {
&self.element_arena
}
pub fn iter(&'_ mut self) -> ParserIter<'_, R> {
ParserIter {
tokenizer: &mut self.tokenizer,
parser_arena: &mut self.element_arena,
current_scope: None,
current_element: None,
}
}
}
pub struct ParserIter<'a, R: BufRead> {
tokenizer: &'a mut Tokenizer<R>,
parser_arena: &'a mut ElementAmphitheatre,
current_scope: Option<usize>,
current_element: Option<Element>,
}
impl<'a, R: BufRead> ParserIter<'a, R> {
pub fn create_element(&mut self, key: String) {
let mut new_element = Element::new(key);
new_element.parent_index = self.current_scope;
self.current_element = Some(new_element);
}
pub fn insert_element(&mut self, element: Element) -> usize {
let parent_index = element.parent_index;
let index = self.parser_arena.insert(element);
if let Some(parent_index) = parent_index {
if let Some(parent) = self.parser_arena.get_mut(parent_index) {
parent.children.push(index);
}
}
index
}
}
impl<'a, R: BufRead> Iterator for ParserIter<'a, R> {
type Item = Result<usize, ParserError>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(token) = self.tokenizer.next() {
let valid_token = match token {
Ok(t) => t,
Err(e) => return Some(Err(ParserError::TokenizerError(e))),
};
match valid_token.data {
Token::OpenBrace => {
if self.current_element.is_none() {
return Some(Err(ParserError::OpenBraceNoKey));
}
let element: Element = self.current_element.take().unwrap();
let index = self.insert_element(element);
self.current_scope = Some(index);
return Some(Ok(index));
}
Token::CloseBrace => {
if let Some(index) = self.current_scope {
if let Some(element) = self.parser_arena.get(index) {
self.current_scope = element.parent_index;
continue;
}
}
return None;
}
Token::Data(data) => {
if let Some(ref mut element) = self.current_element {
element.tokens.push(data);
}
}
Token::Key(key) => {
if let Some(element) = self.current_element.take() {
let index = self.insert_element(element);
self.create_element(key);
return Some(Ok(index));
}
self.create_element(key);
}
_ => {
continue;
}
}
}
if let Some(element) = self.current_element.take() {
let index = self.insert_element(element);
return Some(Ok(index));
}
None
}
}
#[cfg(test)]
mod tests {
use std::io::BufReader;
use super::*;
#[test]
fn test_parser_load_empty() {
let input = "";
let tokenizer = Tokenizer::new(BufReader::new(input.as_bytes()));
let parser = Parser::new(tokenizer);
let elements = parser.load().unwrap();
assert_eq!(elements.elements.len(), 0);
}
#[test]
fn test_parser_read_line_key() {
let input = "Key: Value\n";
let tokenizer = Tokenizer::new(BufReader::new(input.as_bytes()));
let parser = Parser::new(tokenizer);
let elements = parser.load().unwrap();
assert_eq!(elements.elements.len(), 1);
assert_eq!(elements.elements[0].key, "Key");
assert_eq!(elements.elements[0].tokens, vec!["Value"]);
}
#[test]
fn test_parser_read_line() {
let input = r#"
FBXHeaderExtension: {
FBXHeaderVersion: 1003
}"#;
let tokenizer = Tokenizer::new(BufReader::new(input.as_bytes()));
let parser = Parser::new(tokenizer);
let elements = parser.load().unwrap();
assert_eq!(elements.elements.len(), 2);
assert_eq!(elements.elements[0].key, "FBXHeaderExtension");
assert_eq!(elements.elements[0].tokens.len(), 0);
assert_eq!(elements.elements[0].parent_index, None);
assert_eq!(elements.elements[1].key, "FBXHeaderVersion");
assert_eq!(elements.elements[1].tokens, vec!["1003"]);
assert_eq!(elements.elements[1].parent_index, Some(0));
}
fn arena_with_root_and_two_children() -> ElementAmphitheatre {
let mut arena = ElementAmphitheatre::new();
let root = arena.insert(Element {
key: "Root".into(),
tokens: vec!["root_tok".into()],
children: vec![],
parent_index: None,
});
let a = arena.insert(Element {
key: "ChildA".into(),
tokens: vec!["1".into()],
children: vec![],
parent_index: Some(root),
});
let b = arena.insert(Element {
key: "ChildB".into(),
tokens: vec!["2".into(), "3".into()],
children: vec![],
parent_index: Some(root),
});
arena.get_mut(root).unwrap().children = vec![a, b];
arena
}
#[test]
fn extract_subtree_oob_returns_none() {
let arena = ElementAmphitheatre::new();
assert!(arena.extract_subtree(0).is_none());
}
#[test]
fn extract_subtree_leaf_element_attribute() {
let mut arena = ElementAmphitheatre::new();
let idx = arena.insert(Element {
key: "Version".into(),
tokens: vec!["7500".into()],
children: vec![],
parent_index: Some(999),
});
let attr = arena.extract_subtree(idx).expect("leaf");
assert!(attr.is_leaf());
assert!(!attr.is_sub_tree());
assert_eq!(attr.get_key(), "Version");
assert_eq!(attr.get_tokens(), &["7500".to_string()]);
assert!(attr.get_children_distinct().is_empty());
match &attr {
ElementAttribute::Leaf(leaf) => {
assert_eq!(leaf.key, "Version");
assert_eq!(leaf.tokens, vec!["7500".to_string()]);
}
ElementAttribute::SubTree(_) => panic!("expected leaf"),
}
}
#[test]
fn extract_subtree_nested_yields_subtree_element_attribute() {
let arena = arena_with_root_and_two_children();
let root_handle = arena.get_handle(0).unwrap();
let attr = root_handle.to_attribute().expect("subtree");
assert!(attr.is_sub_tree());
assert!(!attr.is_leaf());
assert_eq!(attr.get_key(), "Root");
assert_eq!(attr.get_tokens(), &["root_tok".to_string()]);
let children = attr.get_children_distinct();
assert_eq!(children.len(), 2);
let child_a = children.get("ChildA").expect("ChildA");
assert!(child_a.is_leaf());
assert_eq!(child_a.get_key(), "ChildA");
assert_eq!(child_a.get_tokens(), &["1".to_string()]);
let child_b = children.get("ChildB").expect("ChildB");
assert!(child_b.is_leaf());
assert_eq!(child_b.get_tokens(), &["2".to_string(), "3".to_string()]);
}
#[test]
fn extract_subtree_inner_node_is_leaf_in_new_arena() {
let arena = arena_with_root_and_two_children();
let attr = arena.extract_subtree(1).expect("first child");
assert!(attr.is_leaf());
assert_eq!(attr.get_key(), "ChildA");
}
#[test]
fn extract_subtree_subtree_has_consistent_parent_child_links() {
let arena = arena_with_root_and_two_children();
let ElementAttribute::SubTree(st) = arena.extract_subtree(0).expect("root") else {
panic!("expected SubTree");
};
let sub = &st.amphitheatre;
let root = sub.get(st.root_element_index).expect("root in subtree");
assert_eq!(root.key, "Root");
assert_eq!(root.children.len(), 2);
for &ci in &root.children {
let c = sub.get(ci).expect("child");
assert_eq!(c.parent_index, Some(st.root_element_index));
}
}
#[test]
fn extract_subtree_from_parsed_fbx_header_matches_leaf_and_subtree() {
let input = r#"
FBXHeaderExtension: {
FBXHeaderVersion: 1003
}"#;
let tokenizer = Tokenizer::new(BufReader::new(input.as_bytes()));
let parser = Parser::new(tokenizer);
let arena = parser.load().unwrap();
let leaf = arena.extract_subtree(1).expect("version leaf");
assert!(leaf.is_leaf());
assert_eq!(leaf.get_key(), "FBXHeaderVersion");
assert_eq!(leaf.get_tokens(), &["1003".to_string()]);
let subtree = arena.extract_subtree(0).expect("header subtree");
assert!(subtree.is_sub_tree());
assert_eq!(subtree.get_key(), "FBXHeaderExtension");
assert_eq!(subtree.get_tokens().len(), 0);
let kids = subtree.get_children_distinct();
assert_eq!(kids.len(), 1);
let nested = kids.get("FBXHeaderVersion").expect("nested");
assert!(nested.is_leaf());
assert_eq!(nested.get_tokens(), &["1003".to_string()]);
}
#[test]
fn extract_subtree_deep_chain_preserves_tokens_at_each_level() {
let mut arena = ElementAmphitheatre::new();
let i0 = arena.insert(Element {
key: "L0".into(),
tokens: vec![],
children: vec![],
parent_index: None,
});
let i1 = arena.insert(Element {
key: "L1".into(),
tokens: vec!["t1".into()],
children: vec![],
parent_index: Some(i0),
});
let i2 = arena.insert(Element {
key: "L2".into(),
tokens: vec!["t2".into()],
children: vec![],
parent_index: Some(i1),
});
arena.get_mut(i0).unwrap().children.push(i1);
arena.get_mut(i1).unwrap().children.push(i2);
let mid = arena.extract_subtree(i1).expect("middle subtree");
assert!(mid.is_sub_tree());
assert_eq!(mid.get_key(), "L1");
assert_eq!(mid.get_tokens(), &["t1".to_string()]);
let mid_children = mid.get_children_distinct();
let inner = mid_children.get("L2").expect("L2");
assert!(inner.is_leaf());
assert_eq!(inner.get_key(), "L2");
assert_eq!(inner.get_tokens(), &["t2".to_string()]);
}
}