#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FidelityLevel {
Lossless,
Semantic,
Compressed,
}
impl FidelityLevel {
pub fn allows_compression(self) -> bool {
matches!(self, FidelityLevel::Semantic | FidelityLevel::Compressed)
}
}
#[derive(Debug, Clone)]
pub enum DocNode {
Header {
level: u8,
text: String,
},
Para {
text: String,
importance: f32,
},
Table {
headers: Vec<String>,
rows: Vec<Vec<String>>,
},
Code { lang: Option<String>, body: String },
List { ordered: bool, items: Vec<String> },
Metadata { key: String, value: String },
}
impl DocNode {
pub fn importance(&self) -> f32 {
match self {
DocNode::Para { importance, .. } => *importance,
_ => 1.0,
}
}
pub fn char_len(&self) -> usize {
match self {
DocNode::Header { text, .. } => text.len(),
DocNode::Para { text, .. } => text.len(),
DocNode::Table { headers, rows } => {
headers.iter().map(|h| h.len()).sum::<usize>()
+ rows
.iter()
.flat_map(|r| r.iter())
.map(|c| c.len())
.sum::<usize>()
}
DocNode::Code { body, .. } => body.len(),
DocNode::List { items, .. } => items.iter().map(|i| i.len()).sum(),
DocNode::Metadata { key, value } => key.len() + value.len(),
}
}
}
#[derive(Debug, Clone)]
pub struct IRDocument {
pub fidelity: FidelityLevel,
pub nodes: Vec<DocNode>,
pub token_budget: Option<usize>,
}
impl IRDocument {
pub fn new(fidelity: FidelityLevel, token_budget: Option<usize>) -> Self {
Self {
fidelity,
nodes: Vec::new(),
token_budget,
}
}
pub fn push(&mut self, node: DocNode) {
self.nodes.push(node);
}
pub fn total_char_len(&self) -> usize {
self.nodes.iter().map(|n| n.char_len()).sum()
}
pub fn get_metadata(&self, key: &str) -> Option<&str> {
self.nodes.iter().find_map(|n| {
if let DocNode::Metadata { key: k, value } = n
&& k == key
{
return Some(value.as_str());
}
None
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fidelity_compression_flag() {
assert!(!FidelityLevel::Lossless.allows_compression());
assert!(FidelityLevel::Semantic.allows_compression());
assert!(FidelityLevel::Compressed.allows_compression());
}
#[test]
fn doc_node_importance_defaults() {
let header = DocNode::Header {
level: 1,
text: "제목".into(),
};
assert_eq!(header.importance(), 1.0);
let para = DocNode::Para {
text: "내용".into(),
importance: 0.3,
};
assert_eq!(para.importance(), 0.3);
}
#[test]
fn ir_document_metadata_lookup() {
let mut doc = IRDocument::new(FidelityLevel::Semantic, Some(4096));
doc.push(DocNode::Metadata {
key: "title".into(),
value: "테스트 문서".into(),
});
assert_eq!(doc.get_metadata("title"), Some("테스트 문서"));
assert_eq!(doc.get_metadata("missing"), None);
}
#[test]
fn table_char_len() {
let node = DocNode::Table {
headers: vec!["이름".into(), "나이".into()],
rows: vec![vec!["홍길동".into(), "30".into()]],
};
assert_eq!(node.char_len(), 23);
}
}