use anyhow::Context as _;
use anyhow::Result;
use graphannis::{
AnnotationGraph,
graph::{AnnoKey, NodeID},
};
use graphannis_core::graph::ANNIS_NS;
use graphannis_core::graph::NODE_NAME_KEY;
use ordermap::OrderSet;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::hash::Hash;
use super::util::token_helper::TokenHelper;
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct Token {
pub node_name: String,
pub start: usize,
pub end: usize,
pub labels: BTreeMap<AnnoKey, String>,
}
#[derive(Clone, Debug)]
pub struct Span {
pub node_name: String,
pub sorted_covered_token_ids: OrderSet<NodeID>,
pub labels: BTreeMap<AnnoKey, String>,
}
#[derive(Clone, Debug)]
pub struct SpanRow {
pub spans: Vec<Span>,
pub occupied_token: HashSet<NodeID>,
pub anno_keys: BTreeSet<AnnoKey>,
}
impl Token {
pub fn from_graph(
node_id: NodeID,
start: usize,
end: usize,
graph: &AnnotationGraph,
) -> Result<Self> {
let mut labels = BTreeMap::new();
let node_name = graph
.get_node_annos()
.get_value_for_item(&node_id, &NODE_NAME_KEY)?
.with_context(|| format!("No node name for node with id {node_id}"))?;
for anno in graph.get_node_annos().get_annotations_for_item(&node_id)? {
labels.insert(anno.key, anno.val.to_string());
}
Ok(Token {
node_name: node_name.to_string(),
start,
end,
labels,
})
}
}
impl Span {
pub fn from_graph(
node_id: NodeID,
tok_helper: &TokenHelper<'_>,
graph: &AnnotationGraph,
) -> Result<Self> {
let mut labels = BTreeMap::new();
let node_name = graph
.get_node_annos()
.get_value_for_item(&node_id, &NODE_NAME_KEY)?
.with_context(|| format!("No node name for node with id {node_id}"))?;
for anno in graph.get_node_annos().get_annotations_for_item(&node_id)? {
if anno.key.ns != ANNIS_NS {
labels.insert(anno.key, anno.val.to_string());
}
}
let mut sorted_covered_token_ids = OrderSet::new();
for t in tok_helper.covered_token(node_id)? {
sorted_covered_token_ids.insert(t);
}
Ok(Span {
node_name: node_name.to_string(),
sorted_covered_token_ids,
labels,
})
}
}
impl From<Span> for SpanRow {
fn from(span: Span) -> Self {
Self {
anno_keys: span.labels.keys().cloned().collect(),
occupied_token: span.sorted_covered_token_ids.iter().copied().collect(),
spans: vec![span],
}
}
}
impl SpanRow {
pub(crate) fn can_merge_span(&self, span: &Span) -> bool {
let is_disjoint = if self.occupied_token.len() <= span.sorted_covered_token_ids.len() {
self.occupied_token
.iter()
.all(|v| !span.sorted_covered_token_ids.contains(v))
} else {
span.sorted_covered_token_ids
.iter()
.all(|v| !self.occupied_token.contains(v))
};
if !is_disjoint {
false
} else {
self.anno_keys.iter().eq(span.labels.keys())
}
}
pub(crate) fn merge_span(&mut self, other: &Span) -> Result<bool> {
if self.can_merge_span(other) {
self.occupied_token
.extend(other.sorted_covered_token_ids.iter().copied());
self.spans.push(other.clone());
Ok(true)
} else {
Ok(false)
}
}
}