use super::{OntologySource, SemanticEntry, SemanticPool};
pub type EntityReference = SemanticEntry;
#[derive(Debug, Clone, Default)]
pub struct TokenExtension {
pub semantic_start: u32,
pub semantic_count: u8,
}
impl TokenExtension {
pub fn new() -> Self {
Self::default()
}
pub fn with_semantics(start: u32, count: u8) -> Self {
Self {
semantic_start: start,
semantic_count: count,
}
}
pub fn has_semantics(&self) -> bool {
self.semantic_count > 0
}
pub fn candidate_count(&self) -> usize {
self.semantic_count as usize
}
pub fn get_entries(&self, pool: &SemanticPool) -> Vec<SemanticEntry> {
if !self.has_semantics() {
return Vec::new();
}
let mut entries = Vec::with_capacity(self.semantic_count as usize);
for i in 0..self.semantic_count as u32 {
let id = self.semantic_start + i;
if let Some(uri) = pool.get(id) {
let confidence = pool.get_confidence(id).unwrap_or(0.0);
entries.push(SemanticEntry::new(
uri,
confidence,
OntologySource::Wikidata,
));
}
}
entries
}
pub fn best_entry(&self, pool: &SemanticPool) -> Option<SemanticEntry> {
self.get_entries(pool).into_iter().next()
}
}
#[derive(Debug, Clone)]
pub struct ExtendedMorpheme {
pub surface: String,
pub pos: String,
pub reading: Option<String>,
pub pronunciation: Option<String>,
pub base_form: Option<String>,
pub extension: TokenExtension,
}
impl ExtendedMorpheme {
pub fn new(surface: impl Into<String>, pos: impl Into<String>) -> Self {
Self {
surface: surface.into(),
pos: pos.into(),
reading: None,
pronunciation: None,
base_form: None,
extension: TokenExtension::new(),
}
}
pub fn with_reading(mut self, reading: impl Into<String>) -> Self {
self.reading = Some(reading.into());
self
}
#[must_use]
pub fn with_pronunciation(mut self, pronunciation: impl Into<String>) -> Self {
self.pronunciation = Some(pronunciation.into());
self
}
#[must_use]
pub fn with_base_form(mut self, base_form: impl Into<String>) -> Self {
self.base_form = Some(base_form.into());
self
}
#[must_use]
pub fn with_semantics(mut self, start: u32, count: u8) -> Self {
self.extension = TokenExtension::with_semantics(start, count);
self
}
pub fn has_semantics(&self) -> bool {
self.extension.has_semantics()
}
pub fn semantic_uris(&self, pool: &SemanticPool) -> Vec<String> {
self.extension
.get_entries(pool)
.into_iter()
.map(|e| e.uri)
.collect()
}
}
#[derive(Debug, Default)]
pub struct ExtendedResultBuilder {
morphemes: Vec<ExtendedMorpheme>,
}
impl ExtendedResultBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn add(&mut self, morpheme: ExtendedMorpheme) -> &mut Self {
self.morphemes.push(morpheme);
self
}
pub fn build(self) -> Vec<ExtendedMorpheme> {
self.morphemes
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_token_extension_default() {
let ext = TokenExtension::new();
assert!(!ext.has_semantics());
assert_eq!(ext.candidate_count(), 0);
}
#[test]
fn test_token_extension_with_semantics() {
let ext = TokenExtension::with_semantics(10, 3);
assert!(ext.has_semantics());
assert_eq!(ext.candidate_count(), 3);
assert_eq!(ext.semantic_start, 10);
}
#[test]
fn test_extended_morpheme_builder() {
let morpheme = ExtendedMorpheme::new("東京", "名詞")
.with_reading("トウキョウ")
.with_semantics(1, 2);
assert_eq!(morpheme.surface, "東京");
assert_eq!(morpheme.pos, "名詞");
assert_eq!(morpheme.reading, Some("トウキョウ".to_string()));
assert!(morpheme.has_semantics());
}
#[test]
fn test_result_builder() {
let mut builder = ExtendedResultBuilder::new();
builder.add(ExtendedMorpheme::new("東京", "名詞"));
builder.add(ExtendedMorpheme::new("都", "名詞"));
let result = builder.build();
assert_eq!(result.len(), 2);
}
}