use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
use serde::{Deserialize, Serialize};
use crate::dictionary::schema::Schema;
const DEFAULT_WORD_COST: i16 = -10000;
const DEFAULT_LEFT_CONTEXT_ID: u16 = 1288;
const DEFAULT_RIGHT_CONTEXT_ID: u16 = 1288;
const DEFAULT_FIELD_VALUE: &str = "*";
#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct ModelInfo {
pub feature_count: usize,
pub label_count: usize,
pub max_left_context_id: usize,
pub max_right_context_id: usize,
pub connection_matrix_size: String,
pub version: String,
pub training_iterations: u64,
pub regularization: f64,
pub updated_at: u64,
}
#[derive(Clone, Serialize, Deserialize, Archive, RkyvSerialize, RkyvDeserialize)]
pub struct Metadata {
pub name: String, pub encoding: String, pub default_word_cost: i16, pub default_left_context_id: u16, pub default_right_context_id: u16, pub default_field_value: String, pub flexible_csv: bool, pub skip_invalid_cost_or_id: bool, pub normalize_details: bool, pub dictionary_schema: Schema, pub user_dictionary_schema: Schema, #[serde(skip_serializing_if = "Option::is_none")]
pub model_info: Option<ModelInfo>, }
impl Default for Metadata {
fn default() -> Self {
Metadata::new(
"default".to_string(),
"UTF-8".to_string(),
DEFAULT_WORD_COST,
DEFAULT_LEFT_CONTEXT_ID,
DEFAULT_RIGHT_CONTEXT_ID,
DEFAULT_FIELD_VALUE.to_string(),
false,
false,
false,
Schema::default(),
Schema::new(vec![
"surface".to_string(),
"reading".to_string(),
"pronunciation".to_string(),
]),
)
}
}
impl Metadata {
#[allow(clippy::too_many_arguments)]
pub fn new(
name: String,
encoding: String,
simple_word_cost: i16,
default_left_context_id: u16,
default_right_context_id: u16,
default_field_value: String,
flexible_csv: bool,
skip_invalid_cost_or_id: bool,
normalize_details: bool,
schema: Schema,
userdic_schema: Schema,
) -> Self {
Self {
encoding,
default_word_cost: simple_word_cost,
default_left_context_id,
default_right_context_id,
default_field_value,
dictionary_schema: schema,
name,
flexible_csv,
skip_invalid_cost_or_id,
normalize_details,
user_dictionary_schema: userdic_schema,
model_info: None,
}
}
pub fn load(data: &[u8]) -> crate::LinderaResult<Self> {
if data.is_empty() {
return Err(crate::error::LinderaErrorKind::Io
.with_error(anyhow::anyhow!("Empty metadata data")));
}
serde_json::from_slice(data).map_err(|err| {
crate::error::LinderaErrorKind::Deserialize
.with_error(anyhow::anyhow!(err))
.add_context("Failed to deserialize metadata from JSON")
})
}
pub fn load_or_default(data: &[u8], default_fn: fn() -> Self) -> Self {
if data.is_empty() {
default_fn()
} else {
match Self::load(data) {
Ok(metadata) => metadata,
Err(_) => default_fn(),
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_metadata_default() {
let metadata = Metadata::default();
assert_eq!(metadata.name, "default");
}
#[test]
fn test_metadata_new() {
let schema = Schema::default();
let metadata = Metadata::new(
"TestDict".to_string(),
"UTF-8".to_string(),
-10000,
0,
0,
"*".to_string(),
false,
false,
false,
schema.clone(),
Schema::new(vec!["surface".to_string(), "reading".to_string()]),
);
assert_eq!(metadata.name, "TestDict");
}
#[test]
fn test_metadata_serialization() {
let metadata = Metadata::default();
let serialized = serde_json::to_string(&metadata).unwrap();
assert!(serialized.contains("default"));
assert!(serialized.contains("schema"));
assert!(serialized.contains("name"));
let deserialized: Metadata = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.name, "default");
}
}