use serde::{de, Deserialize, Serialize, Serializer};
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum AnalyzeResponse {
#[serde(rename = "tokens")]
Standard(Vec<Token>),
#[serde(rename = "detail")]
Explained(ExplainedResponse),
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct Token {
pub token: String,
pub start_offset: u32,
pub end_offset: u32,
#[serde(rename = "type")]
pub ty: TokenType,
pub position: u32,
pub bytes: Option<String>,
pub keyword: Option<bool>,
pub position_length: Option<u32>,
pub term_frequency: Option<u32>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct ExplainedResponse {
custom_analyzer: bool,
analyzer: Option<AnalysisObject>,
#[serde(default, rename = "charfilters")]
char_filters: Vec<CharFilter>,
tokenizer: Option<AnalysisObject>,
#[serde(default, rename = "tokenfilters")]
token_filters: Vec<AnalysisObject>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct AnalysisObject {
name: String,
tokens: Vec<Token>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
pub struct CharFilter {
name: String,
filtered_text: Vec<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum TokenType {
Alphanum,
Synonym,
Word,
Hangul,
Num,
Email,
Apostrophe,
Double,
Katakana,
Acronym,
Gram,
Fingerprint,
Shingle,
Other(String),
}
impl Default for TokenType {
fn default() -> Self {
Self::Alphanum
}
}
impl Serialize for TokenType {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Self::Alphanum => "<ALPHANUM>",
Self::Synonym => "SYNONYM",
Self::Word => "word",
Self::Hangul => "<HANGUL>",
Self::Num => "<NUM>",
Self::Email => "<EMAIL>",
Self::Apostrophe => "<APOSTROPHE>",
Self::Double => "<DOUBLE>",
Self::Katakana => "<KATAKANA>",
Self::Acronym => "<ACRONYM>",
Self::Gram => "gram",
Self::Fingerprint => "fingerprint",
Self::Shingle => "shingle",
Self::Other(other) => other,
}
.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for TokenType {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
Ok(match String::deserialize(deserializer)?.as_str() {
"<ALPHANUM>" => Self::Alphanum,
"SYNONYM" => Self::Synonym,
"word" => Self::Word,
"<HANGUL>" => Self::Hangul,
"<NUM>" => Self::Num,
"<EMAIL>" => Self::Email,
"<APOSTROPHE>" => Self::Apostrophe,
"<DOUBLE>" => Self::Double,
"<KATAKANA>" => Self::Katakana,
"<ACRONYM>" => Self::Acronym,
"gram" => Self::Gram,
"fingerprint" => Self::Fingerprint,
"shingle" => Self::Shingle,
other => Self::Other(other.to_string()),
})
}
}
#[cfg(test)]
mod tests {
use serde_json::json;
use super::*;
#[test]
fn deserialize_standard() {
let json_response = json!({
"tokens": [
{
"token": "test1",
"start_offset": 0,
"end_offset": 6,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "test2",
"start_offset": 7,
"end_offset": 11,
"type": "<ALPHANUM>",
"position": 1
}
]
});
let token_1 = Token {
token: "test1".to_string(),
start_offset: 0,
end_offset: 6,
ty: TokenType::Alphanum,
position: 0,
bytes: None,
keyword: None,
position_length: None,
term_frequency: None,
};
let token_2 = Token {
token: "test2".to_string(),
start_offset: 7,
end_offset: 11,
ty: TokenType::Alphanum,
position: 1,
bytes: None,
keyword: None,
position_length: None,
term_frequency: None,
};
let expected = AnalyzeResponse::Standard(vec![token_1, token_2]);
let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
assert_eq!(expected, result);
}
#[test]
fn deserialize_explained() {
let json_response = json!({
"detail": {
"custom_analyzer": true,
"charfilters": [
{
"name": "html_strip",
"filtered_text": [
"test"
]
}
],
"tokenizer": {
"name": "lowercase",
"tokens": [
{
"token": "test",
"start_offset": 0,
"end_offset": 6,
"type": "SYNONYM",
"position": 0
}
]
},
"tokenfilters": [
{
"name": "__anonymous__stop",
"tokens": [
{
"token": "test",
"start_offset": 0,
"end_offset": 6,
"type": "SYNONYM",
"position": 0
}
]
}
]
}
});
let token = Token {
token: "test".to_string(),
start_offset: 0,
end_offset: 6,
ty: TokenType::Synonym,
position: 0,
bytes: None,
keyword: None,
position_length: None,
term_frequency: None,
};
let expected = AnalyzeResponse::Explained(ExplainedResponse {
custom_analyzer: true,
analyzer: None,
char_filters: vec![CharFilter {
name: "html_strip".to_string(),
filtered_text: vec!["test".to_string()],
}],
tokenizer: Some(AnalysisObject {
name: "lowercase".to_string(),
tokens: vec![token.clone()],
}),
token_filters: vec![AnalysisObject {
name: "__anonymous__stop".to_string(),
tokens: vec![token],
}],
});
let result: AnalyzeResponse = serde_json::from_value(json_response).unwrap();
assert_eq!(expected, result);
}
}