use serde::ser::{Serialize, SerializeStruct, Serializer};
use crate::util::*;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
pub struct Analyze {
text: StringOrVecString,
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip", flatten)]
analysis: Option<Analysis>,
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
attributes: Vec<String>,
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
explain: Option<bool>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
pub struct CustomAnalyzer {
tokenizer: String,
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
char_filter: Vec<StringOrObject>,
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
filter: Vec<StringOrObject>,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Default)]
pub struct CustomNormalizer {
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
char_filter: Vec<StringOrObject>,
#[serde(default, skip_serializing_if = "ShouldSkip::should_skip")]
filter: Vec<StringOrObject>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Analysis {
BuiltInAnalyzer(String),
CustomAnalyzer(CustomAnalyzer),
BuiltInNormalizer(String),
CustomNormalizer(CustomNormalizer),
Field(String),
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
#[serde(untagged)]
pub enum StringOrObject {
String(String),
Object(serde_json::Value),
}
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
#[serde(untagged)]
pub enum StringOrVecString {
String(String),
VecString(Vec<String>),
}
impl Analyze {
pub fn new<S>(text: S) -> Self
where
S: Into<StringOrVecString>,
{
Self {
text: text.into(),
analysis: None,
attributes: vec![],
explain: None,
}
}
pub fn analyzer<S>(mut self, analyzer: S) -> Self
where
S: Into<Analysis>,
{
self.analysis = Some(analyzer.into());
self
}
pub fn attributes<I>(mut self, attributes: I) -> Self
where
I: IntoIterator,
I::Item: ToString,
{
self.attributes
.extend(attributes.into_iter().map(|x| x.to_string()));
self
}
pub fn explain(mut self, explain: bool) -> Self {
self.explain = Some(explain);
self
}
}
impl CustomNormalizer {
pub fn new() -> Self {
Default::default()
}
pub fn char_filter<I>(mut self, char_filter: I) -> Self
where
I: IntoIterator,
I::Item: Into<StringOrObject>,
{
self.char_filter
.extend(char_filter.into_iter().map(Into::into));
self
}
pub fn filter<I>(mut self, filter: I) -> Self
where
I: IntoIterator,
I::Item: Into<StringOrObject>,
{
self.filter.extend(filter.into_iter().map(Into::into));
self
}
}
impl CustomAnalyzer {
pub fn new<S>(tokenizer: S) -> Self
where
S: ToString,
{
Self {
tokenizer: tokenizer.to_string(),
char_filter: vec![],
filter: vec![],
}
}
pub fn char_filter<I>(mut self, char_filter: I) -> Self
where
I: IntoIterator,
I::Item: Into<StringOrObject>,
{
self.char_filter
.extend(char_filter.into_iter().map(Into::into));
self
}
pub fn filter<I>(mut self, filter: I) -> Self
where
I: IntoIterator,
I::Item: Into<StringOrObject>,
{
self.filter.extend(filter.into_iter().map(Into::into));
self
}
}
impl Analysis {
pub fn field<S>(value: S) -> Self
where
S: ToString,
{
Self::Field(value.to_string())
}
pub fn analyzer<S>(value: S) -> Self
where
S: ToString,
{
Self::BuiltInAnalyzer(value.to_string())
}
pub fn normalizer<S>(value: S) -> Self
where
S: ToString,
{
Self::BuiltInNormalizer(value.to_string())
}
}
impl<'a> From<&'a str> for StringOrObject {
fn from(value: &'a str) -> Self {
Self::String(value.to_owned())
}
}
impl From<String> for StringOrObject {
fn from(value: String) -> Self {
Self::String(value)
}
}
impl From<serde_json::Value> for StringOrObject {
fn from(value: serde_json::Value) -> Self {
Self::Object(value)
}
}
impl From<CustomAnalyzer> for Analysis {
fn from(value: CustomAnalyzer) -> Self {
Self::CustomAnalyzer(value)
}
}
impl From<CustomNormalizer> for Analysis {
fn from(value: CustomNormalizer) -> Self {
Self::CustomNormalizer(value)
}
}
impl From<String> for StringOrVecString {
fn from(value: String) -> Self {
Self::String(value)
}
}
impl From<&str> for StringOrVecString {
fn from(value: &str) -> Self {
Self::String(value.into())
}
}
impl From<Vec<&str>> for StringOrVecString {
fn from(value: Vec<&str>) -> Self {
Self::VecString(value.into_iter().map(Into::into).collect())
}
}
impl<const N: usize> From<[&str; N]> for StringOrVecString {
fn from(value: [&str; N]) -> Self {
Self::VecString(value.iter().map(ToString::to_string).collect())
}
}
impl<'a> From<&'a [&str]> for StringOrVecString {
fn from(value: &'a [&str]) -> Self {
Self::VecString(value.iter().map(ToString::to_string).collect())
}
}
impl Serialize for Analysis {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
Analysis::BuiltInAnalyzer(name) => {
let mut state = serializer.serialize_struct("analysis_analyzer", 1)?;
state.serialize_field("analyzer", name)?;
state.end()
}
Analysis::CustomAnalyzer(analyzer) => analyzer.serialize(serializer),
Analysis::BuiltInNormalizer(name) => {
let mut state = serializer.serialize_struct("analysis_normalizer", 1)?;
state.serialize_field("normalizer", name)?;
state.end()
}
Analysis::CustomNormalizer(normalizer) => normalizer.serialize(serializer),
Analysis::Field(name) => {
let mut state = serializer.serialize_struct("analysis_field", 1)?;
state.serialize_field("field", name)?;
state.end()
}
}
}
}
impl Default for StringOrVecString {
fn default() -> Self {
Self::String(Default::default())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn serialization() {
assert_serialize(
Analyze::new("analyze these pants"),
json!({
"text": "analyze these pants"
}),
);
assert_serialize(
Analyze::new("analyze these pants").analyzer(Analysis::analyzer("test_default")),
json!({
"text": "analyze these pants",
"analyzer": "test_default"
}),
);
assert_serialize(
Analyze::new(["here is one to test", "and here is another one"])
.analyzer(
CustomAnalyzer::new("lowercase")
.char_filter(["html_strip", "test_strip"])
.filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
)
.attributes(["score", "keyword"])
.explain(true),
json!({
"attributes": [
"score",
"keyword"
],
"char_filter": [
"html_strip",
"test_strip"
],
"filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
"tokenizer": "lowercase",
"explain": true,
"text": ["here is one to test", "and here is another one"]
}),
);
assert_serialize(
Analyze::new("analyze these pants").analyzer(Analysis::normalizer("asciifolding")),
json!({
"text": "analyze these pants",
"normalizer": "asciifolding"
}),
);
assert_serialize(
Analyze::new(["here is one to test", "and here is another one"])
.analyzer(
CustomNormalizer::new()
.char_filter(["html_strip", "test_strip"])
.filter([json!({"type": "stop", "stopwords": ["a", "is", "this"]})]),
)
.attributes(["score", "keyword"])
.explain(true),
json!({
"attributes": [
"score",
"keyword"
],
"char_filter": [
"html_strip",
"test_strip"
],
"filter" : [{"type": "stop", "stopwords": ["a", "is", "this"]}],
"explain": true,
"text": ["here is one to test", "and here is another one"]
}),
);
assert_serialize(
Analyze::new("analyze these pants").analyzer(Analysis::field("title")),
json!({
"text": "analyze these pants",
"field": "title"
}),
);
}
}