1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
//! Defines the pipeline which processes text for inclusion in the index. Most users do not need
//! to use this module directly.

use serde::ser::{Serialize, SerializeSeq, Serializer};

/// Splits a text string into a vector of individual tokens.
pub fn tokenize(text: &str) -> Vec<String> {
    text.split(|c: char| c.is_whitespace() || c == '-')
        .filter(|s| !s.is_empty())
        .map(|s| s.trim().to_lowercase())
        .collect()
}

/// The function type used for each step in a pipeline.
pub type PipelineFn = fn(String) -> Option<String>;

/// A sequence of `PipelineFn`s which are run on tokens to prepare them for searching.
#[derive(Debug, Deserialize)]
pub struct Pipeline {
    #[serde(skip_deserializing)]
    pub queue: Vec<(String, PipelineFn)>,
}

impl Serialize for Pipeline {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut seq = serializer.serialize_seq(Some(self.queue.len()))?;
        for &(ref name, _) in &self.queue {
            seq.serialize_element(&name)?;
        }
        seq.end()
    }
}

impl Default for Pipeline {
    fn default() -> Self {
        ::lang::en::make_pipeline()
    }
}

impl Pipeline {
    /// Returns a pipeline for the given [`Language`](../lang/enum.Language.html).
    #[deprecated(since = "2.2.0", note = "Use `Language::make_pipeline()`")]
    pub fn for_language(lang: ::lang::Language) -> Pipeline {
        lang.make_pipeline()
    }

    /// Run the Pipeline against the given vector of tokens. The returned vector may be shorter
    /// than the input if a pipeline function returns `None` for a token.
    pub fn run(&self, tokens: Vec<String>) -> Vec<String> {
        let mut ret = vec![];
        for token in tokens {
            let mut token = Some(token);
            for &(_, func) in &self.queue {
                if let Some(t) = token {
                    token = func(t);
                } else {
                    break;
                }
            }
            if let Some(t) = token {
                ret.push(t);
            }
        }
        ret
    }
}