imdb_index/
search.rs

1use std::cmp;
2use std::f64;
3use std::fmt;
4use std::result;
5use std::str::FromStr;
6
7use csv;
8use lazy_static::lazy_static;
9use regex::Regex;
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11use strsim;
12
13use crate::error::{Error, Result};
14use crate::index::{Index, MediaEntity, NameQuery, NameScorer};
15use crate::record::{Episode, Rating, Title, TitleKind};
16use crate::scored::{Scored, SearchResults};
17use crate::util::{csv_file, IMDB_BASICS};
18
19/// A handle that permits searching IMDb media records with relevance ranking.
20///
21/// A searcher is constructed by providing it a handle to an IMDb
22/// [`Index`](struct.Index.html). The `Index` is responsible for managing the
23/// lower level data access, while the `Searcher` provides high level routines
24/// for ranking results.
25///
26/// The primary interface to a `Searcher` is its `search` method, which takes
27/// as input a [`Query`](struct.Query.html) and returns a ranked list of
28/// [`MediaEntity`](struct.MediaEntity.html) as output.
29#[derive(Debug)]
30pub struct Searcher {
31    idx: Index,
32}
33
34impl Searcher {
35    /// Create a new searcher for the given `Index`.
36    ///
37    /// A single searcher can be used to execute many queries.
38    ///
39    /// An existing `Index` can be opened with `Index::open`, and a new `Index`
40    /// can be created with `Index::create`.
41    pub fn new(idx: Index) -> Searcher {
42        Searcher { idx }
43    }
44
45    /// Execute a search with the given `Query`.
46    ///
47    /// Generally, the results returned are ranked in relevance order, where
48    /// each result has a score associated with it. The score is between
49    /// `0` and `1.0` (inclusive), where a score of `1.0` means "most similar"
50    /// and a score of `0` means "least similar."
51    ///
52    /// Depending on the query, the behavior of search can vary:
53    ///
54    /// * When the query specifies a similarity function, then the results are
55    ///   ranked by that function.
56    /// * When the query contains a name to search by and a name scorer, then
57    ///   results are ranked by the name scorer. If the query specifies a
58    ///   similarity function, then results are first ranked by the name
59    ///   scorer, and then re-ranked by the similarity function.
60    /// * When no name or no name scorer are specified by the query, then
61    ///   this search will do a (slow) exhaustive search over all media records
62    ///   in IMDb. As a special case, if the query contains a TV show ID, then
63    ///   only records in that TV show are searched, and this is generally
64    ///   fast.
65    /// * If the query is empty, then no results are returned.
66    ///
67    /// If there was a problem reading the underlying index or the IMDb data,
68    /// then an error is returned.
69    pub fn search(
70        &mut self,
71        query: &Query,
72    ) -> Result<SearchResults<MediaEntity>> {
73        if query.is_empty() {
74            return Ok(SearchResults::new());
75        }
76        let mut results = match query.name_query() {
77            None => self.search_exhaustive(query)?,
78            Some(nameq) => self.search_with_name(query, &nameq)?,
79        };
80        results.trim(query.size);
81        results.normalize();
82        Ok(results)
83    }
84
85    /// Return a mutable reference to the underlying index for this searcher.
86    pub fn index(&mut self) -> &mut Index {
87        &mut self.idx
88    }
89
90    fn search_with_name(
91        &mut self,
92        query: &Query,
93        name_query: &NameQuery,
94    ) -> Result<SearchResults<MediaEntity>> {
95        let mut results = SearchResults::new();
96        for r in self.idx.search(name_query)? {
97            if query.similarity.is_none() && results.len() >= query.size {
98                break;
99            }
100            let (score, title) = r.into_pair();
101            let entity = self.idx.entity_from_title(title)?;
102            if query.matches(&entity) {
103                results.push(Scored::new(entity).with_score(score));
104            }
105        }
106        if !query.similarity.is_none() {
107            results.rescore(|e| self.similarity(query, &e.title().title));
108        }
109        Ok(results)
110    }
111
112    fn search_exhaustive(
113        &mut self,
114        query: &Query,
115    ) -> Result<SearchResults<MediaEntity>> {
116        if let Some(ref tvshow_id) = query.tvshow_id {
117            return self.search_with_tvshow(query, tvshow_id);
118        }
119
120        let mut rdr = csv_file(self.idx.data_dir().join(IMDB_BASICS))?;
121        if !query.has_filters() {
122            let mut nresults = SearchResults::new();
123            let mut record = csv::StringRecord::new();
124            while rdr.read_record(&mut record).map_err(Error::csv)? {
125                let id_title = (record[0].to_string(), record[2].to_string());
126                nresults.push(Scored::new(id_title));
127            }
128            nresults.rescore(|t| self.similarity(query, &t.1));
129
130            let mut results = SearchResults::new();
131            for nresult in nresults.into_vec().into_iter().take(query.size) {
132                let (score, (id, _)) = nresult.into_pair();
133                let entity = match self.idx.entity(&id)? {
134                    None => continue,
135                    Some(entity) => entity,
136                };
137                results.push(Scored::new(entity).with_score(score));
138            }
139            Ok(results)
140        } else if query.needs_only_title() {
141            let mut tresults = SearchResults::new();
142            for result in rdr.deserialize() {
143                let title: Title = result.map_err(Error::csv)?;
144                if query.matches_title(&title) {
145                    tresults.push(Scored::new(title));
146                }
147            }
148            tresults.rescore(|t| self.similarity(query, &t.title));
149
150            let mut results = SearchResults::new();
151            for tresult in tresults.into_vec().into_iter().take(query.size) {
152                let (score, title) = tresult.into_pair();
153                let entity = self.idx.entity_from_title(title)?;
154                results.push(Scored::new(entity).with_score(score));
155            }
156            Ok(results)
157        } else {
158            let mut results = SearchResults::new();
159            for result in rdr.deserialize() {
160                let title = result.map_err(Error::csv)?;
161                let entity = self.idx.entity_from_title(title)?;
162                if query.matches(&entity) {
163                    results.push(Scored::new(entity));
164                }
165            }
166            results.rescore(|e| self.similarity(query, &e.title().title));
167            Ok(results)
168        }
169    }
170
171    fn search_with_tvshow(
172        &mut self,
173        query: &Query,
174        tvshow_id: &str,
175    ) -> Result<SearchResults<MediaEntity>> {
176        let mut results = SearchResults::new();
177        for ep in self.idx.seasons(tvshow_id)? {
178            let entity = match self.idx.entity(&ep.id)? {
179                None => continue,
180                Some(entity) => entity,
181            };
182            if query.matches(&entity) {
183                results.push(Scored::new(entity));
184            }
185        }
186        if !query.similarity.is_none() {
187            results.rescore(|e| self.similarity(query, &e.title().title));
188        }
189        Ok(results)
190    }
191
192    fn similarity(&self, query: &Query, name: &str) -> f64 {
193        match query.name {
194            None => 0.0,
195            Some(ref qname) => query.similarity.similarity(qname, name),
196        }
197    }
198}
199
200/// A query that can be used to search IMDb media records.
201///
202/// A query typically consists of a fuzzy name query along with zero or more
203/// filters. If a query lacks a fuzzy name query, then this will generally
204/// result in an exhaustive search of all IMDb media records, which can be
205/// slow.
206///
207/// Filters are matched conjunctively. That is, a search result must satisfy
208/// every filter on a query to match.
209///
210/// Empty queries always return no results.
211///
212/// The `Serialize` and `Deserialize` implementations for this type use the
213/// free-form query syntax.
214#[derive(Clone, Debug, Eq, Hash, PartialEq)]
215pub struct Query {
216    name: Option<String>,
217    name_scorer: Option<NameScorer>,
218    similarity: Similarity,
219    size: usize,
220    kinds: Vec<TitleKind>,
221    year: Range<u32>,
222    votes: Range<u32>,
223    season: Range<u32>,
224    episode: Range<u32>,
225    tvshow_id: Option<String>,
226}
227
228impl Default for Query {
229    fn default() -> Query {
230        Query::new()
231    }
232}
233
234impl Query {
235    /// Create a new empty query.
236    pub fn new() -> Query {
237        Query {
238            name: None,
239            name_scorer: Some(NameScorer::default()),
240            similarity: Similarity::default(),
241            size: 30,
242            kinds: vec![],
243            year: Range::none(),
244            votes: Range::none(),
245            season: Range::none(),
246            episode: Range::none(),
247            tvshow_id: None,
248        }
249    }
250
251    /// Return true if and only if this query is empty.
252    ///
253    /// Searching with an empty query always yields no results.
254    pub fn is_empty(&self) -> bool {
255        self.name.as_ref().map_or(true, |n| n.is_empty())
256            && self.kinds.is_empty()
257            && self.year.is_none()
258            && self.votes.is_none()
259            && self.season.is_none()
260            && self.episode.is_none()
261            && self.tvshow_id.is_none()
262    }
263
264    /// Set the name to query by.
265    ///
266    /// The name given here is normalized and broken down into components
267    /// automatically to facilitate fuzzy searching.
268    ///
269    /// Note that if no name is provided in a query, then it is possible that
270    /// searching with the query will require exhaustively looking at every
271    /// record in IMDb. This will be slower.
272    pub fn name(mut self, name: &str) -> Query {
273        self.name = Some(name.to_string());
274        self
275    }
276
277    /// Set the scorer to use for name searches.
278    ///
279    /// The name scorer is used to rank results from searching the IMDb name
280    /// index. If no name query is given, then this scorer is not used.
281    ///
282    /// If `None` is provided here, then the name index will not be used. This
283    /// will likely cause an exhaustive search of all IMDb records, which can
284    /// be slow. The use case for providing a name query without a name scorer
285    /// is if you, for example, wanted to rank all of the records in IMDb
286    /// by the Levenshtein distance between your query and every other record
287    /// in IMDb. Normally, when the name index is used, only the (small number)
288    /// of results returned by searching the name are ranked. Typically, these
289    /// sorts of queries are useful for evaluation purposes, but not much else.
290    pub fn name_scorer(mut self, scorer: Option<NameScorer>) -> Query {
291        self.name_scorer = scorer;
292        self
293    }
294
295    /// Set the similarity function.
296    ///
297    /// The similarity function can be selected from a predefined set of
298    /// choices defined by the
299    /// [`Similarity`](enum.Similarity.html) type.
300    ///
301    /// When a similarity function is used, then any results from searching
302    /// the name index are re-ranked according to their similarity with the
303    /// query.
304    ///
305    /// By default, no similarity function is used.
306    pub fn similarity(mut self, sim: Similarity) -> Query {
307        self.similarity = sim;
308        self
309    }
310
311    /// Set the maximum number of results to be returned by a search.
312    ///
313    /// Note that setting this number too high (e.g., `> 10,000`) can impact
314    /// performance. This is a normal restriction found in most information
315    /// retrieval systems. That is, deep paging through result sets is
316    /// expensive.
317    pub fn size(mut self, size: usize) -> Query {
318        self.size = size;
319        self
320    }
321
322    /// Add a title kind to filter by.
323    ///
324    /// Multiple title kinds can be added to query, and search results must
325    /// match at least one of them.
326    ///
327    /// Note that it is not possible to remove title kinds from an existing
328    /// query. Instead, build a new query from scratch.
329    pub fn kind(mut self, kind: TitleKind) -> Query {
330        if !self.kinds.contains(&kind) {
331            self.kinds.push(kind);
332        }
333        self
334    }
335
336    /// Set the lower inclusive bound on a title's year.
337    ///
338    /// This applies to either the title's start or end years.
339    pub fn year_ge(mut self, year: u32) -> Query {
340        self.year.start = Some(year);
341        self
342    }
343
344    /// Set the upper inclusive bound on a title's year.
345    ///
346    /// This applies to either the title's start or end years.
347    pub fn year_le(mut self, year: u32) -> Query {
348        self.year.end = Some(year);
349        self
350    }
351
352    /// Set the lower inclusive bound on a title's number of votes.
353    pub fn votes_ge(mut self, votes: u32) -> Query {
354        self.votes.start = Some(votes);
355        self
356    }
357
358    /// Set the upper inclusive bound on a title's number of votes.
359    pub fn votes_le(mut self, votes: u32) -> Query {
360        self.votes.end = Some(votes);
361        self
362    }
363
364    /// Set the lower inclusive bound on a title's season.
365    ///
366    /// This automatically limits all results to episodes.
367    pub fn season_ge(mut self, season: u32) -> Query {
368        self.season.start = Some(season);
369        self
370    }
371
372    /// Set the upper inclusive bound on a title's season.
373    ///
374    /// This automatically limits all results to episodes.
375    pub fn season_le(mut self, season: u32) -> Query {
376        self.season.end = Some(season);
377        self
378    }
379
380    /// Set the lower inclusive bound on a title's episode number.
381    ///
382    /// This automatically limits all results to episodes.
383    pub fn episode_ge(mut self, episode: u32) -> Query {
384        self.episode.start = Some(episode);
385        self
386    }
387
388    /// Set the upper inclusive bound on a title's episode number.
389    ///
390    /// This automatically limits all results to episodes.
391    pub fn episode_le(mut self, episode: u32) -> Query {
392        self.episode.end = Some(episode);
393        self
394    }
395
396    /// Restrict results to episodes belonging to the TV show given by its
397    /// IMDb ID.
398    ///
399    /// This automatically limits all results to episodes.
400    pub fn tvshow_id(mut self, tvshow_id: &str) -> Query {
401        self.tvshow_id = Some(tvshow_id.to_string());
402        self
403    }
404
405    /// Returns true if and only if the given entity matches this query.
406    ///
407    /// Note that this only applies filters in this query. e.g., The name
408    /// aspect of the query, if one exists, is ignored.
409    fn matches(&self, ent: &MediaEntity) -> bool {
410        self.matches_title(&ent.title())
411            && self.matches_rating(ent.rating())
412            && self.matches_episode(ent.episode())
413    }
414
415    /// Returns true if and only if the given title matches this query.
416    ///
417    /// This ignores non-title filters.
418    fn matches_title(&self, title: &Title) -> bool {
419        if !self.kinds.is_empty() && !self.kinds.contains(&title.kind) {
420            return false;
421        }
422        if !self.year.contains(title.start_year.as_ref())
423            && !self.year.contains(title.end_year.as_ref())
424        {
425            return false;
426        }
427        true
428    }
429
430    /// Returns true if and only if the given rating matches this query.
431    ///
432    /// This ignores non-rating filters.
433    ///
434    /// If a rating filter is present and `None` is given, then this always
435    /// returns `false`.
436    fn matches_rating(&self, rating: Option<&Rating>) -> bool {
437        if !self.votes.contains(rating.map(|r| &r.votes)) {
438            return false;
439        }
440        true
441    }
442
443    /// Returns true if and only if the given episode matches this query.
444    ///
445    /// This ignores non-episode filters.
446    ///
447    /// If an episode filter is present and `None` is given, then this always
448    /// returns `false`.
449    fn matches_episode(&self, ep: Option<&Episode>) -> bool {
450        if !self.season.contains(ep.and_then(|e| e.season.as_ref())) {
451            return false;
452        }
453        if !self.episode.contains(ep.and_then(|e| e.episode.as_ref())) {
454            return false;
455        }
456        if let Some(ref tvshow_id) = self.tvshow_id {
457            if ep.map_or(true, |e| tvshow_id != &e.tvshow_id) {
458                return false;
459            }
460        }
461        true
462    }
463
464    /// Build a name query suitable for this query.
465    ///
466    /// The name query returned may request many more results than the result
467    /// size maximum on this query.
468    fn name_query(&self) -> Option<NameQuery> {
469        let name = match self.name.as_ref() {
470            None => return None,
471            Some(name) => &**name,
472        };
473        let scorer = match self.name_scorer {
474            None => return None,
475            Some(scorer) => scorer,
476        };
477        // We want our name query to return a healthy set of results, even if
478        // it's well beyond the result set size requested by the user. This is
479        // primarily because a name search doesn't incorporate filters itself,
480        // which simplifies the implementation. Therefore, we need to request
481        // more results than what we need in case our filter is aggressive.
482        let size = cmp::max(1000, self.size);
483        Some(NameQuery::new(name).with_size(size).with_scorer(scorer))
484    }
485
486    /// Returns true if and only if this query has any filters.
487    ///
488    /// When a query lacks filters, then the result set can be completely
489    /// determined by searching the name index and applying a similarity
490    /// function, if present. This can make exhaustive searches, particularly
491    /// the ones used during an evaluation, a bit faster.
492    fn has_filters(&self) -> bool {
493        self.needs_rating()
494            || self.needs_episode()
495            || !self.kinds.is_empty()
496            || !self.year.is_none()
497    }
498
499    /// Returns true if and only this query has only title filters.
500    ///
501    /// When true, this can make exhaustive searches faster by avoiding the
502    /// need to fetch the rating and/or episode for every title in IMDb.
503    fn needs_only_title(&self) -> bool {
504        !self.needs_rating() && !self.needs_episode()
505    }
506
507    /// Returns true if and only if this query has a rating filter.
508    fn needs_rating(&self) -> bool {
509        !self.votes.is_none()
510    }
511
512    /// Returns true if and only if this query has an episode filter.
513    fn needs_episode(&self) -> bool {
514        !self.season.is_none()
515            || !self.episode.is_none()
516            || !self.tvshow_id.is_none()
517    }
518}
519
520impl Serialize for Query {
521    fn serialize<S>(&self, s: S) -> result::Result<S::Ok, S::Error>
522    where
523        S: Serializer,
524    {
525        s.serialize_str(&self.to_string())
526    }
527}
528
529impl<'a> Deserialize<'a> for Query {
530    fn deserialize<D>(d: D) -> result::Result<Query, D::Error>
531    where
532        D: Deserializer<'a>,
533    {
534        use serde::de::Error;
535
536        let querystr = String::deserialize(d)?;
537        querystr
538            .parse()
539            .map_err(|e: self::Error| D::Error::custom(e.to_string()))
540    }
541}
542
543impl FromStr for Query {
544    type Err = Error;
545
546    fn from_str(qstr: &str) -> Result<Query> {
547        lazy_static! {
548            // The 'directive', 'terms' and 'space' groups are all mutually
549            // exclusive. When 'directive' matches, we parse it using DIRECTIVE
550            // in a subsequent step. When 'terms' matches, we add them to the
551            // name query. Then 'space' matches, we ignore it.
552            static ref PARTS: Regex = Regex::new(
553                r"\{(?P<directive>[^}]+)\}|(?P<terms>[^{}\s]+)|(?P<space>\s+)"
554            ).unwrap();
555
556            // Parse a directive of the form '{name:val}' or '{kind}'.
557            static ref DIRECTIVE: Regex = Regex::new(
558                r"^(?:(?P<name>[^:]+):(?P<val>.+)|(?P<kind>.+))$"
559            ).unwrap();
560        }
561        let mut terms = vec![];
562        let mut q = Query::new();
563        for caps in PARTS.captures_iter(qstr) {
564            if caps.name("space").is_some() {
565                continue;
566            } else if let Some(m) = caps.name("terms") {
567                terms.push(m.as_str().to_string());
568                continue;
569            }
570
571            let dcaps = DIRECTIVE.captures(&caps["directive"]).unwrap();
572            if let Some(m) = dcaps.name("kind") {
573                q = q.kind(m.as_str().parse()?);
574                continue;
575            }
576
577            let (name, val) = (dcaps["name"].trim(), dcaps["val"].trim());
578            match name {
579                "size" => {
580                    q.size = val.parse().map_err(Error::number)?;
581                }
582                "year" => {
583                    q.year = val.parse()?;
584                }
585                "votes" => {
586                    q.votes = val.parse()?;
587                }
588                "season" => {
589                    q.season = val.parse()?;
590                }
591                "episode" => {
592                    q.episode = val.parse()?;
593                }
594                "tvseries" | "tvshow" | "show" => {
595                    q.tvshow_id = Some(val.to_string());
596                }
597                "sim" | "similarity" => {
598                    q.similarity = val.parse()?;
599                }
600                "scorer" => {
601                    if val == "none" {
602                        q.name_scorer = None;
603                    } else {
604                        q.name_scorer = Some(val.parse()?);
605                    }
606                }
607                unk => return Err(Error::unknown_directive(unk)),
608            }
609        }
610        if !terms.is_empty() {
611            q = q.name(&terms.join(" "));
612        }
613        Ok(q)
614    }
615}
616
617impl fmt::Display for Query {
618    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
619        match self.name_scorer {
620            None => f.write_str("{scorer:none}")?,
621            Some(ref scorer) => write!(f, "{{scorer:{}}}", scorer)?,
622        }
623        write!(f, " {{sim:{}}}", self.similarity)?;
624        write!(f, " {{size:{}}}", self.size)?;
625
626        let mut kinds: Vec<&TitleKind> = self.kinds.iter().collect();
627        kinds.sort();
628        for kind in kinds {
629            write!(f, " {{{}}}", kind)?;
630        }
631        if !self.year.is_none() {
632            write!(f, " {{year:{}}}", self.year)?;
633        }
634        if !self.votes.is_none() {
635            write!(f, " {{votes:{}}}", self.votes)?;
636        }
637        if !self.season.is_none() {
638            write!(f, " {{season:{}}}", self.season)?;
639        }
640        if !self.episode.is_none() {
641            write!(f, " {{episode:{}}}", self.episode)?;
642        }
643        if let Some(ref tvshow_id) = self.tvshow_id {
644            write!(f, " {{show:{}}}", tvshow_id)?;
645        }
646        if let Some(ref name) = self.name {
647            write!(f, " {}", name)?;
648        }
649        Ok(())
650    }
651}
652
653/// A ranking function to use when searching IMDb records.
654///
655/// A similarity ranking function computes a score between `0.0` and `1.0` (not
656/// including `0` but including `1.0`) for a query and a candidate result. The
657/// score is determined by the corresponding names for a query and a candidate,
658/// and a higher score indicates more similarity.
659///
660/// This ranking function can be used to increase the precision of a set
661/// of results. In particular, when a similarity function is provided to
662/// a [`Query`](struct.Query.html), then any results returned by querying
663/// the IMDb name index will be rescored according to this function. If no
664/// similarity function is provided, then the results will be ranked according
665/// to scores produced by the name index.
666#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
667pub enum Similarity {
668    /// Do not use a similarity function.
669    None,
670    /// Computes the Levenshtein edit distance between two names and converts
671    /// it to a similarity.
672    Levenshtein,
673    /// Computes the Jaro edit distance between two names and converts it to a
674    /// similarity.
675    Jaro,
676    /// Computes the Jaro-Winkler edit distance between two names and converts
677    /// it to a similarity.
678    JaroWinkler,
679}
680
681impl Similarity {
682    /// Returns a list of s trings representing the possible similarity
683    /// function names.
684    pub fn possible_names() -> &'static [&'static str] {
685        &["none", "levenshtein", "jaro", "jarowinkler"]
686    }
687
688    /// Returns true if and only if no similarity function was selected.
689    pub fn is_none(&self) -> bool {
690        *self == Similarity::None
691    }
692
693    /// Computes the similarity between the given strings according to the
694    /// underlying similarity function. If no similarity function is present,
695    /// then this always returns `1.0`.
696    ///
697    /// The returned value is always in the range `(0, 1]`.
698    pub fn similarity(&self, q1: &str, q2: &str) -> f64 {
699        let sim = match *self {
700            Similarity::None => 1.0,
701            Similarity::Levenshtein => {
702                let distance = strsim::levenshtein(q1, q2) as f64;
703                // We do a simple conversion of distance to similarity. This
704                // will produce very low scores even for very similar names,
705                // but callers may normalize scores.
706                //
707                // We also add `1` to the denominator to avoid division by
708                // zero. Incidentally, this causes the similarity of identical
709                // strings to be exactly 1.0, which is what we want.
710                1.0 / (1.0 + distance)
711            }
712            Similarity::Jaro => strsim::jaro(q1, q2),
713            Similarity::JaroWinkler => strsim::jaro_winkler(q1, q2),
714        };
715        // Don't permit a score to actually be zero. This prevents division
716        // by zero during normalization if all results have a score of zero.
717        if sim < f64::EPSILON {
718            f64::EPSILON
719        } else {
720            sim
721        }
722    }
723}
724
725impl Default for Similarity {
726    fn default() -> Similarity {
727        Similarity::None
728    }
729}
730
731impl fmt::Display for Similarity {
732    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
733        match *self {
734            Similarity::None => write!(f, "none"),
735            Similarity::Levenshtein => write!(f, "levenshtein"),
736            Similarity::Jaro => write!(f, "jaro"),
737            Similarity::JaroWinkler => write!(f, "jarowinkler"),
738        }
739    }
740}
741
742impl FromStr for Similarity {
743    type Err = Error;
744
745    fn from_str(s: &str) -> Result<Similarity> {
746        match s {
747            "none" => Ok(Similarity::None),
748            "levenshtein" => Ok(Similarity::Levenshtein),
749            "jaro" => Ok(Similarity::Jaro),
750            "jarowinkler" | "jaro-winkler" => Ok(Similarity::JaroWinkler),
751            unk => Err(Error::unknown_sim(unk)),
752        }
753    }
754}
755
756/// A range filter over any partially ordered type `T`.
757///
758/// This type permits either end of the range to be unbounded.
759#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
760struct Range<T> {
761    start: Option<T>,
762    end: Option<T>,
763}
764
765impl<T> Range<T> {
766    pub fn none() -> Range<T> {
767        Range { start: None, end: None }
768    }
769
770    pub fn is_none(&self) -> bool {
771        self.start.is_none() && self.end.is_none()
772    }
773}
774
775impl<T: PartialOrd> Range<T> {
776    pub fn contains(&self, t: Option<&T>) -> bool {
777        let t = match t {
778            None => return self.is_none(),
779            Some(t) => t,
780        };
781        match (&self.start, &self.end) {
782            (&None, &None) => true,
783            (&Some(ref s), &None) => s <= t,
784            (&None, &Some(ref e)) => t <= e,
785            (&Some(ref s), &Some(ref e)) => s <= t && t <= e,
786        }
787    }
788}
789
790impl<T: fmt::Display + PartialEq> fmt::Display for Range<T> {
791    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
792        match (&self.start, &self.end) {
793            (&None, &None) => write!(f, "-"),
794            (&Some(ref s), &None) => write!(f, "{}-", s),
795            (&None, &Some(ref e)) => write!(f, "-{}", e),
796            (&Some(ref s), &Some(ref e)) if s == e => write!(f, "{}", s),
797            (&Some(ref s), &Some(ref e)) => write!(f, "{}-{}", s, e),
798        }
799    }
800}
801
802impl<E: std::error::Error + Send + Sync + 'static, T: FromStr<Err = E>> FromStr
803    for Range<T>
804{
805    type Err = Error;
806
807    fn from_str(range: &str) -> Result<Range<T>> {
808        // One wonders what happens if we need to support ranges consisting
809        // of negative numbers. Thankfully, it seems we needn't do that for
810        // the IMDb data.
811        let (start, end) = match range.find('-') {
812            None => {
813                // For no particular reason, parse it twice so that we don't
814                // need a `Clone` bound.
815                let start = range.parse().map_err(Error::number)?;
816                let end = range.parse().map_err(Error::number)?;
817                return Ok(Range { start: Some(start), end: Some(end) });
818            }
819            Some(i) => {
820                let (start, end) = range.split_at(i);
821                (start.trim(), end[1..].trim())
822            }
823        };
824        Ok(match (start.is_empty(), end.is_empty()) {
825            (true, true) => Range::none(),
826            (true, false) => Range {
827                start: None,
828                end: Some(end.parse().map_err(Error::number)?),
829            },
830            (false, true) => Range {
831                start: Some(start.parse().map_err(Error::number)?),
832                end: None,
833            },
834            (false, false) => Range {
835                start: Some(start.parse().map_err(Error::number)?),
836                end: Some(end.parse().map_err(Error::number)?),
837            },
838        })
839    }
840}
841
842#[cfg(test)]
843mod tests {
844    use serde_json;
845
846    use super::*;
847
848    #[test]
849    fn ranges() {
850        let r: Range<u32> = "5-10".parse().unwrap();
851        assert_eq!(r, Range { start: Some(5), end: Some(10) });
852
853        let r: Range<u32> = "5-".parse().unwrap();
854        assert_eq!(r, Range { start: Some(5), end: None });
855
856        let r: Range<u32> = "-10".parse().unwrap();
857        assert_eq!(r, Range { start: None, end: Some(10) });
858
859        let r: Range<u32> = "5-5".parse().unwrap();
860        assert_eq!(r, Range { start: Some(5), end: Some(5) });
861
862        let r: Range<u32> = "5".parse().unwrap();
863        assert_eq!(r, Range { start: Some(5), end: Some(5) });
864    }
865
866    #[test]
867    fn query_parser() {
868        let q: Query = "foo bar baz".parse().unwrap();
869        assert_eq!(q, Query::new().name("foo bar baz"));
870
871        let q: Query = "{movie}".parse().unwrap();
872        assert_eq!(q, Query::new().kind(TitleKind::Movie));
873
874        let q: Query = "{movie} {tvshow}".parse().unwrap();
875        assert_eq!(
876            q,
877            Query::new().kind(TitleKind::Movie).kind(TitleKind::TVSeries)
878        );
879
880        let q: Query = "{movie}{tvshow}".parse().unwrap();
881        assert_eq!(
882            q,
883            Query::new().kind(TitleKind::Movie).kind(TitleKind::TVSeries)
884        );
885
886        let q: Query = "foo {movie} bar {tvshow} baz".parse().unwrap();
887        assert_eq!(
888            q,
889            Query::new()
890                .name("foo bar baz")
891                .kind(TitleKind::Movie)
892                .kind(TitleKind::TVSeries)
893        );
894
895        let q: Query = "{size:5}".parse().unwrap();
896        assert_eq!(q, Query::new().size(5));
897
898        let q: Query = "{ size : 5 }".parse().unwrap();
899        assert_eq!(q, Query::new().size(5));
900
901        let q: Query = "{year:1990}".parse().unwrap();
902        assert_eq!(q, Query::new().year_ge(1990).year_le(1990));
903
904        let q: Query = "{year:1990-}".parse().unwrap();
905        assert_eq!(q, Query::new().year_ge(1990));
906
907        let q: Query = "{year:-1990}".parse().unwrap();
908        assert_eq!(q, Query::new().year_le(1990));
909
910        let q: Query = "{year:-}".parse().unwrap();
911        assert_eq!(q, Query::new());
912    }
913
914    #[test]
915    fn query_parser_error() {
916        assert!("{blah}".parse::<Query>().is_err());
917        assert!("{size:a}".parse::<Query>().is_err());
918        assert!("{year:}".parse::<Query>().is_err());
919    }
920
921    #[test]
922    fn query_parser_weird() {
923        let q: Query = "{movie".parse().unwrap();
924        assert_eq!(q, Query::new().name("movie"));
925
926        let q: Query = "movie}".parse().unwrap();
927        assert_eq!(q, Query::new().name("movie"));
928    }
929
930    #[test]
931    fn query_display() {
932        let q = Query::new()
933            .name("foo bar baz")
934            .size(31)
935            .season_ge(4)
936            .season_le(5)
937            .kind(TitleKind::TVSeries)
938            .kind(TitleKind::Movie)
939            .similarity(Similarity::Jaro);
940        let expected =
941            "{scorer:okapibm25} {sim:jaro} {size:31} {movie} {tvSeries} {season:4-5} foo bar baz";
942        assert_eq!(q.to_string(), expected);
943    }
944
945    #[test]
946    fn query_serialize() {
947        #[derive(Serialize)]
948        struct Test {
949            query: Query,
950        }
951        let query = Query::new()
952            .name("foo bar baz")
953            .name_scorer(None)
954            .size(31)
955            .season_ge(4)
956            .season_le(4);
957        let got = serde_json::to_string(&Test { query }).unwrap();
958
959        let expected = r#"{"query":"{scorer:none} {sim:none} {size:31} {season:4} foo bar baz"}"#;
960        assert_eq!(got, expected);
961    }
962
963    #[test]
964    fn query_deserialize() {
965        let json = r#"{"query": "foo {size:30} bar {season:4} baz {show}"}"#;
966        let expected =
967            "{size:30} {season:4} {show} foo bar baz".parse().unwrap();
968
969        #[derive(Deserialize)]
970        struct Test {
971            query: Query,
972        }
973        let got: Test = serde_json::from_str(json).unwrap();
974        assert_eq!(got.query, expected);
975    }
976}