1use std::cmp;
2use std::f64;
3use std::fmt;
4use std::result;
5use std::str::FromStr;
6
7use csv;
8use lazy_static::lazy_static;
9use regex::Regex;
10use serde::{Deserialize, Deserializer, Serialize, Serializer};
11use strsim;
12
13use crate::error::{Error, Result};
14use crate::index::{Index, MediaEntity, NameQuery, NameScorer};
15use crate::record::{Episode, Rating, Title, TitleKind};
16use crate::scored::{Scored, SearchResults};
17use crate::util::{csv_file, IMDB_BASICS};
18
19#[derive(Debug)]
30pub struct Searcher {
31 idx: Index,
32}
33
34impl Searcher {
35 pub fn new(idx: Index) -> Searcher {
42 Searcher { idx }
43 }
44
45 pub fn search(
70 &mut self,
71 query: &Query,
72 ) -> Result<SearchResults<MediaEntity>> {
73 if query.is_empty() {
74 return Ok(SearchResults::new());
75 }
76 let mut results = match query.name_query() {
77 None => self.search_exhaustive(query)?,
78 Some(nameq) => self.search_with_name(query, &nameq)?,
79 };
80 results.trim(query.size);
81 results.normalize();
82 Ok(results)
83 }
84
85 pub fn index(&mut self) -> &mut Index {
87 &mut self.idx
88 }
89
90 fn search_with_name(
91 &mut self,
92 query: &Query,
93 name_query: &NameQuery,
94 ) -> Result<SearchResults<MediaEntity>> {
95 let mut results = SearchResults::new();
96 for r in self.idx.search(name_query)? {
97 if query.similarity.is_none() && results.len() >= query.size {
98 break;
99 }
100 let (score, title) = r.into_pair();
101 let entity = self.idx.entity_from_title(title)?;
102 if query.matches(&entity) {
103 results.push(Scored::new(entity).with_score(score));
104 }
105 }
106 if !query.similarity.is_none() {
107 results.rescore(|e| self.similarity(query, &e.title().title));
108 }
109 Ok(results)
110 }
111
112 fn search_exhaustive(
113 &mut self,
114 query: &Query,
115 ) -> Result<SearchResults<MediaEntity>> {
116 if let Some(ref tvshow_id) = query.tvshow_id {
117 return self.search_with_tvshow(query, tvshow_id);
118 }
119
120 let mut rdr = csv_file(self.idx.data_dir().join(IMDB_BASICS))?;
121 if !query.has_filters() {
122 let mut nresults = SearchResults::new();
123 let mut record = csv::StringRecord::new();
124 while rdr.read_record(&mut record).map_err(Error::csv)? {
125 let id_title = (record[0].to_string(), record[2].to_string());
126 nresults.push(Scored::new(id_title));
127 }
128 nresults.rescore(|t| self.similarity(query, &t.1));
129
130 let mut results = SearchResults::new();
131 for nresult in nresults.into_vec().into_iter().take(query.size) {
132 let (score, (id, _)) = nresult.into_pair();
133 let entity = match self.idx.entity(&id)? {
134 None => continue,
135 Some(entity) => entity,
136 };
137 results.push(Scored::new(entity).with_score(score));
138 }
139 Ok(results)
140 } else if query.needs_only_title() {
141 let mut tresults = SearchResults::new();
142 for result in rdr.deserialize() {
143 let title: Title = result.map_err(Error::csv)?;
144 if query.matches_title(&title) {
145 tresults.push(Scored::new(title));
146 }
147 }
148 tresults.rescore(|t| self.similarity(query, &t.title));
149
150 let mut results = SearchResults::new();
151 for tresult in tresults.into_vec().into_iter().take(query.size) {
152 let (score, title) = tresult.into_pair();
153 let entity = self.idx.entity_from_title(title)?;
154 results.push(Scored::new(entity).with_score(score));
155 }
156 Ok(results)
157 } else {
158 let mut results = SearchResults::new();
159 for result in rdr.deserialize() {
160 let title = result.map_err(Error::csv)?;
161 let entity = self.idx.entity_from_title(title)?;
162 if query.matches(&entity) {
163 results.push(Scored::new(entity));
164 }
165 }
166 results.rescore(|e| self.similarity(query, &e.title().title));
167 Ok(results)
168 }
169 }
170
171 fn search_with_tvshow(
172 &mut self,
173 query: &Query,
174 tvshow_id: &str,
175 ) -> Result<SearchResults<MediaEntity>> {
176 let mut results = SearchResults::new();
177 for ep in self.idx.seasons(tvshow_id)? {
178 let entity = match self.idx.entity(&ep.id)? {
179 None => continue,
180 Some(entity) => entity,
181 };
182 if query.matches(&entity) {
183 results.push(Scored::new(entity));
184 }
185 }
186 if !query.similarity.is_none() {
187 results.rescore(|e| self.similarity(query, &e.title().title));
188 }
189 Ok(results)
190 }
191
192 fn similarity(&self, query: &Query, name: &str) -> f64 {
193 match query.name {
194 None => 0.0,
195 Some(ref qname) => query.similarity.similarity(qname, name),
196 }
197 }
198}
199
200#[derive(Clone, Debug, Eq, Hash, PartialEq)]
215pub struct Query {
216 name: Option<String>,
217 name_scorer: Option<NameScorer>,
218 similarity: Similarity,
219 size: usize,
220 kinds: Vec<TitleKind>,
221 year: Range<u32>,
222 votes: Range<u32>,
223 season: Range<u32>,
224 episode: Range<u32>,
225 tvshow_id: Option<String>,
226}
227
228impl Default for Query {
229 fn default() -> Query {
230 Query::new()
231 }
232}
233
234impl Query {
235 pub fn new() -> Query {
237 Query {
238 name: None,
239 name_scorer: Some(NameScorer::default()),
240 similarity: Similarity::default(),
241 size: 30,
242 kinds: vec![],
243 year: Range::none(),
244 votes: Range::none(),
245 season: Range::none(),
246 episode: Range::none(),
247 tvshow_id: None,
248 }
249 }
250
251 pub fn is_empty(&self) -> bool {
255 self.name.as_ref().map_or(true, |n| n.is_empty())
256 && self.kinds.is_empty()
257 && self.year.is_none()
258 && self.votes.is_none()
259 && self.season.is_none()
260 && self.episode.is_none()
261 && self.tvshow_id.is_none()
262 }
263
264 pub fn name(mut self, name: &str) -> Query {
273 self.name = Some(name.to_string());
274 self
275 }
276
277 pub fn name_scorer(mut self, scorer: Option<NameScorer>) -> Query {
291 self.name_scorer = scorer;
292 self
293 }
294
295 pub fn similarity(mut self, sim: Similarity) -> Query {
307 self.similarity = sim;
308 self
309 }
310
311 pub fn size(mut self, size: usize) -> Query {
318 self.size = size;
319 self
320 }
321
322 pub fn kind(mut self, kind: TitleKind) -> Query {
330 if !self.kinds.contains(&kind) {
331 self.kinds.push(kind);
332 }
333 self
334 }
335
336 pub fn year_ge(mut self, year: u32) -> Query {
340 self.year.start = Some(year);
341 self
342 }
343
344 pub fn year_le(mut self, year: u32) -> Query {
348 self.year.end = Some(year);
349 self
350 }
351
352 pub fn votes_ge(mut self, votes: u32) -> Query {
354 self.votes.start = Some(votes);
355 self
356 }
357
358 pub fn votes_le(mut self, votes: u32) -> Query {
360 self.votes.end = Some(votes);
361 self
362 }
363
364 pub fn season_ge(mut self, season: u32) -> Query {
368 self.season.start = Some(season);
369 self
370 }
371
372 pub fn season_le(mut self, season: u32) -> Query {
376 self.season.end = Some(season);
377 self
378 }
379
380 pub fn episode_ge(mut self, episode: u32) -> Query {
384 self.episode.start = Some(episode);
385 self
386 }
387
388 pub fn episode_le(mut self, episode: u32) -> Query {
392 self.episode.end = Some(episode);
393 self
394 }
395
396 pub fn tvshow_id(mut self, tvshow_id: &str) -> Query {
401 self.tvshow_id = Some(tvshow_id.to_string());
402 self
403 }
404
405 fn matches(&self, ent: &MediaEntity) -> bool {
410 self.matches_title(&ent.title())
411 && self.matches_rating(ent.rating())
412 && self.matches_episode(ent.episode())
413 }
414
415 fn matches_title(&self, title: &Title) -> bool {
419 if !self.kinds.is_empty() && !self.kinds.contains(&title.kind) {
420 return false;
421 }
422 if !self.year.contains(title.start_year.as_ref())
423 && !self.year.contains(title.end_year.as_ref())
424 {
425 return false;
426 }
427 true
428 }
429
430 fn matches_rating(&self, rating: Option<&Rating>) -> bool {
437 if !self.votes.contains(rating.map(|r| &r.votes)) {
438 return false;
439 }
440 true
441 }
442
443 fn matches_episode(&self, ep: Option<&Episode>) -> bool {
450 if !self.season.contains(ep.and_then(|e| e.season.as_ref())) {
451 return false;
452 }
453 if !self.episode.contains(ep.and_then(|e| e.episode.as_ref())) {
454 return false;
455 }
456 if let Some(ref tvshow_id) = self.tvshow_id {
457 if ep.map_or(true, |e| tvshow_id != &e.tvshow_id) {
458 return false;
459 }
460 }
461 true
462 }
463
464 fn name_query(&self) -> Option<NameQuery> {
469 let name = match self.name.as_ref() {
470 None => return None,
471 Some(name) => &**name,
472 };
473 let scorer = match self.name_scorer {
474 None => return None,
475 Some(scorer) => scorer,
476 };
477 let size = cmp::max(1000, self.size);
483 Some(NameQuery::new(name).with_size(size).with_scorer(scorer))
484 }
485
486 fn has_filters(&self) -> bool {
493 self.needs_rating()
494 || self.needs_episode()
495 || !self.kinds.is_empty()
496 || !self.year.is_none()
497 }
498
499 fn needs_only_title(&self) -> bool {
504 !self.needs_rating() && !self.needs_episode()
505 }
506
507 fn needs_rating(&self) -> bool {
509 !self.votes.is_none()
510 }
511
512 fn needs_episode(&self) -> bool {
514 !self.season.is_none()
515 || !self.episode.is_none()
516 || !self.tvshow_id.is_none()
517 }
518}
519
520impl Serialize for Query {
521 fn serialize<S>(&self, s: S) -> result::Result<S::Ok, S::Error>
522 where
523 S: Serializer,
524 {
525 s.serialize_str(&self.to_string())
526 }
527}
528
529impl<'a> Deserialize<'a> for Query {
530 fn deserialize<D>(d: D) -> result::Result<Query, D::Error>
531 where
532 D: Deserializer<'a>,
533 {
534 use serde::de::Error;
535
536 let querystr = String::deserialize(d)?;
537 querystr
538 .parse()
539 .map_err(|e: self::Error| D::Error::custom(e.to_string()))
540 }
541}
542
543impl FromStr for Query {
544 type Err = Error;
545
546 fn from_str(qstr: &str) -> Result<Query> {
547 lazy_static! {
548 static ref PARTS: Regex = Regex::new(
553 r"\{(?P<directive>[^}]+)\}|(?P<terms>[^{}\s]+)|(?P<space>\s+)"
554 ).unwrap();
555
556 static ref DIRECTIVE: Regex = Regex::new(
558 r"^(?:(?P<name>[^:]+):(?P<val>.+)|(?P<kind>.+))$"
559 ).unwrap();
560 }
561 let mut terms = vec![];
562 let mut q = Query::new();
563 for caps in PARTS.captures_iter(qstr) {
564 if caps.name("space").is_some() {
565 continue;
566 } else if let Some(m) = caps.name("terms") {
567 terms.push(m.as_str().to_string());
568 continue;
569 }
570
571 let dcaps = DIRECTIVE.captures(&caps["directive"]).unwrap();
572 if let Some(m) = dcaps.name("kind") {
573 q = q.kind(m.as_str().parse()?);
574 continue;
575 }
576
577 let (name, val) = (dcaps["name"].trim(), dcaps["val"].trim());
578 match name {
579 "size" => {
580 q.size = val.parse().map_err(Error::number)?;
581 }
582 "year" => {
583 q.year = val.parse()?;
584 }
585 "votes" => {
586 q.votes = val.parse()?;
587 }
588 "season" => {
589 q.season = val.parse()?;
590 }
591 "episode" => {
592 q.episode = val.parse()?;
593 }
594 "tvseries" | "tvshow" | "show" => {
595 q.tvshow_id = Some(val.to_string());
596 }
597 "sim" | "similarity" => {
598 q.similarity = val.parse()?;
599 }
600 "scorer" => {
601 if val == "none" {
602 q.name_scorer = None;
603 } else {
604 q.name_scorer = Some(val.parse()?);
605 }
606 }
607 unk => return Err(Error::unknown_directive(unk)),
608 }
609 }
610 if !terms.is_empty() {
611 q = q.name(&terms.join(" "));
612 }
613 Ok(q)
614 }
615}
616
617impl fmt::Display for Query {
618 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
619 match self.name_scorer {
620 None => f.write_str("{scorer:none}")?,
621 Some(ref scorer) => write!(f, "{{scorer:{}}}", scorer)?,
622 }
623 write!(f, " {{sim:{}}}", self.similarity)?;
624 write!(f, " {{size:{}}}", self.size)?;
625
626 let mut kinds: Vec<&TitleKind> = self.kinds.iter().collect();
627 kinds.sort();
628 for kind in kinds {
629 write!(f, " {{{}}}", kind)?;
630 }
631 if !self.year.is_none() {
632 write!(f, " {{year:{}}}", self.year)?;
633 }
634 if !self.votes.is_none() {
635 write!(f, " {{votes:{}}}", self.votes)?;
636 }
637 if !self.season.is_none() {
638 write!(f, " {{season:{}}}", self.season)?;
639 }
640 if !self.episode.is_none() {
641 write!(f, " {{episode:{}}}", self.episode)?;
642 }
643 if let Some(ref tvshow_id) = self.tvshow_id {
644 write!(f, " {{show:{}}}", tvshow_id)?;
645 }
646 if let Some(ref name) = self.name {
647 write!(f, " {}", name)?;
648 }
649 Ok(())
650 }
651}
652
653#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
667pub enum Similarity {
668 None,
670 Levenshtein,
673 Jaro,
676 JaroWinkler,
679}
680
681impl Similarity {
682 pub fn possible_names() -> &'static [&'static str] {
685 &["none", "levenshtein", "jaro", "jarowinkler"]
686 }
687
688 pub fn is_none(&self) -> bool {
690 *self == Similarity::None
691 }
692
693 pub fn similarity(&self, q1: &str, q2: &str) -> f64 {
699 let sim = match *self {
700 Similarity::None => 1.0,
701 Similarity::Levenshtein => {
702 let distance = strsim::levenshtein(q1, q2) as f64;
703 1.0 / (1.0 + distance)
711 }
712 Similarity::Jaro => strsim::jaro(q1, q2),
713 Similarity::JaroWinkler => strsim::jaro_winkler(q1, q2),
714 };
715 if sim < f64::EPSILON {
718 f64::EPSILON
719 } else {
720 sim
721 }
722 }
723}
724
725impl Default for Similarity {
726 fn default() -> Similarity {
727 Similarity::None
728 }
729}
730
731impl fmt::Display for Similarity {
732 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
733 match *self {
734 Similarity::None => write!(f, "none"),
735 Similarity::Levenshtein => write!(f, "levenshtein"),
736 Similarity::Jaro => write!(f, "jaro"),
737 Similarity::JaroWinkler => write!(f, "jarowinkler"),
738 }
739 }
740}
741
742impl FromStr for Similarity {
743 type Err = Error;
744
745 fn from_str(s: &str) -> Result<Similarity> {
746 match s {
747 "none" => Ok(Similarity::None),
748 "levenshtein" => Ok(Similarity::Levenshtein),
749 "jaro" => Ok(Similarity::Jaro),
750 "jarowinkler" | "jaro-winkler" => Ok(Similarity::JaroWinkler),
751 unk => Err(Error::unknown_sim(unk)),
752 }
753 }
754}
755
756#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
760struct Range<T> {
761 start: Option<T>,
762 end: Option<T>,
763}
764
765impl<T> Range<T> {
766 pub fn none() -> Range<T> {
767 Range { start: None, end: None }
768 }
769
770 pub fn is_none(&self) -> bool {
771 self.start.is_none() && self.end.is_none()
772 }
773}
774
775impl<T: PartialOrd> Range<T> {
776 pub fn contains(&self, t: Option<&T>) -> bool {
777 let t = match t {
778 None => return self.is_none(),
779 Some(t) => t,
780 };
781 match (&self.start, &self.end) {
782 (&None, &None) => true,
783 (&Some(ref s), &None) => s <= t,
784 (&None, &Some(ref e)) => t <= e,
785 (&Some(ref s), &Some(ref e)) => s <= t && t <= e,
786 }
787 }
788}
789
790impl<T: fmt::Display + PartialEq> fmt::Display for Range<T> {
791 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
792 match (&self.start, &self.end) {
793 (&None, &None) => write!(f, "-"),
794 (&Some(ref s), &None) => write!(f, "{}-", s),
795 (&None, &Some(ref e)) => write!(f, "-{}", e),
796 (&Some(ref s), &Some(ref e)) if s == e => write!(f, "{}", s),
797 (&Some(ref s), &Some(ref e)) => write!(f, "{}-{}", s, e),
798 }
799 }
800}
801
802impl<E: std::error::Error + Send + Sync + 'static, T: FromStr<Err = E>> FromStr
803 for Range<T>
804{
805 type Err = Error;
806
807 fn from_str(range: &str) -> Result<Range<T>> {
808 let (start, end) = match range.find('-') {
812 None => {
813 let start = range.parse().map_err(Error::number)?;
816 let end = range.parse().map_err(Error::number)?;
817 return Ok(Range { start: Some(start), end: Some(end) });
818 }
819 Some(i) => {
820 let (start, end) = range.split_at(i);
821 (start.trim(), end[1..].trim())
822 }
823 };
824 Ok(match (start.is_empty(), end.is_empty()) {
825 (true, true) => Range::none(),
826 (true, false) => Range {
827 start: None,
828 end: Some(end.parse().map_err(Error::number)?),
829 },
830 (false, true) => Range {
831 start: Some(start.parse().map_err(Error::number)?),
832 end: None,
833 },
834 (false, false) => Range {
835 start: Some(start.parse().map_err(Error::number)?),
836 end: Some(end.parse().map_err(Error::number)?),
837 },
838 })
839 }
840}
841
842#[cfg(test)]
843mod tests {
844 use serde_json;
845
846 use super::*;
847
848 #[test]
849 fn ranges() {
850 let r: Range<u32> = "5-10".parse().unwrap();
851 assert_eq!(r, Range { start: Some(5), end: Some(10) });
852
853 let r: Range<u32> = "5-".parse().unwrap();
854 assert_eq!(r, Range { start: Some(5), end: None });
855
856 let r: Range<u32> = "-10".parse().unwrap();
857 assert_eq!(r, Range { start: None, end: Some(10) });
858
859 let r: Range<u32> = "5-5".parse().unwrap();
860 assert_eq!(r, Range { start: Some(5), end: Some(5) });
861
862 let r: Range<u32> = "5".parse().unwrap();
863 assert_eq!(r, Range { start: Some(5), end: Some(5) });
864 }
865
866 #[test]
867 fn query_parser() {
868 let q: Query = "foo bar baz".parse().unwrap();
869 assert_eq!(q, Query::new().name("foo bar baz"));
870
871 let q: Query = "{movie}".parse().unwrap();
872 assert_eq!(q, Query::new().kind(TitleKind::Movie));
873
874 let q: Query = "{movie} {tvshow}".parse().unwrap();
875 assert_eq!(
876 q,
877 Query::new().kind(TitleKind::Movie).kind(TitleKind::TVSeries)
878 );
879
880 let q: Query = "{movie}{tvshow}".parse().unwrap();
881 assert_eq!(
882 q,
883 Query::new().kind(TitleKind::Movie).kind(TitleKind::TVSeries)
884 );
885
886 let q: Query = "foo {movie} bar {tvshow} baz".parse().unwrap();
887 assert_eq!(
888 q,
889 Query::new()
890 .name("foo bar baz")
891 .kind(TitleKind::Movie)
892 .kind(TitleKind::TVSeries)
893 );
894
895 let q: Query = "{size:5}".parse().unwrap();
896 assert_eq!(q, Query::new().size(5));
897
898 let q: Query = "{ size : 5 }".parse().unwrap();
899 assert_eq!(q, Query::new().size(5));
900
901 let q: Query = "{year:1990}".parse().unwrap();
902 assert_eq!(q, Query::new().year_ge(1990).year_le(1990));
903
904 let q: Query = "{year:1990-}".parse().unwrap();
905 assert_eq!(q, Query::new().year_ge(1990));
906
907 let q: Query = "{year:-1990}".parse().unwrap();
908 assert_eq!(q, Query::new().year_le(1990));
909
910 let q: Query = "{year:-}".parse().unwrap();
911 assert_eq!(q, Query::new());
912 }
913
914 #[test]
915 fn query_parser_error() {
916 assert!("{blah}".parse::<Query>().is_err());
917 assert!("{size:a}".parse::<Query>().is_err());
918 assert!("{year:}".parse::<Query>().is_err());
919 }
920
921 #[test]
922 fn query_parser_weird() {
923 let q: Query = "{movie".parse().unwrap();
924 assert_eq!(q, Query::new().name("movie"));
925
926 let q: Query = "movie}".parse().unwrap();
927 assert_eq!(q, Query::new().name("movie"));
928 }
929
930 #[test]
931 fn query_display() {
932 let q = Query::new()
933 .name("foo bar baz")
934 .size(31)
935 .season_ge(4)
936 .season_le(5)
937 .kind(TitleKind::TVSeries)
938 .kind(TitleKind::Movie)
939 .similarity(Similarity::Jaro);
940 let expected =
941 "{scorer:okapibm25} {sim:jaro} {size:31} {movie} {tvSeries} {season:4-5} foo bar baz";
942 assert_eq!(q.to_string(), expected);
943 }
944
945 #[test]
946 fn query_serialize() {
947 #[derive(Serialize)]
948 struct Test {
949 query: Query,
950 }
951 let query = Query::new()
952 .name("foo bar baz")
953 .name_scorer(None)
954 .size(31)
955 .season_ge(4)
956 .season_le(4);
957 let got = serde_json::to_string(&Test { query }).unwrap();
958
959 let expected = r#"{"query":"{scorer:none} {sim:none} {size:31} {season:4} foo bar baz"}"#;
960 assert_eq!(got, expected);
961 }
962
963 #[test]
964 fn query_deserialize() {
965 let json = r#"{"query": "foo {size:30} bar {season:4} baz {show}"}"#;
966 let expected =
967 "{size:30} {season:4} {show} foo bar baz".parse().unwrap();
968
969 #[derive(Deserialize)]
970 struct Test {
971 query: Query,
972 }
973 let got: Test = serde_json::from_str(json).unwrap();
974 assert_eq!(got.query, expected);
975 }
976}