use std::cmp;
use std::f64;
use std::fmt;
use std::result;
use std::str::FromStr;
use csv;
use failure::Fail;
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use strsim;
use crate::error::{Error, Result};
use crate::index::{MediaEntity, Index, NameQuery, NameScorer};
use crate::record::{Episode, Rating, Title, TitleKind};
use crate::scored::{Scored, SearchResults};
use crate::util::{IMDB_BASICS, csv_file};
#[derive(Debug)]
pub struct Searcher {
idx: Index,
}
impl Searcher {
pub fn new(idx: Index) -> Searcher {
Searcher { idx }
}
pub fn search(
&mut self,
query: &Query,
) -> Result<SearchResults<MediaEntity>> {
if query.is_empty() {
return Ok(SearchResults::new());
}
let mut results = match query.name_query() {
None => self.search_exhaustive(query)?,
Some(nameq) => self.search_with_name(query, &nameq)?,
};
results.trim(query.size);
results.normalize();
Ok(results)
}
pub fn index(&mut self) -> &mut Index {
&mut self.idx
}
fn search_with_name(
&mut self,
query: &Query,
name_query: &NameQuery,
) -> Result<SearchResults<MediaEntity>> {
let mut results = SearchResults::new();
for r in self.idx.search(name_query)? {
if query.similarity.is_none() && results.len() >= query.size {
break;
}
let (score, title) = r.into_pair();
let entity = self.idx.entity_from_title(title)?;
if query.matches(&entity) {
results.push(Scored::new(entity).with_score(score));
}
}
if !query.similarity.is_none() {
results.rescore(|e| self.similarity(query, &e.title().title));
}
Ok(results)
}
fn search_exhaustive(
&mut self,
query: &Query,
) -> Result<SearchResults<MediaEntity>> {
if let Some(ref tvshow_id) = query.tvshow_id {
return self.search_with_tvshow(query, tvshow_id);
}
let mut rdr = csv_file(self.idx.data_dir().join(IMDB_BASICS))?;
if !query.has_filters() {
let mut nresults = SearchResults::new();
let mut record = csv::StringRecord::new();
while rdr.read_record(&mut record).map_err(Error::csv)? {
let id_title = (record[0].to_string(), record[2].to_string());
nresults.push(Scored::new(id_title));
}
nresults.rescore(|t| self.similarity(query, &t.1));
let mut results = SearchResults::new();
for nresult in nresults.into_vec().into_iter().take(query.size) {
let (score, (id, _)) = nresult.into_pair();
let entity = match self.idx.entity(&id)? {
None => continue,
Some(entity) => entity,
};
results.push(Scored::new(entity).with_score(score));
}
Ok(results)
} else if query.needs_only_title() {
let mut tresults = SearchResults::new();
for result in rdr.deserialize() {
let title: Title = result.map_err(Error::csv)?;
if query.matches_title(&title) {
tresults.push(Scored::new(title));
}
}
tresults.rescore(|t| self.similarity(query, &t.title));
let mut results = SearchResults::new();
for tresult in tresults.into_vec().into_iter().take(query.size) {
let (score, title) = tresult.into_pair();
let entity = self.idx.entity_from_title(title)?;
results.push(Scored::new(entity).with_score(score));
}
Ok(results)
} else {
let mut results = SearchResults::new();
for result in rdr.deserialize() {
let title = result.map_err(Error::csv)?;
let entity = self.idx.entity_from_title(title)?;
if query.matches(&entity) {
results.push(Scored::new(entity));
}
}
results.rescore(|e| self.similarity(query, &e.title().title));
Ok(results)
}
}
fn search_with_tvshow(
&mut self,
query: &Query,
tvshow_id: &str,
) -> Result<SearchResults<MediaEntity>> {
let mut results = SearchResults::new();
for ep in self.idx.seasons(tvshow_id)? {
let entity = match self.idx.entity(&ep.id)? {
None => continue,
Some(entity) => entity,
};
if query.matches(&entity) {
results.push(Scored::new(entity));
}
}
if !query.similarity.is_none() {
results.rescore(|e| self.similarity(query, &e.title().title));
}
Ok(results)
}
fn similarity(&self, query: &Query, name: &str) -> f64 {
match query.name {
None => 0.0,
Some(ref qname) => query.similarity.similarity(qname, name),
}
}
}
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct Query {
name: Option<String>,
name_scorer: Option<NameScorer>,
similarity: Similarity,
size: usize,
kinds: Vec<TitleKind>,
year: Range<u32>,
votes: Range<u32>,
season: Range<u32>,
episode: Range<u32>,
tvshow_id: Option<String>,
}
impl Default for Query {
fn default() -> Query {
Query::new()
}
}
impl Query {
pub fn new() -> Query {
Query {
name: None,
name_scorer: Some(NameScorer::default()),
similarity: Similarity::default(),
size: 30,
kinds: vec![],
year: Range::none(),
votes: Range::none(),
season: Range::none(),
episode: Range::none(),
tvshow_id: None,
}
}
pub fn is_empty(&self) -> bool {
self.name.as_ref().map_or(true, |n| n.is_empty())
&& self.kinds.is_empty()
&& self.year.is_none()
&& self.votes.is_none()
&& self.season.is_none()
&& self.episode.is_none()
&& self.tvshow_id.is_none()
}
pub fn name(mut self, name: &str) -> Query {
self.name = Some(name.to_string());
self
}
pub fn name_scorer(mut self, scorer: Option<NameScorer>) -> Query {
self.name_scorer = scorer;
self
}
pub fn similarity(mut self, sim: Similarity) -> Query {
self.similarity = sim;
self
}
pub fn size(mut self, size: usize) -> Query {
self.size = size;
self
}
pub fn kind(mut self, kind: TitleKind) -> Query {
if !self.kinds.contains(&kind) {
self.kinds.push(kind);
}
self
}
pub fn year_ge(mut self, year: u32) -> Query {
self.year.start = Some(year);
self
}
pub fn year_le(mut self, year: u32) -> Query {
self.year.end = Some(year);
self
}
pub fn votes_ge(mut self, votes: u32) -> Query {
self.votes.start = Some(votes);
self
}
pub fn votes_le(mut self, votes: u32) -> Query {
self.votes.end = Some(votes);
self
}
pub fn season_ge(mut self, season: u32) -> Query {
self.season.start = Some(season);
self
}
pub fn season_le(mut self, season: u32) -> Query {
self.season.end = Some(season);
self
}
pub fn episode_ge(mut self, episode: u32) -> Query {
self.episode.start = Some(episode);
self
}
pub fn episode_le(mut self, episode: u32) -> Query {
self.episode.end = Some(episode);
self
}
pub fn tvshow_id(mut self, tvshow_id: &str) -> Query {
self.tvshow_id = Some(tvshow_id.to_string());
self
}
fn matches(&self, ent: &MediaEntity) -> bool {
self.matches_title(&ent.title())
&& self.matches_rating(ent.rating())
&& self.matches_episode(ent.episode())
}
fn matches_title(&self, title: &Title) -> bool {
if !self.kinds.is_empty() && !self.kinds.contains(&title.kind) {
return false;
}
if !self.year.contains(title.start_year.as_ref())
&& !self.year.contains(title.end_year.as_ref())
{
return false;
}
true
}
fn matches_rating(&self, rating: Option<&Rating>) -> bool {
if !self.votes.contains(rating.map(|r| &r.votes)) {
return false;
}
true
}
fn matches_episode(&self, ep: Option<&Episode>) -> bool {
if !self.season.contains(ep.and_then(|e| e.season.as_ref())) {
return false;
}
if !self.episode.contains(ep.and_then(|e| e.episode.as_ref())) {
return false;
}
if let Some(ref tvshow_id) = self.tvshow_id {
if ep.map_or(true, |e| tvshow_id != &e.tvshow_id) {
return false;
}
}
true
}
fn name_query(&self) -> Option<NameQuery> {
let name = match self.name.as_ref() {
None => return None,
Some(name) => &**name,
};
let scorer = match self.name_scorer {
None => return None,
Some(scorer) => scorer,
};
let size = cmp::max(1000, self.size);
Some(NameQuery::new(name).with_size(size).with_scorer(scorer))
}
fn has_filters(&self) -> bool {
self.needs_rating()
|| self.needs_episode()
|| !self.kinds.is_empty()
|| !self.year.is_none()
}
fn needs_only_title(&self) -> bool {
!self.needs_rating() && !self.needs_episode()
}
fn needs_rating(&self) -> bool {
!self.votes.is_none()
}
fn needs_episode(&self) -> bool {
!self.season.is_none()
|| !self.episode.is_none()
|| !self.tvshow_id.is_none()
}
}
impl Serialize for Query {
fn serialize<S>(&self, s: S) -> result::Result<S::Ok, S::Error>
where S: Serializer
{
s.serialize_str(&self.to_string())
}
}
impl<'a> Deserialize<'a> for Query {
fn deserialize<D>(d: D) -> result::Result<Query, D::Error>
where D: Deserializer<'a>
{
use serde::de::Error;
let querystr = String::deserialize(d)?;
querystr.parse().map_err(|e: self::Error| {
D::Error::custom(e.to_string())
})
}
}
impl FromStr for Query {
type Err = Error;
fn from_str(qstr: &str) -> Result<Query> {
lazy_static! {
static ref PARTS: Regex = Regex::new(
r"\{(?P<directive>[^}]+)\}|(?P<terms>[^{}\s]+)|(?P<space>\s+)"
).unwrap();
static ref DIRECTIVE: Regex = Regex::new(
r"^(?:(?P<name>[^:]+):(?P<val>.+)|(?P<kind>.+))$"
).unwrap();
}
let mut terms = vec![];
let mut q = Query::new();
for caps in PARTS.captures_iter(qstr) {
if caps.name("space").is_some() {
continue;
} else if let Some(m) = caps.name("terms") {
terms.push(m.as_str().to_string());
continue;
}
let dcaps = DIRECTIVE.captures(&caps["directive"]).unwrap();
if let Some(m) = dcaps.name("kind") {
q = q.kind(m.as_str().parse()?);
continue;
}
let (name, val) = (dcaps["name"].trim(), dcaps["val"].trim());
match name {
"size" => { q.size = val.parse().map_err(Error::number)?; }
"year" => { q.year = val.parse()?; }
"votes" => { q.votes = val.parse()?; }
"season" => { q.season = val.parse()?; }
"episode" => { q.episode = val.parse()?; }
"tvseries" | "tvshow" | "show" => {
q.tvshow_id = Some(val.to_string());
}
"sim" | "similarity" => {
q.similarity = val.parse()?;
}
"scorer" => {
if val == "none" {
q.name_scorer = None;
} else {
q.name_scorer = Some(val.parse()?);
}
}
unk => return Err(Error::unknown_directive(unk)),
}
}
if !terms.is_empty() {
q = q.name(&terms.join(" "));
}
Ok(q)
}
}
impl fmt::Display for Query {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.name_scorer {
None => f.write_str("{scorer:none}")?,
Some(ref scorer) => write!(f, "{{scorer:{}}}", scorer)?,
}
write!(f, " {{sim:{}}}", self.similarity)?;
write!(f, " {{size:{}}}", self.size)?;
let mut kinds: Vec<&TitleKind> = self.kinds.iter().collect();
kinds.sort();
for kind in kinds {
write!(f, " {{{}}}", kind)?;
}
if !self.year.is_none() {
write!(f, " {{year:{}}}", self.year)?;
}
if !self.votes.is_none() {
write!(f, " {{votes:{}}}", self.votes)?;
}
if !self.season.is_none() {
write!(f, " {{season:{}}}", self.season)?;
}
if !self.episode.is_none() {
write!(f, " {{episode:{}}}", self.episode)?;
}
if let Some(ref tvshow_id) = self.tvshow_id {
write!(f, " {{show:{}}}", tvshow_id)?;
}
if let Some(ref name) = self.name {
write!(f, " {}", name)?;
}
Ok(())
}
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub enum Similarity {
None,
Levenshtein,
Jaro,
JaroWinkler,
}
impl Similarity {
pub fn possible_names() -> &'static [&'static str] {
&["none", "levenshtein", "jaro", "jarowinkler"]
}
pub fn is_none(&self) -> bool {
*self == Similarity::None
}
pub fn similarity(&self, q1: &str, q2: &str) -> f64 {
let sim = match *self {
Similarity::None => 1.0,
Similarity::Levenshtein => {
let distance = strsim::levenshtein(q1, q2) as f64;
1.0 / (1.0 + distance)
}
Similarity::Jaro => strsim::jaro(q1, q2),
Similarity::JaroWinkler => strsim::jaro_winkler(q1, q2),
};
if sim < f64::EPSILON {
f64::EPSILON
} else {
sim
}
}
}
impl Default for Similarity {
fn default() -> Similarity {
Similarity::None
}
}
impl fmt::Display for Similarity {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Similarity::None => write!(f, "none"),
Similarity::Levenshtein => write!(f, "levenshtein"),
Similarity::Jaro => write!(f, "jaro"),
Similarity::JaroWinkler => write!(f, "jarowinkler"),
}
}
}
impl FromStr for Similarity {
type Err = Error;
fn from_str(s: &str) -> Result<Similarity> {
match s {
"none" => Ok(Similarity::None),
"levenshtein" => Ok(Similarity::Levenshtein),
"jaro" => Ok(Similarity::Jaro),
"jarowinkler" | "jaro-winkler" => Ok(Similarity::JaroWinkler),
unk => Err(Error::unknown_sim(unk)),
}
}
}
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
struct Range<T> {
start: Option<T>,
end: Option<T>,
}
impl<T> Range<T> {
pub fn none() -> Range<T> {
Range { start: None, end: None }
}
pub fn is_none(&self) -> bool {
self.start.is_none() && self.end.is_none()
}
}
impl<T: PartialOrd> Range<T> {
pub fn contains(&self, t: Option<&T>) -> bool {
let t = match t {
None => return self.is_none(),
Some(t) => t,
};
match (&self.start, &self.end) {
(&None, &None) => true,
(&Some(ref s), &None) => s <= t,
(&None, &Some(ref e)) => t <= e,
(&Some(ref s), &Some(ref e)) => s <= t && t <= e,
}
}
}
impl<T: fmt::Display + PartialEq> fmt::Display for Range<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match (&self.start, &self.end) {
(&None, &None) => write!(f, "-"),
(&Some(ref s), &None) => write!(f, "{}-", s),
(&None, &Some(ref e)) => write!(f, "-{}", e),
(&Some(ref s), &Some(ref e)) if s == e => write!(f, "{}", s),
(&Some(ref s), &Some(ref e)) => write!(f, "{}-{}", s, e),
}
}
}
impl<E: Fail, T: FromStr<Err=E>> FromStr for Range<T> {
type Err = Error;
fn from_str(range: &str) -> Result<Range<T>> {
let (start, end) = match range.find('-') {
None => {
let start = range.parse().map_err(Error::number)?;
let end = range.parse().map_err(Error::number)?;
return Ok(Range { start: Some(start), end: Some(end) });
}
Some(i) => {
let (start, end) = range.split_at(i);
(start.trim(), end[1..].trim())
}
};
Ok(match (start.is_empty(), end.is_empty()) {
(true, true) => Range::none(),
(true, false) => {
Range {
start: None,
end: Some(end.parse().map_err(Error::number)?),
}
}
(false, true) => {
Range {
start: Some(start.parse().map_err(Error::number)?),
end: None,
}
}
(false, false) => {
Range {
start: Some(start.parse().map_err(Error::number)?),
end: Some(end.parse().map_err(Error::number)?),
}
}
})
}
}
#[cfg(test)]
mod tests {
use serde_json;
use super::*;
#[test]
fn ranges() {
let r: Range<u32> = "5-10".parse().unwrap();
assert_eq!(r, Range { start: Some(5), end: Some(10) });
let r: Range<u32> = "5-".parse().unwrap();
assert_eq!(r, Range { start: Some(5), end: None });
let r: Range<u32> = "-10".parse().unwrap();
assert_eq!(r, Range { start: None, end: Some(10) });
let r: Range<u32> = "5-5".parse().unwrap();
assert_eq!(r, Range { start: Some(5), end: Some(5) });
let r: Range<u32> = "5".parse().unwrap();
assert_eq!(r, Range { start: Some(5), end: Some(5) });
}
#[test]
fn query_parser() {
let q: Query = "foo bar baz".parse().unwrap();
assert_eq!(q, Query::new().name("foo bar baz"));
let q: Query = "{movie}".parse().unwrap();
assert_eq!(q, Query::new().kind(TitleKind::Movie));
let q: Query = "{movie} {tvshow}".parse().unwrap();
assert_eq!(q, Query::new()
.kind(TitleKind::Movie).kind(TitleKind::TVSeries));
let q: Query = "{movie}{tvshow}".parse().unwrap();
assert_eq!(q, Query::new()
.kind(TitleKind::Movie).kind(TitleKind::TVSeries));
let q: Query = "foo {movie} bar {tvshow} baz".parse().unwrap();
assert_eq!(q, Query::new()
.name("foo bar baz")
.kind(TitleKind::Movie)
.kind(TitleKind::TVSeries));
let q: Query = "{size:5}".parse().unwrap();
assert_eq!(q, Query::new().size(5));
let q: Query = "{ size : 5 }".parse().unwrap();
assert_eq!(q, Query::new().size(5));
let q: Query = "{year:1990}".parse().unwrap();
assert_eq!(q, Query::new().year_ge(1990).year_le(1990));
let q: Query = "{year:1990-}".parse().unwrap();
assert_eq!(q, Query::new().year_ge(1990));
let q: Query = "{year:-1990}".parse().unwrap();
assert_eq!(q, Query::new().year_le(1990));
let q: Query = "{year:-}".parse().unwrap();
assert_eq!(q, Query::new());
}
#[test]
fn query_parser_error() {
assert!("{blah}".parse::<Query>().is_err());
assert!("{size:a}".parse::<Query>().is_err());
assert!("{year:}".parse::<Query>().is_err());
}
#[test]
fn query_parser_weird() {
let q: Query = "{movie".parse().unwrap();
assert_eq!(q, Query::new().name("movie"));
let q: Query = "movie}".parse().unwrap();
assert_eq!(q, Query::new().name("movie"));
}
#[test]
fn query_display() {
let q = Query::new()
.name("foo bar baz")
.size(31)
.season_ge(4).season_le(5)
.kind(TitleKind::TVSeries)
.kind(TitleKind::Movie)
.similarity(Similarity::Jaro);
let expected =
"{scorer:okapibm25} {sim:jaro} {size:31} {movie} {tvSeries} {season:4-5} foo bar baz";
assert_eq!(q.to_string(), expected);
}
#[test]
fn query_serialize() {
#[derive(Serialize)]
struct Test {
query: Query,
}
let query = Query::new()
.name("foo bar baz")
.name_scorer(None)
.size(31)
.season_ge(4).season_le(4);
let got = serde_json::to_string(&Test { query }).unwrap();
let expected = r#"{"query":"{scorer:none} {sim:none} {size:31} {season:4} foo bar baz"}"#;
assert_eq!(got, expected);
}
#[test]
fn query_deserialize() {
let json = r#"{"query": "foo {size:30} bar {season:4} baz {show}"}"#;
let expected =
"{size:30} {season:4} {show} foo bar baz".parse().unwrap();
#[derive(Deserialize)]
struct Test {
query: Query,
}
let got: Test = serde_json::from_str(json).unwrap();
assert_eq!(got.query, expected);
}
}