use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
use std::fmt::Debug;
use std::fs;
use url::Url;
pub struct Entry<'p, 'u> {
pub medium: &'p str,
pub source: &'p str,
pub domain: &'u str,
pub search_term: Option<Cow<'u, str>>,
}
#[derive(serde::Deserialize)]
struct SourceEntry {
domains: Vec<String>,
parameters: Option<Vec<String>>,
}
struct RefererEntry {
medium: String,
source: String,
parameters: Option<Vec<String>>,
}
#[derive(thiserror::Error, Debug)]
pub enum Error {
#[error(transparent)]
IO(#[from] std::io::Error),
#[error(transparent)]
YAML(#[from] serde_yaml::Error),
}
pub struct Parser {
entries: Vec<RefererEntry>,
map: HashMap<String, usize>,
}
impl Parser {
pub fn new(path: &str) -> Result<Self, Error> {
let r = fs::File::open(path)?;
let database: BTreeMap<String, BTreeMap<String, SourceEntry>> = serde_yaml::from_reader(r)?;
let mut entries = Vec::new();
let mut map = HashMap::new();
for (medium, sources) in &database {
for (source_name, source_entry) in sources {
let idx = entries.len();
entries.push(RefererEntry {
medium: medium.clone(),
source: source_name.clone(),
parameters: source_entry.parameters.clone(),
});
for domain in &source_entry.domains {
map.insert(domain.clone(), idx);
}
}
}
Ok(Parser { entries, map })
}
pub fn lookup<'p, 'u>(&'p self, url: &'u Url) -> Option<Entry<'p, 'u>> {
let domain = url.domain()?;
let path = url.path();
let has_path = path.len() > 1;
if has_path {
let mut host = domain;
loop {
let mut p = path;
loop {
let key = format!("{}{}", host, p);
if let Some(&idx) = self.map.get(&key) {
return Some(self.build_entry(domain, &self.entries[idx], url));
}
match p.rfind('/') {
Some(0) | None => break, Some(pos) => p = &p[..pos],
}
}
match host.find('.') {
Some(pos) => host = &host[pos + 1..],
None => break,
}
}
}
let mut host = domain;
loop {
if let Some(&idx) = self.map.get(host) {
return Some(self.build_entry(domain, &self.entries[idx], url));
}
match host.find('.') {
Some(pos) => host = &host[pos + 1..],
None => return None,
}
}
}
fn build_entry<'p, 'u>(
&'p self,
domain: &'u str,
entry: &'p RefererEntry,
url: &'u Url,
) -> Entry<'p, 'u> {
let search_term = entry.parameters.as_ref().and_then(|params| {
url.query_pairs()
.find(|(key, _)| params.iter().any(|p| p == key.as_ref()))
.map(|(_, value)| value)
});
Entry {
medium: &entry.medium,
source: &entry.source,
domain,
search_term,
}
}
}