#![deny(missing_docs)]
extern crate cmudict_core;
extern crate indexed_line_reader;
extern crate reqwest;
extern crate tempdir;
extern crate radix_trie;
#[macro_use] extern crate failure;
#[macro_use] extern crate log;
use std::str::FromStr;
use std::sync::Mutex;
use std::cell::RefCell;
use std::io::{BufReader, BufRead, Seek, SeekFrom};
use std::fs::{OpenOptions, File};
use std::convert::AsRef;
use std::path::{Path, PathBuf};
use std::collections::HashSet;
use radix_trie::Trie;
use tempdir::TempDir;
use indexed_line_reader::IndexedLineReader;
pub use cmudict_core::{Rule, Stress, Symbol};
pub use errors::*;
mod errors;
type Index = Mutex<RefCell<IndexedLineReader<BufReader<File>>>>;
#[derive(Debug)]
pub struct Cmudict {
index: Trie<String, (usize, usize)>,
fname: PathBuf,
line_index: Index,
}
impl Cmudict {
pub fn new<P: AsRef<Path>>(dict: P) -> Result<Cmudict> {
let path = dict.as_ref();
let index = make_index(&path)?;
let file = OpenOptions::new().read(true).open(&path)?;
let line_index = Mutex::new(RefCell::new(IndexedLineReader::new(BufReader::new(file), 100)));
Ok(Cmudict {
index: index,
fname: path.into(),
line_index: line_index,
})
}
pub fn download() -> Result<Cmudict> {
let tmpdir = TempDir::new("cmudict")?;
let path = tmpdir.path().join("cmudict.dict");
let mut file = OpenOptions::new().create(true).write(true).open(&path)?;
let mut r = reqwest::get("https://raw.githubusercontent.com/cmusphinx/cmudict/master/cmudict.dict")?;
r.copy_to(&mut file)?;
Cmudict::new(&path)
}
fn get_index_val(&self, s: &str) -> Option<(usize, usize)> {
let idx = if s.len() < 2 {
self.index.get(&s[..]).map(|u| *u)
} else {
self.index.get(&s[..2]).map(|u| *u)
};
idx
}
pub fn get(&self, s: &str) -> Option<Rule> {
self.get_index_val(s).and_then(|(start, end)| {
let mut lineno = start as u64;
loop {
let line = {
let lock = self.line_index.lock();
match lock {
Ok(lock) => {
let mut reader = lock.borrow_mut();
match reader.seek(SeekFrom::Start(lineno)) {
Ok(l) if l == end as u64 => break,
Err(e) => {
error!("error while seeking: {:?}", e);
break;
},
Ok(_) => {},
}
let mut line = String::new();
if reader.read_line(&mut line).is_err() {
break
}
line
},
Err(e) => {
error!("error while locking: {:?}", e);
return None;
}
}
};
let word = if let Some(word) = left(&line) {
word
} else {
break
};
if word == s {
match Rule::from_str(&line) {
Ok(rule) => return Some(rule),
Err(e) => {
error!("error creating rule: {:?}", e);
break
},
}
} else {
lineno += 1;
}
}
None
})
}
}
pub fn rhymes(one: &Rule, two: &Rule) -> bool {
let one = one.pronunciation();
let two = two.pronunciation();
if let (Some(left), Some(right)) = (one.iter().rposition(|s| s.is_syllable()), two.iter().rposition(|s| s.is_syllable())) {
let one = &one[left..];
let two = &two[right..];
one == two
} else {
false
}
}
fn left(s: &str) -> Option<&str> {
let mut parts = s.splitn(2, ' ');
parts.next()
}
fn make_index<P: AsRef<Path>>(file: P) -> Result<Trie<String, (usize, usize)>> {
let file = OpenOptions::new().read(true).open(&file)?;
let reader = BufReader::new(file);
let mut seen = HashSet::new();
let mut map = Trie::new();
let mut start = None;
for (idx, line) in reader.lines().enumerate() {
let line = line?;
if line.starts_with(";;") {
continue;
}
let mut it = line.splitn(2, ' ');
let label = it.next().unwrap_or("parse error".into());
let label = split_label(label);
let word = if label.len() < 2 {
&label[..]
} else {
&label[..2]
};
if seen.contains(word) {
continue;
}
match start {
Some(u) => {
map.insert(word.to_string(), (u, idx));
seen.insert(word.to_string());
start = Some(idx);
},
None => {
start = Some(idx);
},
}
}
Ok(map)
}
fn split_label(s: &str) -> &str {
let mut parts = s.rsplitn(2, '(');
let _ = parts.next();
if let Some(label) = parts.next() {
label
} else {
s
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use std::sync::Arc;
use cmudict_core::{Rule, Symbol, Stress};
#[test]
fn test_basics() {
let d = Cmudict::new("./resources/cmudict.dict").expect("Could not create Cmudict");
let apple = d.get("apple");
assert!(apple.is_some());
assert_eq!(apple,
Some(Rule::new(
"apple".to_string(),
vec![
Symbol::AE(Stress::Primary),
Symbol::P,
Symbol::AH(Stress::None),
Symbol::L,
]
)));
let abf = d.get("abf");
assert!(abf.is_none());
let unfit = d.get("unfit");
assert!(unfit.is_some());
assert_eq!(unfit,
Some(Rule::new(
"unfit".to_string(),
vec![
Symbol::AH(Stress::None),
Symbol::N,
Symbol::F,
Symbol::IH(Stress::Primary),
Symbol::T]
)));
}
#[test]
fn test_rhymes() {
let d = Cmudict::new("./resources/cmudict.dict").expect("Could not create Cmudict");
let think = d.get("magazine").expect("Couldn't find 'magazine'");
let drink = d.get("magdalene").expect("Couldn't find 'magdalene'");
assert!(rhymes(&think, &drink));
}
#[test]
fn using_tempdir() {
let d = Cmudict::download().expect("Could not create Cmudict");
let abc = d.get("abc");
assert!(abc.is_some());
assert_eq!(abc,
Some(Rule::new(
"abc".to_string(),
vec![
Symbol::EY(Stress::Primary),
Symbol::B,
Symbol::IY(Stress::Secondary),
Symbol::S,
Symbol::IY(Stress::Secondary)
]
)));
let abf = d.get("abf");
assert!(abf.is_none());
}
#[test]
fn threads() {
let d = Arc::new(Cmudict::new("./resources/cmudict.dict").expect("Could not create Cmudict"));
let words = [
"hello",
"apple",
"rust",
];
let mut threads = Vec::with_capacity(words.len());
for i in 0..words.len() {
let d = d.clone();
threads.push(thread::spawn(move || {
let word = words[i];
let result = d.get(&word);
assert!(result.is_some());
}));
}
for thread in threads.into_iter() {
thread.join().unwrap();
}
}
}