#![deny(missing_docs)]
use std::{io::Cursor, fs::File};
use std::io::{BufRead, BufReader};
use std::path::Path;
use std::str::FromStr;
use std::{collections::HashMap};
#[cfg(feature = "serde")]
use serde::{Serialize, Deserialize};
mod core;
mod errors;
pub use crate::core::{Rule, Stress, Symbol};
pub use errors::{Error, ParseError, ParseResult, Result};
#[derive(Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct Cmudict {
map: HashMap<String, Vec<Rule>>,
}
impl Cmudict {
pub fn new<P: AsRef<Path>>(dict: P) -> Result<Cmudict> {
let file = File::open(dict)?;
let buf = BufReader::new(file);
let map = make_mapping(buf)?;
Ok(Cmudict { map })
}
pub fn get(&self, s: &str) -> Option<&[Rule]> {
self.map.get(s).map(|r| &r[..])
}
}
impl FromStr for Cmudict {
type Err = Error;
fn from_str(s: &str) -> ::std::result::Result<Self, Self::Err> {
let cursor = Cursor::new(s);
Ok(Self {
map: make_mapping(cursor)?,
})
}
}
pub fn rhymes(ones: &[Rule], twos: &[Rule]) -> bool {
for one in ones {
for two in twos {
let one = one.pronunciation();
let two = two.pronunciation();
if let (Some(left), Some(right)) = (
one.iter().rposition(|s| s.is_syllable()),
two.iter().rposition(|s| s.is_syllable()),
) {
let one = &one[left..];
let two = &two[right..];
if one == two {
return true;
}
}
}
}
false
}
fn left(s: &str) -> &str {
let mut parts = s.splitn(2, '#');
parts.next().unwrap()
}
fn make_mapping<R: BufRead>(reader: R) -> Result<HashMap<String, Vec<Rule>>> {
let mut map = HashMap::new();
for (idx, line) in reader.lines().enumerate() {
let line = line?;
if line.starts_with(";;") {
continue;
}
let label = line
.splitn(2, ' ')
.next()
.ok_or_else(|| Error::InvalidLine(idx))?;
let label = split_label(label).to_string();
let rule = Rule::from_str(left(&line))?;
let rules = map.entry(label).or_insert_with(|| Vec::new());
rules.push(rule);
}
Ok(map)
}
fn split_label(s: &str) -> &str {
let mut parts = s.rsplitn(2, '(');
let _ = parts.next();
if let Some(label) = parts.next() {
label
} else {
s
}
}
#[cfg(test)]
mod tests {
use super::core::{Rule, Stress, Symbol};
use super::*;
use std::sync::Arc;
use std::thread;
use pretty_assertions::assert_eq;
#[test]
fn test_basics() {
let d = Cmudict::new("./resources/cmudict.dict").expect("Could not create Cmudict");
let apple = d.get("apple");
assert!(apple.is_some());
let apple = apple.unwrap().first().unwrap();
assert_eq!(
apple,
&Rule::new(
"apple".to_string(),
vec![
Symbol::AE(Stress::Primary),
Symbol::P,
Symbol::AH(Stress::None),
Symbol::L,
]
)
);
let abf = d.get("abf");
assert!(abf.is_none());
let unfit = d.get("unfit");
assert!(unfit.is_some());
let unfit = unfit.unwrap().first().unwrap();
assert_eq!(
unfit,
&Rule::new(
"unfit".to_string(),
vec![
Symbol::AH(Stress::None),
Symbol::N,
Symbol::F,
Symbol::IH(Stress::Primary),
Symbol::T
]
)
);
let every = d.get("every").unwrap();
let result = vec![
Rule::new("every".to_string(), vec![
Symbol::EH(Stress::Primary),
Symbol::V,
Symbol::ER(Stress::None),
Symbol::IY(Stress::None)
]),
Rule::new("every(2)".to_string(), vec![
Symbol::EH(Stress::Primary),
Symbol::V,
Symbol::R,
Symbol::IY(Stress::None),
]),
];
assert_eq!(every, &result);
}
#[test]
fn rhyming() {
let d = Cmudict::new("./resources/cmudict.dict").expect("Could not create Cmudict");
let elf = d.get("elf").unwrap();
let shelf = d.get("shelf").unwrap();
assert!(rhymes(elf, shelf));
let fish = d.get("fish").unwrap();
assert!(!rhymes(elf, fish));
}
#[test]
fn threads() {
let d =
Arc::new(Cmudict::new("./resources/cmudict.dict").expect("Could not create Cmudict"));
let words = ["hello", "apple", "rust"];
let mut threads = Vec::with_capacity(words.len());
for i in 0..words.len() {
let d = d.clone();
threads.push(thread::spawn(move || {
let word = words[i];
let result = d.get(&word);
assert!(result.is_some());
}));
}
for thread in threads.into_iter() {
thread.join().unwrap();
}
}
}