#![deny(unsafe_code)]
use std::option::Option;
type Slice = (usize, usize);
#[derive(Clone, Copy, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct DictEntry<T> {
line: T,
traditional: Slice,
simplified: Slice,
pinyin: Slice,
definitions: Slice,
}
impl std::fmt::Debug for DictEntry<String> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> {
write!(
f,
"DictEntry {{ traditional: `{:?}`, simplified: `{:?}`, pinyin: `{:?}`, definitions: [{:?}] }}",
self.traditional(),
self.simplified(),
self.pinyin(),
self.definitions().collect::<Vec<_>>().join("~")
)
}
}
impl<T: AsRef<str>> DictEntry<T> {
pub fn traditional(&self) -> &str {
&self.line.as_ref()[self.traditional.0..self.traditional.1]
}
pub fn simplified(&self) -> &str {
&self.line.as_ref()[self.simplified.0..self.simplified.1]
}
pub fn pinyin(&self) -> &str {
&self.line.as_ref()[self.pinyin.0..self.pinyin.1]
}
pub fn definitions<'a>(&'a self) -> impl Iterator<Item = &'a str> {
let line = self.line.as_ref();
let line = &line[self.definitions.0..self.definitions.1];
let line = line.trim_matches('/');
line.split('/')
}
}
pub fn parse_dict_entry<T: AsRef<str>>(line: T) -> Option<DictEntry<T>> {
let mut chars = line.as_ref().char_indices().peekable();
match chars.peek() {
Some((_, '#')) => return None,
None => return None,
_ => (),
}
let traditional_start = chars.peek()?.0;
loop {
match chars.peek() {
Some((_, ' ')) => break,
None => return None,
_ => {
chars.next();
}
}
}
let traditional_end = chars.peek()?.0;
match chars.next() {
Some((_, ' ')) => (),
_ => return None,
};
let simplified_start = chars.next()?.0;
loop {
match chars.peek() {
Some((_, ' ')) => break,
None => return None,
_ => {
chars.next();
}
}
}
let simplified_end = chars.peek()?.0;
match chars.next() {
Some((_, ' ')) => (),
_ => return None,
};
match chars.next() {
Some((_, '[')) => (),
_ => return None,
};
let pinyin_start = chars.next()?.0;
loop {
match chars.peek() {
Some((_, ']')) => break,
None => return None,
_ => {
chars.next();
}
}
}
let pinyin_end = chars.peek()?.0;
match chars.next() {
Some((_, ']')) => (),
_ => return None,
};
match chars.next() {
Some((_, ' ')) => (),
_ => return None,
};
match chars.next() {
Some((_, '/')) => (),
_ => return None,
};
let definitions_start = chars.next()?.0;
let len = line.as_ref().len();
Some(DictEntry {
line,
traditional: (traditional_start, traditional_end),
simplified: (simplified_start, simplified_end),
pinyin: (pinyin_start, pinyin_end),
definitions: (definitions_start, len),
})
}
pub fn is_comment(line: &str) -> bool {
let bytes = line.as_bytes();
!bytes.is_empty() && bytes[0] == b'#'
}
pub fn is_metadata(line: &str) -> bool {
let bytes = line.as_bytes();
bytes.len() > 1 && bytes[0] == b'#' && bytes[1] == b'!'
}
#[derive(Debug)]
pub enum Line {
Comment(String),
Metadata(String, String),
Entry(DictEntry<String>),
Empty,
Incorrect,
}
pub fn parse_line<T: AsRef<str>>(line: T) -> Line {
let line = line.as_ref();
if line.is_empty() {
Line::Empty
} else if is_metadata(line) {
let line = &line[2..].trim();
let mut parts = line.splitn(2, '=');
Line::Metadata(
parts.next().unwrap().trim().to_string(),
parts.next().unwrap().trim().to_string(),
)
} else if is_comment(line) {
Line::Comment(line[1..].trim().into())
} else {
match parse_dict_entry(line.into()) {
Some(entry) => Line::Entry(entry),
None => Line::Incorrect,
}
}
}
use std::io::BufRead;
pub fn parse_reader<T: std::io::Read>(f: T) -> impl Iterator<Item = DictEntry<String>> {
let lines = std::io::BufReader::new(f).lines();
let lines = lines.filter_map(|l| l.ok());
let lines = lines.filter(|l| !is_comment(l));
lines.filter_map(|x| parse_dict_entry(x))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_dict_entry() {
let line = "睡覺 睡觉 [shui4 jiao4] /to go to bed/to sleep/";
let entry = parse_dict_entry(line).unwrap();
assert_eq!(entry.traditional(), "睡覺");
assert_eq!(entry.simplified(), "睡觉");
assert_eq!(entry.pinyin(), "shui4 jiao4");
assert_eq!(entry.definitions().nth(0), Some("to go to bed"));
assert_eq!(entry.definitions().nth(1), Some("to sleep"));
}
}