1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
use super::jyutping::LaxJyutPings;
use itertools::Itertools;
use serde::Deserialize;
use serde::Serialize;
use std::collections::{HashMap, HashSet};
/// A dictionary is a list of entries
pub type Dict = HashMap<usize, Entry>;
/// An entry contains some information about a word.
///
/// \[id\] the word's unique identifier used by words.hk: 116878
///
/// \[variants\] variants of the word: 㗎:gaa3,咖:gaa3,𡃉:gaa3
///
/// \[pos\] grammatical positions of the word: 動詞, 名詞, 形容詞
///
/// \[labels\] labels on the word: 術語, 俚語, 專名
///
/// \[sims\] synonyms of the word: 武士 is a synonym of 騎士
///
/// \[ants\] antonyms of the word: 放電 is an antonym of 充電
///
/// \[refs\] urls to references for this entry: <http://dictionary.reference.com/browse/tart?s=t>
///
/// \[imgs\] urls to images for this entry: <https://upload.wikimedia.org/wikipedia/commons/7/79/Naihuangbao.jpg>
///
/// \[defs\] a list of definitions for this word
///
#[derive(Debug, PartialEq)]
pub struct Entry {
pub id: usize,
pub variants: Variants,
pub poses: Vec<String>,
pub labels: Vec<String>,
pub sims: Vec<String>,
pub ants: Vec<String>,
pub refs: Vec<String>,
pub imgs: Vec<String>,
pub defs: Vec<Def>,
pub published: bool,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Variants(pub Vec<Variant>);
impl Variants {
pub fn to_words(&self) -> Vec<&str> {
self.0.iter().map(|variant| &variant.word[..]).collect()
}
pub fn to_words_set(&self) -> HashSet<&str> {
self.0
.iter()
.map(|variant| &variant.word[..])
.into_iter()
.collect()
}
}
/// A variant of a \[word\] with \[prs\] (pronounciations)
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Variant {
pub word: String,
pub prs: LaxJyutPings,
}
/// Two types of segments: text or link. See [Segment]
///
/// \[Text\] normal text
///
/// \[Link\] a link to another entry
///
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SegmentType {
Text,
Link,
}
/// A segment can be a text or a link
///
/// Text: 非常鬆軟。(量詞:件/籠)
///
/// Link: A link to the entry 雞蛋 would be #雞蛋
///
pub type Segment = (SegmentType, String);
/// A line consists of one or more [Segment]s
///
/// Empty line: `vec![(Text, "")]`
///
/// Simple line: `vec![(Text, "用嚟圍喺BB牀邊嘅布(量詞:塊)")]`
///
/// Mixed line: `vec![(Text, "一種加入"), (Link, "蝦籽"), (Text, "整嘅廣東麪")]`
///
pub type Line = Vec<Segment>;
pub fn line_to_string(line: &Line) -> String {
line.iter().map(|seg| seg.1.to_string()).join("")
}
/// A clause consists of one or more [Line]s. Appears in explanations and example sentences
///
/// Single-line clause: `vec![vec![(Text, "一行白鷺上青天")]]`
///
/// Multi-line clause: `vec![vec![(Text, "一行白鷺上青天")], vec![(Text, "兩個黃鸝鳴翠柳")]]`
///
pub type Clause = Vec<Line>; // can be multiline
pub fn clause_to_string(clause: &Clause) -> String {
clause.iter().map(|line| line_to_string(line)).join("\n")
}
/// A definition of a word
///
/// Here's an example of the definition of the word 年畫
///
/// \[yue\] Cantonese explanation of the word's meaning: 東亞民間慶祝#新春 嘅畫種(量詞:幅)
///
/// \[eng\] English explanation of the word's meaning: new year picture in East Asia
///
/// \[alts\] Word with similar meaning in other languages: jpn:年画;ねんが, kor:세화, vie:Tranh tết
///
/// \[egs\] Example sentences usually with Jyutping pronunciations and English translations
///
#[derive(Debug, PartialEq)]
pub struct Def {
pub yue: Clause,
pub eng: Option<Clause>,
pub alts: Vec<AltClause>,
pub egs: Vec<Eg>,
}
/// A clause in an alternative language other than Cantonese and English
///
/// \[[AltLang]\] language tag
///
/// \[[Clause]\] A sequence of texts and links
///
pub type AltClause = (AltLang, Clause);
/// Language tags for alternative languages other than Cantonese and English
///
/// From my observation, the tags seem to be alpha-3 codes in [ISO 639-2]
///
/// [ISO 639-2]: https://www.loc.gov/standards/iso639-2/php/code_list.php
///
#[derive(Debug, PartialEq, Clone, Copy, Serialize, Deserialize)]
pub enum AltLang {
Jpn, // Japanese
Kor, // Korean
Por, // Portuguese
Vie, // Vietnamese
Lat, // Latin
Fra, // French
}
impl AltLang {
/// Convert [AltLang] to a language name in Cantonese
pub fn to_yue_name(&self) -> String {
match self {
AltLang::Jpn => "日文",
AltLang::Kor => "韓文",
AltLang::Por => "葡萄牙文",
AltLang::Vie => "越南文",
AltLang::Lat => "拉丁文",
AltLang::Fra => "法文",
}
.to_string()
}
}
/// An example sentence in Mandarin, Cantonese, and/or English
///
/// \[zho\] Mandarin example with optional Jyutping pronunciation: 可否見面? (ho2 fau2 gin3 min6?)
///
/// \[yue\] Cantonese example with optional Jyutping pronunciation: 可唔可以見面? (ho2 m4 ho2 ji5 gin3 min6?)
///
/// \[eng\] English example: Can we meet up?
///
#[derive(Debug, Clone, PartialEq)]
pub struct Eg {
pub zho: Option<PrLine>,
pub yue: Option<PrLine>,
pub eng: Option<Line>,
}
/// An example sentence with optional Jyutping pronunciation
///
/// Eg: 可唔可以見面? (ho2 m4 ho2 ji5 gin3 min6?)
///
pub type PrLine = (Line, Option<String>);