hyphenation/load.rs
1/*! # Reading and loading hyphenation dictionaries
2
3To hyphenate words in a given language, it is first necessary to load
4the relevant hyphenation dictionary into memory. This module offers
5convenience methods for common retrieval patterns, courtesy of the
6[`Load`] trait.
7
8```
9use hyphenation::Load;
10use hyphenation::{Standard, Language};
11```
12
13The primary function of [`Load`] is to deserialize dictionaries from
14buffers – usually, file buffers.
15
16```ignore
17use std::io;
18use std::fs::File;
19
20let path_to_dict = "/path/to/english-dictionary.bincode";
21let dict_file = File::open(path_to_dict) ?;
22let mut reader = io::BufReader::new(dict_file);
23let english_us = Standard::from_reader(Language::EnglishUS, &mut reader) ?;
24```
25
26Dictionaries can be loaded from the file system rather more succintly with
27the [`from_path`] shorthand:
28
29```ignore
30let path_to_dict = "/path/to/english-dictionary.bincode";
31let english_us = Standard::from_path(Language::EnglishUS, path_to_dict) ?;
32```
33
34Dictionaries bundled with the `hyphenation` library are copied to Cargo's
35output directory at build time. To locate them, look for a `dictionaries`
36folder under `target`:
37
38```text
39$ find target -name "dictionaries"
40target/debug/build/hyphenation-33034db3e3b5f3ce/out/dictionaries
41```
42
43
44## Embedding
45
46Optionally, hyphenation dictionaries can be embedded in the compiled
47artifact by enabling the `embed_all` feature. Embedded dictionaries can be
48accessed directly from memory.
49
50```ignore
51use hyphenation::{Standard, Language, Load};
52
53let english_us = Standard::from_embedded(Language::EnglishUS) ?;
54```
55
56Note that embedding significantly increases the size of the compiled artifact.
57
58
59[`Load`]: trait.Load.html
60[`from_path`]: trait.Load.html#method.from_path
61*/
62
63use bincode as bin;
64#[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
65use crate::resources::ResourceId;
66use std::error;
67use std::fmt;
68use std::fs::File;
69use std::io;
70use std::path::Path;
71use std::result;
72
73use hyphenation_commons::dictionary::{extended::Extended, Standard};
74use hyphenation_commons::Language;
75
76
77/// Convenience methods for the retrieval of hyphenation dictionaries.
78pub trait Load: Sized {
79 /// Read and deserialize the dictionary at the given path, verifying that it
80 /// belongs to the expected language.
81 fn from_path<P>(lang : Language, path : P) -> Result<Self>
82 where P : AsRef<Path>
83 {
84 let file = File::open(path)?;
85 Self::from_reader(lang, &mut io::BufReader::new(file))
86 }
87
88 /// Deserialize a dictionary from the provided reader, verifying that it
89 /// belongs to the expected language.
90 fn from_reader<R>(lang : Language, reader : &mut R) -> Result<Self>
91 where R : io::Read;
92
93 /// Deserialize a dictionary from the provided reader.
94 fn any_from_reader<R>(reader : &mut R) -> Result<Self>
95 where R : io::Read;
96
97 #[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
98 /// Deserialize the embedded dictionary for the given language.
99 fn from_embedded(lang : Language) -> Result<Self>;
100}
101
102macro_rules! impl_load {
103 ($dict:ty, $suffix:expr) => {
104 impl Load for $dict {
105 fn from_reader<R>(lang : Language, reader : &mut R) -> Result<Self>
106 where R : io::Read
107 {
108 let dict : Self = bin::deserialize_from(reader)?;
109 let (found, expected) = (dict.language(), lang);
110 if found != expected {
111 Err(Error::LanguageMismatch { expected, found })
112 } else {
113 Ok(dict)
114 }
115 }
116
117 fn any_from_reader<R>(reader : &mut R) -> Result<Self>
118 where R : io::Read
119 {
120 let dict : Self = bin::deserialize_from(reader)?;
121 Ok(dict)
122 }
123
124 #[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
125 fn from_embedded(lang : Language) -> Result<Self> {
126 let dict_bytes = retrieve_resource(lang.code(), $suffix)?;
127 let dict = bin::deserialize(dict_bytes)?;
128 Ok(dict)
129 }
130 }
131 };
132}
133
134impl_load! { Standard, "standard" }
135impl_load! { Extended, "extended" }
136
137
138#[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
139fn retrieve_resource<'a>(code : &str, suffix : &str) -> Result<&'a [u8]> {
140 let name = format!("{}.{}.bincode", code, suffix);
141 let res : Option<ResourceId> = ResourceId::from_name(&name);
142 match res {
143 Some(data) => Ok(data.load()),
144 None => Err(Error::Resource),
145 }
146}
147
148
149pub type Result<T> = result::Result<T, Error>;
150
151/// Failure modes of dictionary loading.
152#[derive(Debug)]
153pub enum Error {
154 /// The dictionary could not be deserialized.
155 Deserialization(bin::Error),
156 /// The dictionary could not be read.
157 IO(io::Error),
158 /// The loaded dictionary is for the wrong language.
159 LanguageMismatch {
160 expected : Language,
161 found : Language,
162 },
163 /// The embedded dictionary could not be retrieved.
164 Resource,
165}
166
167impl error::Error for Error {
168 fn source(&self) -> Option<&(dyn error::Error + 'static)> {
169 match *self {
170 Error::Deserialization(ref e) => Some(e),
171 Error::IO(ref e) => Some(e),
172 _ => None,
173 }
174 }
175}
176
177impl fmt::Display for Error {
178 fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
179 match *self {
180 Error::Deserialization(ref e) => e.fmt(f),
181 Error::IO(ref e) => e.fmt(f),
182 Error::LanguageMismatch { expected, found } => write!(
183 f,
184 "\
185Language mismatch: attempted to load a dictionary for `{}`, but found
186a dictionary for `{}` instead.",
187 expected, found
188 ),
189 Error::Resource => f.write_str("the embedded dictionary could not be retrieved"),
190 }
191 }
192}
193
194impl From<io::Error> for Error {
195 fn from(err : io::Error) -> Error { Error::IO(err) }
196}
197
198impl From<bin::Error> for Error {
199 fn from(err : bin::Error) -> Error { Error::Deserialization(err) }
200}