spandex_hyphenation/
load.rs

1/*! # Reading and loading hyphenation dictionaries
2
3To hyphenate words in a given language, it is first necessary to load
4the relevant hyphenation dictionary into memory. This module offers
5convenience methods for common retrieval patterns, courtesy of the
6[`Load`] trait.
7
8```
9use hyphenation::Load;
10use hyphenation::{Standard, Language};
11```
12
13The primary function of [`Load`] is to deserialize dictionaries from
14buffers – usually, file buffers.
15
16```norun
17use std::io;
18use std::fs::File;
19
20let path_to_dict = "/path/to/english-dictionary.bincode";
21let dict_file = File::open(path_to_dict) ?;
22let mut reader = io::BufReader::new(dict_file);
23let english_us = Standard::from_reader(Language::EnglishUS, &mut reader) ?;
24```
25
26Dictionaries can be loaded from the file system rather more succintly with
27the [`from_path`] shorthand:
28
29```norun
30let path_to_dict = "/path/to/english-dictionary.bincode";
31let english_us = Standard::from_path(Language::EnglishUS, path_to_dict) ?;
32```
33
34Dictionaries bundled with the `hyphenation` library are copied to Cargo's
35output directory at build time. To locate them, look for a `dictionaries`
36folder under `target`:
37
38```ignore
39$ find target -name "dictionaries"
40target/debug/build/hyphenation-33034db3e3b5f3ce/out/dictionaries
41```
42
43
44## Embedding
45
46Optionally, hyphenation dictionaries can be embedded in the compiled
47artifact by enabling the `embed_all` feature. Embedded dictionaries can be
48accessed directly from memory.
49
50```ignore
51use hyphenation::{Standard, Language, Load};
52
53let english_us = Standard::from_embedded(Language::EnglishUS) ?;
54```
55
56Note that embeding significantly increases the size of the compiled artifact.
57
58
59[`Load`]: trait.Load.html
60[`from_path`]: trait.Load.html#method.from_path
61*/
62
63#[cfg(feature = "embed_all")] use resources::ResourceId;
64use bincode as bin;
65use std::error;
66use std::fmt;
67use std::io;
68use std::fs::File;
69use std::path::Path;
70use std::result;
71
72use hyphenation_commons::Language;
73use hyphenation_commons::dictionary::{Standard, Extended};
74
75
76/// Convenience methods for the retrieval of hyphenation dictionaries.
77pub trait Load : Sized {
78    /// Read and deserialize the dictionary at the given path, verifying that it
79    /// effectively belongs to the requested language.
80    fn from_path<P>(lang : Language, path : P) -> Result<Self>
81    where P : AsRef<Path> {
82        let file = File::open(path) ?;
83        Self::from_reader(lang, &mut io::BufReader::new(file))
84    }
85
86    /// Deserialize a dictionary from the provided reader, verifying that it
87    /// effectively belongs to the requested language.
88    fn from_reader<R>(lang : Language, reader : &mut R) -> Result<Self>
89    where R : io::Read;
90
91    /// Deserialize a dictionary from the provided reader.
92    fn any_from_reader<R>(reader : &mut R) -> Result<Self>
93    where R : io::Read;
94
95    #[cfg(feature = "embed_all")]
96    /// Deserialize the embedded dictionary.
97    fn from_embedded(lang : Language) -> Result<Self>;
98
99}
100
101macro_rules! impl_load {
102    ($dict:ty, $suffix:expr) => {
103        impl Load for $dict {
104            fn from_reader<R>(lang : Language, reader : &mut R) -> Result<Self>
105            where R : io::Read {
106                let dict : Self = bin::config().limit(5_000_000).deserialize_from(reader) ?;
107                let (found, expected) = (dict.language, lang);
108                if found != expected {
109                    Err(Error::LanguageMismatch { expected, found })
110                } else { Ok(dict) }
111            }
112
113            fn any_from_reader<R>(reader : &mut R) -> Result<Self>
114            where R : io::Read {
115                let dict : Self = bin::config().limit(5_000_000).deserialize_from(reader) ?;
116                Ok(dict)
117            }
118
119            #[cfg(feature = "embed_all")]
120            fn from_embedded(lang : Language) -> Result<Self> {
121                let dict_bytes = retrieve_resource(lang.code(), $suffix) ?;
122                let dict = bin::deserialize(dict_bytes) ?;
123                Ok(dict)
124            }
125        }
126    }
127}
128
129impl_load! { Standard, "standard" }
130impl_load! { Extended, "extended" }
131
132
133#[cfg(feature = "embed_all")]
134fn retrieve_resource<'a>(code : &str, suffix : &str) -> Result<&'a [u8]> {
135    let name = format!("{}.{}.bincode", code, suffix);
136    let res : Option<ResourceId> = ResourceId::from_name(&name);
137    match res {
138        Some(data) => Ok(data.load()),
139        None => Err(Error::Resource)
140    }
141}
142
143
144pub type Result<T> = result::Result<T, Error>;
145
146/// Failure modes of dictionary loading.
147#[derive(Debug)]
148pub enum Error {
149    /// The dictionary could not be deserialized.
150    Deserialization(bin::Error),
151    /// The dictionary could not be read.
152    IO(io::Error),
153    /// The loaded dictionary is for the wrong language.
154    LanguageMismatch { expected : Language, found : Language },
155    /// The embedded dictionary could not be retrieved.
156    Resource
157}
158
159impl error::Error for Error {
160    fn description(&self) -> &str {
161        match *self {
162            Error::Deserialization(ref e) => e.description(),
163            Error::IO(ref e) => e.description(),
164            Error::LanguageMismatch { .. } => "loaded a dictionary for the wrong language",
165            Error::Resource => "embedded dictionary could not be retrieved"
166        }
167    }
168}
169
170impl fmt::Display for Error {
171    fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
172        match *self {
173            Error::Deserialization(ref e) => e.fmt(f),
174            Error::IO(ref e) => e.fmt(f),
175            Error::LanguageMismatch { expected, found } =>
176                write!(f, "\
177Language mismatch: attempted to load a dictionary for `{}`, but found
178a dictionary for `{}` instead.", expected, found),
179            Error::Resource => {
180                let e = self as &dyn error::Error;
181                e.description().fmt(f)
182            }
183        }
184    }
185}
186
187impl From<io::Error> for Error {
188    fn from(err : io::Error) -> Error { Error::IO(err) }
189}
190
191impl From<bin::Error> for Error {
192    fn from(err : bin::Error) -> Error { Error::Deserialization(err) }
193}