hyphenation/
load.rs

1/*! # Reading and loading hyphenation dictionaries
2
3To hyphenate words in a given language, it is first necessary to load
4the relevant hyphenation dictionary into memory. This module offers
5convenience methods for common retrieval patterns, courtesy of the
6[`Load`] trait.
7
8```
9use hyphenation::Load;
10use hyphenation::{Standard, Language};
11```
12
13The primary function of [`Load`] is to deserialize dictionaries from
14buffers – usually, file buffers.
15
16```ignore
17use std::io;
18use std::fs::File;
19
20let path_to_dict = "/path/to/english-dictionary.bincode";
21let dict_file = File::open(path_to_dict) ?;
22let mut reader = io::BufReader::new(dict_file);
23let english_us = Standard::from_reader(Language::EnglishUS, &mut reader) ?;
24```
25
26Dictionaries can be loaded from the file system rather more succintly with
27the [`from_path`] shorthand:
28
29```ignore
30let path_to_dict = "/path/to/english-dictionary.bincode";
31let english_us = Standard::from_path(Language::EnglishUS, path_to_dict) ?;
32```
33
34Dictionaries bundled with the `hyphenation` library are copied to Cargo's
35output directory at build time. To locate them, look for a `dictionaries`
36folder under `target`:
37
38```text
39$ find target -name "dictionaries"
40target/debug/build/hyphenation-33034db3e3b5f3ce/out/dictionaries
41```
42
43
44## Embedding
45
46Optionally, hyphenation dictionaries can be embedded in the compiled
47artifact by enabling the `embed_all` feature. Embedded dictionaries can be
48accessed directly from memory.
49
50```ignore
51use hyphenation::{Standard, Language, Load};
52
53let english_us = Standard::from_embedded(Language::EnglishUS) ?;
54```
55
56Note that embedding significantly increases the size of the compiled artifact.
57
58
59[`Load`]: trait.Load.html
60[`from_path`]: trait.Load.html#method.from_path
61*/
62
63use bincode as bin;
64#[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
65use crate::resources::ResourceId;
66use std::error;
67use std::fmt;
68use std::fs::File;
69use std::io;
70use std::path::Path;
71use std::result;
72
73use hyphenation_commons::dictionary::{extended::Extended, Standard};
74use hyphenation_commons::Language;
75
76
77/// Convenience methods for the retrieval of hyphenation dictionaries.
78pub trait Load: Sized {
79    /// Read and deserialize the dictionary at the given path, verifying that it
80    /// belongs to the expected language.
81    fn from_path<P>(lang : Language, path : P) -> Result<Self>
82        where P : AsRef<Path>
83    {
84        let file = File::open(path)?;
85        Self::from_reader(lang, &mut io::BufReader::new(file))
86    }
87
88    /// Deserialize a dictionary from the provided reader, verifying that it
89    /// belongs to the expected language.
90    fn from_reader<R>(lang : Language, reader : &mut R) -> Result<Self>
91        where R : io::Read;
92
93    /// Deserialize a dictionary from the provided reader.
94    fn any_from_reader<R>(reader : &mut R) -> Result<Self>
95        where R : io::Read;
96
97    #[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
98    /// Deserialize the embedded dictionary for the given language.
99    fn from_embedded(lang : Language) -> Result<Self>;
100}
101
102macro_rules! impl_load {
103    ($dict:ty, $suffix:expr) => {
104        impl Load for $dict {
105            fn from_reader<R>(lang : Language, reader : &mut R) -> Result<Self>
106                where R : io::Read
107            {
108                let dict : Self = bin::deserialize_from(reader)?;
109                let (found, expected) = (dict.language(), lang);
110                if found != expected {
111                    Err(Error::LanguageMismatch { expected, found })
112                } else {
113                    Ok(dict)
114                }
115            }
116
117            fn any_from_reader<R>(reader : &mut R) -> Result<Self>
118                where R : io::Read
119            {
120                let dict : Self = bin::deserialize_from(reader)?;
121                Ok(dict)
122            }
123
124            #[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
125            fn from_embedded(lang : Language) -> Result<Self> {
126                let dict_bytes = retrieve_resource(lang.code(), $suffix)?;
127                let dict = bin::deserialize(dict_bytes)?;
128                Ok(dict)
129            }
130        }
131    };
132}
133
134impl_load! { Standard, "standard" }
135impl_load! { Extended, "extended" }
136
137
138#[cfg(any(feature = "embed_all", feature = "embed_en-us"))]
139fn retrieve_resource<'a>(code : &str, suffix : &str) -> Result<&'a [u8]> {
140    let name = format!("{}.{}.bincode", code, suffix);
141    let res : Option<ResourceId> = ResourceId::from_name(&name);
142    match res {
143        Some(data) => Ok(data.load()),
144        None => Err(Error::Resource),
145    }
146}
147
148
149pub type Result<T> = result::Result<T, Error>;
150
151/// Failure modes of dictionary loading.
152#[derive(Debug)]
153pub enum Error {
154    /// The dictionary could not be deserialized.
155    Deserialization(bin::Error),
156    /// The dictionary could not be read.
157    IO(io::Error),
158    /// The loaded dictionary is for the wrong language.
159    LanguageMismatch {
160        expected : Language,
161        found :    Language,
162    },
163    /// The embedded dictionary could not be retrieved.
164    Resource,
165}
166
167impl error::Error for Error {
168    fn source(&self) -> Option<&(dyn error::Error + 'static)> {
169        match *self {
170            Error::Deserialization(ref e) => Some(e),
171            Error::IO(ref e) => Some(e),
172            _ => None,
173        }
174    }
175}
176
177impl fmt::Display for Error {
178    fn fmt(&self, f : &mut fmt::Formatter) -> fmt::Result {
179        match *self {
180            Error::Deserialization(ref e) => e.fmt(f),
181            Error::IO(ref e) => e.fmt(f),
182            Error::LanguageMismatch { expected, found } => write!(
183                                                                  f,
184                                                                  "\
185Language mismatch: attempted to load a dictionary for `{}`, but found
186a dictionary for `{}` instead.",
187                                                                  expected, found
188            ),
189            Error::Resource => f.write_str("the embedded dictionary could not be retrieved"),
190        }
191    }
192}
193
194impl From<io::Error> for Error {
195    fn from(err : io::Error) -> Error { Error::IO(err) }
196}
197
198impl From<bin::Error> for Error {
199    fn from(err : bin::Error) -> Error { Error::Deserialization(err) }
200}