lindera_dictionary/
util.rs

1use std::fs::File;
2use std::io::{Read, Write};
3use std::ops::Deref;
4use std::path::Path;
5
6#[cfg(feature = "mmap")]
7use memmap2::Mmap;
8
9use anyhow::anyhow;
10use encoding_rs::Encoding;
11use serde::{Deserialize, Serialize};
12
13use crate::LinderaResult;
14#[cfg(feature = "compress")]
15use crate::compress::compress;
16use crate::decompress::Algorithm;
17use crate::error::LinderaErrorKind;
18
19#[cfg(feature = "compress")]
20pub fn compress_write<W: Write>(
21    buffer: &[u8],
22    algorithm: Algorithm,
23    writer: &mut W,
24) -> LinderaResult<()> {
25    let compressed = compress(buffer, algorithm).map_err(|err| {
26        LinderaErrorKind::Compression
27            .with_error(err)
28            .add_context(format!(
29                "Failed to compress data with {algorithm:?} algorithm"
30            ))
31    })?;
32    bincode::serde::encode_into_std_write(&compressed, writer, bincode::config::legacy()).map_err(
33        |err| {
34            LinderaErrorKind::Io
35                .with_error(err)
36                .add_context("Failed to write compressed data to output")
37        },
38    )?;
39
40    Ok(())
41}
42
43#[cfg(not(feature = "compress"))]
44pub fn compress_write<W: Write>(
45    buffer: &[u8],
46    _algorithm: Algorithm,
47    writer: &mut W,
48) -> LinderaResult<()> {
49    writer.write_all(buffer).map_err(|err| {
50        LinderaErrorKind::Io
51            .with_error(err)
52            .add_context("Failed to write data to output")
53    })?;
54
55    Ok(())
56}
57
58pub fn read_file(filename: &Path) -> LinderaResult<Vec<u8>> {
59    let mut input_read = File::open(filename).map_err(|err| {
60        LinderaErrorKind::Io
61            .with_error(err)
62            .add_context(format!("Failed to open file: {}", filename.display()))
63    })?;
64    let mut buffer = Vec::new();
65    input_read.read_to_end(&mut buffer).map_err(|err| {
66        LinderaErrorKind::Io.with_error(err).add_context(format!(
67            "Failed to read file contents: {}",
68            filename.display()
69        ))
70    })?;
71    Ok(buffer)
72}
73
74#[cfg(feature = "mmap")]
75pub fn mmap_file(filename: &Path) -> LinderaResult<Mmap> {
76    let file = File::open(filename).map_err(|err| {
77        LinderaErrorKind::Io.with_error(err).add_context(format!(
78            "Failed to open file for memory mapping: {}",
79            filename.display()
80        ))
81    })?;
82    let mmap = unsafe { Mmap::map(&file) }.map_err(|err| {
83        LinderaErrorKind::Io
84            .with_error(err)
85            .add_context(format!("Failed to memory map file: {}", filename.display()))
86    })?;
87    Ok(mmap)
88}
89
90pub fn read_file_with_encoding(filepath: &Path, encoding_name: &str) -> LinderaResult<String> {
91    let encoding = Encoding::for_label_no_replacement(encoding_name.as_bytes());
92    let encoding = encoding.ok_or_else(|| {
93        LinderaErrorKind::Decode.with_error(anyhow!("Invalid encoding: {encoding_name}"))
94    })?;
95
96    let buffer = read_file(filepath)?;
97    Ok(encoding.decode(&buffer).0.into_owned())
98}
99
100pub enum Data {
101    Static(&'static [u8]),
102    Vec(Vec<u8>),
103    #[cfg(feature = "mmap")]
104    Map(Mmap),
105}
106
107impl Deref for Data {
108    type Target = [u8];
109    fn deref(&self) -> &Self::Target {
110        match self {
111            Data::Static(s) => s,
112            Data::Vec(v) => v,
113            #[cfg(feature = "mmap")]
114            Data::Map(m) => m,
115        }
116    }
117}
118
119impl From<&'static [u8]> for Data {
120    fn from(s: &'static [u8]) -> Self {
121        Self::Static(s)
122    }
123}
124
125impl<T: Deref<Target = [u8]>> From<&'static T> for Data {
126    fn from(t: &'static T) -> Self {
127        Self::Static(t)
128    }
129}
130
131impl From<Vec<u8>> for Data {
132    fn from(v: Vec<u8>) -> Self {
133        Self::Vec(v)
134    }
135}
136
137#[cfg(feature = "mmap")]
138impl From<Mmap> for Data {
139    fn from(m: Mmap) -> Self {
140        Self::Map(m)
141    }
142}
143
144impl Clone for Data {
145    fn clone(&self) -> Self {
146        match self {
147            Data::Static(s) => Data::Static(s),
148            Data::Vec(v) => Data::Vec(v.clone()),
149            #[cfg(feature = "mmap")]
150            Data::Map(m) => Data::Vec(m.to_vec()),
151        }
152    }
153}
154
155impl Serialize for Data {
156    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
157    where
158        S: serde::Serializer,
159    {
160        match self {
161            Self::Static(s) => serializer.serialize_bytes(s),
162            Self::Vec(v) => serializer.serialize_bytes(v),
163            #[cfg(feature = "mmap")]
164            Self::Map(m) => serializer.serialize_bytes(m),
165        }
166    }
167}
168
169impl<'de> Deserialize<'de> for Data {
170    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
171    where
172        D: serde::Deserializer<'de>,
173    {
174        Vec::<u8>::deserialize(deserializer).map(Self::Vec)
175    }
176}