lindera_dictionary/
util.rs

1use std::fs::File;
2use std::io::{Read, Write};
3use std::ops::Deref;
4use std::path::Path;
5
6#[cfg(feature = "mmap")]
7use memmap2::Mmap;
8
9use anyhow::anyhow;
10use encoding_rs::Encoding;
11use serde::{Deserialize, Serialize};
12
13use crate::LinderaResult;
14#[cfg(feature = "compress")]
15use crate::compress::compress;
16use crate::decompress::Algorithm;
17use crate::error::LinderaErrorKind;
18
19use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
20
21#[cfg(feature = "compress")]
22pub fn compress_write<W: Write>(
23    buffer: &[u8],
24    algorithm: Algorithm,
25    writer: &mut W,
26) -> LinderaResult<()> {
27    let compressed = compress(buffer, algorithm).map_err(|err| {
28        LinderaErrorKind::Compression
29            .with_error(err)
30            .add_context(format!(
31                "Failed to compress data with {algorithm:?} algorithm"
32            ))
33    })?;
34
35    // Use rkyv to serialize the CompressedData
36    let bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&compressed).map_err(|err| {
37        LinderaErrorKind::Serialize
38            .with_error(anyhow::anyhow!(err))
39            .add_context("Failed to serialize compressed data")
40    })?;
41
42    writer.write_all(&bytes).map_err(|err| {
43        LinderaErrorKind::Io
44            .with_error(err)
45            .add_context("Failed to write compressed data to output")
46    })?;
47
48    Ok(())
49}
50
51#[cfg(not(feature = "compress"))]
52pub fn compress_write<W: Write>(
53    buffer: &[u8],
54    _algorithm: Algorithm,
55    writer: &mut W,
56) -> LinderaResult<()> {
57    writer.write_all(buffer).map_err(|err| {
58        LinderaErrorKind::Io
59            .with_error(err)
60            .add_context("Failed to write data to output")
61    })?;
62
63    Ok(())
64}
65
66pub fn read_file(filename: &Path) -> LinderaResult<Vec<u8>> {
67    let mut input_read = File::open(filename).map_err(|err| {
68        LinderaErrorKind::Io
69            .with_error(err)
70            .add_context(format!("Failed to open file: {}", filename.display()))
71    })?;
72    let mut buffer = Vec::new();
73    input_read.read_to_end(&mut buffer).map_err(|err| {
74        LinderaErrorKind::Io.with_error(err).add_context(format!(
75            "Failed to read file contents: {}",
76            filename.display()
77        ))
78    })?;
79    Ok(buffer)
80}
81
82#[cfg(feature = "mmap")]
83pub fn mmap_file(filename: &Path) -> LinderaResult<Mmap> {
84    let file = File::open(filename).map_err(|err| {
85        LinderaErrorKind::Io.with_error(err).add_context(format!(
86            "Failed to open file for memory mapping: {}",
87            filename.display()
88        ))
89    })?;
90    let mmap = unsafe { Mmap::map(&file) }.map_err(|err| {
91        LinderaErrorKind::Io
92            .with_error(err)
93            .add_context(format!("Failed to memory map file: {}", filename.display()))
94    })?;
95    Ok(mmap)
96}
97
98pub fn read_file_with_encoding(filepath: &Path, encoding_name: &str) -> LinderaResult<String> {
99    let encoding = Encoding::for_label_no_replacement(encoding_name.as_bytes());
100    let encoding = encoding.ok_or_else(|| {
101        LinderaErrorKind::Decode.with_error(anyhow!("Invalid encoding: {encoding_name}"))
102    })?;
103
104    let buffer = read_file(filepath)?;
105    Ok(encoding.decode(&buffer).0.into_owned())
106}
107
108use std::sync::Arc;
109
110#[derive(Clone)]
111pub enum Data {
112    Static(&'static [u8]),
113    Vec(Vec<u8>),
114    #[cfg(feature = "mmap")]
115    Map(Arc<Mmap>),
116}
117
118impl Archive for Data {
119    type Archived = rkyv::vec::ArchivedVec<u8>;
120    type Resolver = rkyv::vec::VecResolver;
121
122    fn resolve(&self, resolver: Self::Resolver, out: rkyv::Place<Self::Archived>) {
123        rkyv::vec::ArchivedVec::resolve_from_slice(self.deref(), resolver, out);
124    }
125}
126
127impl<S> RkyvSerialize<S> for Data
128where
129    S: rkyv::rancor::Fallible + rkyv::ser::Writer + rkyv::ser::Allocator + ?Sized,
130{
131    fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
132        rkyv::vec::ArchivedVec::serialize_from_slice(self.deref(), serializer)
133    }
134}
135
136impl<D: rkyv::rancor::Fallible + ?Sized> RkyvDeserialize<Data, D> for rkyv::vec::ArchivedVec<u8> {
137    fn deserialize(&self, _deserializer: &mut D) -> Result<Data, D::Error> {
138        let mut vec = Vec::with_capacity(self.len());
139        vec.extend_from_slice(self.as_slice());
140        Ok(Data::Vec(vec))
141    }
142}
143
144impl Deref for Data {
145    type Target = [u8];
146    fn deref(&self) -> &Self::Target {
147        match self {
148            Data::Static(s) => s,
149            Data::Vec(v) => v,
150            #[cfg(feature = "mmap")]
151            Data::Map(m) => m,
152        }
153    }
154}
155
156impl From<&'static [u8]> for Data {
157    fn from(s: &'static [u8]) -> Self {
158        Self::Static(s)
159    }
160}
161
162impl<T: Deref<Target = [u8]>> From<&'static T> for Data {
163    fn from(t: &'static T) -> Self {
164        Self::Static(t)
165    }
166}
167
168impl From<Vec<u8>> for Data {
169    fn from(v: Vec<u8>) -> Self {
170        Self::Vec(v)
171    }
172}
173
174#[cfg(feature = "mmap")]
175impl From<Mmap> for Data {
176    fn from(m: Mmap) -> Self {
177        Self::Map(Arc::new(m))
178    }
179}
180
181impl Serialize for Data {
182    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
183    where
184        S: serde::Serializer,
185    {
186        serializer.serialize_bytes(self.deref())
187    }
188}
189
190impl<'de> Deserialize<'de> for Data {
191    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
192    where
193        D: serde::Deserializer<'de>,
194    {
195        let v = <Vec<u8> as serde::Deserialize>::deserialize(deserializer)?;
196        Ok(Data::Vec(v))
197    }
198}