Skip to main content

lindera_dictionary/
util.rs

1use std::fs::File;
2use std::io::{Read, Write};
3use std::ops::Deref;
4use std::path::Path;
5
6#[cfg(feature = "mmap")]
7use memmap2::Mmap;
8
9use anyhow::anyhow;
10use encoding_rs::Encoding;
11use serde::{Deserialize, Serialize};
12
13use crate::LinderaResult;
14use crate::error::LinderaErrorKind;
15
16use rkyv::{Archive, Deserialize as RkyvDeserialize, Serialize as RkyvSerialize};
17
18/// Write data directly to the writer.
19pub fn write_data<W: Write>(buffer: &[u8], writer: &mut W) -> LinderaResult<()> {
20    writer.write_all(buffer).map_err(|err| {
21        LinderaErrorKind::Io
22            .with_error(err)
23            .add_context("Failed to write data to output")
24    })?;
25    Ok(())
26}
27
28pub fn read_file(filename: &Path) -> LinderaResult<Vec<u8>> {
29    let mut input_read = File::open(filename).map_err(|err| {
30        LinderaErrorKind::Io
31            .with_error(err)
32            .add_context(format!("Failed to open file: {}", filename.display()))
33    })?;
34    let mut buffer = Vec::new();
35    input_read.read_to_end(&mut buffer).map_err(|err| {
36        LinderaErrorKind::Io.with_error(err).add_context(format!(
37            "Failed to read file contents: {}",
38            filename.display()
39        ))
40    })?;
41    Ok(buffer)
42}
43
44#[cfg(feature = "mmap")]
45pub fn mmap_file(filename: &Path) -> LinderaResult<Mmap> {
46    let file = File::open(filename).map_err(|err| {
47        LinderaErrorKind::Io.with_error(err).add_context(format!(
48            "Failed to open file for memory mapping: {}",
49            filename.display()
50        ))
51    })?;
52    let mmap = unsafe { Mmap::map(&file) }.map_err(|err| {
53        LinderaErrorKind::Io
54            .with_error(err)
55            .add_context(format!("Failed to memory map file: {}", filename.display()))
56    })?;
57    Ok(mmap)
58}
59
60pub fn read_file_with_encoding(filepath: &Path, encoding_name: &str) -> LinderaResult<String> {
61    let encoding = Encoding::for_label_no_replacement(encoding_name.as_bytes());
62    let encoding = encoding.ok_or_else(|| {
63        LinderaErrorKind::Decode.with_error(anyhow!("Invalid encoding: {encoding_name}"))
64    })?;
65
66    let buffer = read_file(filepath)?;
67    Ok(encoding.decode(&buffer).0.into_owned())
68}
69
70use std::sync::Arc;
71
72#[derive(Clone)]
73pub enum Data {
74    Static(&'static [u8]),
75    Vec(Vec<u8>),
76    #[cfg(feature = "mmap")]
77    Map(Arc<Mmap>),
78}
79
80impl Archive for Data {
81    type Archived = rkyv::vec::ArchivedVec<u8>;
82    type Resolver = rkyv::vec::VecResolver;
83
84    fn resolve(&self, resolver: Self::Resolver, out: rkyv::Place<Self::Archived>) {
85        rkyv::vec::ArchivedVec::resolve_from_slice(self.deref(), resolver, out);
86    }
87}
88
89impl<S> RkyvSerialize<S> for Data
90where
91    S: rkyv::rancor::Fallible + rkyv::ser::Writer + rkyv::ser::Allocator + ?Sized,
92{
93    fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
94        rkyv::vec::ArchivedVec::serialize_from_slice(self.deref(), serializer)
95    }
96}
97
98impl<D: rkyv::rancor::Fallible + ?Sized> RkyvDeserialize<Data, D> for rkyv::vec::ArchivedVec<u8> {
99    fn deserialize(&self, _deserializer: &mut D) -> Result<Data, D::Error> {
100        let mut vec = Vec::with_capacity(self.len());
101        vec.extend_from_slice(self.as_slice());
102        Ok(Data::Vec(vec))
103    }
104}
105
106impl Deref for Data {
107    type Target = [u8];
108    fn deref(&self) -> &Self::Target {
109        match self {
110            Data::Static(s) => s,
111            Data::Vec(v) => v,
112            #[cfg(feature = "mmap")]
113            Data::Map(m) => m,
114        }
115    }
116}
117
118impl From<&'static [u8]> for Data {
119    fn from(s: &'static [u8]) -> Self {
120        Self::Static(s)
121    }
122}
123
124impl<T: Deref<Target = [u8]>> From<&'static T> for Data {
125    fn from(t: &'static T) -> Self {
126        Self::Static(t)
127    }
128}
129
130impl From<Vec<u8>> for Data {
131    fn from(v: Vec<u8>) -> Self {
132        Self::Vec(v)
133    }
134}
135
136#[cfg(feature = "mmap")]
137impl From<Mmap> for Data {
138    fn from(m: Mmap) -> Self {
139        Self::Map(Arc::new(m))
140    }
141}
142
143impl Serialize for Data {
144    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
145    where
146        S: serde::Serializer,
147    {
148        serializer.serialize_bytes(self.deref())
149    }
150}
151
152impl<'de> Deserialize<'de> for Data {
153    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
154    where
155        D: serde::Deserializer<'de>,
156    {
157        let v = <Vec<u8> as serde::Deserialize>::deserialize(deserializer)?;
158        Ok(Data::Vec(v))
159    }
160}