finalfusion/
io.rs

1//! Traits and error types for I/O.
2//!
3//! This module provides traits for reading embeddings
4//! (`ReadEmbeddings`), memory mapping embeddings (`MmapEmbeddings`),
5//! and writing embeddings (`WriteEmbeddings`). Moreover, the module
6//! provides the `Error`, `ErrorKind`, and `Result` types that are
7//! used for handling I/O errors throughout the crate.
8
9use std::fs::File;
10use std::io::{BufReader, Read, Seek, Write};
11
12use crate::error::Result;
13
14/// Read finalfusion embeddings.
15///
16/// This trait is used to read embeddings in the finalfusion format.
17/// Implementations are provided for the vocabulary and storage types
18/// in this crate.
19///
20/// ```
21/// use std::fs::File;
22///
23/// use finalfusion::prelude::*;
24///
25/// let mut f = File::open("testdata/similarity.fifu").unwrap();
26/// let embeddings: Embeddings<VocabWrap, StorageWrap> =
27///     Embeddings::read_embeddings(&mut f).unwrap();
28/// ```
29pub trait ReadEmbeddings
30where
31    Self: Sized,
32{
33    /// Read the embeddings.
34    fn read_embeddings<R>(read: &mut R) -> Result<Self>
35    where
36        R: Read + Seek;
37}
38
39/// Read finalfusion embeddings metadata.
40///
41/// This trait is used to read the metadata of embeddings in the
42/// finalfusion format. This is typically faster than
43/// `ReadEmbeddings::read_embeddings`.
44///
45/// ```
46/// use std::fs::File;
47///
48/// use finalfusion::prelude::*;
49/// use finalfusion::metadata::Metadata;
50/// use finalfusion::io::ReadMetadata;
51///
52/// let mut f = File::open("testdata/similarity.fifu").unwrap();
53/// let metadata: Option<Metadata> =
54///     ReadMetadata::read_metadata(&mut f).unwrap();
55/// ```
56pub trait ReadMetadata
57where
58    Self: Sized,
59{
60    /// Read the metadata.
61    fn read_metadata<R>(read: &mut R) -> Result<Self>
62    where
63        R: Read + Seek;
64}
65
66/// Memory-map finalfusion embeddings.
67///
68/// This trait is used to read finalfusion embeddings while [memory
69/// mapping](https://en.wikipedia.org/wiki/Mmap) the embedding matrix.
70/// This leads to considerable memory savings, since the operating
71/// system will load the relevant pages from disk on demand.
72pub trait MmapEmbeddings
73where
74    Self: Sized,
75{
76    fn mmap_embeddings(read: &mut BufReader<File>) -> Result<Self>;
77}
78
79/// Write embeddings in finalfusion format.
80///
81/// This trait is used to write embeddings in finalfusion
82/// format. Writing in finalfusion format is supported regardless of
83/// the original format of the embeddings.
84pub trait WriteEmbeddings {
85    fn write_embeddings<W>(&self, write: &mut W) -> Result<()>
86    where
87        W: Write + Seek;
88
89    fn write_embeddings_len(&self, offset: u64) -> u64;
90}