libquilt/storage/
file.rs

1// Copyright 2018-2019 Joe Neeman.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8//
9// See the LICENSE-APACHE or LICENSE-MIT files at the top-level directory
10// of this distribution.
11
12use crate::storage::Storage;
13use crate::NodeId;
14
15/// A `File` is a special case of a [`Graggle`](crate::Graggle), in which there is just a linear order.
16///
17/// This struct offers convenient (read-only) access to a `File`, allowing the contents and ids of
18/// nodes to be access by indexing.
19///
20/// The most convenient way to get a [`File`] is through [`Repo::file`](crate::Repo::file), but they can also
21/// be built from raw bytes (using [`File::from_bytes`]).
22#[derive(Clone, Debug, Eq, PartialEq)]
23pub struct File {
24    ids: Vec<NodeId>,
25    // The contents of the file, in one long vector.
26    contents: Vec<u8>,
27    // The ith node is in contents[boundaries[i]..boundaries[i+1]]. In particular, boundaries is
28    // always one longer than ids.
29    boundaries: Vec<usize>,
30}
31
32impl File {
33    /// Creates a `File` from a slice of node ids. The contents of those nodes will be retrieved
34    /// from `storage`.
35    pub(crate) fn from_ids(ids: &[NodeId], storage: &Storage) -> File {
36        let mut contents = Vec::new();
37        let mut boundaries = Vec::new();
38        for id in ids {
39            boundaries.push(contents.len());
40            contents.extend_from_slice(storage.contents(id));
41        }
42        boundaries.push(contents.len());
43        File {
44            contents,
45            boundaries,
46            ids: ids.to_owned(),
47        }
48    }
49
50    /// Creates a [`File`] from the raw bytes, by dividing them into lines.
51    ///
52    /// The [`NodeId`]s will be synthesized: they will have empty [`PatchId`](crate::PatchId)s, and
53    /// their node indices will be consecutive, starting from zero.
54    pub fn from_bytes(bytes: &[u8]) -> File {
55        let contents = bytes.to_owned();
56
57        // Finds the positions of the beginnings of all the lines, including the position of the
58        // EOF if there isn't a newline at the end of the file.
59        let mut boundaries = vec![0];
60        boundaries.extend(
61            bytes
62                .iter()
63                .enumerate()
64                .filter(|&(_, &b)| b == b'\n')
65                .map(|x| x.0 + 1),
66        );
67        if let Some(&last) = bytes.last() {
68            if last != b'\n' {
69                boundaries.push(bytes.len());
70            }
71        }
72
73        let ids = (0..(boundaries.len() as u64 - 1))
74            .map(NodeId::cur)
75            .collect();
76
77        File {
78            ids,
79            contents,
80            boundaries,
81        }
82    }
83
84    /// How many nodes does this file have?
85    ///
86    /// Currently, "nodes" is synonymous with "lines", but that may not necessarily be the case in
87    /// the future (for example, we could diff files based on words instead of lines).
88    pub fn num_nodes(&self) -> usize {
89        self.ids.len()
90    }
91
92    /// Gets the contents of the node at the given index. This includes the `\n` character, if
93    /// there was one.
94    pub fn node(&self, idx: usize) -> &[u8] {
95        let start = self.boundaries[idx];
96        let end = self.boundaries[idx + 1];
97        &self.contents[start..end]
98    }
99
100    /// Gets the id of the node at the given index.
101    pub fn node_id(&self, idx: usize) -> &NodeId {
102        &self.ids[idx]
103    }
104
105    /// Gets the whole file, as an array of bytes.
106    pub fn as_bytes(&self) -> &[u8] {
107        &self.contents[..]
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::File;
114
115    #[test]
116    fn from_bytes_empty() {
117        let f = File::from_bytes(b"");
118        assert_eq!(f.boundaries, vec![0]);
119        assert_eq!(f.num_nodes(), 0);
120        assert_eq!(f.ids.len(), 0);
121    }
122
123    #[test]
124    fn from_bytes_one_empty_line() {
125        let f = File::from_bytes(b"\n");
126        assert_eq!(f.boundaries, vec![0, 1]);
127        assert_eq!(f.num_nodes(), 1);
128        assert_eq!(f.ids.len(), 1);
129    }
130
131    #[test]
132    fn from_bytes_one_line_no_newline() {
133        let f = File::from_bytes(b"test");
134        assert_eq!(f.boundaries, vec![0, 4]);
135        assert_eq!(f.num_nodes(), 1);
136        assert_eq!(f.ids.len(), 1);
137        assert_eq!(f.node(0), b"test");
138    }
139
140    #[test]
141    fn from_bytes_one_line() {
142        let f = File::from_bytes(b"test\n");
143        assert_eq!(f.boundaries, vec![0, 5]);
144        assert_eq!(f.num_nodes(), 1);
145        assert_eq!(f.ids.len(), 1);
146        assert_eq!(f.node(0), b"test\n");
147    }
148
149    #[test]
150    fn from_bytes_two_lines() {
151        let f = File::from_bytes(b"test1\ntest2\n");
152        assert_eq!(f.boundaries, vec![0, 6, 12]);
153        assert_eq!(f.num_nodes(), 2);
154        assert_eq!(f.ids.len(), 2);
155        assert_eq!(f.node(0), b"test1\n");
156        assert_eq!(f.node(1), b"test2\n");
157    }
158}