libquilt/storage/file.rs
1// Copyright 2018-2019 Joe Neeman.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8//
9// See the LICENSE-APACHE or LICENSE-MIT files at the top-level directory
10// of this distribution.
11
12use crate::storage::Storage;
13use crate::NodeId;
14
15/// A `File` is a special case of a [`Graggle`](crate::Graggle), in which there is just a linear order.
16///
17/// This struct offers convenient (read-only) access to a `File`, allowing the contents and ids of
18/// nodes to be access by indexing.
19///
20/// The most convenient way to get a [`File`] is through [`Repo::file`](crate::Repo::file), but they can also
21/// be built from raw bytes (using [`File::from_bytes`]).
22#[derive(Clone, Debug, Eq, PartialEq)]
23pub struct File {
24 ids: Vec<NodeId>,
25 // The contents of the file, in one long vector.
26 contents: Vec<u8>,
27 // The ith node is in contents[boundaries[i]..boundaries[i+1]]. In particular, boundaries is
28 // always one longer than ids.
29 boundaries: Vec<usize>,
30}
31
32impl File {
33 /// Creates a `File` from a slice of node ids. The contents of those nodes will be retrieved
34 /// from `storage`.
35 pub(crate) fn from_ids(ids: &[NodeId], storage: &Storage) -> File {
36 let mut contents = Vec::new();
37 let mut boundaries = Vec::new();
38 for id in ids {
39 boundaries.push(contents.len());
40 contents.extend_from_slice(storage.contents(id));
41 }
42 boundaries.push(contents.len());
43 File {
44 contents,
45 boundaries,
46 ids: ids.to_owned(),
47 }
48 }
49
50 /// Creates a [`File`] from the raw bytes, by dividing them into lines.
51 ///
52 /// The [`NodeId`]s will be synthesized: they will have empty [`PatchId`](crate::PatchId)s, and
53 /// their node indices will be consecutive, starting from zero.
54 pub fn from_bytes(bytes: &[u8]) -> File {
55 let contents = bytes.to_owned();
56
57 // Finds the positions of the beginnings of all the lines, including the position of the
58 // EOF if there isn't a newline at the end of the file.
59 let mut boundaries = vec![0];
60 boundaries.extend(
61 bytes
62 .iter()
63 .enumerate()
64 .filter(|&(_, &b)| b == b'\n')
65 .map(|x| x.0 + 1),
66 );
67 if let Some(&last) = bytes.last() {
68 if last != b'\n' {
69 boundaries.push(bytes.len());
70 }
71 }
72
73 let ids = (0..(boundaries.len() as u64 - 1))
74 .map(NodeId::cur)
75 .collect();
76
77 File {
78 ids,
79 contents,
80 boundaries,
81 }
82 }
83
84 /// How many nodes does this file have?
85 ///
86 /// Currently, "nodes" is synonymous with "lines", but that may not necessarily be the case in
87 /// the future (for example, we could diff files based on words instead of lines).
88 pub fn num_nodes(&self) -> usize {
89 self.ids.len()
90 }
91
92 /// Gets the contents of the node at the given index. This includes the `\n` character, if
93 /// there was one.
94 pub fn node(&self, idx: usize) -> &[u8] {
95 let start = self.boundaries[idx];
96 let end = self.boundaries[idx + 1];
97 &self.contents[start..end]
98 }
99
100 /// Gets the id of the node at the given index.
101 pub fn node_id(&self, idx: usize) -> &NodeId {
102 &self.ids[idx]
103 }
104
105 /// Gets the whole file, as an array of bytes.
106 pub fn as_bytes(&self) -> &[u8] {
107 &self.contents[..]
108 }
109}
110
111#[cfg(test)]
112mod tests {
113 use super::File;
114
115 #[test]
116 fn from_bytes_empty() {
117 let f = File::from_bytes(b"");
118 assert_eq!(f.boundaries, vec![0]);
119 assert_eq!(f.num_nodes(), 0);
120 assert_eq!(f.ids.len(), 0);
121 }
122
123 #[test]
124 fn from_bytes_one_empty_line() {
125 let f = File::from_bytes(b"\n");
126 assert_eq!(f.boundaries, vec![0, 1]);
127 assert_eq!(f.num_nodes(), 1);
128 assert_eq!(f.ids.len(), 1);
129 }
130
131 #[test]
132 fn from_bytes_one_line_no_newline() {
133 let f = File::from_bytes(b"test");
134 assert_eq!(f.boundaries, vec![0, 4]);
135 assert_eq!(f.num_nodes(), 1);
136 assert_eq!(f.ids.len(), 1);
137 assert_eq!(f.node(0), b"test");
138 }
139
140 #[test]
141 fn from_bytes_one_line() {
142 let f = File::from_bytes(b"test\n");
143 assert_eq!(f.boundaries, vec![0, 5]);
144 assert_eq!(f.num_nodes(), 1);
145 assert_eq!(f.ids.len(), 1);
146 assert_eq!(f.node(0), b"test\n");
147 }
148
149 #[test]
150 fn from_bytes_two_lines() {
151 let f = File::from_bytes(b"test1\ntest2\n");
152 assert_eq!(f.boundaries, vec![0, 6, 12]);
153 assert_eq!(f.num_nodes(), 2);
154 assert_eq!(f.ids.len(), 2);
155 assert_eq!(f.node(0), b"test1\n");
156 assert_eq!(f.node(1), b"test2\n");
157 }
158}