rsyn/flist.rs
1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! File lists and entries.
16
17use std::cmp::Ordering;
18use std::convert::TryInto;
19use std::fmt;
20
21use anyhow::Context;
22use chrono::{Local, TimeZone};
23
24#[allow(unused_imports)]
25use log::{debug, error, info, trace, warn};
26
27use crate::varint::ReadVarint;
28use crate::Result;
29
30// const STATUS_TOP_LEVEL_DIR: u8 = 0x01;
31const STATUS_REPEAT_MODE: u8 = 0x02;
32// const STATUS_REPEAT_UID: u8 = 0x08;
33// const STATUS_REPEAT_GID: u8 = 0x08;
34const STATUS_REPEAT_PARTIAL_NAME: u8 = 0x20;
35const STATUS_LONG_NAME: u8 = 0x40;
36const STATUS_REPEAT_MTIME: u8 = 0x80;
37
38type ByteString = Vec<u8>;
39
40/// Description of a single file (or directory or symlink etc).
41///
42/// The `Display` trait formats an entry like in `ls -l`, and like in rsync
43/// directory listings.
44#[derive(Debug, PartialEq, Eq)]
45pub struct FileEntry {
46 // Corresponds to rsync |file_struct|.
47 /// Name of this file, as a byte string.
48 name: Vec<u8>,
49
50 /// Length of the file, in bytes.
51 pub file_len: u64,
52
53 /// Unix mode, containing the file type and permissions.
54 pub mode: u32,
55
56 /// Modification time, in seconds since the Unix epoch.
57 mtime: u32,
58
59 /// If this is a symlink, the target.
60 link_target: Option<ByteString>,
61 // TODO: Other file_struct fields.
62 // TODO: Work out what |basedir| is and maybe include that.
63}
64
65impl FileEntry {
66 /// Returns the file name, as a byte string, in the (remote) OS's encoding.
67 ///
68 /// rsync doesn't constrain the encoding, so this will typically, but not
69 /// necessarily be UTF-8.
70 // TODO: Also offer it as an OSString?
71 pub fn name_bytes(&self) -> &[u8] {
72 &self.name
73 }
74
75 /// Returns the file name, with un-decodable bytes converted to Unicode
76 /// replacement characters.
77 ///
78 /// For the common case of UTF-8 names, this is simply the name, but
79 /// if the remote end uses a different encoding the name may be mangled.
80 ///
81 /// This is suitable for printing, but might not be suitable for use as a
82 /// destination file name.
83 pub fn name_lossy_string(&self) -> std::borrow::Cow<'_, str> {
84 String::from_utf8_lossy(&self.name)
85 }
86
87 /// Returns true if this entry describes a plain file.
88 pub fn is_file(&self) -> bool {
89 unix_mode::is_file(self.mode)
90 }
91
92 /// Returns true if this entry describes a directory.
93 pub fn is_dir(&self) -> bool {
94 unix_mode::is_dir(self.mode)
95 }
96
97 /// Returns true if this entry describes a symlink.
98 pub fn is_symlink(&self) -> bool {
99 unix_mode::is_symlink(self.mode)
100 }
101
102 /// Returns the modification time, in seconds since the Unix epoch.
103 pub fn unix_mtime(&self) -> u32 {
104 self.mtime
105 }
106
107 /// Returns the modification time as a chrono::DateTime associated to the
108 /// local timezone.
109 pub fn mtime(&self) -> chrono::DateTime<Local> {
110 Local.timestamp(self.mtime as i64, 0)
111 }
112}
113
114/// Display this entry in a format like that of `ls`, and like `rsync` uses in
115/// listing directories:
116///
117/// ```text
118/// drwxr-x--- 420 2020-05-02 07:25:17 rsyn
119/// ```
120///
121/// The modification time is shown in the local timezone.
122impl fmt::Display for FileEntry {
123 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124 write!(
125 f,
126 "{:08} {:11} {:19} {}",
127 unix_mode::to_string(self.mode),
128 self.file_len,
129 self.mtime().format("%Y-%m-%d %H:%M:%S"),
130 self.name_lossy_string(),
131 )
132 }
133}
134
135/// A list of files returned from a server.
136pub type FileList = Vec<FileEntry>;
137
138/// Read a file list off the wire, and return it in the order it was received.
139pub(crate) fn read_file_list(r: &mut ReadVarint) -> Result<FileList> {
140 // Corresponds to rsync |receive_file_entry|.
141 // TODO: Support receipt of uid and gid with -o, -g.
142 // TODO: Support devices, links, etc.
143
144 let mut v: Vec<FileEntry> = Vec::new();
145 while let Some(entry) = receive_file_entry(r, v.last())? {
146 v.push(entry)
147 }
148 debug!("End of file list");
149 Ok(v)
150}
151
152fn receive_file_entry(
153 r: &mut ReadVarint,
154 previous: Option<&FileEntry>,
155) -> Result<Option<FileEntry>> {
156 let status = r
157 .read_u8()
158 .context("Failed to read file entry status byte")?;
159 trace!("File list status {:#x}", status);
160 if status == 0 {
161 return Ok(None);
162 }
163
164 let inherit_name_bytes = if (status & STATUS_REPEAT_PARTIAL_NAME) != 0 {
165 r.read_u8().context("Failed to read inherited name bytes")? as usize
166 } else {
167 0
168 };
169
170 let name_len = if status & STATUS_LONG_NAME != 0 {
171 r.read_i32()? as usize
172 } else {
173 r.read_u8()? as usize
174 };
175 let mut name = r.read_byte_string(name_len)?;
176 if inherit_name_bytes > 0 {
177 let mut new_name = previous.unwrap().name.clone();
178 new_name.truncate(inherit_name_bytes);
179 new_name.append(&mut name);
180 name = new_name;
181 }
182 trace!(" filename: {:?}", String::from_utf8_lossy(&name));
183 assert!(!name.is_empty());
184
185 let file_len: u64 = r
186 .read_i64()?
187 .try_into()
188 .context("Received negative file_len")?;
189 trace!(" file_len: {}", file_len);
190
191 let mtime = if status & STATUS_REPEAT_MTIME == 0 {
192 r.read_i32()? as u32
193 } else {
194 previous.unwrap().mtime
195 };
196 trace!(" mtime: {}", mtime);
197
198 let mode = if status & STATUS_REPEAT_MODE == 0 {
199 r.read_i32()? as u32
200 } else {
201 previous.unwrap().mode
202 };
203 trace!(" mode: {:#o}", mode);
204
205 // TODO: If the relevant options are set, read uid, gid, device, link target.
206
207 Ok(Some(FileEntry {
208 name,
209 file_len,
210 mtime,
211 mode,
212 link_target: None,
213 }))
214}
215
216/// Compare two entry names, in the protocol 27 sort.
217///
218/// The rsync code is complex but seems to reduce to a strcmp for names
219/// that actually occur, once you cancel
220/// out the somewhat complicated sharing of string parts.
221fn filename_compare_27(a: &[u8], b: &[u8]) -> Ordering {
222 a.cmp(&b)
223}
224
225pub(crate) fn sort(file_list: &mut [FileEntry]) {
226 // Compare to rsync `file_compare`.
227 // TODO: Clean the list of duplicates, like in rsync `clean_flist`.
228 file_list.sort_by(|a, b| filename_compare_27(&a.name, &b.name));
229 debug!("File list sort done");
230 for (i, entry) in file_list.iter().enumerate() {
231 debug!("[{:8}] {:?}", i, entry.name_lossy_string())
232 }
233}
234
235#[cfg(test)]
236mod test {
237 use super::*;
238 use regex::Regex;
239
240 #[test]
241 fn file_entry_display_like_ls() {
242 let entry = FileEntry {
243 mode: 0o0040750,
244 file_len: 420,
245 mtime: 1588429517,
246 name: b"rsyn".to_vec(),
247 link_target: None,
248 };
249 // The mtime is in the local timezone, and we need the tests to pass
250 // regardless of timezone. Rust Chrono doesn't seem to provide a way
251 // to override it for testing. Let's just assert that the pattern is
252 // plausible.
253 //
254 // This does assume there are no timezones with a less-than-whole minute
255 // offset. (There are places like South Australia with a fractional-hour offset.
256 let entry_display = format!("{}", entry);
257 assert!(
258 Regex::new(r"drwxr-x--- 420 2020-05-0[123] \d\d:\d\d:17 rsyn")
259 .unwrap()
260 .is_match(&entry_display),
261 "{:?} doesn't match expected format",
262 entry_display
263 );
264 }
265
266 // TODO: Test reading and decoding from an varint stream.
267
268 /// Examples from verbose output of rsync 2.6.1.
269 #[test]
270 fn ordering_examples() {
271 const EXAMPLE: &[&[u8]] = &[
272 b"./",
273 b".git/",
274 b".git/HEAD",
275 b".github/",
276 b".github/workflows/",
277 b".github/workflows/rust.yml",
278 b".gitignore",
279 b"CONTRIBUTING.md",
280 b"src/",
281 b"src/lib.rs",
282 ];
283 for (i, a) in EXAMPLE.iter().enumerate() {
284 for (j, b) in EXAMPLE.iter().enumerate() {
285 assert_eq!(filename_compare_27(a, b), i.cmp(&j))
286 }
287 }
288 }
289}