Skip to main content

rsyn/
flist.rs

1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! File lists and entries.
16
17use std::cmp::Ordering;
18use std::convert::TryInto;
19use std::fmt;
20
21use anyhow::Context;
22use chrono::{Local, TimeZone};
23
24#[allow(unused_imports)]
25use log::{debug, error, info, trace, warn};
26
27use crate::varint::ReadVarint;
28use crate::Result;
29
30// const STATUS_TOP_LEVEL_DIR: u8 = 0x01;
31const STATUS_REPEAT_MODE: u8 = 0x02;
32// const STATUS_REPEAT_UID: u8 = 0x08;
33// const STATUS_REPEAT_GID: u8 = 0x08;
34const STATUS_REPEAT_PARTIAL_NAME: u8 = 0x20;
35const STATUS_LONG_NAME: u8 = 0x40;
36const STATUS_REPEAT_MTIME: u8 = 0x80;
37
38type ByteString = Vec<u8>;
39
40/// Description of a single file (or directory or symlink etc).
41///
42/// The `Display` trait formats an entry like in `ls -l`, and like in rsync
43/// directory listings.
44#[derive(Debug, PartialEq, Eq)]
45pub struct FileEntry {
46    // Corresponds to rsync |file_struct|.
47    /// Name of this file, as a byte string.
48    name: Vec<u8>,
49
50    /// Length of the file, in bytes.
51    pub file_len: u64,
52
53    /// Unix mode, containing the file type and permissions.
54    pub mode: u32,
55
56    /// Modification time, in seconds since the Unix epoch.
57    mtime: u32,
58
59    /// If this is a symlink, the target.
60    link_target: Option<ByteString>,
61    // TODO: Other file_struct fields.
62    // TODO: Work out what |basedir| is and maybe include that.
63}
64
65impl FileEntry {
66    /// Returns the file name, as a byte string, in the (remote) OS's encoding.
67    ///
68    /// rsync doesn't constrain the encoding, so this will typically, but not
69    /// necessarily be UTF-8.
70    // TODO: Also offer it as an OSString?
71    pub fn name_bytes(&self) -> &[u8] {
72        &self.name
73    }
74
75    /// Returns the file name, with un-decodable bytes converted to Unicode
76    /// replacement characters.
77    ///
78    /// For the common case of UTF-8 names, this is simply the name, but
79    /// if the remote end uses a different encoding the name may be mangled.
80    ///
81    /// This is suitable for printing, but might not be suitable for use as a
82    /// destination file name.
83    pub fn name_lossy_string(&self) -> std::borrow::Cow<'_, str> {
84        String::from_utf8_lossy(&self.name)
85    }
86
87    /// Returns true if this entry describes a plain file.
88    pub fn is_file(&self) -> bool {
89        unix_mode::is_file(self.mode)
90    }
91
92    /// Returns true if this entry describes a directory.
93    pub fn is_dir(&self) -> bool {
94        unix_mode::is_dir(self.mode)
95    }
96
97    /// Returns true if this entry describes a symlink.
98    pub fn is_symlink(&self) -> bool {
99        unix_mode::is_symlink(self.mode)
100    }
101
102    /// Returns the modification time, in seconds since the Unix epoch.
103    pub fn unix_mtime(&self) -> u32 {
104        self.mtime
105    }
106
107    /// Returns the modification time as a chrono::DateTime associated to the
108    /// local timezone.
109    pub fn mtime(&self) -> chrono::DateTime<Local> {
110        Local.timestamp(self.mtime as i64, 0)
111    }
112}
113
114/// Display this entry in a format like that of `ls`, and like `rsync` uses in
115/// listing directories:
116///
117/// ```text
118/// drwxr-x---         420 2020-05-02 07:25:17 rsyn
119/// ```
120///
121/// The modification time is shown in the local timezone.
122impl fmt::Display for FileEntry {
123    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124        write!(
125            f,
126            "{:08} {:11} {:19} {}",
127            unix_mode::to_string(self.mode),
128            self.file_len,
129            self.mtime().format("%Y-%m-%d %H:%M:%S"),
130            self.name_lossy_string(),
131        )
132    }
133}
134
135/// A list of files returned from a server.
136pub type FileList = Vec<FileEntry>;
137
138/// Read a file list off the wire, and return it in the order it was received.
139pub(crate) fn read_file_list(r: &mut ReadVarint) -> Result<FileList> {
140    // Corresponds to rsync |receive_file_entry|.
141    // TODO: Support receipt of uid and gid with -o, -g.
142    // TODO: Support devices, links, etc.
143
144    let mut v: Vec<FileEntry> = Vec::new();
145    while let Some(entry) = receive_file_entry(r, v.last())? {
146        v.push(entry)
147    }
148    debug!("End of file list");
149    Ok(v)
150}
151
152fn receive_file_entry(
153    r: &mut ReadVarint,
154    previous: Option<&FileEntry>,
155) -> Result<Option<FileEntry>> {
156    let status = r
157        .read_u8()
158        .context("Failed to read file entry status byte")?;
159    trace!("File list status {:#x}", status);
160    if status == 0 {
161        return Ok(None);
162    }
163
164    let inherit_name_bytes = if (status & STATUS_REPEAT_PARTIAL_NAME) != 0 {
165        r.read_u8().context("Failed to read inherited name bytes")? as usize
166    } else {
167        0
168    };
169
170    let name_len = if status & STATUS_LONG_NAME != 0 {
171        r.read_i32()? as usize
172    } else {
173        r.read_u8()? as usize
174    };
175    let mut name = r.read_byte_string(name_len)?;
176    if inherit_name_bytes > 0 {
177        let mut new_name = previous.unwrap().name.clone();
178        new_name.truncate(inherit_name_bytes);
179        new_name.append(&mut name);
180        name = new_name;
181    }
182    trace!("  filename: {:?}", String::from_utf8_lossy(&name));
183    assert!(!name.is_empty());
184
185    let file_len: u64 = r
186        .read_i64()?
187        .try_into()
188        .context("Received negative file_len")?;
189    trace!("  file_len: {}", file_len);
190
191    let mtime = if status & STATUS_REPEAT_MTIME == 0 {
192        r.read_i32()? as u32
193    } else {
194        previous.unwrap().mtime
195    };
196    trace!("  mtime: {}", mtime);
197
198    let mode = if status & STATUS_REPEAT_MODE == 0 {
199        r.read_i32()? as u32
200    } else {
201        previous.unwrap().mode
202    };
203    trace!("  mode: {:#o}", mode);
204
205    // TODO: If the relevant options are set, read uid, gid, device, link target.
206
207    Ok(Some(FileEntry {
208        name,
209        file_len,
210        mtime,
211        mode,
212        link_target: None,
213    }))
214}
215
216/// Compare two entry names, in the protocol 27 sort.
217///
218/// The rsync code is complex but seems to reduce to a strcmp for names
219/// that actually occur, once you cancel
220/// out the somewhat complicated sharing of string parts.
221fn filename_compare_27(a: &[u8], b: &[u8]) -> Ordering {
222    a.cmp(&b)
223}
224
225pub(crate) fn sort(file_list: &mut [FileEntry]) {
226    // Compare to rsync `file_compare`.
227    // TODO: Clean the list of duplicates, like in rsync `clean_flist`.
228    file_list.sort_by(|a, b| filename_compare_27(&a.name, &b.name));
229    debug!("File list sort done");
230    for (i, entry) in file_list.iter().enumerate() {
231        debug!("[{:8}] {:?}", i, entry.name_lossy_string())
232    }
233}
234
235#[cfg(test)]
236mod test {
237    use super::*;
238    use regex::Regex;
239
240    #[test]
241    fn file_entry_display_like_ls() {
242        let entry = FileEntry {
243            mode: 0o0040750,
244            file_len: 420,
245            mtime: 1588429517,
246            name: b"rsyn".to_vec(),
247            link_target: None,
248        };
249        // The mtime is in the local timezone, and we need the tests to pass
250        // regardless of timezone. Rust Chrono doesn't seem to provide a way
251        // to override it for testing. Let's just assert that the pattern is
252        // plausible.
253        //
254        // This does assume there are no timezones with a less-than-whole minute
255        // offset. (There are places like South Australia with a fractional-hour offset.
256        let entry_display = format!("{}", entry);
257        assert!(
258            Regex::new(r"drwxr-x---         420 2020-05-0[123] \d\d:\d\d:17 rsyn")
259                .unwrap()
260                .is_match(&entry_display),
261            "{:?} doesn't match expected format",
262            entry_display
263        );
264    }
265
266    // TODO: Test reading and decoding from an varint stream.
267
268    /// Examples from verbose output of rsync 2.6.1.
269    #[test]
270    fn ordering_examples() {
271        const EXAMPLE: &[&[u8]] = &[
272            b"./",
273            b".git/",
274            b".git/HEAD",
275            b".github/",
276            b".github/workflows/",
277            b".github/workflows/rust.yml",
278            b".gitignore",
279            b"CONTRIBUTING.md",
280            b"src/",
281            b"src/lib.rs",
282        ];
283        for (i, a) in EXAMPLE.iter().enumerate() {
284            for (j, b) in EXAMPLE.iter().enumerate() {
285                assert_eq!(filename_compare_27(a, b), i.cmp(&j))
286            }
287        }
288    }
289}