1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
use std::cmp::min;
use std::ffi::OsString;
use std::fs::{self, DirEntry};
use std::io::{self, Read};
use std::path::Path;
use std::{fs::File, path::PathBuf};

use is_executable::IsExecutable;

use crate::{coder, NarError};

/// Encoder which can archive a given path as a NAR file.
pub struct Encoder {
    stack: Vec<CurrentActivity>,
    internal_buffer_size: usize,
}

/// Builder for [`Encoder`].
pub struct EncoderBuilder<P: AsRef<Path>> {
    path: P,
    internal_buffer_size: usize,
}

#[derive(Debug)]
enum CurrentActivity {
    StartArchive,
    StartEntry,
    Toplevel {
        path: PathBuf,
    },
    WalkingDir {
        dir_path: PathBuf,
        files_rev: Vec<OsString>,
    },
    EncodingFile {
        file: File,
    },
    WritePadding {
        padding: u64,
    },
    WriteMoreBytes {
        bytes: Vec<u8>,
    },
    CloseDirEntry,
    CloseEntry,
}

impl Encoder {
    /// Create a new encoder for file hierarchy at the given path.
    pub fn new<P: AsRef<Path>>(path: P) -> Self {
        Self {
            stack: vec![
                CurrentActivity::CloseEntry,
                CurrentActivity::Toplevel {
                    path: path.as_ref().to_path_buf(),
                },
                CurrentActivity::StartEntry,
                CurrentActivity::StartArchive,
            ],
            internal_buffer_size: 1024,
        }
    }

    /// Create a builder for this encoder that can take additional
    /// options.
    pub fn builder<P: AsRef<Path>>(path: P) -> EncoderBuilder<P> {
        EncoderBuilder {
            path,
            internal_buffer_size: 1024,
        }
    }

    /// Archive to the given path.
    ///
    /// This is equivalent to creating the file, and [`io::copy`]ing
    /// to it.
    ///
    /// Returns an error if the path already exists.
    pub fn pack<P: AsRef<Path>>(&mut self, dst: P) -> Result<(), NarError> {
        let dst = dst.as_ref();
        if fs::symlink_metadata(dst).is_ok() {
            return Err(NarError::PackError(format!(
                "Destination {} already exists. Delete it first.",
                dst.display()
            )));
        }
        let mut nar = File::create(dst)?;
        io::copy(self, &mut nar)?;
        Ok(())
    }
}

impl<P: AsRef<Path>> EncoderBuilder<P> {
    pub fn build(&self) -> Encoder {
        let Self {
            path,
            internal_buffer_size,
        } = self;
        let mut enc = Encoder::new(path.as_ref());
        enc.internal_buffer_size = *internal_buffer_size;
        enc
    }

    /// Configure the internal buffer size.  This should be at least
    /// 200 bytes larger than the longest filename.  Panics if the
    /// given number is smaller than 200.
    pub fn internal_buffer_size(&mut self, x: usize) -> &mut Self {
        assert!(
            x >= 200,
            "internal_buffer_size should be at least 200 bytes larger than the longest filename you have"
        );
        self.internal_buffer_size = x;
        self
    }
}

impl Encoder {
    fn start_encoding_file<P: AsRef<Path>>(
        &mut self,
        buf: &mut [u8],
        path: P,
        dir_entry: Option<OsString>,
    ) -> Result<usize, io::Error> {
        let path = path.as_ref();
        let executable = path.is_executable();
        let file_handle = File::open(&path).map_err(annotate_err_with_path(&path))?;
        let file_len = file_handle.metadata()?.len();
        let file_len_rounded_up = (file_len + 7) & !7;
        if file_len_rounded_up > file_len {
            self.stack.push(CurrentActivity::WritePadding {
                padding: file_len_rounded_up - file_len,
            });
        }
        self.stack
            .push(CurrentActivity::EncodingFile { file: file_handle });
        self.write_with_buffer(buf, move |buf| {
            let mut len = 0;
            if let Some(file) = dir_entry {
                len += coder::start_dir_entry(&mut buf[len..], file)?;
            }
            len += coder::write_file_regular(&mut buf[len..], executable)?;
            len += coder::write_u64_le(&mut buf[len..], file_len)?;
            Ok(len)
        })
    }

    fn start_encoding_dir<P: AsRef<Path>>(
        &mut self,
        buf: &mut [u8],
        path: P,
        dir_entry: Option<OsString>,
    ) -> Result<usize, io::Error> {
        let path = path.as_ref();
        self.stack.push(CurrentActivity::WalkingDir {
            files_rev: list_dir_files(path).map_err(annotate_err_with_path(&path))?,
            dir_path: path.to_path_buf(),
        });
        self.write_with_buffer(buf, move |buf| {
            let mut len = 0;
            if let Some(file) = dir_entry {
                len += coder::start_dir_entry(&mut buf[len..], file)?;
            }
            len += coder::start_dir(&mut buf[len..])?;
            Ok(len)
        })
    }

    fn start_encoding_symlink<P: AsRef<Path>>(
        &mut self,
        buf: &mut [u8],
        path: P,
        dir_entry: Option<OsString>,
    ) -> Result<usize, io::Error> {
        let path = path.as_ref();
        let target: PathBuf =
            fs::read_link(&path).map_err(annotate_err_with_path(&path))?;
        self.write_with_buffer(buf, move |buf| {
            let mut len = 0;
            if let Some(file) = dir_entry {
                len += coder::start_dir_entry(buf, file)?;
            }
            len += coder::write_symlink(&mut buf[len..], target)?;
            Ok(len)
        })
    }

    /// Execute a write-into-buffer operation.  If the given buffer is
    /// big enough, then we just write into that.  Otherwise, we
    /// create a temporary buffer, we write into that, we copy as much
    /// data as we can to the given buffer, and store the remainer
    /// into a `CurrentActivity::WriteMoreBytes` on the `self.stack`.
    fn write_with_buffer<F>(&mut self, dst_buf: &mut [u8], f: F) -> io::Result<usize>
    where
        F: FnOnce(&mut [u8]) -> io::Result<usize>,
    {
        if dst_buf.len() >= 1024 {
            f(dst_buf)
        } else {
            let mut buf = vec![0; self.internal_buffer_size];
            let len = f(&mut buf)?;
            let to_write_len = min(len, dst_buf.len());
            dst_buf[..to_write_len].copy_from_slice(&buf[..to_write_len]);
            if len > to_write_len {
                // TODO This is one vec copy too many.
                self.stack.push(CurrentActivity::WriteMoreBytes {
                    bytes: buf[to_write_len..len].to_vec(),
                });
            }
            Ok(to_write_len)
        }
    }
}

impl Read for Encoder {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        match self.stack.pop() {
            None => Ok(0),
            Some(CurrentActivity::StartArchive) => {
                self.write_with_buffer(buf, coder::start_archive)
            }
            Some(CurrentActivity::StartEntry) => {
                self.write_with_buffer(buf, coder::start_entry)
            }
            Some(CurrentActivity::CloseDirEntry) => {
                self.write_with_buffer(buf, coder::close_dir_entry)
            }
            Some(CurrentActivity::CloseEntry) => {
                self.write_with_buffer(buf, coder::close_entry)
            }
            Some(CurrentActivity::Toplevel { path }) => {
                let metadata =
                    fs::symlink_metadata(&path).map_err(annotate_err_with_path(&path))?;
                if metadata.is_dir() {
                    self.start_encoding_dir(buf, path, None)
                } else if metadata.is_symlink() {
                    self.start_encoding_symlink(buf, path, None)
                } else if metadata.is_file() {
                    self.start_encoding_file(buf, path, None)
                } else {
                    return Err(other_io_error(format!(
                        "unknown file type {}",
                        path.display()
                    )));
                }
            }
            Some(CurrentActivity::WalkingDir {
                dir_path,
                mut files_rev,
            }) => match files_rev.pop() {
                None => self.read(buf),
                Some(file) => {
                    let path = dir_path.join(&file);
                    self.stack.push(CurrentActivity::WalkingDir {
                        dir_path,
                        files_rev,
                    });
                    self.stack.push(CurrentActivity::CloseDirEntry);
                    let metadata = fs::symlink_metadata(&path)
                        .map_err(annotate_err_with_path(&file))?;
                    if metadata.is_dir() {
                        self.start_encoding_dir(buf, path, Some(file))
                    } else if metadata.is_symlink() {
                        self.start_encoding_symlink(buf, path, Some(file))
                    } else if metadata.is_file() {
                        self.start_encoding_file(buf, path, Some(file))
                    } else {
                        return Err(other_io_error(format!(
                            "unknown file type {}",
                            path.display()
                        )));
                    }
                }
            },
            Some(CurrentActivity::EncodingFile { mut file }) => {
                let len = file.read(buf)?;
                if len != 0 {
                    self.stack.push(CurrentActivity::EncodingFile { file });
                    Ok(len)
                } else {
                    self.read(buf)
                }
            }
            Some(CurrentActivity::WritePadding { padding }) => {
                let len = min(padding, buf.len() as u64) as usize;
                buf.fill(0);
                if (len as u64) < padding {
                    self.stack.push(CurrentActivity::WritePadding {
                        padding: padding - len as u64,
                    });
                }
                Ok(len)
            }
            Some(CurrentActivity::WriteMoreBytes { bytes }) => {
                let len = min(bytes.len(), buf.len());
                buf[..len].copy_from_slice(&bytes[..len]);
                if len < bytes.len() {
                    self.stack.push(CurrentActivity::WriteMoreBytes {
                        bytes: bytes[len..].to_vec(),
                    });
                }
                Ok(len)
            }
        }
    }
}

fn list_dir_files(path: &Path) -> Result<Vec<OsString>, io::Error> {
    let mut fs = std::fs::read_dir(&path)
        .map_err(annotate_err_with_path(&path))?
        .collect::<Result<Vec<DirEntry>, io::Error>>()?
        .into_iter()
        .map(|p| p.file_name())
        .collect::<Vec<OsString>>();
    fs.sort_by(|a, b| b.cmp(a));
    Ok(fs)
}

fn annotate_err_with_path<P: AsRef<Path>>(
    path: P,
) -> impl FnOnce(io::Error) -> io::Error {
    let path = path.as_ref().to_path_buf();
    move |err: io::Error| other_io_error(format!("IO error on {}: {err}", path.display()))
}

fn other_io_error<S: AsRef<str>>(message: S) -> io::Error {
    io::Error::new(io::ErrorKind::Other, message.as_ref())
}