http_serve/
dir.rs

1// Copyright (c) 2020 The http-serve developers
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE.txt or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT.txt or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Directory traversal on local filesystems.
10//! Currently Unix-only. Gated behind the `dir` feature.
11
12use http::header::{self, HeaderMap, HeaderValue};
13use memchr::memchr;
14use std::convert::TryInto;
15use std::ffi::CStr;
16use std::fs::File;
17use std::io::{Error, ErrorKind};
18use std::os::unix::{ffi::OsStrExt, io::FromRawFd};
19use std::path::Path;
20use std::sync::Arc;
21
22/// A builder for a `FsDir`.
23pub struct FsDirBuilder {
24    auto_gzip: bool,
25}
26
27impl FsDirBuilder {
28    /// Enables or disables automatic gzipping based on `Accept-Encoding` request headers.
29    ///
30    /// Default is `true`.
31    pub fn auto_gzip(mut self, auto_gzip: bool) -> Self {
32        self.auto_gzip = auto_gzip;
33        self
34    }
35
36    /// Returns a `FsDir` for the given path.
37    pub fn for_path<P: AsRef<Path>>(&self, path: P) -> Result<Arc<FsDir>, Error> {
38        FsDir::open(path.as_ref(), self.auto_gzip)
39    }
40}
41
42/// A base directory for local filesystem traversal.
43pub struct FsDir {
44    auto_gzip: bool,
45    fd: std::os::unix::io::RawFd,
46}
47
48impl FsDir {
49    pub fn builder() -> FsDirBuilder {
50        FsDirBuilder { auto_gzip: true }
51    }
52
53    fn open(path: &Path, auto_gzip: bool) -> Result<Arc<Self>, Error> {
54        let path = path.as_os_str().as_bytes();
55        if memchr(0, path).is_some() {
56            return Err(Error::new(
57                ErrorKind::InvalidInput,
58                "path contains NUL byte",
59            ));
60        }
61        if path.len() >= libc::PATH_MAX.try_into().unwrap() {
62            return Err(Error::new(ErrorKind::InvalidInput, "path is too long"));
63        }
64        let mut buf = [0u8; libc::PATH_MAX as usize];
65        unsafe { std::ptr::copy_nonoverlapping(path.as_ptr(), buf.as_mut_ptr(), path.len()) };
66        let fd = unsafe {
67            libc::open(
68                buf.as_ptr() as *const libc::c_char,
69                libc::O_DIRECTORY | libc::O_CLOEXEC,
70                0,
71            )
72        };
73        if fd < 0 {
74            return Err(Error::last_os_error());
75        }
76        Ok(Arc::new(FsDir { auto_gzip, fd }))
77    }
78
79    /// Opens a path within this base directory.
80    ///
81    /// If using `auto_gzip` (the default) and `req_hdrs` indicate the client supports `gzip`, will
82    /// look for a `.gz`-suffixed version of this path first and note that in the returned `Node`.
83    /// `.gz`-suffixed directories are ignored.
84    ///
85    /// Validates that `path` has no `..` segments or interior NULs. Currently doesn't check for
86    /// symlinks, however. That may eventually be configurable via the builder.
87    pub async fn get(self: Arc<Self>, path: &str, req_hdrs: &HeaderMap) -> Result<Node, Error> {
88        if let Err(e) = validate_path(path) {
89            return Err(Error::new(ErrorKind::InvalidInput, e));
90        }
91        let mut buf = Vec::with_capacity(path.len() + b".gz\0".len());
92        buf.extend_from_slice(path.as_bytes());
93        let should_gzip = self.auto_gzip && super::should_gzip(req_hdrs);
94        tokio::task::spawn_blocking(move || -> Result<Node, Error> {
95            if should_gzip {
96                let path_len = buf.len();
97                buf.extend_from_slice(&b".gz\0"[..]);
98                match self.open_file(
99                    // This is safe because we've ensured in validate_path that there are no
100                    // interior NULs, and we've just appended a NUL.
101                    unsafe { CStr::from_bytes_with_nul_unchecked(&buf[..]) },
102                ) {
103                    Ok(file) => {
104                        let metadata = file.metadata()?;
105                        if !metadata.is_dir() {
106                            return Ok(Node {
107                                file,
108                                metadata,
109                                auto_gzip: self.auto_gzip,
110                                is_gzipped: true,
111                            });
112                        }
113                    }
114                    Err(ref e) if e.kind() == ErrorKind::NotFound => {}
115                    Err(e) => return Err(e),
116                };
117                buf.truncate(path_len);
118            }
119
120            buf.push(b'\0');
121
122            // As in the case above, we've ensured buf contains exactly one NUL, at the end.
123            let p = unsafe { CStr::from_bytes_with_nul_unchecked(&buf[..]) };
124            let file = self.open_file(p)?;
125            let metadata = file.metadata()?;
126            Ok(Node {
127                file,
128                metadata,
129                auto_gzip: self.auto_gzip,
130                is_gzipped: false,
131            })
132        })
133        .await
134        .unwrap_or_else(|e: tokio::task::JoinError| Err(Error::new(ErrorKind::Other, e)))
135    }
136
137    /// Opens the given file with a path relative to this directory.
138    /// Performs the blocking I/O directly from this thread.
139    fn open_file(&self, path: &CStr) -> Result<File, Error> {
140        let fd =
141            unsafe { libc::openat(self.fd, path.as_ptr(), libc::O_RDONLY | libc::O_CLOEXEC, 0) };
142        if fd < 0 {
143            return Err(Error::last_os_error());
144        }
145        Ok(unsafe { File::from_raw_fd(fd) })
146    }
147}
148
149impl Drop for FsDir {
150    fn drop(&mut self) {
151        unsafe { libc::close(self.fd) };
152    }
153}
154
155/// An opened path (aka inode on Unix) as returned by `FsDir::open`.
156///
157/// This is not necessarily a plain file; it could also be a directory, for example.
158///
159/// The caller can inspect it as desired. If it is a directory, the caller might pass the result of
160/// `into_file()` to `nix::dir::Dir::from`. If it is a plain file, the caller might create an
161/// `http_serve::Entity` with `into_file_entity()`.
162pub struct Node {
163    file: std::fs::File,
164    metadata: std::fs::Metadata,
165    auto_gzip: bool,
166    is_gzipped: bool,
167}
168
169impl Node {
170    /// Converts this node to a `std::fs::File`.
171    pub fn into_file(self) -> std::fs::File {
172        self.file
173    }
174
175    /// Converts this node (which must represent a plain file) into a `ChunkedReadFile`.
176    /// The caller is expected to supply all headers. The function `add_encoding_headers`
177    /// may be useful.
178    pub fn into_file_entity<D, E>(
179        self,
180        headers: HeaderMap,
181    ) -> Result<crate::file::ChunkedReadFile<D, E>, Error>
182    where
183        D: 'static + Send + Sync + bytes::Buf + From<Vec<u8>> + From<&'static [u8]>,
184        E: 'static
185            + Send
186            + Sync
187            + Into<Box<dyn std::error::Error + Send + Sync>>
188            + From<Box<dyn std::error::Error + Send + Sync>>,
189    {
190        crate::file::ChunkedReadFile::new_with_metadata(self.file, &self.metadata, headers)
191    }
192
193    /// Returns the (already fetched) metadata for this node.
194    pub fn metadata(&self) -> &std::fs::Metadata {
195        &self.metadata
196    }
197
198    /// Returns the encoding this file is assumed to have applied to the caller's request.
199    /// E.g., if automatic gzip compression is enabled and `index.html.gz` was found when the
200    /// caller requested `index.html`, this will return `Some("gzip")`. If the caller requests
201    /// `index.html.gz`, this will return `None` because the gzip encoding is built in to the
202    /// caller's request.
203    pub fn encoding(&self) -> Option<&'static str> {
204        if self.is_gzipped {
205            Some("gzip")
206        } else {
207            None
208        }
209    }
210
211    /// Returns true iff the content varies with the request's `Accept-Encoding` header value.
212    pub fn encoding_varies(&self) -> bool {
213        self.auto_gzip
214    }
215
216    /// Adds `Content-Encoding` and `Vary` headers for the encoding to `hdrs`.
217    ///
218    /// Note if there are other `Vary` header components known to the caller, this method is
219    /// inappropriate.
220    pub fn add_encoding_headers(&self, hdrs: &mut HeaderMap) {
221        if let Some(e) = self.encoding() {
222            hdrs.insert(header::CONTENT_ENCODING, HeaderValue::from_static(e));
223        }
224        if self.auto_gzip {
225            hdrs.insert(header::VARY, HeaderValue::from_static("accept-encoding"));
226        }
227    }
228}
229
230/// Ensures path is safe: no NUL bytes, not absolute, no `..` segments.
231fn validate_path(path: &str) -> Result<(), &'static str> {
232    if memchr::memchr(0, path.as_bytes()).is_some() {
233        return Err("path contains NUL byte");
234    }
235    if path.as_bytes().first() == Some(&b'/') {
236        return Err("path is absolute");
237    }
238    let mut left = path.as_bytes();
239    loop {
240        let next = memchr::memchr(b'/', left);
241        let seg = &left[0..next.unwrap_or(left.len())];
242        if seg == b".." {
243            return Err("path contains .. segment");
244        }
245        match next {
246            None => break,
247            Some(n) => left = &left[n + 1..],
248        };
249    }
250    Ok(())
251}
252
253#[cfg(test)]
254mod tests {
255    use super::*;
256
257    #[tokio::test(flavor = "multi_thread")]
258    async fn path_with_interior_nul() {
259        let tmp = tempfile::tempdir().unwrap();
260        let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
261        let e = match fsdir.get("foo\0bar", &HeaderMap::new()).await {
262            Ok(_) => panic!("should have failed"),
263            Err(e) => e,
264        };
265        assert_eq!(e.kind(), std::io::ErrorKind::InvalidInput);
266        assert_eq!(e.to_string(), "path contains NUL byte");
267    }
268
269    #[tokio::test(flavor = "multi_thread")]
270    async fn path_with_parent_dir_segment() {
271        let tmp = tempfile::tempdir().unwrap();
272        let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
273
274        for p in &["..", "../foo", "foo/../bar", "foo/.."] {
275            let e = match Arc::clone(&fsdir).get(p, &HeaderMap::new()).await {
276                Ok(_) => panic!("should have failed"),
277                Err(e) => e,
278            };
279            assert_eq!(e.kind(), std::io::ErrorKind::InvalidInput);
280            assert_eq!(e.to_string(), "path contains .. segment");
281        }
282    }
283
284    #[tokio::test(flavor = "multi_thread")]
285    async fn absolute_path() {
286        let tmp = tempfile::tempdir().unwrap();
287        let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
288        let e = match fsdir.get("/etc/passwd", &HeaderMap::new()).await {
289            Ok(_) => panic!("should have failed"),
290            Err(e) => e,
291        };
292        assert_eq!(e.kind(), std::io::ErrorKind::InvalidInput);
293        assert_eq!(e.to_string(), "path is absolute");
294    }
295
296    #[tokio::test(flavor = "multi_thread")]
297    async fn regular_file() {
298        let tmp = tempfile::tempdir().unwrap();
299        tokio::spawn(async move {
300            let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
301            let p = "foo.txt";
302            let contents = b"1234";
303            {
304                use std::io::Write;
305                let mut f = File::create(tmp.path().join(p)).unwrap();
306                f.write_all(contents).unwrap();
307            }
308            let f = fsdir.get("foo.txt", &HeaderMap::new()).await.unwrap();
309            assert_eq!(f.metadata.len(), contents.len() as u64);
310        })
311        .await
312        .unwrap()
313    }
314
315    #[tokio::test(flavor = "multi_thread")]
316    async fn missing_file() {
317        let tmp = tempfile::tempdir().unwrap();
318        tokio::spawn(async move {
319            let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
320            match fsdir.get("nonexistent.txt", &HeaderMap::new()).await {
321                Ok(_) => panic!("nonexistent file found?!?"),
322                Err(e) => assert_eq!(e.kind(), std::io::ErrorKind::NotFound),
323            };
324        })
325        .await
326        .unwrap()
327    }
328
329    #[tokio::test(flavor = "multi_thread")]
330    async fn symlink_allowed_in_last_path_component() {
331        let tmp = tempfile::tempdir().unwrap();
332        tokio::spawn(async move {
333            let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
334            ::std::os::unix::fs::symlink("/etc/passwd", tmp.path().join("foo.txt")).unwrap();
335            fsdir.get("foo.txt", &HeaderMap::new()).await.unwrap();
336        })
337        .await
338        .unwrap()
339    }
340
341    #[tokio::test(flavor = "multi_thread")]
342    async fn symlink_allowed_in_earlier_path_component() {
343        let tmp = tempfile::tempdir().unwrap();
344        tokio::spawn(async move {
345            let fsdir = FsDir::builder().for_path(tmp.path()).unwrap();
346            ::std::os::unix::fs::symlink("/etc", tmp.path().join("etc")).unwrap();
347            fsdir.get("etc/passwd", &HeaderMap::new()).await.unwrap();
348        })
349        .await
350        .unwrap()
351    }
352}