1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
//! This crate is a thin CGI/FCGI wrapper. It turns your program into an
//! adaptive CGI script; capable of being invoked as CGI or FCGI in a variety
//! of configurations.
//!
//! This is *not* a full web framework. It performs minimal validation, and no
//! parsing beyond the bare minimum required to pass one or more CGI-style
//! requests to your handler. Examples of things that `outer_cgi` **does not
//! do**:
//!
//! - Validate environment variables, beyond checking that `GATEWAY_INTERFACE`
//! begins with `"CGI/"`, when invoked as a CGI.
//! - Parse query strings or cookies.
//! - Provide a template engine.
//! - Provide any database interfaces.
//!
//! Here is what it **does** do:
//!
//! - Seamlessly supports operation as either CGI or FCGI.
//! - FCGI may either be spawned in the "standard" way (where stdin is a listen
//! socket) or by explicitly binding to either a TCP port or UNIX socket.
//! - The UNIX version supports the following additional features:
//!     - `setuid`, `setgid`, and `chroot` for privilege reduction.
//!     - Logging to `syslog`, either out of necessity (from being spawned as
//! FCGI by another process) or by user request.
//!     - Daemonization.
//!
//! You write your code as a simple CGI script, using `outer_cgi`'s
//! replacements for `stdin`, `stdout`, and `env`. `outer_cgi` then allows the
//! webmaster to deploy your script in whatever configuration is most suitable.
//!
//! ```rust,no_run
//! extern crate outer_cgi;
//! use std::collections::HashMap;
//! use outer_cgi::IO;
//!
//! fn handler(io: &mut IO, env: HashMap<String, String>) -> anyhow::Result<i32> {
//!     io.write_all(format!(r#"Content-type: text/plain; charset=utf-8
//!
//! Hello World! Your request method was "{}"!
//! "#, env.get("REQUEST_METHOD").unwrap()).as_bytes())?;
//!     Ok(0)
//! }
//!
//! pub fn main() {
//!     outer_cgi::main(|_|{}, handler)
//! }
//! ```
//!
//! See the [Common Gateway Interface][1] specification for more information.
//!
//! According to the RFC, the current working directory SHOULD be the directory
//! containing the script. It's up to the webmaster to ensure this is the case
//! when running as an FCGI.
//!
//! [1]: https://tools.ietf.org/html/rfc3875

use std::{
    collections::HashMap,
    io,
    io::{Read, BufRead, Write, BufWriter},
    net::{SocketAddr, IpAddr, TcpStream, TcpListener},
    panic::RefUnwindSafe,
    path::PathBuf,
};

#[cfg(unix)] mod unix;

mod fcgi;
mod options;

use options::*;

/// Used internally to allow Rust TcpListener to coexist with a UNIX domain
/// socket. We also wrap the `FCGI_WEB_SERVER_ADDRS` checking in one of these.
#[doc(hidden)]
pub trait Listener : Send {
    /// Blocks until a new connection arrives. Returns a stream for the new
    /// connection. (This may not actually be a `TcpStream` on UNIX, but a UNIX
    /// domain socket in `TcpStream`'s clothing! Some day I should replace this
    /// with a trait like `IO` instead... but then it ends up in a box on the
    /// heap... sigh.)
    fn accept_connection(&mut self) -> io::Result<TcpStream>;
}

impl Listener for TcpListener {
    fn accept_connection(&mut self) -> io::Result<TcpStream> {
        self.accept().map(|(x, _)| x)
    }
}

struct ParanoidTcpListener {
    listener: TcpListener,
    whitelist: Vec<IpAddr>,
}
impl ParanoidTcpListener {
    fn make_whitelist(whitelist: &str) -> io::Result<Vec<IpAddr>> {
        let mut ret = Vec::new();
        for result in whitelist.as_bytes().split(|x| *x == b',')
        .map(|mut x| {
            while !x.is_empty() && x[0] == b' ' {
                x = &x[1..];
            }
            while !x.is_empty() && x[x.len()-1] == b' ' {
                x = &x[..x.len()-1];
            }
            unsafe{String::from_utf8_unchecked(x.to_vec())}.parse()
        }) {
            match result {
                Ok(addr) => ret.push(addr),
                Err(_) => return Err(io::Error::new(io::ErrorKind::Other,
                                                    "Invalid address in \
                                                     FCGI_WEB_SERVER_ADDRS")),
            }
        }
        Ok(ret)
    }
    fn new(addr: SocketAddr, whitelist: &str)
           -> io::Result<ParanoidTcpListener> {
        let whitelist = ParanoidTcpListener::make_whitelist(whitelist)?;
        Ok(ParanoidTcpListener {
            listener: TcpListener::bind(addr)?,
            whitelist,
        })
    }
    #[allow(unused)]
    fn with(listener: TcpListener, whitelist: &str)
           -> io::Result<ParanoidTcpListener> {
        let whitelist = ParanoidTcpListener::make_whitelist(whitelist)?;
        Ok(ParanoidTcpListener {
            listener,
            whitelist,
        })
    }
}

impl Listener for ParanoidTcpListener {
    fn accept_connection(&mut self) -> io::Result<TcpStream> {
        loop {
            let (sock, addr) = self.listener.accept()?;
            let ip = addr.ip();
            for white in self.whitelist.iter() {
                if ip == *white { return Ok(sock) }
            }
        }
    }
}

/// Wraps the stdin and stdout streams of a standard CGI invocation.
///
/// See the [Common Gateway Interface][1] specification for more information.
///
/// [1]: https://tools.ietf.org/html/rfc3875
pub trait IO : BufRead + Write {
}

struct DualIO<R: BufRead, W: Write> {
    i: R,
    o: W,
}

impl<R: BufRead, W: Write> Read for DualIO<R, W> {
    fn read(&mut self, buf: &mut[u8]) -> io::Result<usize> {
        self.i.read(buf)
    }
}

impl<R: BufRead, W: Write> BufRead for DualIO<R, W> {
    fn fill_buf(&mut self) -> io::Result<&[u8]> {
        self.i.fill_buf()
    }
    fn consume(&mut self, amount: usize) {
        self.i.consume(amount)
    }
}

impl<R: BufRead, W: Write> Write for DualIO<R, W> {
    fn write(&mut self, bytes: &[u8]) -> io::Result<usize> {
        self.o.write(bytes)
    }
    fn flush(&mut self) -> io::Result<()> {
        self.o.flush()
    }
}

impl<R: BufRead, W: Write> IO for DualIO<R, W> {
}

/// The first (and preferably only) function your program's `main` function
/// should call. Handles argument parsing, worker thread spawning, etc. For
/// each request, calls the handler you provide.
///
/// `init` is called once, before any requests are handled. It is passed the
/// maximum number of parallel connections that will be handled by this
/// instance. You should perform initialization (read templates, set up
/// database connection pools, etc.) in this function. If you don't need any
/// such setup, just pass `|_|{}`.
///
/// Your handler receives the standard set of CGI streams and environment
/// variables as parameters. It should use them instead of the usual Rust
/// `stdin`/`stdout`/`env` facilities. `outer_cgi` tries to ensure that the
/// usual `stderr` facility (`eprintln!` etc.) is usable for logging error
/// information.
///
/// `outer_cgi::main` does not return. It handles as many requests as possible,
/// then calls `std::process::exit` as appropriate. You shouldn't call anything
/// but `outer_cgi::main` from your script's `main` function;
/// `stdin`/`stdout`/`stderr` may be in an incoherent state, and if you spawn
/// any threads before calling `outer_cgi::main`, they may silently die in some
/// configurations. Perform any per-process setup on-demand, the first time
/// your `handler` is called, instead.
///
/// See the [Common Gateway Interface][1] specification and the module-level
/// documentation for more information.
///
/// [1]: https://tools.ietf.org/html/rfc3875
pub fn main<I, H>(init: I, handler: H) -> !
where I: 'static + Fn(u32),
      H: 'static + Fn(&mut dyn IO, HashMap<String, String>) -> anyhow::Result<i32>
    + Sync + Send + Copy + RefUnwindSafe {
    use std::process::exit;
    match sub_main(init, handler) {
        Ok(i) => exit(i),
        Err(e) => {
            eprintln!("Unexpected error: {}", e);
            exit(1)
        }
    }
}

fn sub_main<I, H>(init: I, handler: H) -> anyhow::Result<i32>
where I: 'static + Fn(u32),
      H: 'static + Fn(&mut dyn IO, HashMap<String, String>) -> anyhow::Result<i32>
    + Sync + Send + Copy + RefUnwindSafe {
    let args: Vec<String> = std::env::args().collect();
    let static_env: HashMap<String, String> = std::env::vars().collect();
    if args.len() <= 1 {
        if let Some(existing_listener) = fix_fds(&static_env) {
            // FCGI server spawned by someone else. We'll handle one request
            // at a time.
            init(1);
            Ok(fcgi::listen_loop(existing_listener,
                                 handler,
                                 fcgi::Options { max_connections:1 },
                                 &static_env))
        }
        else if let Some(_) = static_env.get("GATEWAY_INTERFACE") {
            init(1);
            // A bit convoluted to satisfy the borrow checker.
            if !static_env.get("GATEWAY_INTERFACE").unwrap()
            .starts_with("CGI/") {
                // Some unknown foreign gateway interface
                eprintln!("Unknown GATEWAY_INTERFACE type");
                return Ok(1)
            }
            // CGI process spawned by web server. Simplest case.
            let stdin = io::stdin();
            let stdout = io::stdout();
            let mut io = DualIO {
                i: stdin.lock(),
                o: BufWriter::new(stdout.lock()),
            };
            handler(&mut io, static_env)
        }
        else {
            print_usage();
            Ok(1)
        }
    }
    else {
        match args[1].as_str() {
            "fcgi-tcp" => {
                let mut bind_options: BindOptions<SocketAddr>
                    = BindOptions::new();
                let mut fcgi_options = fcgi::Options::new();
                let mut os_options = os_options();
                if !handle_command_line(&mut [
                    &mut bind_options,
                    &mut fcgi_options,
                    &mut os_options,
                ], args[2..].iter()) {
                    print_usage();
                    Ok(1)
                }
                else if bind_options.addr.is_none() {
                    eprintln!("Please specify an address and port to bind to \
                               using --bind");
                    print_usage();
                    Ok(1)
                }
                else {
                    let listener: Box<dyn Listener> =
                    if let Some(list)=static_env.get("FCGI_WEB_SERVER_ADDRS") {
                        Box::new(ParanoidTcpListener::new(bind_options.addr
                                                          .unwrap(), list)?)
                    }
                    else {
                        Box::new(TcpListener::bind(bind_options.addr
                                                   .unwrap())?)
                    };
                    os_options.post_setup()?;
                    init(fcgi_options.max_connections);
                    Ok(fcgi::listen_loop(listener, handler, fcgi_options,
                                         &static_env))
                }
            },
            #[cfg(unix)] "fcgi-unix" => {
                let mut bind_options: BindOptions<PathBuf>
                    = BindOptions::new();
                let mut unix_socket_options = unix::UnixSocketOptions::new();
                let mut fcgi_options = fcgi::Options::new();
                let mut os_options = os_options();
                if !handle_command_line(&mut [
                    &mut bind_options,
                    &mut unix_socket_options,
                    &mut fcgi_options,
                    &mut os_options,
                ], args[2..].iter()) {
                    print_usage();
                    Ok(1)
                }
                else if bind_options.addr.is_none() {
                    eprintln!("Please specify a filesystem path to bind to \
                               using --bind");
                    print_usage();
                    Ok(1)
                }
                else {
                    if let Some(_) = static_env.get("FCGI_WEB_SERVER_ADDRS") {
                        eprintln!("WARNING: Value of FCGI_WEB_SERVER_ADDRS is \
                                   ignored for non-TCP sockets!");
                    }
                    let listener
                        = unix::listen(bind_options.addr.unwrap().as_path(),
                                       unix_socket_options)?;
                    os_options.post_setup()?;
                    init(fcgi_options.max_connections);
                    Ok(fcgi::listen_loop(Box::new(listener), handler,
                                         fcgi_options, &static_env))
                }
            },
            x => {
                eprintln!("Unknown mode: {}", x);
                print_usage();
                Ok(1)
            },
        }
    }
}