1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
//! Determine proxy parameters for a URL from the environment.
//
// Copyright (c) 2016 Ivan Nejgebauer <inejge@gmail.com>
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
//!
//! Environment variables are one way to request the use of an HTTP proxy server for
//! outgoing connections in many command-line applications. _Which_ environment variable
//! will be used depends on the target URL and the convention used by the application
//! (or, customarily, the connection library that it uses.)
//!
//! This crate aims to replicate the convention of the __curl__ library and offer it
//! behind a simple API: in most cases, a single function, `for_url()`, which accepts
//! a target URL and returns the proxy parameters, if applicable. The method for determining 
//! the parameters is described in detail in that function's documentation.
//!
//! # Getting Started
//!
//! Add the following to the `[dependencies]` section of your `Cargo.toml`:
//!
//! ```toml
//! env_proxy = "0.1"
//! ```
//!
//! Also, import the crate to your crate root:
//!
//! ```
//! extern crate env_proxy;
//! ```
//!
//! # Examples
//!
//! To determine proxy parameters for `http://www.example.org`:
//!
//! ```
//! # extern crate url;
//! # extern crate env_proxy;
//! # fn main() {
//! use env_proxy;
//! use url::Url;
//!
//! let url = Url::parse("http://www.example.org").unwrap();
//! if let Some(proxy) = env_proxy::for_url(&url) {
//!     println!("Proxy host: {}", proxy.0);
//!     println!("Proxy port: {}", proxy.1);
//! }
//! # }
//! ```


#[cfg(test)]
#[macro_use] extern crate lazy_static;
extern crate url;

use std::env::var_os;
use url::Url;

fn matches_no_proxy(url: &Url) -> bool {
    let mut maybe_no_proxy = var_os("no_proxy").map(|ref v| v.to_str().unwrap_or("").to_string());
    if maybe_no_proxy.is_none() {
	maybe_no_proxy = var_os("NO_PROXY").map(|ref v| v.to_str().unwrap_or("").to_string());
    }
    if let Some(no_proxy) = maybe_no_proxy {
	if no_proxy == "*" {
	    return true;
	}
	if let Some(host) = url.host_str() {
	    'elems: for elem in no_proxy.split(|c| c == ',' || c == ' ') {
		if elem == "" || elem == "." {
		    continue;
		}
		let ch1 = elem.chars().next().unwrap();
		let mut elem_iter = elem.chars();
		if ch1 == '.' {
		    elem_iter.next();
		}
		let mut elem_iter = elem_iter.rev();
		let mut host_iter = host.chars().rev();
		while let Some(elem_ch) = elem_iter.next() {
		    if let Some(host_ch) = host_iter.next() {
			let host_ch = host_ch as u32;
			let elem_ch = match elem_ch as u32 {
			    uppercase @ 0x41 ... 0x5a => uppercase + 0x20,
			    anything => anything
			};
			if elem_ch == host_ch {
			    continue;
			}
			continue 'elems;
		    } else {
			continue 'elems;
		    }
		}
		match host_iter.next() {
		    None => return true,
		    Some(host_ch) if host_ch == '.' => return true,
		    _ => ()
		}
	    }
	}
    }
    false
}

/// Determine proxy parameters for a URL by examining the environment variables.
///
/// __Attention__: in a multithreaded program, care should be taken not to change the environment
/// in multiple threads simultaneously without some form of serialization.
///
/// Most environment variables described here can be defined either with an all-lowercase or an
/// all-uppercase name. If both versions are defined, the all-lowercase name takes precedence
/// (e.g., __no_proxy__ will be used even if __NO_PROXY__ is defined.) The only exception is
/// __http_proxy__, where only the lowercase name is checked for. This text will use the
/// lowercase variants for simplicity.
///
/// If __no_proxy__ is defined, check the host part of the URL against its components and return
/// `None` if there is any match. The value of __no_proxy__ should be a space- or comma-separated
/// list of host/domain names or IP addresses for which no proxying should be done, or a single
/// '&#8239;__*__&#8239;' (asterisk) which means that proxying is disabled for all hosts. Empty names
/// are skipped. Names beginning with a dot are not treated specially; matching is always done
/// by full domain name component. A name consisting of a bare dot is skipped (this is different
/// from __curl__'s behavior.) The rules are summarized in the following table:
///
/// |             |example.org|.example.org|
/// |-------------|:---------:|:----------:|
/// |example.org  |  &#x2714; |  &#x2714;  |
/// |a.example.org|  &#x2714; |  &#x2714;  |
/// |xample.org   |  &#x2718; |  &#x2718;  |
/// |org          |  &#x2718; |  &#x2718;  |
///
/// For the __ftp__ protocol scheme, __ftp_proxy__ is checked first; for __https__, __https_proxy__
/// is checked. Both schemes will fall back to __http_proxy__, then __all_proxy__ if the former is
/// undefined. For __http__, __http_proxy__ is cheked first, then __all_proxy__. For all other schemes
/// only __all_proxy__ is checked. In this context, "checked" means that the value of a variable is used
/// if present, and the search for further definition stops.
///
/// The return value, if not `None`, is a tuple consisting of the proxy hostname and the port, which
/// are obtained from the chosen environment variable parsed as a URL.
///
/// If the target URL matches __no_proxy__, or if the hostname cannot be extracted from the URL,
/// the function returns `None`. If the port is not explicitly defined in the proxy URL, the value 8080
/// is used, which corresponds to __http-alt__ in the IANA port registry. This is different from __curl__,
/// which uses port 1080 as the default.

pub fn for_url(url: &Url) -> Option<(String, u16)> {
    if matches_no_proxy(url) {
	return None;
    }

    let mut maybe_https_proxy = var_os("https_proxy").map(|ref v| v.to_str().unwrap_or("").to_string());
    if maybe_https_proxy.is_none() {
	maybe_https_proxy = var_os("HTTPS_PROXY").map(|ref v| v.to_str().unwrap_or("").to_string());
    }
    let mut maybe_ftp_proxy = var_os("ftp_proxy").map(|ref v| v.to_str().unwrap_or("").to_string());
    if maybe_ftp_proxy.is_none() {
	maybe_ftp_proxy = var_os("FTP_PROXY").map(|ref v| v.to_str().unwrap_or("").to_string());
    }
    let maybe_http_proxy = var_os("http_proxy").map(|ref v| v.to_str().unwrap_or("").to_string());
    let mut maybe_all_proxy = var_os("all_proxy").map(|ref v| v.to_str().unwrap_or("").to_string());
    if maybe_all_proxy.is_none() {
	maybe_all_proxy = var_os("ALL_PROXY").map(|ref v| v.to_str().unwrap_or("").to_string());
    }
    if let Some(url_value) = match url.scheme() {
				 "https" => maybe_https_proxy.or(maybe_http_proxy.or(maybe_all_proxy)),
				 "http" => maybe_http_proxy.or(maybe_all_proxy),
				 "ftp" => maybe_ftp_proxy.or(maybe_http_proxy.or(maybe_all_proxy)),
				 _ => maybe_all_proxy,
			     } {
	if let Ok(proxy_url) = Url::parse(&url_value) {
	    if let Some(host) = proxy_url.host_str() {
		let port = proxy_url.port().unwrap_or(8080);
		return Some((host.to_string(), port));
	    }
	}
    }
    None
}

#[cfg(test)]
mod tests {
    use std::env::{remove_var, set_var};
    use std::sync::Mutex;
    use super::*;
    use url::Url;

    // environment is per-process, and we need it stable per-thread,
    // hence locking
    lazy_static! {
	static ref LOCK: Mutex<()> = Mutex::new(());
    }

    fn scrub_env() {
	remove_var("http_proxy");
	remove_var("https_proxy");
	remove_var("HTTPS_PROXY");
	remove_var("ftp_proxy");
	remove_var("FTP_PROXY");
	remove_var("all_proxy");
	remove_var("ALL_PROXY");
	remove_var("no_proxy");
	remove_var("NO_PROXY");
    }

    #[test]
    fn no_proxy_simple_name() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("no_proxy", "example.org");
	set_var("http_proxy", "http://proxy.example.com:8080");
	let u = Url::parse("http://example.org").ok().unwrap();
	assert!(for_url(&u).is_none());
    }

    #[test]
    fn no_proxy_global() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("no_proxy", "*");
	set_var("http_proxy", "http://proxy.example.com:8080");
	let u = Url::parse("http://example.org").ok().unwrap();
	assert!(for_url(&u).is_none());
    }

    #[test]
    fn no_proxy_subdomain() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("no_proxy", "example.org");
	set_var("http_proxy", "http://proxy.example.com:8080");
	let u = Url::parse("http://www.example.org").ok().unwrap();
	assert!(for_url(&u).is_none());
    }

    #[test]
    fn no_proxy_subdomain_dot() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("no_proxy", ".example.org");
	set_var("http_proxy", "http://proxy.example.com:8080");
	let u = Url::parse("http://www.example.org").ok().unwrap();
	assert!(for_url(&u).is_none());
    }

    #[test]
    fn no_proxy_multiple_list() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("no_proxy", "com, .example.org, net");
	set_var("http_proxy", "http://proxy.example.com:8080");
	let u = Url::parse("http://www.example.org").ok().unwrap();
	assert!(for_url(&u).is_none());
    }

    #[test]
    fn http_proxy_specific() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("http_proxy", "http://proxy.example.com:8080");
	set_var("all_proxy", "http://proxy.example.org:8081");
	let u = Url::parse("http://www.example.org").ok().unwrap();
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8080)));
    }

    #[test]
    fn http_proxy_fallback() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("ALL_PROXY", "http://proxy.example.com:8080");
	let u = Url::parse("http://www.example.org").ok().unwrap();
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8080)));
	set_var("all_proxy", "http://proxy.example.org:8081");
	assert_eq!(for_url(&u), Some(("proxy.example.org".to_string(), 8081)));
    }

    #[test]
    fn https_proxy_specific() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("HTTPS_PROXY", "http://proxy.example.com:8080");
	set_var("http_proxy", "http://proxy.example.org:8081");
	set_var("all_proxy", "http://proxy.example.org:8081");
	let u = Url::parse("https://www.example.org").ok().unwrap();
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8080)));
	set_var("https_proxy", "http://proxy.example.com:8081");
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8081)));
    }

    #[test]
    fn https_proxy_fallback() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("http_proxy", "http://proxy.example.com:8080");
	set_var("ALL_PROXY", "http://proxy.example.org:8081");
	let u = Url::parse("https://www.example.org").ok().unwrap();
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8080)));
	remove_var("http_proxy");
	assert_eq!(for_url(&u), Some(("proxy.example.org".to_string(), 8081)));
	set_var("all_proxy", "http://proxy.example.org:8082");
	assert_eq!(for_url(&u), Some(("proxy.example.org".to_string(), 8082)));
    }

    #[test]
    fn ftp_proxy_specific() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("FTP_PROXY", "http://proxy.example.com:8080");
	set_var("http_proxy", "http://proxy.example.org:8081");
	set_var("all_proxy", "http://proxy.example.org:8081");
	let u = Url::parse("ftp://www.example.org").ok().unwrap();
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8080)));
	set_var("ftp_proxy", "http://proxy.example.com:8081");
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8081)));
    }

    #[test]
    fn ftp_proxy_fallback() {
	let _l = LOCK.lock();
	scrub_env();
	set_var("http_proxy", "http://proxy.example.com:8080");
	set_var("ALL_PROXY", "http://proxy.example.org:8081");
	let u = Url::parse("ftp://www.example.org").ok().unwrap();
	assert_eq!(for_url(&u), Some(("proxy.example.com".to_string(), 8080)));
	remove_var("http_proxy");
	assert_eq!(for_url(&u), Some(("proxy.example.org".to_string(), 8081)));
	set_var("all_proxy", "http://proxy.example.org:8082");
	assert_eq!(for_url(&u), Some(("proxy.example.org".to_string(), 8082)));
    }
}