1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
//! Gemini Requests
//!
//! Gemini requests consist of a single URL (which means the scheme must be
//! present). The format of `gemini://` URLs is defined in Section 1.2 of [the
//! Gemini spec](https://gemini.circumlunar.space/docs/specification.html).

use std::{borrow::Cow, fmt::Display, marker::PhantomData};

use thiserror::Error;
pub use url::Url;

pub use request_type::{Any, Gemini};

mod request_type;

/// A Gemini request.
///
/// Gemini requests consist of a single URL, although this URL does not
/// necessarily have to be located via the `gemini://` scheme (such that gemini
/// servers are able to proxy requests to `gopher` resources, or other schemes).
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub struct Request<T: request_type::RequestType> {
    url: Url,
    _phantom: PhantomData<T>,
}

/// A request that may be for any scheme.
pub type AnyRequest = Request<request_type::Any>;
/// A request that is known to be a valid Gemini URL.
pub type GeminiRequest = Request<request_type::Gemini>;

/// Error to indicate a failure in constructing a `Request`.
///
/// Errors will be returned in the case of invalid URLs with respect to the
/// `url` crate, or URLs with a length greater than 1024 bytes.
#[derive(Debug, Copy, Clone, Error)]
pub struct InvalidRequest {
    _priv: (),
}

impl Display for InvalidRequest {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "invalid request")
    }
}

impl InvalidRequest {
    fn new() -> Self {
        InvalidRequest { _priv: () }
    }
}

impl<T: request_type::RequestType> Request<T> {
    /// Maximum length, in bytes, of a request URL.
    pub const MAX_URL_LEN: usize = 1024;
    /// Default port for the Gemini protocol.
    pub const DEFAULT_PORT: u16 = 1965;
    /// The Gemini scheme, which can be prepended to a URI to attempt to see if
    /// it is a valid Gemini URL.
    pub const GEMINI_SCHEME: &'static str = "gemini";

    /// Attempt to construct a `Request` from a given URI that may not have a
    /// scheme.
    pub fn from_uri(uri: &str) -> Result<AnyRequest, InvalidRequest> {
        let uri = if uri.contains("://") {
            Cow::from(uri)
        } else {
            format!("{}://{}", Self::GEMINI_SCHEME, uri).into()
        };
        let url = Url::parse(&uri).map_err(|_| InvalidRequest::new())?;
        Self::from_url(url)
    }

    /// Attempt to construct an `AnyRequest` from a given `Url`.
    ///
    /// Returns an error if the URL's length exceeds `Self::MAX_URL_LEN`.
    pub fn from_url(url: Url) -> Result<AnyRequest, InvalidRequest> {
        if url.as_str().len() > Self::MAX_URL_LEN {
            Err(InvalidRequest::new())
        } else {
            Ok(Request {
                url,
                _phantom: PhantomData,
            })
        }
    }

    /// Construct a `GeminiRequest` from the necessary components.
    pub fn gemini_request(
        host: &str,
        port: Option<u16>,
        path: &str,
    ) -> Result<GeminiRequest, InvalidRequest> {
        let url = format!(
            "{}://{}:{}/{}",
            Self::GEMINI_SCHEME,
            host,
            port.unwrap_or(1965),
            path
        );
        let url = Url::parse(url.as_str()).map_err(|_| InvalidRequest::new())?;
        if url.as_str().len() > Self::MAX_URL_LEN {
            Err(InvalidRequest::new())
        } else {
            Ok(Request {
                url,
                _phantom: PhantomData,
            })
        }
    }

    /// Returns `true` if this `Request` can be interpreted as containing
    /// a resource located via the `gemini://` scheme.
    pub fn is_gemini_request(&self) -> bool {
        self.scheme() == Self::GEMINI_SCHEME
            && self.url.has_authority()
            && self.url.username().is_empty()
            && self.url.password().is_none()
            && self.url.host().is_some()
            && !self.url.cannot_be_a_base()
    }

    /// Return the scheme for the underlying url.
    pub fn scheme(&self) -> &str {
        self.url.scheme()
    }

    /// Return a reference to the underlying url.
    pub fn url(&self) -> &Url {
        &self.url
    }
}

impl AnyRequest {
    /// Validate that this `Request` can be made via `gemini://`, including
    /// setting the port if not already done.
    pub fn into_gemini_request(mut self) -> Result<GeminiRequest, InvalidRequest> {
        if self.is_gemini_request() {
            if self.url.port().is_none() {
                self.url.set_port(Some(Self::DEFAULT_PORT)).unwrap()
            }
            Ok(Request {
                url: self.url,
                _phantom: PhantomData,
            })
        } else {
            Err(InvalidRequest::new())
        }
    }
}

impl GeminiRequest {
    /// Return the hostname associated with the Gemini request.
    pub fn host(&self) -> &str {
        self.url.host_str().unwrap()
    }

    /// Return the path portion of the Gemini request url.
    pub fn path(&self) -> &str {
        self.url.path()
    }

    /// Return the port associated with the Gemini request.
    pub fn port(&self) -> u16 {
        self.url.port().unwrap()
    }
}

/// Parser
#[cfg(feature = "parsers")]
pub mod parse {
    use std::str;

    use nom::{
        bytes::streaming::{tag, take_until},
        combinator::map_res,
        error::context,
        sequence::terminated,
        IResult,
    };

    use super::*;

    /// `nom` parser for a request string. Invalid utf-8 will be rejected, but
    /// non-"gemini://" urls are allowed as per the spec.
    pub fn request(input: &[u8]) -> IResult<&[u8], AnyRequest> {
        context(
            "request",
            map_res(terminated(take_until("\r\n"), tag("\r\n")), |bs| {
                let s = str::from_utf8(bs).map_err(|_| InvalidRequest::new())?;
                AnyRequest::from_uri(s)
            }),
        )(input)
    }

    #[cfg(test)]
    mod test {
        use super::*;

        #[test]
        fn test_gemini_request() {
            let bytes = b"gemini://foo.bar.baz:1966/path\r\n";
            assert!(request(bytes).unwrap().1.is_gemini_request())
        }

        #[test]
        fn test_gemini_request_no_port() {
            let bytes = b"gemini://foo.bar.baz/path\r\n";
            assert!(request(bytes).unwrap().1.is_gemini_request())
        }

        #[test]
        fn test_generic_request() {
            let bytes = b"http://goggle.com/snoop\r\n";
            assert!(request(bytes).is_ok())
        }

        // NB: looking at this test, im unsure if this behavior is desirable.
        // maybe there should be a canonical parser that strictly requires
        // a scheme, since that's what the gemini spec does, along with a uri
        // parser that tries to read a "link-like-thing" as a gemini link?
        #[test]
        fn test_no_scheme() {
            let bytes = b"foo.bar.baz/path\r\n";
            assert!(request(bytes).unwrap().1.is_gemini_request())
        }
    }
}