Skip to main content

vectorizer_sdk/rpc/
endpoint.rs

1//! Canonical URL parser for the SDK's connection string.
2//!
3//! The contract (from every `phase6_sdk-*-rpc/proposal.md`):
4//!
5//! - `vectorizer://host:port` → RPC on the given port.
6//! - `vectorizer://host` (no port) → RPC on default port 15503.
7//! - `host:port` (no scheme) → RPC.
8//! - `http://host:port` / `https://host:port` → REST (legacy fallback).
9//! - Anything else → [`ParseError::UnsupportedScheme`].
10//!
11//! URLs that carry credentials in the userinfo (`user:pass@host`) are
12//! REJECTED — credentials cross the wire in the `HELLO` handshake, NOT
13//! in the URL. This avoids accidentally logging or shell-history-saving
14//! a token-bearing URL.
15
16/// Default RPC port (matches `RpcConfig::default_port()` in the
17/// server crate). Documented in wire spec § 12.
18pub const DEFAULT_RPC_PORT: u16 = 15503;
19
20/// Default REST port (matches `ServerConfig::default()` in the server
21/// crate).
22pub const DEFAULT_HTTP_PORT: u16 = 15002;
23
24/// A parsed endpoint; what transport to use and where to connect.
25#[derive(Debug, Clone, PartialEq, Eq)]
26pub enum Endpoint {
27    /// Speak VectorizerRPC at `host:port`.
28    Rpc {
29        /// DNS hostname or IP literal.
30        host: String,
31        /// TCP port; defaults to [`DEFAULT_RPC_PORT`] when omitted.
32        port: u16,
33    },
34    /// Speak REST at the given URL. The URL is preserved verbatim
35    /// (with scheme + host + port + path) so the HTTP transport can
36    /// pass it straight to `reqwest`.
37    Rest {
38        /// Full URL including scheme — `http://host:port` or
39        /// `https://host:port`.
40        url: String,
41    },
42}
43
44/// Reasons [`parse_endpoint`] can fail.
45#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
46pub enum ParseError {
47    /// The URL string was empty.
48    #[error("endpoint URL is empty")]
49    Empty,
50
51    /// The URL used a scheme other than `vectorizer://`, `http://`,
52    /// or `https://`.
53    #[error("unsupported URL scheme '{scheme}'; expected 'vectorizer', 'http', or 'https'")]
54    UnsupportedScheme {
55        /// The unrecognised scheme.
56        scheme: String,
57    },
58
59    /// The URL's authority section couldn't be parsed (e.g. missing
60    /// host, malformed port).
61    #[error("invalid authority in URL '{raw}': {reason}")]
62    InvalidAuthority {
63        /// The original URL.
64        raw: String,
65        /// What went wrong.
66        reason: String,
67    },
68
69    /// The URL carried credentials in the userinfo section. These
70    /// MUST go through the HELLO handshake, not the URL.
71    #[error(
72        "URL carries credentials in the userinfo section; \
73         pass credentials to the HELLO handshake instead of embedding them in the URL"
74    )]
75    CredentialsInUrl,
76}
77
78/// Parse a connection string into a typed [`Endpoint`].
79///
80/// See the module docstring for the contract. Returns the first
81/// matching endpoint shape; never falls through silently.
82pub fn parse_endpoint(url: &str) -> Result<Endpoint, ParseError> {
83    let trimmed = url.trim();
84    if trimmed.is_empty() {
85        return Err(ParseError::Empty);
86    }
87
88    // Split on the first "://" to recognise an explicit scheme.
89    if let Some((scheme, rest)) = trimmed.split_once("://") {
90        let scheme_lower = scheme.to_ascii_lowercase();
91        match scheme_lower.as_str() {
92            "vectorizer" => parse_rpc_authority(rest),
93            "http" | "https" => parse_rest(scheme_lower.as_str(), rest, trimmed),
94            _ => Err(ParseError::UnsupportedScheme {
95                scheme: scheme.to_owned(),
96            }),
97        }
98    } else {
99        // No scheme — treat as bare `host[:port]` for RPC.
100        parse_rpc_authority(trimmed)
101    }
102}
103
104/// Parse the post-`vectorizer://` part as `host[:port]` and return
105/// an [`Endpoint::Rpc`].
106fn parse_rpc_authority(authority: &str) -> Result<Endpoint, ParseError> {
107    if authority.is_empty() {
108        return Err(ParseError::InvalidAuthority {
109            raw: authority.to_owned(),
110            reason: "missing host".to_owned(),
111        });
112    }
113    if authority.contains('@') {
114        return Err(ParseError::CredentialsInUrl);
115    }
116    // Trim a trailing path; we don't support paths on the RPC scheme.
117    let host_port = authority.split(['/', '?', '#']).next().unwrap_or(authority);
118    if host_port.is_empty() {
119        return Err(ParseError::InvalidAuthority {
120            raw: authority.to_owned(),
121            reason: "missing host".to_owned(),
122        });
123    }
124
125    let (host, port) = if let Some(idx) = host_port.rfind(':') {
126        // Don't treat IPv6-bracket colons as port separators.
127        if host_port.starts_with('[') {
128            // IPv6 literal: [::1]:1234. Locate the closing bracket
129            // before splitting on the LAST colon after it.
130            let close = host_port
131                .find(']')
132                .ok_or_else(|| ParseError::InvalidAuthority {
133                    raw: authority.to_owned(),
134                    reason: "unterminated IPv6 literal '['".to_owned(),
135                })?;
136            let host_part = &host_port[..=close];
137            let after_bracket = &host_port[close + 1..];
138            if after_bracket.is_empty() {
139                (host_part.to_owned(), DEFAULT_RPC_PORT)
140            } else if let Some(port_str) = after_bracket.strip_prefix(':') {
141                let port = port_str
142                    .parse::<u16>()
143                    .map_err(|e| ParseError::InvalidAuthority {
144                        raw: authority.to_owned(),
145                        reason: format!("invalid port: {e}"),
146                    })?;
147                (host_part.to_owned(), port)
148            } else {
149                return Err(ParseError::InvalidAuthority {
150                    raw: authority.to_owned(),
151                    reason: format!("expected ':<port>' after IPv6 literal, got '{after_bracket}'"),
152                });
153            }
154        } else {
155            let host = &host_port[..idx];
156            let port_str = &host_port[idx + 1..];
157            if host.is_empty() {
158                return Err(ParseError::InvalidAuthority {
159                    raw: authority.to_owned(),
160                    reason: "missing host before ':<port>'".to_owned(),
161                });
162            }
163            let port = port_str
164                .parse::<u16>()
165                .map_err(|e| ParseError::InvalidAuthority {
166                    raw: authority.to_owned(),
167                    reason: format!("invalid port: {e}"),
168                })?;
169            (host.to_owned(), port)
170        }
171    } else {
172        // No colon → no explicit port → use the default.
173        (host_port.to_owned(), DEFAULT_RPC_PORT)
174    };
175
176    Ok(Endpoint::Rpc { host, port })
177}
178
179/// Parse an `http(s)://` URL into [`Endpoint::Rest`]. We rebuild the
180/// URL rather than echoing `raw` because some callers might pass
181/// trailing whitespace or odd casing on the scheme; the rebuild
182/// normalises both.
183fn parse_rest(scheme: &str, rest: &str, raw: &str) -> Result<Endpoint, ParseError> {
184    if rest.is_empty() {
185        return Err(ParseError::InvalidAuthority {
186            raw: raw.to_owned(),
187            reason: "missing host".to_owned(),
188        });
189    }
190    if rest.contains('@') {
191        return Err(ParseError::CredentialsInUrl);
192    }
193    let url = format!("{scheme}://{rest}");
194    Ok(Endpoint::Rest { url })
195}
196
197#[cfg(test)]
198#[allow(clippy::unwrap_used, clippy::expect_used)]
199mod tests {
200    use super::*;
201
202    #[test]
203    fn rpc_with_explicit_host_and_port() {
204        let ep = parse_endpoint("vectorizer://example.com:9000").unwrap();
205        assert_eq!(
206            ep,
207            Endpoint::Rpc {
208                host: "example.com".into(),
209                port: 9000,
210            }
211        );
212    }
213
214    #[test]
215    fn rpc_without_port_defaults_to_15503() {
216        let ep = parse_endpoint("vectorizer://example.com").unwrap();
217        assert_eq!(
218            ep,
219            Endpoint::Rpc {
220                host: "example.com".into(),
221                port: DEFAULT_RPC_PORT,
222            }
223        );
224        assert_eq!(DEFAULT_RPC_PORT, 15503);
225    }
226
227    #[test]
228    fn bare_host_port_without_scheme_is_rpc() {
229        let ep = parse_endpoint("localhost:15503").unwrap();
230        assert_eq!(
231            ep,
232            Endpoint::Rpc {
233                host: "localhost".into(),
234                port: 15503,
235            }
236        );
237    }
238
239    #[test]
240    fn http_url_routes_to_rest_endpoint() {
241        let ep = parse_endpoint("http://localhost:15002").unwrap();
242        assert_eq!(
243            ep,
244            Endpoint::Rest {
245                url: "http://localhost:15002".into(),
246            }
247        );
248
249        let ep = parse_endpoint("https://api.example.com").unwrap();
250        assert_eq!(
251            ep,
252            Endpoint::Rest {
253                url: "https://api.example.com".into(),
254            }
255        );
256    }
257
258    #[test]
259    fn unsupported_scheme_is_rejected_by_name() {
260        let err = parse_endpoint("ftp://server.example.com").unwrap_err();
261        match err {
262            ParseError::UnsupportedScheme { scheme } => assert_eq!(scheme, "ftp"),
263            other => panic!("expected UnsupportedScheme, got {other:?}"),
264        }
265    }
266
267    #[test]
268    fn empty_string_is_rejected() {
269        let err = parse_endpoint("").unwrap_err();
270        assert_eq!(err, ParseError::Empty);
271
272        let err = parse_endpoint("   ").unwrap_err();
273        assert_eq!(err, ParseError::Empty);
274    }
275
276    #[test]
277    fn url_with_userinfo_credentials_is_rejected() {
278        // RPC scheme: token@host
279        let err = parse_endpoint("vectorizer://user:pass@host:15503").unwrap_err();
280        assert_eq!(err, ParseError::CredentialsInUrl);
281
282        // REST scheme: same protection so callers can't shell-history
283        // a token-bearing URL by accident.
284        let err = parse_endpoint("https://user:secret@api.example.com").unwrap_err();
285        assert_eq!(err, ParseError::CredentialsInUrl);
286    }
287
288    #[test]
289    fn malformed_port_is_rejected() {
290        let err = parse_endpoint("vectorizer://host:not-a-port").unwrap_err();
291        match err {
292            ParseError::InvalidAuthority { raw, reason } => {
293                assert!(raw.contains("host:not-a-port"));
294                assert!(reason.contains("invalid port"), "got reason: {reason}");
295            }
296            other => panic!("expected InvalidAuthority, got {other:?}"),
297        }
298    }
299
300    #[test]
301    fn ipv6_literal_with_port_works() {
302        let ep = parse_endpoint("vectorizer://[::1]:15503").unwrap();
303        assert_eq!(
304            ep,
305            Endpoint::Rpc {
306                host: "[::1]".into(),
307                port: 15503,
308            }
309        );
310    }
311
312    #[test]
313    fn ipv6_literal_without_port_defaults() {
314        let ep = parse_endpoint("vectorizer://[::1]").unwrap();
315        assert_eq!(
316            ep,
317            Endpoint::Rpc {
318                host: "[::1]".into(),
319                port: DEFAULT_RPC_PORT,
320            }
321        );
322    }
323}