Skip to main content

code_moniker_core/core/uri/
parse.rs

1use super::{UriConfig, UriError};
2use crate::core::moniker::{Moniker, MonikerBuilder};
3
4pub fn from_uri(uri: &str, config: &UriConfig<'_>) -> Result<Moniker, UriError> {
5	let rest = uri
6		.strip_prefix(config.scheme)
7		.ok_or_else(|| UriError::MissingScheme(config.scheme.to_string()))?;
8	let bytes = rest.as_bytes();
9
10	let mut i = 0;
11	let (project, project_end) = read_name(bytes, i, |c| c == b'/')?;
12	if project.is_empty() {
13		return Err(UriError::MissingProject);
14	}
15	std::str::from_utf8(&project).map_err(|_| UriError::NonUtf8Project)?;
16	i = project_end;
17
18	let mut builder = MonikerBuilder::new();
19	builder.project(&project);
20
21	while i < bytes.len() {
22		debug_assert_eq!(bytes[i], b'/');
23		i += 1;
24		let seg_start = i;
25		let (kind, kind_end) = read_kind(bytes, i, seg_start)?;
26		i = kind_end;
27		if i >= bytes.len() || bytes[i] != b':' {
28			return Err(UriError::MissingKindSeparator(seg_start));
29		}
30		i += 1;
31		let (name, name_end) = read_name(bytes, i, |c| c == b'/')?;
32		if name.is_empty() && kind.is_empty() {
33			return Err(UriError::EmptySegment(seg_start));
34		}
35		std::str::from_utf8(&name).map_err(|_| UriError::NonUtf8Segment)?;
36		builder.segment(&kind, &name);
37		i = name_end;
38	}
39
40	Ok(builder.build())
41}
42
43fn read_kind(bytes: &[u8], start: usize, seg_start: usize) -> Result<(Vec<u8>, usize), UriError> {
44	let mut i = start;
45	while i < bytes.len() && bytes[i] != b':' && bytes[i] != b'/' {
46		i += 1;
47	}
48	let kind = &bytes[start..i];
49	if kind.is_empty() {
50		return Err(UriError::EmptySegment(seg_start));
51	}
52	if !is_plain_identifier(kind) {
53		return Err(UriError::InvalidKind(
54			String::from_utf8_lossy(kind).into_owned(),
55		));
56	}
57	Ok((kind.to_vec(), i))
58}
59
60fn is_plain_identifier(bytes: &[u8]) -> bool {
61	if bytes.is_empty() {
62		return false;
63	}
64	let first = bytes[0];
65	if !(first.is_ascii_alphabetic()) {
66		return false;
67	}
68	bytes[1..]
69		.iter()
70		.all(|b| b.is_ascii_alphanumeric() || *b == b'_')
71}
72
73fn read_name(
74	bytes: &[u8],
75	start: usize,
76	is_terminator: impl Fn(u8) -> bool,
77) -> Result<(Vec<u8>, usize), UriError> {
78	if start < bytes.len() && bytes[start] == b'`' {
79		let mut i = start + 1;
80		let mut out = Vec::new();
81		loop {
82			if i >= bytes.len() {
83				return Err(UriError::UnterminatedBacktick(start));
84			}
85			if bytes[i] == b'`' {
86				if i + 1 < bytes.len() && bytes[i + 1] == b'`' {
87					out.push(b'`');
88					i += 2;
89				} else {
90					i += 1;
91					break;
92				}
93			} else {
94				out.push(bytes[i]);
95				i += 1;
96			}
97		}
98		if i < bytes.len() && !is_terminator(bytes[i]) {
99			return Err(UriError::TrailingAfterBacktick(start));
100		}
101		Ok((out, i))
102	} else {
103		let mut i = start;
104		while i < bytes.len() && !is_terminator(bytes[i]) {
105			i += 1;
106		}
107		Ok((bytes[start..i].to_vec(), i))
108	}
109}
110
111#[cfg(test)]
112mod tests {
113	use super::super::test_helpers::*;
114	use super::*;
115
116	#[test]
117	fn from_uri_project_only() {
118		let m = from_uri("esac+moniker://my-app", &default_config()).unwrap();
119		assert_eq!(m.as_view().project(), b"my-app");
120		assert_eq!(m.as_view().segment_count(), 0);
121	}
122
123	#[test]
124	fn from_uri_path_chain() {
125		let m = from_uri(
126			"esac+moniker://my-app/path:main/path:com/path:acme/class:Foo",
127			&default_config(),
128		)
129		.unwrap();
130		let v = m.as_view();
131		let segs: Vec<_> = v.segments().collect();
132		assert_eq!(segs.len(), 4);
133		assert_eq!(segs[0].kind, b"path");
134		assert_eq!(segs[0].name, b"main");
135		assert_eq!(segs[3].kind, b"class");
136		assert_eq!(segs[3].name, b"Foo");
137	}
138
139	#[test]
140	fn from_uri_method_with_arity_in_name() {
141		let m = from_uri(
142			"esac+moniker://app/class:Foo/method:bar(2)",
143			&default_config(),
144		)
145		.unwrap();
146		let segs: Vec<_> = m.as_view().segments().collect();
147		assert_eq!(segs[1].kind, b"method");
148		assert_eq!(segs[1].name, b"bar(2)");
149	}
150
151	#[test]
152	fn from_uri_typed_signature_in_name() {
153		let m = from_uri(
154			"esac+moniker://app/class:UserService/method:findById(String)",
155			&default_config(),
156		)
157		.unwrap();
158		let segs: Vec<_> = m.as_view().segments().collect();
159		assert_eq!(segs[1].name, b"findById(String)");
160	}
161
162	#[test]
163	fn from_uri_backtick_name() {
164		let m = from_uri("esac+moniker://app/path:`util/test.ts`", &default_config()).unwrap();
165		let segs: Vec<_> = m.as_view().segments().collect();
166		assert_eq!(segs[0].name, b"util/test.ts");
167	}
168
169	#[test]
170	fn from_uri_doubled_backtick() {
171		let m = from_uri("esac+moniker://app/class:`weird``name`", &default_config()).unwrap();
172		assert_eq!(m.as_view().segments().next().unwrap().name, b"weird`name");
173	}
174
175	#[test]
176	fn from_uri_rejects_missing_scheme() {
177		let err = from_uri("esac://app", &default_config()).unwrap_err();
178		match err {
179			UriError::MissingScheme(expected) => assert_eq!(expected, "esac+moniker://"),
180			other => panic!("unexpected error: {other:?}"),
181		}
182	}
183
184	#[test]
185	fn from_uri_rejects_missing_project() {
186		assert_eq!(
187			from_uri("esac+moniker:///path:foo", &default_config()).unwrap_err(),
188			UriError::MissingProject
189		);
190	}
191
192	#[test]
193	fn from_uri_rejects_missing_kind_separator() {
194		let err = from_uri("esac+moniker://app/just_a_name", &default_config()).unwrap_err();
195		assert!(matches!(err, UriError::MissingKindSeparator(_)));
196	}
197
198	#[test]
199	fn from_uri_rejects_invalid_kind_starting_with_digit() {
200		let err = from_uri("esac+moniker://app/9bad:name", &default_config()).unwrap_err();
201		assert!(matches!(err, UriError::InvalidKind(_)));
202	}
203
204	#[test]
205	fn from_uri_rejects_unterminated_backtick() {
206		let r = from_uri("esac+moniker://app/path:`unterminated", &default_config());
207		assert!(matches!(r.unwrap_err(), UriError::UnterminatedBacktick(_)));
208	}
209
210	#[test]
211	fn from_uri_rejects_trailing_data_after_backtick() {
212		let r = from_uri("esac+moniker://`x`A", &default_config());
213		assert!(matches!(r.unwrap_err(), UriError::TrailingAfterBacktick(_)));
214	}
215
216	use proptest::prelude::*;
217
218	proptest! {
219		#![proptest_config(ProptestConfig {
220			cases: 256,
221			..ProptestConfig::default()
222		})]
223
224		#[test]
225		fn from_uri_never_panics(input in ".{0,512}") {
226			let _ = from_uri(&input, &default_config());
227		}
228
229		#[test]
230		fn from_uri_with_scheme_never_panics(suffix in ".{0,512}") {
231			let s = format!("esac+moniker://{suffix}");
232			let _ = from_uri(&s, &default_config());
233		}
234
235		#[test]
236		fn from_uri_lossy_bytes_never_panics(bytes in proptest::collection::vec(any::<u8>(), 0..512)) {
237			let s = String::from_utf8_lossy(&bytes);
238			let _ = from_uri(&s, &default_config());
239		}
240	}
241}