1use std::sync::LazyLock;
2
3use regex::Regex;
4
5#[derive(Clone, Debug, Eq, PartialEq)]
24pub struct PathResolver {
25 pub file_separator: char,
28 }
31
32impl Default for PathResolver {
33 fn default() -> Self {
34 Self {
35 file_separator: std::path::MAIN_SEPARATOR,
36 }
37 }
38}
39
40impl PathResolver {
41 pub fn posixify(&self, path: &str) -> String {
43 if self.file_separator == '\\' && path.contains('\\') {
44 path.replace('\\', "/")
45 } else {
46 path.to_string()
47 }
48 }
49
50 pub fn web_path(&self, target: &str, start: Option<&str>) -> String {
61 let mut target = self.posixify(target);
62 let start = start.map(|start| self.posixify(start));
63
64 let mut uri_prefix: Option<String> = None;
65
66 if !(start.is_none() || self.is_web_root(&target)) {
67 (target, uri_prefix) = extract_uri_prefix(&format!(
68 "{start}{maybe_add_slash}{target}",
69 start = start.as_deref().unwrap_or_default(),
70 maybe_add_slash = start
71 .as_ref()
72 .map(|s| if s.ends_with("/") { "" } else { "/" })
73 .unwrap_or_default()
74 ));
75 }
76
77 let (target_segments, target_root) = self.partition_path(&target, WebPath(true));
78
79 let mut resolved_segments: Vec<String> = vec![];
80
81 for segment in target_segments {
82 if segment == ".." {
83 if resolved_segments.is_empty() {
84 if let Some(target_root) = target_root.as_ref()
85 && target_root != "./"
86 {
87 } else {
89 resolved_segments.push(segment);
90 }
91 } else if let Some(last_segment) = resolved_segments.last()
92 && last_segment == ".."
93 {
94 resolved_segments.push(segment);
95 } else {
96 resolved_segments.pop();
97 }
98 } else {
99 resolved_segments.push(segment);
100 }
101 }
102
103 let resolved_path = self
104 .join_path(&resolved_segments, target_root.as_deref())
105 .replace(" ", "%20");
106
107 format!(
108 "{uri_prefix}{resolved_path}",
109 uri_prefix = uri_prefix.unwrap_or_default()
110 )
111 }
112
113 fn partition_path(&self, path: &str, web: WebPath) -> (Vec<String>, Option<String>) {
125 let posix_path = self.posixify(path);
128
129 let root: Option<String> = if web.0 {
130 if self.is_web_root(&posix_path) {
131 Some("/".to_owned())
132 } else if posix_path.starts_with("./") {
133 Some("./".to_owned())
134 } else {
135 None
136 }
137 } else {
138 todo!(
139 "Port this: {}",
140 r#"
141 elsif root? posix_path
142 # ex. //sample/path
143 if unc? posix_path
144 root = DOUBLE_SLASH
145 # ex. /sample/path
146 elsif posix_path.start_with? SLASH
147 root = SLASH
148 # ex. uri:classloader:sample/path (or uri:classloader:/sample/path)
149 elsif posix_path.start_with? URI_CLASSLOADER
150 root = posix_path.slice 0, URI_CLASSLOADER.length
151 # ex. C:/sample/path (or file:///sample/path in browser environment)
152 else
153 root = posix_path.slice 0, (posix_path.index SLASH) + 1
154 end
155 # ex. ./sample/path
156 elsif posix_path.start_with? DOT_SLASH
157 root = DOT_SLASH
158 end
159 # otherwise ex. sample/path
160 "#
161 );
162 };
163
164 let path_after_root = if let Some(root) = &root {
165 &posix_path[root.len()..]
166 } else {
167 &posix_path
168 };
169
170 let path_segments: Vec<String> = path_after_root
171 .split('/')
172 .filter(|s| *s != ".")
173 .map(|s| s.to_owned())
174 .collect();
175
176 (path_segments, root)
179 }
180
181 fn join_path(&self, segments: &[String], root: Option<&str>) -> String {
186 format!(
187 "{root}{segments}",
188 root = root.unwrap_or_default(),
189 segments = segments.join("/"),
190 )
191 }
192
193 pub fn is_web_root(&self, path: &str) -> bool {
196 path.starts_with('/')
197 }
198}
199
200fn extract_uri_prefix(s: &str) -> (String, Option<String>) {
208 if s.contains(':')
209 && let Some(prefix) = URI_SNIFF.find(s)
210 {
211 (
212 s[prefix.len()..].to_string(),
213 Some(prefix.as_str().to_owned()),
214 )
215 } else {
216 (s.to_string(), None)
217 }
218}
219
220static URI_SNIFF: LazyLock<Regex> = LazyLock::new(|| {
222 #[allow(clippy::unwrap_used)]
223 Regex::new(
224 r#"(?x)
225 ^ # Anchor: start of string
226
227 \p{Alphabetic} # First character: a Unicode letter
228
229 [\p{Alphabetic} # Followed by one or more of:
230 \p{Number} # - Unicode letters or numbers
231 . # - Period
232 \+ # - Plus sign
233 \- # - Hyphen
234 ]+ # One or more of the above
235
236 : # Followed by a literal colon
237
238 /{0,2} # Followed by zero, one, or two literal slashes
239 "#,
240 )
241 .unwrap()
242});
243
244#[derive(Clone, Debug, Eq, PartialEq)]
245pub(crate) struct WebPath(pub(crate) bool);
246
247#[cfg(test)]
248mod tests {
249 #![allow(clippy::unwrap_used)]
250
251 use crate::parser::PathResolver;
252
253 mod posixify {
254 use pretty_assertions_sorted::assert_eq;
255
256 use crate::parser::PathResolver;
257
258 #[test]
259 fn replaces_backslashes_if_windowsish() {
260 let pr = PathResolver {
261 file_separator: '\\',
262 };
263
264 assert_eq!(pr.posixify("abc/def\\ghi"), "abc/def/ghi");
265 }
266
267 #[test]
268 fn doesnt_replace_backslashes_if_posixish() {
269 let pr = PathResolver {
270 file_separator: '/',
271 };
272
273 assert_eq!(pr.posixify("abc/def\\ghi"), "abc/def\\ghi");
274 }
275
276 #[test]
277 fn doesnt_replace_backslashes_if_none_exist() {
278 let pr = PathResolver {
279 file_separator: '\\',
280 };
281
282 assert_eq!(pr.posixify("abc/def"), "abc/def");
283 }
284 }
285
286 mod web_path {
287 use pretty_assertions_sorted::assert_eq;
288
289 use crate::parser::PathResolver;
290
291 #[test]
292 fn test_cases_from_asciidoctor_rb() {
293 let pr = PathResolver::default();
294
295 assert_eq!(pr.web_path("images", None), "images");
296 assert_eq!(pr.web_path("./images", None), "./images");
297 assert_eq!(pr.web_path("/images", None), "/images");
298
299 assert_eq!(
300 pr.web_path("./images/../assets/images", None),
301 "./assets/images"
302 );
303
304 assert_eq!(pr.web_path("/../images", None), "/images");
305
306 assert_eq!(pr.web_path("/../images", Some("assets")), "/images");
307 assert_eq!(pr.web_path("../images", Some("./")), "./../images");
308 assert_eq!(pr.web_path("../../images", Some("./")), "./../../images");
309
310 assert_eq!(
311 pr.web_path("tiger.png", Some("../assets/images")),
312 "../assets/images/tiger.png"
313 );
314
315 assert_eq!(
317 pr.web_path("images/photo.jpg", Some("docs/guide")),
318 "docs/guide/images/photo.jpg"
319 );
320 assert_eq!(pr.web_path("photo.jpg", Some("images")), "images/photo.jpg");
321 assert_eq!(
322 pr.web_path("../photo.jpg", Some("images/folder")),
323 "images/photo.jpg"
324 );
325 assert_eq!(
326 pr.web_path("../../photo.jpg", Some("docs/images/folder")),
327 "docs/photo.jpg"
328 );
329
330 assert_eq!(
332 pr.web_path("images/photo.jpg", Some("http://example.com/base")),
333 "http://example.com/base/images/photo.jpg"
334 );
335 assert_eq!(
336 pr.web_path("../images/logo.png", Some("https://cdn.example.com/assets")),
337 "https://cdn.example.com/images/logo.png"
338 );
339 assert_eq!(
340 pr.web_path("docs/guide.pdf", Some("file:///Users/docs")),
341 "file:///Users/docs/docs/guide.pdf"
342 );
343 assert_eq!(
344 pr.web_path("assets/style.css", Some("ftp://files.example.com/web")),
345 "ftp://files.example.com/web/assets/style.css"
346 );
347
348 assert_eq!(
350 pr.web_path("/absolute/path.jpg", Some("http://example.com/base")),
351 "/absolute/path.jpg"
352 );
353 assert_eq!(
354 pr.web_path("/images/photo.jpg", Some("docs/guide")),
355 "/images/photo.jpg"
356 );
357 assert_eq!(pr.web_path("/", Some("any/path")), "/");
358
359 assert_eq!(pr.web_path("images/photo.jpg", None), "images/photo.jpg");
361 assert_eq!(pr.web_path("../photo.jpg", None), "../photo.jpg");
362
363 assert_eq!(
365 pr.web_path("./photo.jpg", Some("images")),
366 "images/photo.jpg"
367 );
368 assert_eq!(
369 pr.web_path("folder/./photo.jpg", Some("images")),
370 "images/folder/photo.jpg"
371 );
372 assert_eq!(
373 pr.web_path("folder/../photo.jpg", Some("images")),
374 "images/photo.jpg"
375 );
376
377 assert_eq!(
379 pr.web_path("../../../photo.jpg", Some("docs/images/folder/sub")),
380 "docs/photo.jpg"
381 );
382 assert_eq!(
383 pr.web_path("folder/../../photo.jpg", Some("docs/images")),
384 "docs/photo.jpg"
385 );
386 assert_eq!(
387 pr.web_path("./folder/../photo.jpg", Some("images")),
388 "images/photo.jpg"
389 );
390
391 assert_eq!(
393 pr.web_path("photo.jpg", Some("images/")),
394 "images/photo.jpg"
395 );
396 assert_eq!(pr.web_path("photo.jpg", Some("images")), "images/photo.jpg");
397
398 assert_eq!(
400 pr.web_path("../styles/main.css", Some("https://example.com/assets/css")),
401 "https://example.com/assets/styles/main.css"
402 );
403 assert_eq!(
404 pr.web_path(
405 "../../images/logo.png",
406 Some("http://site.com/docs/guide/examples")
407 ),
408 "http://site.com/docs/images/logo.png"
409 );
410
411 assert_eq!(
413 pr.web_path("my file.jpg", Some("images")),
414 "images/my%20file.jpg"
415 );
416 assert_eq!(
417 pr.web_path("folder with spaces/file.jpg", Some("docs")),
418 "docs/folder%20with%20spaces/file.jpg"
419 );
420
421 assert_eq!(
423 pr.web_path(
424 "//cdn.example.com/assets/image.jpg",
425 Some("http://example.com")
426 ),
427 "//cdn.example.com/assets/image.jpg"
428 );
429
430 assert_eq!(pr.web_path("", Some("docs/images")), "docs/images/");
432 assert_eq!(pr.web_path("", Some("")), "/");
433 assert_eq!(pr.web_path("", None), "");
434
435 assert_eq!(
437 pr.web_path("api/v1/data", Some("https://api.example.com:8080/base")),
438 "https://api.example.com:8080/base/api/v1/data"
439 );
440 assert_eq!(
441 pr.web_path("../v2/data", Some("https://api.example.com/api/v1")),
442 "https://api.example.com/api/v2/data"
443 );
444
445 assert_eq!(
447 pr.web_path("document.pdf", Some("file:///C:/Users/docs")),
448 "file:///C:/Users/docs/document.pdf"
449 );
450 assert_eq!(
451 pr.web_path("../shared/doc.pdf", Some("file:///home/user/documents")),
452 "file:///home/user/shared/doc.pdf"
453 );
454 }
455 }
456
457 #[test]
458 fn is_web_root() {
459 let pr = PathResolver::default();
460 assert!(pr.is_web_root("/blah"));
461 assert!(!pr.is_web_root(""));
462 assert!(!pr.is_web_root("./blah"));
463 }
464}